Module `visualization.APCAnalysis`

Expand source code

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score 
from sklearn.model_selection import KFold 
import numpy as np
import pandas as pd
from sklearn import model_selection
import plotly.graph_objs as go
from datetime import datetime
from dateutil.relativedelta import relativedelta
import plotly.offline as py
import sys
import math
sys.path.insert(1, '../../visualization/')
from adjustSlope import adjust_slope
start_date = datetime(2016,1,1)




def apc_analysis(data,isDark = False,colorVision ="None",error = "0",language = "English"):
    """
    This function takes in a pandas DataFrame and returns the Age-Period-Cohort analysis
    including the visualizations of the effects of age, period, and cohort.
    Parameters:
    data (pandas DataFrame): The input data for Age-Period-Cohort analysis.
    isDark (bool): If True, use colorblind-friendly colors in the visualizations. Default is False.
    error (str): a string indicating the error to add to the slope of the predicted data (default is "0")
    Returns:
    None
    """
    # Get the coefficients for age, period, and cohort
    para = get_effect(data)
    effect_list_predicted = para['effect_list_predicted']
    effect_list_real =para['effect_list_real']
    
    len_v = para['len_v']
    len_t = para['len_t']
    len_c = para['len_c']
    v = para['v']
    t = para['t']
    c = para['c']
    default_rate = para['default_rate']
    default_rate_real = para['default_rate_real']

    cohort_effect_predicted = effect_list_predicted.coef_[0:len_v-1]
    age_effect_predicted = effect_list_predicted.coef_[len_v :len_t + len_v - 1]
    period_effect_predicted = effect_list_predicted.coef_[len_t + len_v : len_t + len_v + len_c]
    cohort_effect_real = effect_list_real.coef_[0:len_v-1]
    age_effect_real = effect_list_real.coef_[len_v :len_t + len_v - 1]
    period_effect_real = effect_list_real.coef_[len_t + len_v : len_t + len_v + len_c]
    
    cohort_effect_predicted = np.array(cohort_effect_predicted).astype(float)
    cohort_effect_real = np.array(cohort_effect_real).astype(float)
    # Visualize the effects of age, period, and cohort
    visualize_effect("cohort",v,cohort_effect_predicted,cohort_effect_real,isDark,colorVision,error,language)
    visualize_effect("age",t,age_effect_predicted,age_effect_real,isDark,colorVision,error,language)
    visualize_effect("period",c,period_effect_predicted,period_effect_real,isDark,colorVision,error,language)
    
    # apply sinh scale
    visualize_effect("sinh_cohort",v,np.sinh(cohort_effect_predicted),np.sinh(cohort_effect_real),
                     isDark,colorVision,error,language)
    visualize_effect("sinh_age",t,np.sinh(age_effect_predicted),np.sinh(age_effect_real),
                     isDark,colorVision,error,language)
    visualize_effect("sinh_period",c,np.sinh(period_effect_predicted),np.sinh(period_effect_real),
                     isDark,colorVision,error,language)

    # apply exp scale
    visualize_effect("exp_cohort",v,np.exp(cohort_effect_predicted),
                     np.exp(cohort_effect_real),isDark,colorVision,error,language)
    visualize_effect("exp_age",t,np.exp(age_effect_predicted),
                     np.exp(age_effect_real),isDark,colorVision,error,language)
    visualize_effect("exp_period",c,np.exp(period_effect_predicted),
                     np.exp(period_effect_real),isDark,colorVision,error,language)
    
    
    
   
    # Visualize the Lexis diagrams
    default_rate = np.array(default_rate).astype(float)
    default_rate_real = np.array(default_rate_real).astype(float)
    theme = ["hot_r","YlGnBu","OrRd","greys",[[0, 'rgb(255,255,204)'], 
            [1, 'rgb(97,97,189)']], [[0, 'rgb(205,255,153)'], [1, 'rgb(51,102,0)']],
            [[0, 'rgb(153,255,255)'], [1, 'rgb(0,153,153)']],"Spectral","PiYG"]
    theme_name = ["hot_r","YlGnBu","OrRd","greys","yellows","greens","blues","Spectral","PiYG" ]
    for i in range(0,len(theme)):
        visualize_lexis_diagram(theme[i],theme_name[i],c,t,default_rate,"apc_",language,isDark)
        visualize_lexis_diagram(theme[i],theme_name[i],c,t,default_rate_real,"real_apc_",language,isDark)
        visualize_lexis_diagram(theme[i],theme_name[i],c,t,np.sinh(default_rate),"sinh_apc_",language,isDark)
        visualize_lexis_diagram(theme[i],theme_name[i],c,t,np.exp(default_rate),"exp_apc_",language,isDark)
        visualize_lexis_diagram(theme[i],theme_name[i],c,t,np.sinh(default_rate),"sinh_real_apc_",language,isDark)
        visualize_lexis_diagram(theme[i],theme_name[i],c,t,np.exp(default_rate_real),"exp_real_apc_",language,isDark)
    


def get_effect(data):        
    """
    This function calculates the effect of variables on the response variable and returns relevant information.
    Parameters:
    data (pandas DataFrame): The input data for Age-Period-Cohort analysis.
    """

    # Add a column to the DataFrame that combines 'v' and 't' columns
    data['c'] = data['v'] + data['t']
    # Group the data by 't', 'v', and 'c' columns, then aggregate data 
    # based on count, sum of 'y', and sum of 'pd'
    data = data.groupby(['t','v','c']).agg(number = ('y','count'),
                                        dr=('y','sum'),pd=('pd','sum'))
    
    # Compute the average of 'dr' and 'pd' for each group based on 'number'
    data["dr"] = data["dr"]/data["number"]
    data['pd'] = data['pd']/data['number']

    # Extract 't', 'v', and 'c' from the index of 'data'
    t_list,v_list,c_list = [],[],[]
    for (i,j,k) in data.index:
        t_list.append(i)
        v_list.append(j)
        c_list.append(k)
    

    data['t'] = t_list 
    data['v'] = v_list 
    data['c'] = c_list

    # Prepare 'yTrain1', 'yTrain2', and 'train_encode' based on 'data'
    yTrain1 = data['pd']
    yTrain2 = data['dr']

    # One-hot encode age, cohort, and 'c' values
    train_encode_simplify = data[['v','t','c']]
    train_encode = pd.get_dummies(train_encode_simplify,
                                columns = ['v','t','c'])
    
    # Create feature variable arrays for Ridge regression
    xTrain_encode = train_encode.values

    # Fit linear regression models for pd and dr
    effect_list_predicted = Ridge(alpha=0.001)
    effect_list_predicted.fit(xTrain_encode,yTrain1)

    effect_list_real = Ridge(alpha=2)
    effect_list_real.fit(xTrain_encode,yTrain2)

    # Create arrays of unique age, cohort, and 'c' values for creating Lexis diagrams
    v = data['v'].unique()
    len_v = len(v)
    t = data['t'].unique()
    len_t = len(t)
    c = data['c'].unique()
    len_c = len(c)

    # Create a 2D array of predicted pd values to create a Lexis diagram
    default_rate = get_predicted_default_rate(train_encode,t,c,effect_list_predicted)
    
    # Create a 2D array of predicted dr values to create a Lexis diagram
    default_rate_real = get_predicted_default_rate(train_encode,t,c,effect_list_real)

        

    return { 'len_t': len_t, "len_v": len_v, "len_c":len_c,"effect_list_predicted": 
            effect_list_predicted,"effect_list_real":effect_list_real,
            'v':v, 't':t, 'c':c, "default_rate":default_rate,
            "default_rate_real":default_rate_real}



def get_predicted_default_rate(train_encode,t,c,effect_list_model):
    """
    Create a 2D array of predicted default rate values to create a Lexis diagram
    Parameter: 
    train_encode: encoded a,p,c value
    t: age list
    c: period list
    effect_list_model: model to predict default rate
    """
    # Create a 2D array of predicted pd values to create a Lexis diagram
    default_rate = []
    for i in t:
        templist = []
        for j in c:
            try:
                encode = train_encode.loc[i].loc[j-i]
                y = effect_list_model.predict(encode.values)[0]
            except:
                y = np.nan
            templist.append(y)
        default_rate.append(templist)
    return default_rate



def visualize_effect(graphName,x,predicty,realy,isDark = False,colorVision ="None",error = "0",language = "English"):
    """
    visualize apc effect
    Parameters:
    - graphName: a string indicating the name of the graph
    - x: an array representing the input variable
    - predicty: an array representing the predicted output variable
    - realy: an array representing the actual output variable
    - isDark: a boolean indicating whether the color scheme should be adjusted for color blindness (default is False)
    - error: a string indicating the error to add to the slope of the predicted data (default is "0")
    """
    # Create a list of tuples, each containing a value from x, predicty, and realy
    list_to_sort = list(zip(x.tolist(), predicty.tolist(), realy.tolist()))
    # Sort the list of tuples by the first value (x value)
    list_to_sort.sort(key = lambda a: a[0])
    # Separate the sorted values back into individual lists
    sorted_x = [x[0] for x in list_to_sort]
    sorted_predicty = [x[1] for x in list_to_sort]
    sorted_realy = [x[2] for x in list_to_sort]
    xTitle = ""
    
    if(language == "Chinese"):
        if(graphName == "age"):
            xTitle = "年龄"
        elif(graphName == "period"):
            xTitle = "时期"
        elif(graphName == "cohort"):
            xTitle = "队列"
        yTitle = xTitle + " 效应"
        trace0_name = "基于预测数据的APC"
        trace1_name = "基于真实数据的APC"
    else:
        xTitle = graphName
        yTitle = graphName + " effect"
        trace0_name = "APC by predicted data"
        trace1_name = "APC by real data"

    # Set the layout of the graph with the specified options
    layout = go.Layout(
        paper_bgcolor='rgb(233,233,233)',
        title=
            {
                'y':0.9,
                'x':0.45,
                'xanchor': 'center', 
                'yanchor': 'top'
            },
            xaxis=dict(
                title= xTitle,
                range = [x.min(),x.max()]
            ),
            yaxis=dict(
                title= yTitle,
            ),
        font=dict(
            size=20
        ),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        )
    )

    
    

    # Create a scatter plot trace using the sorted x and predicted y values
    trace0 = go.Scatter(
        x = sorted_x,
        # y = [i + float(error) for i in clf_v],
        # y = clf_v,
        y = adjust_slope(sorted_x,sorted_predicty,error),
        mode = 'lines',
        name = trace0_name
    )

    # Create a scatter plot trace using the sorted x and real y values
    trace1 = go.Scatter(
        x = sorted_x,
        # y = [i + float(error) for i in clf_v],
        # y = clf_v,
        y = adjust_slope(sorted_x,sorted_realy,error),
        mode = 'lines',
        name = trace1_name
    )
    

    if(colorVision == "redGreen"):
        trace1.update(marker_color = "green")
    elif(colorVision =="blueYellow"):
        trace0.update(marker_color = "green")

    # Create the figure object and add the traces and layout
    figure = go.Figure({'data': [trace0,trace1],
                            'layout': layout})

    figure.update_yaxes(exponentformat= "power")

    if(colorVision == "full"):
        figure.update_traces(marker_color="grey")

    # If isDark is True, update the layout
    if(isDark):
        figure.update_layout(
            plot_bgcolor='rgb(63, 71, 79)',
            paper_bgcolor ='rgb(63, 71, 79)',
            font_color="white")
    # Plot the figure and save the file to the specified location with the specified filename
    py.plot(figure, filename='../../../res/' + graphName + 'effect.html',
            auto_open = False)
    


def visualize_lexis_diagram(theme,theme_name,c,t,z,prefix,language = "English",isDark =False):
    """
    Creates a Lexis diagram visualization using the plotly library.
    Parameter:
    - theme: a string representing the color theme of the visualization
    - c: a list of strings representing the calendar time
    - t: a list of strings representing the age
    - z: a 2D list of numerical values representing the heatmap data
    - prefix: a string representing the prefix of the output file name
    """
    # Create a new Figure object with a Heatmap trace using the provided data
    figure = go.Figure(data=go.Heatmap(
                    z=z,                # the heatmap values to be plotted
                    x=c,                # the x-axis values (calendar time)
                    y=t,                # the y-axis values (age)
                    zsmooth="best",     # the smoothing algorithm to use. Other possible value: "fast" | "best" | False
                    colorscale=theme,   # the color scheme to use
                    hoverongaps = True, # whether to display hover information for missing data
                    connectgaps=False))  # whether to connect gaps in the heatmap

    figure.update_layout(
                paper_bgcolor='rgb(233,233,233)',  # the background color of the plot
                title="lexis diagram",             # the title of the plot
                title_x=0.5,                       # the horizontal alignment of the title
                xaxis_title = "calendar time",     # the label for the x-axis
                yaxis_title = "age",               # the label for the y-axis
                font=dict(size=20),                 # the font size of the plot
                )
    if(language == "Chinese"):
        figure.update_layout(
            xaxis_title = "日期",     # the label for the x-axis
            yaxis_title = "年龄",               # the label for the y-axis
            title = "lexis 图"         # the title of the plot
        )
    
    figure.update_traces(colorbar_exponentformat="SI", selector=dict(type='heatmap'))
    figure.update_traces(colorbar_showexponent="first", selector=dict(type='heatmap'))

    # figure.show()
    if(isDark):
        figure.update_layout(
            plot_bgcolor='rgb(63, 71, 79)',
            paper_bgcolor ='rgb(63, 71, 79)',
            font_color="white")
        
    # Save the plot to an HTML file using Plotly and the provided file path and prefix
    py.plot(figure, filename='../../../res/'+ prefix + 'lexis_diagram_' + theme_name +'.html',
           auto_open = False)                # whether to automatically open the plot in a browser

Functions

def apc_analysis(data, isDark=False, colorVision='None', error='0', language='English')

This function takes in a pandas DataFrame and returns the Age-Period-Cohort analysis including the visualizations of the effects of age, period, and cohort. Parameters: data (pandas DataFrame): The input data for Age-Period-Cohort analysis. isDark (bool): If True, use colorblind-friendly colors in the visualizations. Default is False. error (str): a string indicating the error to add to the slope of the predicted data (default is "0") Returns: None

Expand source code

def apc_analysis(data,isDark = False,colorVision ="None",error = "0",language = "English"):
    """
    This function takes in a pandas DataFrame and returns the Age-Period-Cohort analysis
    including the visualizations of the effects of age, period, and cohort.
    Parameters:
    data (pandas DataFrame): The input data for Age-Period-Cohort analysis.
    isDark (bool): If True, use colorblind-friendly colors in the visualizations. Default is False.
    error (str): a string indicating the error to add to the slope of the predicted data (default is "0")
    Returns:
    None
    """
    # Get the coefficients for age, period, and cohort
    para = get_effect(data)
    effect_list_predicted = para['effect_list_predicted']
    effect_list_real =para['effect_list_real']
    
    len_v = para['len_v']
    len_t = para['len_t']
    len_c = para['len_c']
    v = para['v']
    t = para['t']
    c = para['c']
    default_rate = para['default_rate']
    default_rate_real = para['default_rate_real']

    cohort_effect_predicted = effect_list_predicted.coef_[0:len_v-1]
    age_effect_predicted = effect_list_predicted.coef_[len_v :len_t + len_v - 1]
    period_effect_predicted = effect_list_predicted.coef_[len_t + len_v : len_t + len_v + len_c]
    cohort_effect_real = effect_list_real.coef_[0:len_v-1]
    age_effect_real = effect_list_real.coef_[len_v :len_t + len_v - 1]
    period_effect_real = effect_list_real.coef_[len_t + len_v : len_t + len_v + len_c]
    
    cohort_effect_predicted = np.array(cohort_effect_predicted).astype(float)
    cohort_effect_real = np.array(cohort_effect_real).astype(float)
    # Visualize the effects of age, period, and cohort
    visualize_effect("cohort",v,cohort_effect_predicted,cohort_effect_real,isDark,colorVision,error,language)
    visualize_effect("age",t,age_effect_predicted,age_effect_real,isDark,colorVision,error,language)
    visualize_effect("period",c,period_effect_predicted,period_effect_real,isDark,colorVision,error,language)
    
    # apply sinh scale
    visualize_effect("sinh_cohort",v,np.sinh(cohort_effect_predicted),np.sinh(cohort_effect_real),
                     isDark,colorVision,error,language)
    visualize_effect("sinh_age",t,np.sinh(age_effect_predicted),np.sinh(age_effect_real),
                     isDark,colorVision,error,language)
    visualize_effect("sinh_period",c,np.sinh(period_effect_predicted),np.sinh(period_effect_real),
                     isDark,colorVision,error,language)

    # apply exp scale
    visualize_effect("exp_cohort",v,np.exp(cohort_effect_predicted),
                     np.exp(cohort_effect_real),isDark,colorVision,error,language)
    visualize_effect("exp_age",t,np.exp(age_effect_predicted),
                     np.exp(age_effect_real),isDark,colorVision,error,language)
    visualize_effect("exp_period",c,np.exp(period_effect_predicted),
                     np.exp(period_effect_real),isDark,colorVision,error,language)
    
    
    
   
    # Visualize the Lexis diagrams
    default_rate = np.array(default_rate).astype(float)
    default_rate_real = np.array(default_rate_real).astype(float)
    theme = ["hot_r","YlGnBu","OrRd","greys",[[0, 'rgb(255,255,204)'], 
            [1, 'rgb(97,97,189)']], [[0, 'rgb(205,255,153)'], [1, 'rgb(51,102,0)']],
            [[0, 'rgb(153,255,255)'], [1, 'rgb(0,153,153)']],"Spectral","PiYG"]
    theme_name = ["hot_r","YlGnBu","OrRd","greys","yellows","greens","blues","Spectral","PiYG" ]
    for i in range(0,len(theme)):
        visualize_lexis_diagram(theme[i],theme_name[i],c,t,default_rate,"apc_",language,isDark)
        visualize_lexis_diagram(theme[i],theme_name[i],c,t,default_rate_real,"real_apc_",language,isDark)
        visualize_lexis_diagram(theme[i],theme_name[i],c,t,np.sinh(default_rate),"sinh_apc_",language,isDark)
        visualize_lexis_diagram(theme[i],theme_name[i],c,t,np.exp(default_rate),"exp_apc_",language,isDark)
        visualize_lexis_diagram(theme[i],theme_name[i],c,t,np.sinh(default_rate),"sinh_real_apc_",language,isDark)
        visualize_lexis_diagram(theme[i],theme_name[i],c,t,np.exp(default_rate_real),"exp_real_apc_",language,isDark)

def get_effect(data)

This function calculates the effect of variables on the response variable and returns relevant information. Parameters: data (pandas DataFrame): The input data for Age-Period-Cohort analysis.

Expand source code

def get_effect(data):        
    """
    This function calculates the effect of variables on the response variable and returns relevant information.
    Parameters:
    data (pandas DataFrame): The input data for Age-Period-Cohort analysis.
    """

    # Add a column to the DataFrame that combines 'v' and 't' columns
    data['c'] = data['v'] + data['t']
    # Group the data by 't', 'v', and 'c' columns, then aggregate data 
    # based on count, sum of 'y', and sum of 'pd'
    data = data.groupby(['t','v','c']).agg(number = ('y','count'),
                                        dr=('y','sum'),pd=('pd','sum'))
    
    # Compute the average of 'dr' and 'pd' for each group based on 'number'
    data["dr"] = data["dr"]/data["number"]
    data['pd'] = data['pd']/data['number']

    # Extract 't', 'v', and 'c' from the index of 'data'
    t_list,v_list,c_list = [],[],[]
    for (i,j,k) in data.index:
        t_list.append(i)
        v_list.append(j)
        c_list.append(k)
    

    data['t'] = t_list 
    data['v'] = v_list 
    data['c'] = c_list

    # Prepare 'yTrain1', 'yTrain2', and 'train_encode' based on 'data'
    yTrain1 = data['pd']
    yTrain2 = data['dr']

    # One-hot encode age, cohort, and 'c' values
    train_encode_simplify = data[['v','t','c']]
    train_encode = pd.get_dummies(train_encode_simplify,
                                columns = ['v','t','c'])
    
    # Create feature variable arrays for Ridge regression
    xTrain_encode = train_encode.values

    # Fit linear regression models for pd and dr
    effect_list_predicted = Ridge(alpha=0.001)
    effect_list_predicted.fit(xTrain_encode,yTrain1)

    effect_list_real = Ridge(alpha=2)
    effect_list_real.fit(xTrain_encode,yTrain2)

    # Create arrays of unique age, cohort, and 'c' values for creating Lexis diagrams
    v = data['v'].unique()
    len_v = len(v)
    t = data['t'].unique()
    len_t = len(t)
    c = data['c'].unique()
    len_c = len(c)

    # Create a 2D array of predicted pd values to create a Lexis diagram
    default_rate = get_predicted_default_rate(train_encode,t,c,effect_list_predicted)
    
    # Create a 2D array of predicted dr values to create a Lexis diagram
    default_rate_real = get_predicted_default_rate(train_encode,t,c,effect_list_real)

        

    return { 'len_t': len_t, "len_v": len_v, "len_c":len_c,"effect_list_predicted": 
            effect_list_predicted,"effect_list_real":effect_list_real,
            'v':v, 't':t, 'c':c, "default_rate":default_rate,
            "default_rate_real":default_rate_real}

def get_predicted_default_rate(train_encode, t, c, effect_list_model)

Create a 2D array of predicted default rate values to create a Lexis diagram Parameter: train_encode: encoded a,p,c value t: age list c: period list effect_list_model: model to predict default rate

Expand source code

def get_predicted_default_rate(train_encode,t,c,effect_list_model):
    """
    Create a 2D array of predicted default rate values to create a Lexis diagram
    Parameter: 
    train_encode: encoded a,p,c value
    t: age list
    c: period list
    effect_list_model: model to predict default rate
    """
    # Create a 2D array of predicted pd values to create a Lexis diagram
    default_rate = []
    for i in t:
        templist = []
        for j in c:
            try:
                encode = train_encode.loc[i].loc[j-i]
                y = effect_list_model.predict(encode.values)[0]
            except:
                y = np.nan
            templist.append(y)
        default_rate.append(templist)
    return default_rate

def visualize_effect(graphName, x, predicty, realy, isDark=False, colorVision='None', error='0', language='English')

visualize apc effect Parameters: - graphName: a string indicating the name of the graph - x: an array representing the input variable - predicty: an array representing the predicted output variable - realy: an array representing the actual output variable - isDark: a boolean indicating whether the color scheme should be adjusted for color blindness (default is False) - error: a string indicating the error to add to the slope of the predicted data (default is "0")

Expand source code

def visualize_effect(graphName,x,predicty,realy,isDark = False,colorVision ="None",error = "0",language = "English"):
    """
    visualize apc effect
    Parameters:
    - graphName: a string indicating the name of the graph
    - x: an array representing the input variable
    - predicty: an array representing the predicted output variable
    - realy: an array representing the actual output variable
    - isDark: a boolean indicating whether the color scheme should be adjusted for color blindness (default is False)
    - error: a string indicating the error to add to the slope of the predicted data (default is "0")
    """
    # Create a list of tuples, each containing a value from x, predicty, and realy
    list_to_sort = list(zip(x.tolist(), predicty.tolist(), realy.tolist()))
    # Sort the list of tuples by the first value (x value)
    list_to_sort.sort(key = lambda a: a[0])
    # Separate the sorted values back into individual lists
    sorted_x = [x[0] for x in list_to_sort]
    sorted_predicty = [x[1] for x in list_to_sort]
    sorted_realy = [x[2] for x in list_to_sort]
    xTitle = ""
    
    if(language == "Chinese"):
        if(graphName == "age"):
            xTitle = "年龄"
        elif(graphName == "period"):
            xTitle = "时期"
        elif(graphName == "cohort"):
            xTitle = "队列"
        yTitle = xTitle + " 效应"
        trace0_name = "基于预测数据的APC"
        trace1_name = "基于真实数据的APC"
    else:
        xTitle = graphName
        yTitle = graphName + " effect"
        trace0_name = "APC by predicted data"
        trace1_name = "APC by real data"

    # Set the layout of the graph with the specified options
    layout = go.Layout(
        paper_bgcolor='rgb(233,233,233)',
        title=
            {
                'y':0.9,
                'x':0.45,
                'xanchor': 'center', 
                'yanchor': 'top'
            },
            xaxis=dict(
                title= xTitle,
                range = [x.min(),x.max()]
            ),
            yaxis=dict(
                title= yTitle,
            ),
        font=dict(
            size=20
        ),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        )
    )

    
    

    # Create a scatter plot trace using the sorted x and predicted y values
    trace0 = go.Scatter(
        x = sorted_x,
        # y = [i + float(error) for i in clf_v],
        # y = clf_v,
        y = adjust_slope(sorted_x,sorted_predicty,error),
        mode = 'lines',
        name = trace0_name
    )

    # Create a scatter plot trace using the sorted x and real y values
    trace1 = go.Scatter(
        x = sorted_x,
        # y = [i + float(error) for i in clf_v],
        # y = clf_v,
        y = adjust_slope(sorted_x,sorted_realy,error),
        mode = 'lines',
        name = trace1_name
    )
    

    if(colorVision == "redGreen"):
        trace1.update(marker_color = "green")
    elif(colorVision =="blueYellow"):
        trace0.update(marker_color = "green")

    # Create the figure object and add the traces and layout
    figure = go.Figure({'data': [trace0,trace1],
                            'layout': layout})

    figure.update_yaxes(exponentformat= "power")

    if(colorVision == "full"):
        figure.update_traces(marker_color="grey")

    # If isDark is True, update the layout
    if(isDark):
        figure.update_layout(
            plot_bgcolor='rgb(63, 71, 79)',
            paper_bgcolor ='rgb(63, 71, 79)',
            font_color="white")
    # Plot the figure and save the file to the specified location with the specified filename
    py.plot(figure, filename='../../../res/' + graphName + 'effect.html',
            auto_open = False)

def visualize_lexis_diagram(theme, theme_name, c, t, z, prefix, language='English', isDark=False)

Creates a Lexis diagram visualization using the plotly library. Parameter: - theme: a string representing the color theme of the visualization - c: a list of strings representing the calendar time - t: a list of strings representing the age - z: a 2D list of numerical values representing the heatmap data - prefix: a string representing the prefix of the output file name

Expand source code

def visualize_lexis_diagram(theme,theme_name,c,t,z,prefix,language = "English",isDark =False):
    """
    Creates a Lexis diagram visualization using the plotly library.
    Parameter:
    - theme: a string representing the color theme of the visualization
    - c: a list of strings representing the calendar time
    - t: a list of strings representing the age
    - z: a 2D list of numerical values representing the heatmap data
    - prefix: a string representing the prefix of the output file name
    """
    # Create a new Figure object with a Heatmap trace using the provided data
    figure = go.Figure(data=go.Heatmap(
                    z=z,                # the heatmap values to be plotted
                    x=c,                # the x-axis values (calendar time)
                    y=t,                # the y-axis values (age)
                    zsmooth="best",     # the smoothing algorithm to use. Other possible value: "fast" | "best" | False
                    colorscale=theme,   # the color scheme to use
                    hoverongaps = True, # whether to display hover information for missing data
                    connectgaps=False))  # whether to connect gaps in the heatmap

    figure.update_layout(
                paper_bgcolor='rgb(233,233,233)',  # the background color of the plot
                title="lexis diagram",             # the title of the plot
                title_x=0.5,                       # the horizontal alignment of the title
                xaxis_title = "calendar time",     # the label for the x-axis
                yaxis_title = "age",               # the label for the y-axis
                font=dict(size=20),                 # the font size of the plot
                )
    if(language == "Chinese"):
        figure.update_layout(
            xaxis_title = "日期",     # the label for the x-axis
            yaxis_title = "年龄",               # the label for the y-axis
            title = "lexis 图"         # the title of the plot
        )
    
    figure.update_traces(colorbar_exponentformat="SI", selector=dict(type='heatmap'))
    figure.update_traces(colorbar_showexponent="first", selector=dict(type='heatmap'))

    # figure.show()
    if(isDark):
        figure.update_layout(
            plot_bgcolor='rgb(63, 71, 79)',
            paper_bgcolor ='rgb(63, 71, 79)',
            font_color="white")
        
    # Save the plot to an HTML file using Plotly and the provided file path and prefix
    py.plot(figure, filename='../../../res/'+ prefix + 'lexis_diagram_' + theme_name +'.html',
           auto_open = False)                # whether to automatically open the plot in a browser