Skip to content
Snippets Groups Projects
visualize.py 5.57 KiB
Newer Older
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
import streamlit as st
DIANE's avatar
DIANE committed
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
DIANE's avatar
DIANE committed
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~
@st.cache_data
def pred_hist(pred):
    # Creating histogram
    hist, axs = plt.subplots(1, 1, figsize =(15, 3), 
                            tight_layout = True)

    # Add x, y gridlines 
    axs.grid( color ='grey', linestyle ='-.', linewidth = 0.5, alpha = 0.6) 
    # Remove axes splines 
    for s in ['top', 'bottom', 'left', 'right']: 
        axs.spines[s].set_visible(False)
    # Remove x, y ticks
    axs.xaxis.set_ticks_position('none') 
    axs.yaxis.set_ticks_position('none') 
    # Add padding between axes and labels 
    axs.xaxis.set_tick_params(pad = 5) 
    axs.yaxis.set_tick_params(pad = 10) 
    # Creating histogram
    N, bins, patches = axs.hist(pred, bins = 12)
    return hist


# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~
@st.cache_data
def plot_spectra(specdf, xunits, yunits):
DIANE's avatar
DIANE committed
    import matplotlib.pyplot as plt
    import numpy as np
    
DIANE's avatar
DIANE committed
    fig, ax = plt.subplots(figsize = (30,7))
    if isinstance(specdf.columns[0], str):
        specdf.T.plot(legend=False, ax = ax, color = '#2474b4')
        min = 0
    else: 
        min = np.max(specdf.columns)
        specdf.T.plot(legend=False, ax = ax, color = '#2474b4').invert_xaxis()

    ax.set_xlabel(xunits, fontsize=30)
    ax.set_ylabel(yunits, fontsize=30)
    plt.margins(x = 0)
    plt.tight_layout()
    return fig


# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cal/val hist ~~~~~~~~~~~~~~~~~~~~~~~~~~
@st.cache_data
def hist(y, y_train, y_test, target_name = 'y'):
    fig, ax = plt.subplots(figsize = (12,3))
    sns.histplot(y, color = "#004e9e", kde = True, label = str(target_name), ax = ax, fill = True)
    sns.histplot(y_train, color = "#2C6B6F", kde = True, label = str(target_name)+" (Cal)", ax = ax, fill = True)
    sns.histplot(y_test, color = "#d0f7be", kde = True, label = str(target_name)+" (Val)", ax = ax, fill = True)
    ax.set_xlabel(str(target_name))
    plt.legend()
    plt.tight_layout()
    return fig



@st.cache_data
def reg_plot( meas, pred, train_idx, test_idx):
    a0 = np.ones(2)
    a1 = np.ones(2)
    
    for i in range(len(meas)):
        meas[i] = np.array(meas[i]).reshape(-1, 1) 
        pred[i] = np.array(pred[i]).reshape(-1, 1)

DIANE's avatar
DIANE committed
        from sklearn.linear_model import LinearRegression
DIANE's avatar
DIANE committed
        M = LinearRegression()
        M.fit(meas[i], pred[i])
        a1[i] = np.round(M.coef_[0][0],2)
        a0[i] = np.round(M.intercept_[0],2)

    ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1))
    et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))

    fig, ax = plt.subplots(figsize = (12,4))
    sns.regplot(x = meas[0] , y = pred[0], color="#2C6B6F", label = f'Cal (Predicted = {a0[0]} + {a1[0]} x Measured)', scatter_kws={'edgecolor': 'black'})
    sns.regplot(x = meas[1], y = pred[1], color='#d0f7be', label = f'Val (Predicted = {a0[1]} + {a1[1]} x Measured)', scatter_kws={'edgecolor': 'black'})
    plt.plot([np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], [np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], color = 'black')

    for i, txt  in enumerate(train_idx):
        #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
        if np.abs(ec[i])> np.mean(ec)+ 3*np.std(ec):
            plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i], np.array(pred[0]).reshape(-1)[i]))

    for i, txt  in enumerate(test_idx):
        if np.abs(et[i])> np.mean(et)+ 3*np.std(et):
            plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i]))

    ax.set_ylabel('Predicted values')
    ax.set_xlabel('Measured values')
    plt.legend()
    plt.margins(0)
    # fig.savefig('./report/figures/measured_vs_predicted.png')
    return fig

# Resid plot
@st.cache_data
def resid_plot( meas, pred, train_idx, test_idx):
    a0 = np.ones(2)
    a1 = np.ones(2)
    e = [np.subtract(meas[0] ,pred[0]), np.subtract(meas[1], pred[1])]

    for i in range(len(meas)):
DIANE's avatar
DIANE committed
        from sklearn.linear_model import LinearRegression
DIANE's avatar
DIANE committed
        M = LinearRegression()
        M.fit( np.array(meas[i]).reshape(-1,1), np.array(e[i]).reshape(-1,1))
        a1[i] = np.round(M.coef_[0],2)
        a0[i] = np.round(M.intercept_,2)
    

    fig, ax = plt.subplots(figsize = (12,4))
    sns.scatterplot(x = pred[0], y = e[0], color="#2C6B6F", label = f'Cal', edgecolor="black")
    sns.scatterplot(x = pred[1], y = e[1], color="#d0f7be", label = f'Val', edgecolor="black")

    # sns.scatterplot(x = pred[0], y = e[0], color='blue', label = f'Cal (Residual = {a0[0]} + {a1[0]} * Predicted)')
    # sns.scatterplot(x = pred[1], y = e[1], color='green', label = f'Val (Residual = {a0[1]} + {a1[1]} * Predicted)')
    plt.axhline(y= 0, c ='black', linestyle = ':')
    lim = np.max(abs(np.concatenate([e[0], e[1]], axis = 0)))*1.1
    plt.ylim(- lim, lim )    
    

    for i in range(2):
        e[i] = np.array(e[i]).reshape(-1,1)

    for i, txt  in enumerate(train_idx):
        #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
        if np.abs(e[0][i])> np.mean(e[0])+ 3*np.std(e[0]):
            plt.annotate(txt ,(np.array(pred[0]).reshape(-1)[i],e[0][i]))

    for i, txt  in enumerate(test_idx):
        if np.abs(e[1][i])> np.mean(e[1])+ 3*np.std(e[1]):
            plt.annotate(txt ,(np.array(pred[1]).reshape(-1)[i],e[1][i]))
    ax.set_xlabel(f'{ train_idx.shape}')
    ax.set_ylabel('Residuals')
    ax.set_xlabel('Predicted values')
    plt.legend()
    plt.margins(0)
    # fig.savefig('./report/figures/residuals_plot.png')
    return fig