Skip to content
Snippets Groups Projects
visualize.py 6.22 KiB
Newer Older
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
import streamlit as st
DIANE's avatar
DIANE committed
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
DIANE's avatar
DIANE committed
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~
@st.cache_data
def pred_hist(pred):
    # Creating histogram
    hist, axs = plt.subplots(1, 1, figsize =(15, 3), 
                            tight_layout = True)

    # Add x, y gridlines 
    axs.grid( color ='grey', linestyle ='-.', linewidth = 0.5, alpha = 0.6) 
    # Remove axes splines 
    for s in ['top', 'bottom', 'left', 'right']: 
        axs.spines[s].set_visible(False)
    # Remove x, y ticks
    axs.xaxis.set_ticks_position('none') 
    axs.yaxis.set_ticks_position('none') 
    # Add padding between axes and labels 
    axs.xaxis.set_tick_params(pad = 5) 
    axs.yaxis.set_tick_params(pad = 10) 
    # Creating histogram
    N, bins, patches = axs.hist(pred, bins = 12)
    return hist


# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~
@st.cache_data
DIANE's avatar
DIANE committed
def plot_spectra(specdf = None, color = None, cmap =None, xunits = None, yunits = None):
    # pass
DIANE's avatar
DIANE committed
    import matplotlib.pyplot as plt
    import numpy as np
    
DIANE's avatar
DIANE committed
    
DIANE's avatar
DIANE committed
    fig, ax = plt.subplots(figsize = (30,7))
DIANE's avatar
DIANE committed

    if color is None or cmap is None:
        specdf.T.plot(legend=False, ax = ax, color = "blue")
        
    else:
        cats = color.unique()
        for key, value in cmap.items():
            ax.plot([], [], color=value, label = str(key))
            plt.legend()
            
        for key, value in cmap.items():
            idx = color.index[color == key].tolist()
            specdf.loc[idx].T.plot(legend=False, ax = ax, color = value)
            
DIANE's avatar
DIANE committed

    ax.set_xlabel(xunits, fontsize=30)
    ax.set_ylabel(yunits, fontsize=30)
    plt.margins(x = 0)
    plt.tight_layout()
DIANE's avatar
DIANE committed
    # plt.legend()
    return fig


@st.cache_data
def barhplot(metadf, cmap):
    counts = metadf.groupby(metadf.columns[0]).size()
    counts = counts.loc[cmap.keys()]
    fig, ax = plt.subplots(figsize = (10,5))
    ax.barh(counts.index, counts.values, color=cmap.values())
    plt.gca().invert_yaxis()
    plt.xlabel('Count')
    plt.ylabel(str(metadf.columns[0]).capitalize())
DIANE's avatar
DIANE committed
    return fig


DIANE's avatar
DIANE committed


DIANE's avatar
DIANE committed
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cal/val hist ~~~~~~~~~~~~~~~~~~~~~~~~~~
@st.cache_data
def hist(y, y_train, y_test, target_name = 'y'):
DIANE's avatar
DIANE committed
    fig, ax = plt.subplots(figsize = (5,2))
DIANE's avatar
DIANE committed
    sns.histplot(y, color = "#004e9e", kde = True, label = str(target_name), ax = ax, fill = True)
    sns.histplot(y_train, color = "#2C6B6F", kde = True, label = str(target_name)+" (Cal)", ax = ax, fill = True)
    sns.histplot(y_test, color = "#d0f7be", kde = True, label = str(target_name)+" (Val)", ax = ax, fill = True)
    ax.set_xlabel(str(target_name))
    plt.legend()
    plt.tight_layout()
    return fig



@st.cache_data
def reg_plot( meas, pred, train_idx, test_idx):
    a0 = np.ones(2)
    a1 = np.ones(2)
    
    for i in range(len(meas)):
        meas[i] = np.array(meas[i]).reshape(-1, 1) 
        pred[i] = np.array(pred[i]).reshape(-1, 1)

DIANE's avatar
DIANE committed
        from sklearn.linear_model import LinearRegression
DIANE's avatar
DIANE committed
        M = LinearRegression()
        M.fit(meas[i], pred[i])
        a1[i] = np.round(M.coef_[0][0],2)
        a0[i] = np.round(M.intercept_[0],2)

    ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1))
    et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))

    fig, ax = plt.subplots(figsize = (12,4))
    sns.regplot(x = meas[0] , y = pred[0], color="#2C6B6F", label = f'Cal (Predicted = {a0[0]} + {a1[0]} x Measured)', scatter_kws={'edgecolor': 'black'})
    sns.regplot(x = meas[1], y = pred[1], color='#d0f7be', label = f'Val (Predicted = {a0[1]} + {a1[1]} x Measured)', scatter_kws={'edgecolor': 'black'})
    plt.plot([np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], [np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], color = 'black')

    for i, txt  in enumerate(train_idx):
        #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
        if np.abs(ec[i])> np.mean(ec)+ 3*np.std(ec):
            plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i], np.array(pred[0]).reshape(-1)[i]))

    for i, txt  in enumerate(test_idx):
        if np.abs(et[i])> np.mean(et)+ 3*np.std(et):
            plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i]))

    ax.set_ylabel('Predicted values')
    ax.set_xlabel('Measured values')
    plt.legend()
    plt.margins(0)
    # fig.savefig('./report/figures/measured_vs_predicted.png')
    return fig

# Resid plot
@st.cache_data
def resid_plot( meas, pred, train_idx, test_idx):
    a0 = np.ones(2)
    a1 = np.ones(2)
    e = [np.subtract(meas[0] ,pred[0]), np.subtract(meas[1], pred[1])]

    for i in range(len(meas)):
DIANE's avatar
DIANE committed
        from sklearn.linear_model import LinearRegression
DIANE's avatar
DIANE committed
        M = LinearRegression()
        M.fit( np.array(meas[i]).reshape(-1,1), np.array(e[i]).reshape(-1,1))
        a1[i] = np.round(M.coef_[0],2)
        a0[i] = np.round(M.intercept_,2)
    

    fig, ax = plt.subplots(figsize = (12,4))
    sns.scatterplot(x = pred[0], y = e[0], color="#2C6B6F", label = f'Cal', edgecolor="black")
    sns.scatterplot(x = pred[1], y = e[1], color="#d0f7be", label = f'Val', edgecolor="black")

    # sns.scatterplot(x = pred[0], y = e[0], color='blue', label = f'Cal (Residual = {a0[0]} + {a1[0]} * Predicted)')
    # sns.scatterplot(x = pred[1], y = e[1], color='green', label = f'Val (Residual = {a0[1]} + {a1[1]} * Predicted)')
    plt.axhline(y= 0, c ='black', linestyle = ':')
    lim = np.max(abs(np.concatenate([e[0], e[1]], axis = 0)))*1.1
    plt.ylim(- lim, lim )    
    

    for i in range(2):
        e[i] = np.array(e[i]).reshape(-1,1)

    for i, txt  in enumerate(train_idx):
        #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
        if np.abs(e[0][i])> np.mean(e[0])+ 3*np.std(e[0]):
            plt.annotate(txt ,(np.array(pred[0]).reshape(-1)[i],e[0][i]))

    for i, txt  in enumerate(test_idx):
        if np.abs(e[1][i])> np.mean(e[1])+ 3*np.std(e[1]):
            plt.annotate(txt ,(np.array(pred[1]).reshape(-1)[i],e[1][i]))
    ax.set_xlabel(f'{ train_idx.shape}')
    ax.set_ylabel('Residuals')
    ax.set_xlabel('Predicted values')
    plt.legend()
    plt.margins(0)
    # fig.savefig('./report/figures/residuals_plot.png')
    return fig