Skip to content
Snippets Groups Projects
visualize.py 6.22 KiB
Newer Older
  • Learn to ignore specific revisions
  • DIANE's avatar
    DIANE committed
    
    
    DIANE's avatar
    DIANE committed
    import streamlit as st
    
    DIANE's avatar
    DIANE committed
    import numpy as np
    import matplotlib.pyplot as plt
    import seaborn as sns
    
    DIANE's avatar
    DIANE committed
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~
    @st.cache_data
    def pred_hist(pred):
        # Creating histogram
        hist, axs = plt.subplots(1, 1, figsize =(15, 3), 
                                tight_layout = True)
    
        # Add x, y gridlines 
        axs.grid( color ='grey', linestyle ='-.', linewidth = 0.5, alpha = 0.6) 
        # Remove axes splines 
        for s in ['top', 'bottom', 'left', 'right']: 
            axs.spines[s].set_visible(False)
        # Remove x, y ticks
        axs.xaxis.set_ticks_position('none') 
        axs.yaxis.set_ticks_position('none') 
        # Add padding between axes and labels 
        axs.xaxis.set_tick_params(pad = 5) 
        axs.yaxis.set_tick_params(pad = 10) 
        # Creating histogram
        N, bins, patches = axs.hist(pred, bins = 12)
        return hist
    
    
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~
    @st.cache_data
    
    DIANE's avatar
    DIANE committed
    def plot_spectra(specdf = None, color = None, cmap =None, xunits = None, yunits = None):
        # pass
    
    DIANE's avatar
    DIANE committed
        import matplotlib.pyplot as plt
        import numpy as np
        
    
    DIANE's avatar
    DIANE committed
        
    
    DIANE's avatar
    DIANE committed
        fig, ax = plt.subplots(figsize = (30,7))
    
    DIANE's avatar
    DIANE committed
    
        if color is None or cmap is None:
            specdf.T.plot(legend=False, ax = ax, color = "blue")
            
        else:
            cats = color.unique()
            for key, value in cmap.items():
                ax.plot([], [], color=value, label = str(key))
                plt.legend()
                
            for key, value in cmap.items():
                idx = color.index[color == key].tolist()
                specdf.loc[idx].T.plot(legend=False, ax = ax, color = value)
                
    
    DIANE's avatar
    DIANE committed
    
        ax.set_xlabel(xunits, fontsize=30)
        ax.set_ylabel(yunits, fontsize=30)
        plt.margins(x = 0)
        plt.tight_layout()
    
    DIANE's avatar
    DIANE committed
        # plt.legend()
        return fig
    
    
    @st.cache_data
    def barhplot(metadf, cmap):
        counts = metadf.groupby(metadf.columns[0]).size()
        counts = counts.loc[cmap.keys()]
        fig, ax = plt.subplots(figsize = (10,5))
        ax.barh(counts.index, counts.values, color=cmap.values())
        plt.gca().invert_yaxis()
        plt.xlabel('Count')
        plt.ylabel(str(metadf.columns[0]).capitalize())
    
    DIANE's avatar
    DIANE committed
        return fig
    
    
    
    DIANE's avatar
    DIANE committed
    
    
    
    DIANE's avatar
    DIANE committed
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cal/val hist ~~~~~~~~~~~~~~~~~~~~~~~~~~
    @st.cache_data
    def hist(y, y_train, y_test, target_name = 'y'):
    
    DIANE's avatar
    DIANE committed
        fig, ax = plt.subplots(figsize = (5,2))
    
    DIANE's avatar
    DIANE committed
        sns.histplot(y, color = "#004e9e", kde = True, label = str(target_name), ax = ax, fill = True)
        sns.histplot(y_train, color = "#2C6B6F", kde = True, label = str(target_name)+" (Cal)", ax = ax, fill = True)
        sns.histplot(y_test, color = "#d0f7be", kde = True, label = str(target_name)+" (Val)", ax = ax, fill = True)
        ax.set_xlabel(str(target_name))
        plt.legend()
        plt.tight_layout()
        return fig
    
    
    
    @st.cache_data
    def reg_plot( meas, pred, train_idx, test_idx):
        a0 = np.ones(2)
        a1 = np.ones(2)
        
        for i in range(len(meas)):
            meas[i] = np.array(meas[i]).reshape(-1, 1) 
            pred[i] = np.array(pred[i]).reshape(-1, 1)
    
    
    DIANE's avatar
    DIANE committed
            from sklearn.linear_model import LinearRegression
    
    DIANE's avatar
    DIANE committed
            M = LinearRegression()
            M.fit(meas[i], pred[i])
            a1[i] = np.round(M.coef_[0][0],2)
            a0[i] = np.round(M.intercept_[0],2)
    
        ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1))
        et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))
    
        fig, ax = plt.subplots(figsize = (12,4))
        sns.regplot(x = meas[0] , y = pred[0], color="#2C6B6F", label = f'Cal (Predicted = {a0[0]} + {a1[0]} x Measured)', scatter_kws={'edgecolor': 'black'})
        sns.regplot(x = meas[1], y = pred[1], color='#d0f7be', label = f'Val (Predicted = {a0[1]} + {a1[1]} x Measured)', scatter_kws={'edgecolor': 'black'})
        plt.plot([np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], [np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], color = 'black')
    
        for i, txt  in enumerate(train_idx):
            #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
            if np.abs(ec[i])> np.mean(ec)+ 3*np.std(ec):
                plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i], np.array(pred[0]).reshape(-1)[i]))
    
        for i, txt  in enumerate(test_idx):
            if np.abs(et[i])> np.mean(et)+ 3*np.std(et):
                plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i]))
    
        ax.set_ylabel('Predicted values')
        ax.set_xlabel('Measured values')
        plt.legend()
        plt.margins(0)
        # fig.savefig('./report/figures/measured_vs_predicted.png')
        return fig
    
    # Resid plot
    @st.cache_data
    def resid_plot( meas, pred, train_idx, test_idx):
        a0 = np.ones(2)
        a1 = np.ones(2)
        e = [np.subtract(meas[0] ,pred[0]), np.subtract(meas[1], pred[1])]
    
        for i in range(len(meas)):
    
    DIANE's avatar
    DIANE committed
            from sklearn.linear_model import LinearRegression
    
    DIANE's avatar
    DIANE committed
            M = LinearRegression()
            M.fit( np.array(meas[i]).reshape(-1,1), np.array(e[i]).reshape(-1,1))
            a1[i] = np.round(M.coef_[0],2)
            a0[i] = np.round(M.intercept_,2)
        
    
        fig, ax = plt.subplots(figsize = (12,4))
        sns.scatterplot(x = pred[0], y = e[0], color="#2C6B6F", label = f'Cal', edgecolor="black")
        sns.scatterplot(x = pred[1], y = e[1], color="#d0f7be", label = f'Val', edgecolor="black")
    
        # sns.scatterplot(x = pred[0], y = e[0], color='blue', label = f'Cal (Residual = {a0[0]} + {a1[0]} * Predicted)')
        # sns.scatterplot(x = pred[1], y = e[1], color='green', label = f'Val (Residual = {a0[1]} + {a1[1]} * Predicted)')
        plt.axhline(y= 0, c ='black', linestyle = ':')
        lim = np.max(abs(np.concatenate([e[0], e[1]], axis = 0)))*1.1
        plt.ylim(- lim, lim )    
        
    
        for i in range(2):
            e[i] = np.array(e[i]).reshape(-1,1)
    
        for i, txt  in enumerate(train_idx):
            #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
            if np.abs(e[0][i])> np.mean(e[0])+ 3*np.std(e[0]):
                plt.annotate(txt ,(np.array(pred[0]).reshape(-1)[i],e[0][i]))
    
        for i, txt  in enumerate(test_idx):
            if np.abs(e[1][i])> np.mean(e[1])+ 3*np.std(e[1]):
                plt.annotate(txt ,(np.array(pred[1]).reshape(-1)[i],e[1][i]))
        ax.set_xlabel(f'{ train_idx.shape}')
        ax.set_ylabel('Residuals')
        ax.set_xlabel('Predicted values')
        plt.legend()
        plt.margins(0)
        # fig.savefig('./report/figures/residuals_plot.png')
        return fig