Skip to content
Snippets Groups Projects
Miscellaneous.py 5.64 KiB
Newer Older
  • Learn to ignore specific revisions
  • from Packages import *
    
    # local CSS
    ## load the custom CSS in the style folder
    @st.cache_data
    def local_css(file_name):
        with open(file_name) as f:
            st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
    
    # predict module
    def prediction(NIRS_csv, qsep, qhdr, model):
        # hdr var correspond to column header True or False in the CSV
        if qhdr == 'yes':
            col = 0
        else:
            col = False
        X_test = pd.read_csv(NIRS_csv, sep=qsep, index_col=col)
        Y_preds = model.predict(X_test)
        # Y_preds = X_test
        return Y_preds
    
    
    @st.cache_data
    
    DIANE's avatar
    DIANE committed
    def reg_plot( meas, pred, train_idx, test_idx):
    
    DIANE's avatar
    DIANE committed
        a0 = np.ones(2)
        a1 = np.ones(2)
        
        for i in range(len(meas)):
            meas[i] = np.array(meas[i]).reshape(-1, 1) 
            pred[i] = np.array(pred[i]).reshape(-1, 1)
    
            M = LinearRegression()
            M.fit(meas[i], pred[i])
            a1[i] = np.round(M.coef_[0][0],2)
            a0[i] = np.round(M.intercept_[0],2)
    
    
    DIANE's avatar
    DIANE committed
        ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1))
    
    DIANE's avatar
    DIANE committed
        et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))
    
    DIANE's avatar
    DIANE committed
    
    
        fig, ax = plt.subplots(figsize = (12,4))
    
    DIANE's avatar
    DIANE committed
        sns.regplot(x = meas[0] , y = pred[0], color='blue', label = f'Calib (Predicted = {a0[0]} + {a1[0]} x Measured)')
        sns.regplot(x = meas[1], y = pred[1], color='green', label = f'Test (Predicted = {a0[1]} + {a1[1]} x Measured)')
        plt.plot([np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], [np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], color = 'black')
    
    DIANE's avatar
    DIANE committed
    
        for i, txt  in enumerate(train_idx):
            #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
            if np.abs(ec[i])> np.mean(ec)+ 3*np.std(ec):
                plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i], np.array(pred[0]).reshape(-1)[i]))
    
        for i, txt  in enumerate(test_idx):
            if np.abs(et[i])> np.mean(et)+ 3*np.std(et):
    
    DIANE's avatar
    DIANE committed
                plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i]))
    
    DIANE's avatar
    DIANE committed
    
    
        ax.set_ylabel('Predicted values')
        ax.set_xlabel('Measured values')
        plt.legend()
        plt.margins(0)
    
        return fig
    
    DIANE's avatar
    DIANE committed
    def resid_plot( meas, pred, train_idx, test_idx):
    
    DIANE's avatar
    DIANE committed
        a0 = np.ones(2)
        a1 = np.ones(2)
        e = [np.subtract(meas[0] ,pred[0]), np.subtract(meas[1], pred[1])]
    
        for i in range(len(meas)):
            M = LinearRegression()
            M.fit( np.array(meas[i]).reshape(-1,1), np.array(e[i]).reshape(-1,1))
            a1[i] = np.round(M.coef_[0],2)
            a0[i] = np.round(M.intercept_,2)
    
    DIANE's avatar
    DIANE committed
        
    
    
        fig, ax = plt.subplots(figsize = (12,4))
    
    DIANE's avatar
    DIANE committed
        sns.scatterplot(x = pred[0], y = e[0], color='blue', label = f'Calib (Residual = {a0[0]} + {a1[0]} * Predicted)')
        sns.scatterplot(x = pred[1], y = e[1], color='green', label = f'Test (Residual = {a0[1]} + {a1[1]} * Predicted)')
    
    DIANE's avatar
    DIANE committed
        plt.axhline(y= 0, c ='black', linestyle = ':')
    
    DIANE's avatar
    DIANE committed
        lim = np.max(abs(np.concatenate([e[0], e[1]], axis = 0)))*1.1
    
    DIANE's avatar
    DIANE committed
        plt.ylim(- lim, lim )    
        
    
    
    DIANE's avatar
    DIANE committed
        for i in range(2):
            e[i] = np.array(e[i]).reshape(-1,1)
    
    DIANE's avatar
    DIANE committed
    
        for i, txt  in enumerate(train_idx):
            #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
    
    DIANE's avatar
    DIANE committed
            if np.abs(e[0][i])> np.mean(e[0])+ 3*np.std(e[0]):
                plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],e[0][i]))
    
    DIANE's avatar
    DIANE committed
    
        for i, txt  in enumerate(test_idx):
    
    DIANE's avatar
    DIANE committed
            if np.abs(e[1][i])> np.mean(e[1])+ 3*np.std(e[1]):
                plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i],e[1][i]))
    
    DIANE's avatar
    DIANE committed
        ax.set_xlabel(f'{ train_idx.shape}')
    
    DIANE's avatar
    DIANE committed
        ax.set_xlabel('Predicted values')
    
    DIANE's avatar
    DIANE committed
        plt.margins(0)
    
        return fig
    
    DIANE's avatar
    DIANE committed
    
    
    DIANE's avatar
    DIANE committed
    
    
    # function that create a download button - needs the data to save and the file name to store to
    def download_results(data, export_name):
        with open(data) as f:
            st.download_button('Download Results', f, export_name)
    
    @st.cache_resource
    
    DIANE's avatar
    DIANE committed
    def plot_spectra(df, xunits, yunits):
    
        fig, ax = plt.subplots(figsize = (30,7))
    
        if isinstance(df.columns[0], str):
    
            df.T.plot(legend=False, ax = ax, color = 'blue')
            min = 0
    
            min = np.max(df.columns)
            df.T.plot(legend=False, ax = ax, color = 'blue').invert_xaxis()
    
    DIANE's avatar
    DIANE committed
        ax.set_xlabel(xunits, fontsize=18)
        ax.set_ylabel(yunits, fontsize=18)
    
    DIANE's avatar
    DIANE committed
        plt.tight_layout()
    
    DIANE's avatar
    DIANE committed
    
    
    ## descriptive stat
    def desc_stats(x):
        a = {}
        a['N samples'] = x.shape[0]
        a['Min'] =  np.min(x)
        a['Max'] = np.max(x)
        a['Mean'] = np.mean(x)
        a['Median'] = np.median(x)
        a['S'] = np.std(x)
    
        a['RSD'] = np.std(x)*100/np.mean(x)
        a['Skew'] = skew(x, axis=0, bias=True)
        a['Kurt'] = kurtosis(x, axis=0, bias=True)
    
    DIANE's avatar
    DIANE committed
        return a
    
    
    def hash_data(data):
        """Hash various data types using MD5."""
        
        # Convert to a string representation
        if isinstance(data, pd.DataFrame):
            data_str = data.to_string()
        elif isinstance(data, pd.Series):
            data_str = data.to_string()
        elif isinstance(data, np.ndarray):
            data_str = np.array2string(data, separator=',')
        elif isinstance(data, (list, tuple)):
            data_str = str(data)
        elif isinstance(data, dict):
            # Ensure consistent order for dict items
            data_str = str(sorted(data.items()))
        elif isinstance(data, (int, float, str, bool)):
            data_str = str(data)
        elif isinstance(data, bytes):
            data_str = data.decode('utf-8', 'ignore')  # Decode bytes to string
        elif isinstance(data, str):  # Check if it's a string representing file content
            data_str = data
        else:
            raise TypeError(f"Unsupported data type: {type(data)}")
        
        # Encode the string to bytes
        data_bytes = data_str.encode()
        
        # Compute the MD5 hash
        md5_hash = hashlib.md5(data_bytes).hexdigest()
        
        return md5_hash