Miscellaneous.py

from Packages import *

# local CSS
## load the custom CSS in the style folder
@st.cache_data
def local_css(file_name):
    with open(file_name) as f:
        st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)

# predict module
def prediction(NIRS_csv, qsep, qhdr, model):
    # hdr var correspond to column header True or False in the CSV
    if qhdr == 'yes':
        col = 0
    else:
        col = False
    X_test = pd.read_csv(NIRS_csv, sep=qsep, index_col=col)
    Y_preds = model.predict(X_test)
    # Y_preds = X_test
    return Y_preds


@st.cache_data
def reg_plot( meas, pred, train_idx, test_idx):
    ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1))
    et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))

    fig, ax = plt.subplots(figsize = (12,4))
    sns.regplot(x = meas[0] , y = pred[0], color='blue', label = 'Calib')
    sns.regplot(x = meas[1], y = pred[1], color='green', label = 'Test')
    plt.plot([np.min(meas[0])-0.05, np.max([meas[0]])+0.05], [np.min(meas[0])-0.05, np.max([meas[0]])+0.05], color = 'black')

    for i, txt  in enumerate(train_idx):
        #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
        if np.abs(ec[i])> np.mean(ec)+ 3*np.std(ec):
            plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i], np.array(pred[0]).reshape(-1)[i]))

    for i, txt  in enumerate(test_idx):
        if np.abs(et[i])> np.mean(et)+ 3*np.std(et):
            plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i]))

    ax.set_ylabel('Predicted values')
    ax.set_xlabel('Measured values')
    plt.legend()
    plt.margins(0)

@st.cache_data
def resid_plot( meas, pred, train_idx, test_idx):
    
    ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1))
    et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))
    

    fig, ax = plt.subplots(figsize = (12,4))
    sns.scatterplot(x = meas[0], y = ec, color='blue', label = 'Calib')
    sns.scatterplot(x = meas[1], y = et, color='green', label = 'Test')
    plt.axhline(y= 0, c ='black', linestyle = ':')
    lim = np.max(abs(np.concatenate([ec, et], axis = 0)))*1.1
    plt.ylim(- lim, lim )    
    

    for i, txt  in enumerate(train_idx):
        #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
        if np.abs(ec[i])> np.mean(ec)+ 3*np.std(ec):
            plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))

    for i, txt  in enumerate(test_idx):
        if np.abs(et[i])> np.mean(et)+ 3*np.std(et):
            plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i],et[i]))
    ax.set_xlabel(f'{ train_idx.shape}')
    ax.set_ylabel('Residuals')
    ax.set_xlabel('Measured values')
    plt.legend()
    plt.margins(0)


# function that create a download button - needs the data to save and the file name to store to
def download_results(data, export_name):
    with open(data) as f:
        st.download_button('Download Results', f, export_name)

@st.cache_resource
def plot_spectra(df, xunits, yunits):
    fig, ax = plt.subplots(figsize = (30,7))
    if isinstance(df.columns[0], str):
        df.T.plot(legend=False, ax = ax, color = 'blue')
        min = 0
    else: 
        min = np.max(df.columns)
        df.T.plot(legend=False, ax = ax, color = 'blue').invert_xaxis()

    plt.annotate(text = f'The total number of spectra is {df.shape[0]}', xy =(min, np.max(df)), size=20, color = 'black', backgroundcolor='red')
    ax.set_xlabel(xunits, fontsize=18)
    ax.set_ylabel(yunits, fontsize=18)
    plt.margins(x = 0)

    return fig


## descriptive stat
def desc_stats(x):
    a = {}
    a['N samples'] = x.shape[0]
    a['Min'] =  np.min(x)
    a['Max'] = np.max(x)
    a['Mean'] = np.mean(x)
    a['Median'] = np.median(x)
    a['S'] = np.std(x)
    a['RSD(%)'] = np.std(x)*100/np.mean(x)
    a['Skewness'] = skew(x, axis=0, bias=True)
    a['Kurtosis'] = kurtosis(x, axis=0, bias=True)
    return a