3-prediction.py

from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import *
from Class_Mod.DATA_HANDLING import *
# HTML pour le bandeau "CEFE - CNRS"
# bandeau_html = """
# <div style="width: 100%; background-color: #4682B4; padding: 10px; margin-bottom: 10px;">
#   <h1 style="text-align: center; color: white;">CEFE - CNRS</h1>
# </div>
# """
# # Injecter le code HTML du bandeau
# st.markdown(bandeau_html, unsafe_allow_html=True)
add_header()
add_sidebar(pages_folder)

local_css(css_file / "style_model.css")

st.title("Prediction making using a previously developed model")
M10, M20= st.columns([2, 1])
M10.image("./images/prediction making.png", use_column_width=True)

# M1, M2= st.columns([2, 1])


# st.header("Prediction making", divider='blue')
# M5, M6 = st.columns([2, 0.01])


files_format = ['.csv', '.dx']
file = M20.file_uploader("Select NIRS Data to predict", type = files_format, help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
export_folder = './data/predictions/'
export_name = 'Predictions_of_'
reg_algo = ["Interval-PLS"]
pred_data = pd.DataFrame()
loaded_model = None

if not file:
    M20.warning('Insert your spectral data file here!')
else:
    test = file.name[file.name.find('.'):]
    export_name += file.name[:file.name.find('.')]

    if test == files_format[0]:
        #
        qsep = M20.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+file.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+file.name))), key=2)
        qhdr = M20.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+file.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+file.name))), key=3)
        if qhdr == 'yes':
            col = 0
        else:
            col = False
        pred_data = pd.read_csv(file, sep=qsep, index_col=col)

    elif test == files_format[1]:
        with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
            tmp.write(file.read())
            tmp_path = tmp.name
            chem_data, spectra, meta_data, _ = read_dx(file =  tmp_path)
            M20.success("The data have been loaded successfully", icon="✅")
            if chem_data.to_numpy().shape[1]>0:
                yname = M20.selectbox('Select target', options=chem_data.columns)
                measured = chem_data.loc[:,yname] == 0
                y = chem_data.loc[:,yname].loc[measured]
                pred_data = spectra.loc[measured]
            
            else:
                pred_data = spectra
        os.unlink(tmp_path)


# Load parameters
st.header("I - Spectral data preprocessing & visualization", divider='blue')
if not pred_data.empty:# Load the model with joblib
    M1, M2= st.columns([2, 1])
    M1.write('Raw spectra')
    fig = plot_spectra(pred_data, xunits = 'lab', yunits = "meta_data.loc[:,'yunits'][0]")
    M1.pyplot(fig)

### preprocessing
preprocessed = pd.DataFrame
if not pred_data.empty:
    params = M2.file_uploader("Load preprocessings params", type = '.json', help=" .json file")
    if params:
        prep = json.load(params)
        # M4.write(ProcessLookupError)

        if prep['normalization'] == 'Snv':
            x1 = Snv(pred_data)
            norm = 'Standard Normal Variate'
        else:
            norm = 'No Normalization was applied'
            x1 = pred_data
        x2 = savgol_filter(x1,
                            window_length = prep["window_length"],
                            polyorder = prep["polyorder"],
                            deriv=prep["deriv"],
                                delta=1.0, axis=-1, mode="interp", cval=0.0)
        preprocessed = pd.DataFrame(x2, index = pred_data.index, columns = pred_data.columns)

################################################################################################
## plot preprocessed spectra
if not preprocessed.empty:
    M3, M4= st.columns([2, 1])
    M3.write('Preprocessed spectra')
    fig2 = plot_spectra(preprocessed, xunits = 'lab', yunits = "meta_data.loc[:,'yunits'][0]")
    M3.pyplot(fig2)
    SG = f'- Savitzky-Golay derivative parameters \:(Window_length:{prep['window_length']};  polynomial order: {prep['polyorder']};  Derivative order : {prep['deriv']})'
    Norm = f'- Spectral Normalization \: {norm}'
    M4.info('The spectra were preprocessed using:\n'+SG+"\n"+Norm)

################### Predictions making  ##########################
st.header("II - Prediction making", divider='blue')
if not pred_data.empty and params:# Load the model with joblib
    M5, M6 = st.columns([2, 1])
    #dir = os.listdir('data/models/')[1:]
    dir = os.listdir('data/models/')
    dir.insert(0,'')
    model_name = M6.selectbox("Select your model from the dropdown list:", options = dir, key = 21, format_func=lambda x: x if x else "<Select>")

    if model_name:
        export_name += '_with_' + model_name[:model_name.find('.')]
        with open('data/models/'+ model_name,'rb') as f:
            loaded_model = joblib.load(f)
            ncols = loaded_model.n_features_in_
            
        if loaded_model:
            M6.success("The model has been loaded successfully", icon="✅")
            s = M6.checkbox('the model is of ipls type?')
            if s:
                index = M6.file_uploader("select wavelengths index file", type="csv")
                if index:
                    intervalls = pd.read_csv(index, sep=';', index_col=0).to_numpy()
                    idx = []
                    for i in range(intervalls.shape[0]):
                        idx.extend(np.arange(intervalls[i,0], intervalls[i,1]+1))
                    if max(idx) <= preprocessed.shape[1]:
                        preprocessed = preprocessed.iloc[:,idx] ### get predictors
                    else:
                        M6.error("Error: The number of columns in your data does not match the number of columns used to train the model. Please ensure they are the same.")


if loaded_model:
    if M6.button('Predict', type='primary'):
            if ncols == preprocessed.shape[1]:
                result = pd.DataFrame(loaded_model.predict(preprocessed), index = preprocessed.index)

                #############################
                if preprocessed.shape[1]>1:
                    M5.write('Predicted values distribution')
                    # Creating histogram
                    fig, axs = plt.subplots(1, 1, figsize =(15, 3), 
                                            tight_layout = True)
                    
                    # Add x, y gridlines 
                    axs.grid( color ='grey', linestyle ='-.', linewidth = 0.5, alpha = 0.6) 
                    # Remove axes splines 
                    for s in ['top', 'bottom', 'left', 'right']: 
                        axs.spines[s].set_visible(False) 
                    # Remove x, y ticks
                    axs.xaxis.set_ticks_position('none') 
                    axs.yaxis.set_ticks_position('none') 
                    # Add padding between axes and labels 
                    axs.xaxis.set_tick_params(pad = 5) 
                    axs.yaxis.set_tick_params(pad = 10) 
                    # Creating histogram
                    N, bins, patches = axs.hist(result, bins = 12)
                    # Setting color
                    fracs = ((N**(1 / 5)) / N.max())
                    norm = colors.Normalize(fracs.min(), fracs.max())
                    
                    for thisfrac, thispatch in zip(fracs, patches):
                        color = plt.cm.viridis(norm(thisfrac))
                        thispatch.set_facecolor(color)

                    M5.pyplot(fig)
                st.write('Predicted values table')
                st.dataframe(result.T)
                ##################################

                # result.to_csv(export_folder + export_name + '.csv', sep = ';')
                # export to local drive - Download
                download_results(export_folder + export_name + '.csv', export_name + '.csv')
                # create a report with information on the prediction
                ## see https://stackoverflow.com/a/59578663
            else:
                M6.error(f'Error: The model was trained with {ncols} wavelengths, but you provided {preprocessed.shape[1]} wavelengths for prediction. Please ensure they match.')