#from Modules_manager.PCA_ import pca_maker from Packages import * st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") from Modules import * from Packages import * from Class_Mod.DATA_HANDLING import * # graphical delimiter st.write("---") # load images for web interface img_sselect = Image.open("images\sselect.JPG") img_general = Image.open("images\general.JPG") img_predict = Image.open("images\predict.JPG") # TOC menu on the left with st.sidebar: st.markdown("[Sample Selection](#sample-selection)") st.markdown("[Model Development](#create-a-model)") st.markdown("[Predictions Making](#predict)") # Page header with st.container(): st.subheader("Plateforme d'Analyses Chimiques pour l'Ecologie-PACE :goat:") st.title("NIRS Utils") st.write("Samples selection, Predictive Modelling, and Predictions making using [Pinard](https://github.com/GBeurier/pinard) and PACE NIRS Database.") #st.image(img_general) ################################### Data Loading and Visualization ######################################## container1 = st.container(border=True) col2, col1 = st.columns([3, 1]) container2 = st.container(border=True) container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divider='blue') scores, loadings, pc = st.columns([2, 2, 0.5]) influence, hotelling, qexp = st.columns([2, 2, 1]) with container1: col1.header("NIRS Data Loading", divider='blue') col2.header("Spectral Data Visualization", divider='blue') with col1: # loader for csv file containing NIRS spectra sselectx_csv = st.file_uploader("Load NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5) if sselectx_csv is not None: # Select list for CSV delimiter psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9) # Select list for CSV header True / False phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31) if phdr == 'yes': col = 0 else: col = False data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col) st.success("The data have been loaded successfully", icon="✅") ## Visualize spectra if sselectx_csv is not None: with col2: fig, ax = plt.subplots(figsize = (30,7)) data_import.T.plot(legend=False, ax = ax, color = 'blue') ax.set_xlabel('Wavelength/Wavenumber', fontsize=18) ax.set_ylabel('Signal', fontsize=18) plt.margins(x = 0) st.pyplot(fig) st.write("Summary") info = pd.DataFrame({'N':[data_import.shape[0]], 'Min': [np.min(data_import)], 'Max':[np.max(data_import)],}, index = ['Values']).T info.rename_axis('information') st.table(data=info) ###################################################################################### ############################## Exploratory data analysis ############################### with container2: if sselectx_csv is not None: plot_type=['', 'PCA','UMAP', 'NMF'] cluster_methods = ['', 'Kmeans','UMAP', 'AP'] with pc: type_plot = st.selectbox("Dimensionality reduction techniques: ", options=plot_type, key=37) type_cluster = st.selectbox("Clustering techniques: ", options=cluster_methods, key=38) # compute UMAP - umap_maker in application_functions.py if type_plot == 'PCA': model = LinearPCA(data_import, Ncomp=5) elif type_plot =='UMAP': pass if type_plot in ['PCA', 'UMAP']: # add 2 select lists to choose which component to plot axis1 = pc.selectbox("x-axis", options = model.scores_.columns, index=0) axis2 = pc.selectbox("y-axis", options = model.scores_.columns, index=1) axis3 = pc.selectbox("z-axis", options = model.scores_.columns, index=2) if type_cluster == 'Kmeans': cl = Sk_Kmeans(pd.concat([model.scores_.loc[:,axis1], model.scores_.loc[:,axis2], model.scores_.loc[:,axis3]], axis = 1), max_clusters = 30) with scores: t = model.scores_ if type_cluster in ['Kmeans','UMAP', 'AP']: st.write('Scree plot') fig2 = px.scatter(cl.inertia_.T, y = 'inertia') st.plotly_chart(fig2) ncluster = st.number_input(min_value=2, max_value=30, value=3, label = 'Select the desired number of clusters') data, colors = cl.fit_optimal(nclusters=ncluster) #fig = px.scatter(data, x=axis1, y=axis2, color= colors) st.write('Scores plot') fig = px.scatter_3d(data, x=axis1, y=axis2, z = axis3, color=colors) else: fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3) st.plotly_chart(fig) with loadings: st.write('Loadings plot') p = model.loadings_ pp = pd.concat([p, pd.DataFrame(np.arange(p.shape[0]), index=p.index, columns=['wl'])], axis =1) df1 = pp.melt(id_vars="wl") fig = px.line(df1, x = 'wl', y = 'value', color='variable') fig.update_layout( legend=dict(x=1, y=0, font=dict( family="Courier", size=12, color="black"), bordercolor="Black", borderwidth=2) ) st.plotly_chart(fig) with influence: st.write('Influence plot') ax1 = st.selectbox("Component", options=model.scores_.columns, index=3) leverage = model.leverage_ residuals = model.residuals fig = px.scatter(x=leverage[ax1], y=residuals[ax1], color = leverage[ax1]*residuals[ax1]) st.plotly_chart(fig) with hotelling: st.write('T²-Hotelling vs Q residuals plot') ax2 = st.selectbox("Component", options=model.scores_.columns, index=4) t = model.scores_ fig = px.scatter(t, x=axis1, y=t.columns[1]) st.plotly_chart(fig) with qexp: pass else: st.markdown('Select a dimensionality reduction technique from the dropdown list') ######################################################################################## # Model creation module container2 = st.container(border=True) M1, M2, M3 = st.columns([2,2,2]) M4, M5 = st.columns([6,2]) container3 = st.container(border=True) M7, M8 = st.columns([2,2]) available_regression_algo = ["","SciKitLearn PLSR", "Jchemo Local Weighted PLSR", "Intervalle Selection PLSR"] with container2: st.header("Calibration Model Development", divider='blue') st.write("Create a predictive model, then use it for predicting your target variable(chemical values) from NIRS spectra") # CSV files loader xcal_csv = M3.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns") ycal_csv = M3.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column") if xcal_csv is not None and ycal_csv is not None: # Select list for CSV delimiter sep = M3.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0) # Select list for CSV header True / False hdr = M3.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1) if hdr == 'yes': col = 0 else: col = False rd_seed = M1.slider("Choose seed", min_value=1, max_value=1212, value=42, format="%i") x, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col) # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing train_index, test_index = train_test_split_idx(x, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed) # Assign data to training and test sets X_train, y_train, X_test, y_test = pd.DataFrame(x[train_index]), pd.DataFrame(y[train_index]), pd.DataFrame(x[test_index]), pd.DataFrame(y[test_index]) ############################# regression_algo = M1.selectbox("Choose the algorithm for regression", options=available_regression_algo, key = 12) if regression_algo == 'SciKitLearn PLSR': # Train model with model function from application_functions.py Reg = PinardPlsr(x_train=X_train, x_test=X_test,y_train=y_train, y_test=y_test) reg_model = Reg.model_ #M2.dataframe(Pin.pred_data_) elif regression_algo == 'Jchemo Local Weighted PLSR': reg_model = model_LWPLSR(xcal_csv, ycal_csv, sep, hdr) elif regression_algo == "Intervalle Selection PLSR": s = M2.number_input(label='Enter the maximum number of intervalls', min_value=1, max_value=6, value="min") reg_model = TpeIpls(x_train= X_train, y_train= y_train, x_test=X_test, y_test= y_test,Kfold= 3,scale= True, n_intervall = 3) reg_model.tune(n_iter=10) if regression_algo in ["SciKitLearn PLSR", "Jchemo Local Weighted PLSR", "Intervalle Selection PLSR"]: with container3: st.header("Model Diagnosis", divider='blue') yc = Reg.pred_data_[0] ycv = Reg.pred_data_[1] yt = Reg.pred_data_[2] M7.write('Predicted vs Measured values') M7.pyplot(reg_plot([y_train, y_train, y_test],[yc, ycv, yt])) M8.write('Residuals plot') M8.pyplot(resid_plot([y_train, y_train, y_test],[yc, ycv, yt])) # Export the model with pickle or joblib if regression_algo != '': M1.write("-- Performance metrics --") M1.dataframe(Reg.metrics_) M1.write("-- Save the model --") #model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20) model_name = M1.text_input('Give it a name') if M1.button('Export Model'): #export_package = __import__(model_export) with open('data/models/model_' + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_' + '.pkl','wb') as f: joblib.dump(reg_model,f) st.write('Model Exported') # create a report with information on the model ## see https://stackoverflow.com/a/59578663 #M4.pyplot(reg_plot(meas==(ycal_csv,ycal_csv,ycal_csv], pred=[ycal_csv,ycal_csv,ycal_csv])) # graphical delimiter st.write("---") #M9, M10, M11 = st.columns([2,2,2]) # Prediction module - TO BE DONE !!!!! with st.container(): st.header("Predictions making") st.write("---") st.write("Predict chemical values from NIRS") model_column, space, file_column= st.columns((2, 1, 1)) NIRS_csv = file_column.file_uploader("Select NIRS Data to predict", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns") export_name = './data/predictions/Predictions_of_' if NIRS_csv: export_name += str(NIRS_csv.name[:-4]) qsep = file_column.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+NIRS_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+NIRS_csv.name))), key=2) qhdr = file_column.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+NIRS_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+NIRS_csv.name))), key=3) # Load the model with pickle or joblib model_column.write("Load your saved predictive model") model_name_import = model_column.selectbox('Choose file:', options=os.listdir('data/models/'), key = 21) if model_name_import != ' ': export_name += '_with_' + str(model_name_import[:-4]) with open('data/models/'+ model_name_import,'rb') as f: model_loaded = joblib.load(f) if model_loaded: model_column.success("The model has been loaded successfully", icon="✅") result = '' if st.button("Predict"): # use prediction function from application_functions.py to predict chemical values result = prediction(NIRS_csv, qsep, qhdr, model_loaded) st.write('Predicted values are: ') st.dataframe(result.T) pd.DataFrame(result).to_csv(export_name + '.csv') st.write('Predictions exported to ' + export_name + '.csv') # export to local drive url = ('http://localhost:8501' + export_name[1:] + '.csv') filename = export_name + '.csv' urlretrieve(url, filename) # create a report with information on the prediction ## see https://stackoverflow.com/a/59578663 if type(result) is list: st.write(result)