from Packages import * st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") from Modules import * from Class_Mod.DATA_HANDLING import * # HTML pour le bandeau "CEFE - CNRS" # bandeau_html = """ # <div style="width: 100%; background-color: #4682B4; padding: 10px; margin-bottom: 10px;"> # <h1 style="text-align: center; color: white;">CEFE - CNRS / UM</h1> # </div> # """ # # Injecter le code HTML du bandeau # st.markdown(bandeau_html, unsafe_allow_html=True) add_header() st.session_state["interface"] = st.session_state.get('interface') if st.session_state["interface"] == 'simple': hide_pages("Predictions") ####################################### page Design ####################################### st.header("Calibration Model Development", divider='blue') st.write("Create a predictive model, then use it for predicting your target variable (chemical values) from NIRS spectra") M1, M2, M3 = st.columns([2,3,2]) M4, M5 = st.columns([6,2]) st.write("---") st.header("Model Diagnosis", divider='blue') M7, M8 = st.columns([2,2]) M7.write('Predicted vs Measured values') M8.write('Residuals plot') M9, M10 = st.columns([2,2]) M9.write("-- Save the model --") ###################################################################### reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR", "Full-PLSR-sklearn", "PrePLStester"] ####################################### ########################################### files_format = ['.csv', '.dx'] file = M3.radio('select data file format:', options = files_format) ### Data spectra = pd.DataFrame y = pd.DataFrame # load .csv file if file == files_format[0]: xcal_csv = M3.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns") if xcal_csv: sepx = M3.radio("Select separator (X file) - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0) hdrx = M3.radio("samples name (X file)? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1) if hdrx == "yes": col = 0 else: col = False ycal_csv = M3.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column") if ycal_csv: sepy = M3.radio("separator (Y file): ", options=[";", ","], key=2) hdry = M3.radio("samples name (Y file)?: ", options=["no", "yes"], key=3) if hdry == "yes": col = 0 else: col = False if xcal_csv and ycal_csv: spectra, meta_data = col_cat(pd.read_csv(xcal_csv, decimal='.', sep=sepx, index_col=col, header=0)) y, _ = col_cat(pd.read_csv(ycal_csv, decimal='.', sep=sepy, index_col=col)) y = pd.DataFrame(y).astype(float).iloc[:,0] spectra = pd.DataFrame(spectra).astype(float) if not meta_data.empty : st.write(meta_data) if spectra.shape[0] == y.shape[0]: pass else: M3.warning('The number of samples is different in X and Y') y = pd.DataFrame spectra = pd.DataFrame ## Load .dx file elif file == files_format[1]: data_file = M3.file_uploader("Select Data", type=".dx", help=" :mushroom: select a dx file") if data_file: with NamedTemporaryFile(delete=False, suffix=".dx") as tmp: tmp.write(data_file.read()) tmp_path = tmp.name chem_data, spectra, meta_data = read_dx(file = tmp_path) M3.success("The data have been loaded successfully", icon="✅") if chem_data.shape[1]>0: yname = M3.selectbox('Select target', options=chem_data.columns) measured = chem_data.loc[:,yname] > 0 y = chem_data.loc[:,yname].loc[measured] spectra = spectra.loc[measured] else: M3.warning('Warning: Chemical data are not included in your file !', icon="⚠️") os.unlink(tmp_path) ### split the data if not spectra.empty and not y.empty: rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i") # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing train_index, test_index = train_test_split_idx(spectra, y = y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed) # Assign data to training and test sets X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index] X_test, y_test = pd.DataFrame(spectra.iloc[test_index,:]), y.iloc[test_index] sk = lambda x: skew(x, axis=0, bias=True) ku = lambda x:kurtosis(x, axis=0, bias=True) cv = lambda x: x.std()*100/x.mean() M2.write('Loaded data summary') M2.write(f'The loaded spectra consist of {spectra.shape[1]} wavelengths') datainf = pd.DataFrame() datainf['N samples'] = [X_train.shape[0], X_test.shape[0], spectra.shape[0] ] datainf['Mean'] = [y_train.mean(), y_test.mean(), y.mean()] datainf['SD'] = [y_train.std(), y_test.std(), y.std()] datainf['CV(%)'] = [cv(y_train), cv(y_test), cv(y)] datainf['Skewness'] = [sk(y_train), sk(y_test), sk(y)] datainf['Kurtosis'] = [ku(y_train), ku(y_test), ku(y)] datainf.index = ['Train', 'Test', 'Total'] M2.write(datainf.round(3)) ####################################### regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12) if regression_algo == reg_algo[1]: # Train model with model function from application_functions.py Reg = PinardPlsr(x_train = X_train, x_test = X_test,y_train = y_train, y_test = y_test) reg_model = Reg.model_ #M2.dataframe(Pin.pred_data_) elif regression_algo == reg_algo[2]: x_train, y_train, x_test, y_test = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy() Reg = LWPLSR(x_train, y_train, x_test, y_test) LWPLSR.Jchemo_lwplsr(Reg) reg_model = Reg.model_ # LWPLSR.Jchemo_lwplsr_predict(Reg) # st.dataframe(Reg.pred_data_) # st.dataframe(Reg.metrics_) elif regression_algo == reg_algo[3]: s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3) it = M1.number_input(label='Enter the number of iterations', min_value=50, max_value=1000, value=100) progress_text = "The model is being created. Please wait." Reg = TpeIpls(x_train = X_train, x_test=X_test, y_train = y_train, y_test = y_test, scale = False, Kfold = 3, n_intervall = s) pro = M1.progress(0, text="The model is being created. Please wait!") rega = Reg.BandSelect(n_iter=it) pro.empty() M1.progress(100, text = "The model has successfully been created!") time.sleep(1) reg_model = Reg.model_ M2.write('-- Table of selected wavelengths --') M2.table(rega[0]) elif regression_algo == reg_algo[4]: Reg = PlsR(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test) reg_model = Reg.model_ elif regression_algo == reg_algo[5]: Reg = PlsProcess(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test, scale = False, Kfold=3) Reg.tune(n_iter=500) reg_model = Reg.model_ ################# Model analysis ############ if regression_algo in reg_algo[1:]: yc = Reg.pred_data_[0] ycv = Reg.pred_data_[1] yt = Reg.pred_data_[2] M2.write('-- Spectral preprocessing info --') M2.write(Reg.best_hyperparams) M2.write("-- Performance metrics --") M2.dataframe(Reg.metrics_) M7.pyplot(reg_plot([y_train, y_train, y_test],[yc, ycv, yt], train_idx = train_index, test_idx = test_index)) M8.pyplot(resid_plot([y_train, y_train, y_test],[yc, ycv, yt], train_idx = train_index, test_idx = test_index)) #model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20) model_name = M9.text_input('Give it a name') date_time = datetime.datetime.strftime(datetime.date.today(), '_%Y_%m_%d_') if M9.button('Export Model'): path = 'data/models/model_' if file == files_format[0]: #export_package = __import__(model_export) with open(path + model_name + date_time + '_created_on_' + xcal_csv.name[:xcal_csv.name.find(".")] +""+ '_and_' + ycal_csv.name[:ycal_csv.name.find(".")] + '_data_' + '.pkl','wb') as f: joblib.dump(reg_model, f) if regression_algo == reg_algo[3]: rega[1].sort() pd.DataFrame(rega[1]).to_csv(path + model_name + date_time + '_on_' + xcal_csv.name[:xcal_csv.name.find(".")] + '_and_' + ycal_csv.name[:ycal_csv.name.find(".")] + '_data_'+'Wavelengths_index.csv', sep = ';') elif file == files_format[1]: #export_package = __import__(model_export) with open(path + model_name + '_on_'+ data_file.name[:data_file.name.find(".")] + '_data_' + '.pkl','wb') as f: joblib.dump(reg_model, f) if regression_algo == reg_algo[3]: rega[1].sort() pd.DataFrame(rega[1]).to_csv(path +data_file.name[:data_file.name.find(".")]+ model_name + date_time+ '_on_' + '_data_'+'Wavelengths_index.csv', sep = ';') st.write('Model Exported ') if regression_algo == reg_algo[3]: st.write('Model Exported') # create a report with information on the model ## see https://stackoverflow.com/a/59578663 if st.session_state['interface'] == 'simple': st.page_link('pages\\3-prediction.py', label = 'Keep on keepin\' on to predict your values !') ## Load .dx file