from Packages import * st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") from Modules import * from Class_Mod.DATA_HANDLING import * def nn(x): return x is not None ######################################################################################## reg_algo = ["","Full-PLS", "Locally Weighted PLS", "Interval-PLS"] # Model creation module st.header("Calibration Model Development", divider='blue') st.write("Create a predictive model, then use it for predicting your target variable(chemical values) from NIRS spectra") M1, M2, M3 = st.columns([2,2,2]) M1.write("-- Performance metrics --") M4, M5 = st.columns([6,2]) st.write("---") st.header("Model Diagnosis", divider='blue') M7, M8 = st.columns([2,2]) M7.write('Predicted vs Measured values') M8.write('Residuals plot') M9, M10 = st.columns([2,2]) M9.write("-- Save the model --") # CSV files loader xcal_csv = M3.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns") ycal_csv = M3.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column") if xcal_csv is not None and ycal_csv is not None: # Select list for CSV delimiter sep = M3.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0) # Select list for CSV header True / False hdr = M3.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1) if hdr == 'yes': col = 0 else: col = False rd_seed = M1.slider("Change Train-test split", min_value=1, max_value=1212, value=42, format="%i") x, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col) # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing train_index, test_index = train_test_split_idx(x, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed) # Assign data to training and test sets X_train, y_train, X_test, y_test = pd.DataFrame(x[train_index]), pd.DataFrame(y[train_index]), pd.DataFrame(x[test_index]), pd.DataFrame(y[test_index]) y_train = y_train.iloc[:,0] y_test = y_test.iloc[:,0] ############################# Regression modelling ########################################## regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12) if regression_algo == reg_algo[1]: # Train model with model function from application_functions.py Reg = PinardPlsr(x_train=X_train, x_test=X_test,y_train=y_train, y_test=y_test) reg_model = Reg.model_ #M2.dataframe(Pin.pred_data_) elif regression_algo == reg_algo[2]: reg_model = model_LWPLSR(xcal_csv, ycal_csv, sep, hdr) elif regression_algo == reg_algo[3]: s = M2.number_input(label='Enter the maximum number of intervalls', min_value=1, max_value=6, value="min") it = M2.number_input(label='Enter the maximum number of iteration', min_value=50, max_value=1000, value="min") Reg = TpeIpls(x_train = X_train, x_test=X_test, y_train = y_train, y_test = y_test, scale = False, Kfold = 3, n_intervall = 6) rega = Reg.BandSelect(n_iter=it) reg_model = Reg.model_ ################# Model analysis ############ if regression_algo in reg_algo[1:]: yc = Reg.pred_data_[0] ycv = Reg.pred_data_[1] yt = Reg.pred_data_[2] M7.pyplot(reg_plot([y_train, y_train, y_test],[yc, ycv, yt])) M8.pyplot(resid_plot([y_train, y_train, y_test],[yc, ycv, yt])) M1.dataframe(Reg.metrics_) #model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20) model_name = M9.text_input('Give it a name') if M9.button('Export Model'): #export_package = __import__(model_export) with open('data/models/model_' + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_' + '.pkl','wb') as f: joblib.dump(reg_model, f) st.write('Model Exported') # create a report with information on the model ## see https://stackoverflow.com/a/59578663 #M4.pyplot(reg_plot(meas==(ycal_csv,ycal_csv,ycal_csv], pred=[ycal_csv,ycal_csv,ycal_csv]))