Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import *
from Class_Mod.DATA_HANDLING import *
########################################################################################
# Model creation module
container2 = st.container(border=True)
M1, M2, M3 = st.columns([2,2,2])
M4, M5 = st.columns([6,2])
container3 = st.container(border=True)
M7, M8 = st.columns([2,2])
available_regression_algo = ["","SciKitLearn PLSR", "Jchemo Local Weighted PLSR", "Intervalle Selection PLSR"]
with container2:
st.header("Calibration Model Development", divider='blue')
st.write("Create a predictive model, then use it for predicting your target variable(chemical values) from NIRS spectra")
# CSV files loader
xcal_csv = M3.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
ycal_csv = M3.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
if xcal_csv is not None and ycal_csv is not None:
# Select list for CSV delimiter
sep = M3.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0)
# Select list for CSV header True / False
hdr = M3.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1)
if hdr == 'yes':
col = 0
else:
col = False
rd_seed = M1.slider("Choose seed", min_value=1, max_value=1212, value=42, format="%i")
x, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col)
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
train_index, test_index = train_test_split_idx(x, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
# Assign data to training and test sets
X_train, y_train, X_test, y_test = pd.DataFrame(x[train_index]), pd.DataFrame(y[train_index]), pd.DataFrame(x[test_index]), pd.DataFrame(y[test_index])
#############################
regression_algo = M1.selectbox("Choose the algorithm for regression", options=available_regression_algo, key = 12)
if regression_algo == 'SciKitLearn PLSR':
# Train model with model function from application_functions.py
Reg = PinardPlsr(x_train=X_train, x_test=X_test,y_train=y_train, y_test=y_test)
reg_model = Reg.model_
#M2.dataframe(Pin.pred_data_)
elif regression_algo == 'Jchemo Local Weighted PLSR':
reg_model = model_LWPLSR(xcal_csv, ycal_csv, sep, hdr)
elif regression_algo == "Intervalle Selection PLSR":
s = M2.number_input(label='Enter the maximum number of intervalls', min_value=1, max_value=6, value="min")
reg_model = TpeIpls(x_train= X_train, y_train= y_train, x_test=X_test, y_test= y_test,Kfold= 3,scale= True, n_intervall = 3)
reg_model.tune(n_iter=10)
if regression_algo in ["SciKitLearn PLSR", "Jchemo Local Weighted PLSR", "Intervalle Selection PLSR"]:
with container3:
st.header("Model Diagnosis", divider='blue')
yc = Reg.pred_data_[0]
ycv = Reg.pred_data_[1]
yt = Reg.pred_data_[2]
M7.write('Predicted vs Measured values')
M7.pyplot(reg_plot([y_train, y_train, y_test],[yc, ycv, yt]))
M8.write('Residuals plot')
M8.pyplot(resid_plot([y_train, y_train, y_test],[yc, ycv, yt]))
# Export the model with pickle or joblib
if regression_algo != '':
M1.write("-- Performance metrics --")
M1.dataframe(Reg.metrics_)
M1.write("-- Save the model --")
#model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20)
model_name = M1.text_input('Give it a name')
if M1.button('Export Model'):
#export_package = __import__(model_export)
with open('data/models/model_' + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_' + '.pkl','wb') as f:
joblib.dump(reg_model,f)
st.write('Model Exported')
# create a report with information on the model
## see https://stackoverflow.com/a/59578663
#M4.pyplot(reg_plot(meas==(ycal_csv,ycal_csv,ycal_csv], pred=[ycal_csv,ycal_csv,ycal_csv]))
# graphical delimiter
st.write("---")