Newer
Older
from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import *
from Class_Mod.DATA_HANDLING import *
st.session_state["interface"] = st.session_state.get('interface')
if st.session_state["interface"] == 'simple':
hide_pages("Predictions")
def nn(x):
return x is not None
########################################################################################
reg_algo = ["","Full-PLS", "Locally Weighted PLS", "Interval-PLS"]
st.header("Calibration Model Development", divider='blue')
st.write("Create a predictive model, then use it for predicting your target variable(chemical values) from NIRS spectra")
M1, M2, M3 = st.columns([2,2,2])
M4, M5 = st.columns([6,2])
st.write("---")
st.header("Model Diagnosis", divider='blue')
M7, M8 = st.columns([2,2])
M7.write('Predicted vs Measured values')
M8.write('Residuals plot')
M9, M10 = st.columns([2,2])
M9.write("-- Save the model --")
files_format = ['.csv', '.dx']
file = M3.radio('select data file format:', options = files_format)
# load .csv file
if file == files_format[0]:
xcal_csv = M3.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
ycal_csv = M3.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
if xcal_csv and ycal_csv:
# Select list for CSV delimiter
sep = M3.radio("Select csv separator - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)),
options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0)
# Select list for CSV header True / False
hdr = M3.radio("indexes column in csv? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)),
options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1)
###############
if hdr == 'yes':
col = 0
else:
col = False
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
###############
spectra, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col)
spectra = pd.DataFrame(spectra)
y = pd.DataFrame(y)
## Load .dx file
elif file == files_format[1]:
data_file = M3.file_uploader("Select Data", type=".dx", help=" :mushroom: select a dx file")
if data_file:
with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
tmp.write(data_file.read())
tmp_path = tmp.name
chem_data, spectra, meta_data = read_dx(file = tmp_path)
M3.success("The data have been loaded successfully", icon="✅")
yname = M3.selectbox('Select target', options=chem_data.columns)
spectra = spectra
y = chem_data.loc[:,yname]
os.unlink(tmp_path)
### split the data
if not spectra.empty and not y.empty:
rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i")
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
train_index, test_index = train_test_split_idx(spectra, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
# Assign data to training and test sets
X_train, y_train, X_test, y_test = pd.DataFrame(spectra.iloc[train_index,:]), pd.DataFrame(y.iloc[train_index]), pd.DataFrame(spectra.iloc[test_index,:]), pd.DataFrame(y.iloc[test_index])
y_train = y_train.iloc[:,0]
y_test = y_test.iloc[:,0]
#######################################
regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12)
if regression_algo == reg_algo[1]:
# Train model with model function from application_functions.py
Reg = PinardPlsr(x_train = X_train, x_test = X_test,y_train = y_train, y_test = y_test)
reg_model = Reg.model_
#M2.dataframe(Pin.pred_data_)
elif regression_algo == reg_algo[2]:
reg_model = model_LWPLSR(xcal_csv, ycal_csv, sep, hdr)
elif regression_algo == reg_algo[3]:
s = M1.number_input(label='Enter the maximum number of intervalls', min_value=1, max_value=6, value=3)
it = M1.number_input(label='Enter the number of iterations', min_value=50, max_value=1000, value=100)
progress_text = "The model is being created. Please wait."
Reg = TpeIpls(x_train = X_train, x_test=X_test, y_train = y_train, y_test = y_test, scale = False, Kfold = 3, n_intervall = s)
pro = M1.progress(0, text="The model is being created. Please wait!")
rega = Reg.BandSelect(n_iter=it)
pro.empty()
M1.progress(100, text = "The model has successfully been created!")
time.sleep(1)
reg_model = Reg.model_
M2.write('-- Table of selected wavelengths --')
M2.table(rega[0])
################# Model analysis ############
if regression_algo in reg_algo[1:]:
yc = Reg.pred_data_[0]
ycv = Reg.pred_data_[1]
yt = Reg.pred_data_[2]
M2.write("-- Performance metrics --")
M2.dataframe(Reg.metrics_)
M7.pyplot(reg_plot([y_train, y_train, y_test],[yc, ycv, yt]))
M8.pyplot(resid_plot([y_train, y_train, y_test],[yc, ycv, yt]))
#model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20)
model_name = M9.text_input('Give it a name')
if M9.button('Export Model'):
path = 'data/models/model_'
if file == files_format[0]:
#export_package = __import__(model_export)
with open(path + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_' + '.pkl','wb') as f:
if regression_algo == reg_algo[3]:
rega[1].sort()
pd.DataFrame(rega[1]).to_csv(path + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_'+'Wavelengths_index.csv', sep = ';')
elif file == files_format[1]:
#export_package = __import__(model_export)
with open(path + model_name + '_on_' + '_data_' + '.pkl','wb') as f:
joblib.dump(reg_model, f)
if regression_algo == reg_algo[3]:
rega[1].sort()
pd.DataFrame(rega[1]).to_csv(path + model_name + '_on_' + '_data_'+'Wavelengths_index.csv', sep = ';')
st.write('Model Exported')
if regression_algo == reg_algo[3]:
st.write('Model Exported')
# create a report with information on the model
## see https://stackoverflow.com/a/59578663