Newer
Older
from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import *
from Class_Mod.DATA_HANDLING import *
# HTML pour le bandeau "CEFE - CNRS"
bandeau_html = """
<div style="width: 100%; background-color: #4682B4; padding: 10px; margin-bottom: 10px;">
<h1 style="text-align: center; color: white;">CEFE - CNRS</h1>
</div>
"""
# Injecter le code HTML du bandeau
st.markdown(bandeau_html, unsafe_allow_html=True)
st.session_state["interface"] = st.session_state.get('interface')
if st.session_state["interface"] == 'simple':
hide_pages("Predictions")
def nn(x):
return x is not None
########################################################################################
reg_algo = ["","Full-PLS", "Locally Weighted PLS", "Interval-PLS"]
st.header("Calibration Model Development", divider='blue')
st.write("Create a predictive model, then use it for predicting your target variable(chemical values) from NIRS spectra")
M1, M2, M3 = st.columns([2,2,2])
M4, M5 = st.columns([6,2])
st.write("---")
st.header("Model Diagnosis", divider='blue')
M7, M8 = st.columns([2,2])
M7.write('Predicted vs Measured values')
M8.write('Residuals plot')
M9, M10 = st.columns([2,2])
M9.write("-- Save the model --")
files_format = ['.csv', '.dx']
file = M3.radio('select data file format:', options = files_format)
# load .csv file
if file == files_format[0]:
xcal_csv = M3.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
ycal_csv = M3.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
if xcal_csv and ycal_csv:
# Select list for CSV delimiter
sep = M3.radio("Select csv separator - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)),
options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0)
# Select list for CSV header True / False
hdr = M3.radio("indexes column in csv? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)),
options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1)
###############
if hdr == 'yes':
col = 0
else:
col = False
###############
spectra, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col)
spectra = pd.DataFrame(spectra)
## Load .dx file
elif file == files_format[1]:
data_file = M3.file_uploader("Select Data", type=".dx", help=" :mushroom: select a dx file")
if data_file:
with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
tmp.write(data_file.read())
tmp_path = tmp.name
chem_data, spectra, meta_data = read_dx(file = tmp_path)
M3.success("The data have been loaded successfully", icon="✅")
yname = M3.selectbox('Select target', options=chem_data.columns)
measured = chem_data.loc[:,yname] > 0
y = chem_data.loc[:,yname].loc[measured]
spectra = spectra.loc[measured]
os.unlink(tmp_path)
### split the data
if not spectra.empty and not y.empty:
rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i")
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
train_index, test_index = train_test_split_idx(spectra, y = y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
# Assign data to training and test sets
X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index]
X_test, y_test = pd.DataFrame(spectra.iloc[test_index,:]), y.iloc[test_index]
#######################################
regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12)
if regression_algo == reg_algo[1]:
# Train model with model function from application_functions.py
Reg = PinardPlsr(x_train = X_train, x_test = X_test,y_train = y_train, y_test = y_test)
reg_model = Reg.model_
#M2.dataframe(Pin.pred_data_)
elif regression_algo == reg_algo[2]:
reg_model = model_LWPLSR(xcal_csv, ycal_csv, sep, hdr)
elif regression_algo == reg_algo[3]:
s = M1.number_input(label='Enter the maximum number of intervalls', min_value=1, max_value=6, value=3)
it = M1.number_input(label='Enter the number of iterations', min_value=50, max_value=1000, value=100)
progress_text = "The model is being created. Please wait."
Reg = TpeIpls(x_train = X_train, x_test=X_test, y_train = y_train, y_test = y_test, scale = False, Kfold = 3, n_intervall = s)
pro = M1.progress(0, text="The model is being created. Please wait!")
rega = Reg.BandSelect(n_iter=it)
pro.empty()
M1.progress(100, text = "The model has successfully been created!")
time.sleep(1)
reg_model = Reg.model_
M2.write('-- Table of selected wavelengths --')
M2.table(rega[0])
################# Model analysis ############
if regression_algo in reg_algo[1:]:
yc = Reg.pred_data_[0]
ycv = Reg.pred_data_[1]
yt = Reg.pred_data_[2]
M2.write("-- Performance metrics --")
M2.dataframe(Reg.metrics_)
M7.pyplot(reg_plot([y_train, y_train, y_test],[yc, ycv, yt]))
M8.pyplot(resid_plot([y_train, y_train, y_test],[yc, ycv, yt]))
#model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20)
model_name = M9.text_input('Give it a name')
if M9.button('Export Model'):
path = 'data/models/model_'
if file == files_format[0]:
#export_package = __import__(model_export)
with open(path + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_' + '.pkl','wb') as f:
if regression_algo == reg_algo[3]:
rega[1].sort()
pd.DataFrame(rega[1]).to_csv(path + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_'+'Wavelengths_index.csv', sep = ';')
elif file == files_format[1]:
#export_package = __import__(model_export)
with open(path + model_name + '_on_' + '_data_' + '.pkl','wb') as f:
joblib.dump(reg_model, f)
if regression_algo == reg_algo[3]:
rega[1].sort()
pd.DataFrame(rega[1]).to_csv(path + model_name + '_on_' + '_data_'+'Wavelengths_index.csv', sep = ';')
st.write('Model Exported')
if regression_algo == reg_algo[3]:
st.write('Model Exported')
# create a report with information on the model
## see https://stackoverflow.com/a/59578663