Skip to content
Snippets Groups Projects
Commit 29ea9c4a authored by Nicolas Barthes's avatar Nicolas Barthes
Browse files

Merge remote-tracking branch 'origin/master'

parents a05d597b fc727f39
No related branches found
No related tags found
No related merge requests found
...@@ -17,6 +17,7 @@ pages_folder = Path("pages/") ...@@ -17,6 +17,7 @@ pages_folder = Path("pages/")
image_path = Path('./images/img-sky.jpg') image_path = Path('./images/img-sky.jpg')
import os import os
import sys
from shutil import rmtree from shutil import rmtree
from pandas import DataFrame, concat from pandas import DataFrame, concat
from PIL import Image from PIL import Image
......
...@@ -54,24 +54,58 @@ y = DataFrame() # preallocate the target(s) data block ...@@ -54,24 +54,58 @@ y = DataFrame() # preallocate the target(s) data block
match file: match file:
# load csv file # load csv file
case 'csv': case 'csv':
from utils.data_parsing import CsvParser
def read_csv(file = file, change = None, dec = None, sep= None, names = None, hdr = None):
delete_files(keep = ['.py', '.pyc','.bib'])
from utils.data_parsing import CsvParser
par = CsvParser(file= file)
par.parse(decimal = dec, separator = sep, index_col = names, header = hdr)
return par.float, par.meta_data, par.meta_data_st_, par.df
with c1: with c1:
# Load X-block data # Load X-block data
xcal_csv = st.file_uploader("Select NIRS Data", type = "csv", help = " :mushroom: select a csv matrix with samples as rows and lambdas as columns") xcal_csv = st.file_uploader("Select NIRS Data", type = "csv", help = " :mushroom: select a csv matrix with samples as rows and lambdas as columns")
if xcal_csv: if xcal_csv:
sepx = st.radio("Select separator (X file): " , options = [";", ","], key = 0,horizontal = True) c1_1, c2_2 = st.columns([.5, .5])
hdrx = st.checkbox("Samples name (X file): ") with c1_1:
colx = 0 if hdrx else False decx = st.radio('decimal(x):', options= [".", ","], horizontal = True)
sepx = st.radio("separator(x):", options = [";", ","], horizontal = True)
with c2_2:
phdrx = st.radio("header(x): ", options = ["yes", "no"], horizontal = True)
pnamesx = st.radio("samples name(x):", options = ["yes", "no"], horizontal = True)
hdrx = 0 if phdrx =="yes" else None
namesx = 0 if pnamesx =="yes" else None
try:
spectra, meta_data, md_df_st_, xfile = read_csv(file= xcal_csv, change = hash_, dec = decx, sep = sepx, names =namesx, hdr = hdrx)
st.success('xfile has been loaded successfully')
except:
st.error('Error: The xfile has not been loaded successfully, please consider tuning the dialect settings!')
else: else:
st.info('Info: Insert your spectral data file above!') st.info('Info: Insert your spectral data file above!')
# Load Y-block data # Load Y-block data
ycal_csv = st.file_uploader("Select corresponding Chemical Data", type = "csv", help = " :mushroom: select a csv matrix with samples as rows and chemical values as a column") ycal_csv = st.file_uploader("Select corresponding Chemical Data", type = "csv", help = " :mushroom: select a csv matrix with samples as rows and chemical values as a column")
if ycal_csv: if ycal_csv:
sepy = st.radio("Select separator (Y file): ", options = [";", ","], key = 2, horizontal = True) c1_1, c2_2 = st.columns([.5, .5])
hdry = st.checkbox("samples name (Y file)?: ") with c1_1:
coly = 0 if hdry else False decy = st.radio('decimal(y):', options= [".", ","], horizontal = True)
sepy = st.radio("separator(y):", options = [";", ","], horizontal = True)
with c2_2:
phdry = st.radio("header(y): ", options = ["yes", "no"], horizontal = True)
pnamesy = st.radio("samples name(y):", options = ["yes", "no"], horizontal = True)
hdry = 0 if phdry =="yes" else None
namesy = 0 if pnamesy =="yes" else None
try:
chem_data, meta_data, md_df_st_, yfile = read_csv(file= ycal_csv, change = hash_, dec = decy, sep = sepy, names =namesy, hdr = hdry)
st.success('yfile has been loaded successfully')
except:
st.error('Error: The yfile has not been loaded successfully, please consider tuning the dialect settings!')
else: else:
st.info('Info: Insert your target data file above!') st.info('Info: Insert your target data file above!')
...@@ -86,37 +120,27 @@ match file: ...@@ -86,37 +120,27 @@ match file:
stringio = StringIO(eval(f'{i}.getvalue().decode("utf-8")')) stringio = StringIO(eval(f'{i}.getvalue().decode("utf-8")'))
xy_str += str(stringio.read()) xy_str += str(stringio.read())
# p_hash([xy_str + str(xcal_csv.name) + str(ycal_csv.name), hdrx, sepx, hdry, sepy]) # p_hash([xy_str + str(xcal_csv.name) + str(ycal_csv.name), hdrx, sepx, hdry, sepy])
# p_hash(add = ) hash_ = ObjectHash(current=hash_,add = xy_str)
@st.cache_data
def csv_loader(change):
delete_files(keep = ['.py', '.pyc','.bib'])
file_name = str(xcal_csv.name) +' and '+ str(ycal_csv.name)
xfile = read_csv(xcal_csv, decimal = '.', sep = sepx, index_col = colx, header = 0)
yfile = read_csv(ycal_csv, decimal = '.', sep = sepy, index_col = coly)
return xfile, yfile, file_name
xfile, yfile, file_name = csv_loader(change = hash_)
if yfile.shape[1]>0 and xfile.shape[1]>0 :
# prepare x data
try:
spectra, meta_data = col_cat(xfile)
except:
st.error('Error: The format of the X-file does not correspond to the expected dialect settings. To read the file correctly, please adjust the separator parameters.')
spectra = DataFrame(spectra).astype(float)
# prepare y data
try:
chem_data, idx = col_cat(yfile)
except:
st.error('Error: The format of the Y-file does not correspond to the expected dialect settings. To read the file correctly, please adjust the separator parameters.')
# xfile, yfile, file_name = csv_loader(change = hash_)
# yfile = read_csv(file= ycal_csv, change = hash_)
if yfile.shape[1]>0 and xfile.shape[1]>0 :
if 'chem_data' in globals(): if 'chem_data' in globals():
if chem_data.shape[1]>1: if chem_data.shape[1] > 1:
yname = c1.selectbox('Select a target', options = ['']+chem_data.columns.tolist(), format_func = lambda x: x if x else "<Select>") yname = c1.selectbox('Select a target', options = ['']+chem_data.columns.tolist(), format_func = lambda x: x if x else "<Select>")
if yname: if yname:
y = chem_data.loc[:, yname] y = chem_data.loc[:, yname]
...@@ -312,6 +336,7 @@ if not spectra.empty and not y.empty: ...@@ -312,6 +336,7 @@ if not spectra.empty and not y.empty:
data_to_work_with.append("xte_fold{0}".format(i+1)) data_to_work_with.append("xte_fold{0}".format(i+1))
data_to_work_with.append("yte_fold{0}".format(i+1)) data_to_work_with.append("yte_fold{0}".format(i+1))
# check best pre-treatment with a global PLSR model # check best pre-treatment with a global PLSR model
from utils.regress import Plsr
preReg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=100) preReg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=100)
temp_path = Path('temp/') temp_path = Path('temp/')
with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile: with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile:
...@@ -328,7 +353,7 @@ if not spectra.empty and not y.empty: ...@@ -328,7 +353,7 @@ if not spectra.empty and not y.empty:
# run Julia Jchemo as subprocess # run Julia Jchemo as subprocess
import subprocess import subprocess
subprocess_path = Path("utils/") subprocess_path = Path("utils/")
subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"]) subprocess.run([f"{sys.executable}", subprocess_path / "lwplsr_call.py"])
# retrieve json results from Julia JChemo # retrieve json results from Julia JChemo
try: try:
with open(temp_path / "lwplsr_outputs.json", "r") as outfile: with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
...@@ -380,12 +405,13 @@ if not spectra.empty and not y.empty: ...@@ -380,12 +405,13 @@ if not spectra.empty and not y.empty:
Reg.best_hyperparams_print = {**preReg.best_hyperparams_, **Reg.best_hyperparams_} Reg.best_hyperparams_print = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}
Reg.best_hyperparams_ = {**preReg.best_hyperparams_, **Reg.best_hyperparams_} Reg.best_hyperparams_ = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}
Reg.__hash__ = hash_data(Reg.best_hyperparams_print) Reg.__hash__ = ObjectHash(current = hash_,add = Reg.best_hyperparams_print)
except FileNotFoundError as e: except FileNotFoundError as e:
Reg = None Reg = None
for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
case 'TPE-iPLS': case 'TPE-iPLS':
from utils.regress import TpeIpls
Reg = TpeIpls(train = [X_train, y_train], test=[X_test, y_test], n_intervall = s, n_iter=it, cv = nb_folds) Reg = TpeIpls(train = [X_train, y_train], test=[X_test, y_test], n_intervall = s, n_iter=it, cv = nb_folds)
# reg_model = Reg.model_ # reg_model = Reg.model_
...@@ -413,10 +439,10 @@ if not spectra.empty and not y.empty: ...@@ -413,10 +439,10 @@ if not spectra.empty and not y.empty:
it = st.number_input(label = 'Enter the number of iterations', min_value = 2, max_value = 500, value = 250) it = st.number_input(label = 'Enter the number of iterations', min_value = 2, max_value = 500, value = 250)
else: else:
s, it = None, None s, it = None, None
# p_hash(str(s)+str(it)) hash_ = ObjectHash( current = hash_,add = str(s)+str(it))
remodel_button = st.button('re-model the data', key=4, help=None, type="primary", use_container_width=True, on_click=increment) remodel_button = st.button('re-model the data', key=4, help=None, type="primary", use_container_width=True, on_click=increment)
# p_hash(st.session_state.counter) hash_ = ObjectHash(current = hash_, add = st.session_state.counter)
Reg = RequestingModelCreation(change = hash_) Reg = RequestingModelCreation(change = hash_)
reg_model = Reg.model_ reg_model = Reg.model_
hash_ = hash(Reg) hash_ = hash(Reg)
......
...@@ -25,7 +25,7 @@ def data_split(x, y): ...@@ -25,7 +25,7 @@ def data_split(x, y):
from kennard_stone import train_test_split from kennard_stone import train_test_split
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 42) X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 42)
train_index, test_index = X_train.index, X_test.index train_index, test_index = np.array(X_train.index), np.array(X_test.index)
return X_train, X_test, y_train, y_test, train_index, test_index return X_train, X_test, y_train, y_test, train_index, test_index
## descriptive stat ## descriptive stat
......
...@@ -70,6 +70,7 @@ def reg_plot( meas, pred, train_idx, test_idx): ...@@ -70,6 +70,7 @@ def reg_plot( meas, pred, train_idx, test_idx):
meas[i] = np.array(meas[i]).reshape(-1, 1) meas[i] = np.array(meas[i]).reshape(-1, 1)
pred[i] = np.array(pred[i]).reshape(-1, 1) pred[i] = np.array(pred[i]).reshape(-1, 1)
from sklearn.linear_model import LinearRegression
M = LinearRegression() M = LinearRegression()
M.fit(meas[i], pred[i]) M.fit(meas[i], pred[i])
a1[i] = np.round(M.coef_[0][0],2) a1[i] = np.round(M.coef_[0][0],2)
...@@ -107,6 +108,7 @@ def resid_plot( meas, pred, train_idx, test_idx): ...@@ -107,6 +108,7 @@ def resid_plot( meas, pred, train_idx, test_idx):
e = [np.subtract(meas[0] ,pred[0]), np.subtract(meas[1], pred[1])] e = [np.subtract(meas[0] ,pred[0]), np.subtract(meas[1], pred[1])]
for i in range(len(meas)): for i in range(len(meas)):
from sklearn.linear_model import LinearRegression
M = LinearRegression() M = LinearRegression()
M.fit( np.array(meas[i]).reshape(-1,1), np.array(e[i]).reshape(-1,1)) M.fit( np.array(meas[i]).reshape(-1,1), np.array(e[i]).reshape(-1,1))
a1[i] = np.round(M.coef_[0],2) a1[i] = np.round(M.coef_[0],2)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment