Skip to content
Snippets Groups Projects
Commit 29ea9c4a authored by Nicolas Barthes's avatar Nicolas Barthes
Browse files

Merge remote-tracking branch 'origin/master'

parents a05d597b fc727f39
No related branches found
No related tags found
No related merge requests found
......@@ -17,6 +17,7 @@ pages_folder = Path("pages/")
image_path = Path('./images/img-sky.jpg')
import os
import sys
from shutil import rmtree
from pandas import DataFrame, concat
from PIL import Image
......
......@@ -54,24 +54,58 @@ y = DataFrame() # preallocate the target(s) data block
match file:
# load csv file
case 'csv':
from utils.data_parsing import CsvParser
def read_csv(file = file, change = None, dec = None, sep= None, names = None, hdr = None):
delete_files(keep = ['.py', '.pyc','.bib'])
from utils.data_parsing import CsvParser
par = CsvParser(file= file)
par.parse(decimal = dec, separator = sep, index_col = names, header = hdr)
return par.float, par.meta_data, par.meta_data_st_, par.df
with c1:
# Load X-block data
xcal_csv = st.file_uploader("Select NIRS Data", type = "csv", help = " :mushroom: select a csv matrix with samples as rows and lambdas as columns")
if xcal_csv:
sepx = st.radio("Select separator (X file): " , options = [";", ","], key = 0,horizontal = True)
hdrx = st.checkbox("Samples name (X file): ")
colx = 0 if hdrx else False
c1_1, c2_2 = st.columns([.5, .5])
with c1_1:
decx = st.radio('decimal(x):', options= [".", ","], horizontal = True)
sepx = st.radio("separator(x):", options = [";", ","], horizontal = True)
with c2_2:
phdrx = st.radio("header(x): ", options = ["yes", "no"], horizontal = True)
pnamesx = st.radio("samples name(x):", options = ["yes", "no"], horizontal = True)
hdrx = 0 if phdrx =="yes" else None
namesx = 0 if pnamesx =="yes" else None
try:
spectra, meta_data, md_df_st_, xfile = read_csv(file= xcal_csv, change = hash_, dec = decx, sep = sepx, names =namesx, hdr = hdrx)
st.success('xfile has been loaded successfully')
except:
st.error('Error: The xfile has not been loaded successfully, please consider tuning the dialect settings!')
else:
st.info('Info: Insert your spectral data file above!')
# Load Y-block data
ycal_csv = st.file_uploader("Select corresponding Chemical Data", type = "csv", help = " :mushroom: select a csv matrix with samples as rows and chemical values as a column")
if ycal_csv:
sepy = st.radio("Select separator (Y file): ", options = [";", ","], key = 2, horizontal = True)
hdry = st.checkbox("samples name (Y file)?: ")
coly = 0 if hdry else False
c1_1, c2_2 = st.columns([.5, .5])
with c1_1:
decy = st.radio('decimal(y):', options= [".", ","], horizontal = True)
sepy = st.radio("separator(y):", options = [";", ","], horizontal = True)
with c2_2:
phdry = st.radio("header(y): ", options = ["yes", "no"], horizontal = True)
pnamesy = st.radio("samples name(y):", options = ["yes", "no"], horizontal = True)
hdry = 0 if phdry =="yes" else None
namesy = 0 if pnamesy =="yes" else None
try:
chem_data, meta_data, md_df_st_, yfile = read_csv(file= ycal_csv, change = hash_, dec = decy, sep = sepy, names =namesy, hdr = hdry)
st.success('yfile has been loaded successfully')
except:
st.error('Error: The yfile has not been loaded successfully, please consider tuning the dialect settings!')
else:
st.info('Info: Insert your target data file above!')
......@@ -86,37 +120,27 @@ match file:
stringio = StringIO(eval(f'{i}.getvalue().decode("utf-8")'))
xy_str += str(stringio.read())
# p_hash([xy_str + str(xcal_csv.name) + str(ycal_csv.name), hdrx, sepx, hdry, sepy])
# p_hash(add = )
@st.cache_data
def csv_loader(change):
delete_files(keep = ['.py', '.pyc','.bib'])
file_name = str(xcal_csv.name) +' and '+ str(ycal_csv.name)
xfile = read_csv(xcal_csv, decimal = '.', sep = sepx, index_col = colx, header = 0)
yfile = read_csv(ycal_csv, decimal = '.', sep = sepy, index_col = coly)
return xfile, yfile, file_name
hash_ = ObjectHash(current=hash_,add = xy_str)
xfile, yfile, file_name = csv_loader(change = hash_)
if yfile.shape[1]>0 and xfile.shape[1]>0 :
# prepare x data
try:
spectra, meta_data = col_cat(xfile)
except:
st.error('Error: The format of the X-file does not correspond to the expected dialect settings. To read the file correctly, please adjust the separator parameters.')
spectra = DataFrame(spectra).astype(float)
# prepare y data
try:
chem_data, idx = col_cat(yfile)
except:
st.error('Error: The format of the Y-file does not correspond to the expected dialect settings. To read the file correctly, please adjust the separator parameters.')
# xfile, yfile, file_name = csv_loader(change = hash_)
# yfile = read_csv(file= ycal_csv, change = hash_)
if yfile.shape[1]>0 and xfile.shape[1]>0 :
if 'chem_data' in globals():
if chem_data.shape[1]>1:
if chem_data.shape[1] > 1:
yname = c1.selectbox('Select a target', options = ['']+chem_data.columns.tolist(), format_func = lambda x: x if x else "<Select>")
if yname:
y = chem_data.loc[:, yname]
......@@ -312,6 +336,7 @@ if not spectra.empty and not y.empty:
data_to_work_with.append("xte_fold{0}".format(i+1))
data_to_work_with.append("yte_fold{0}".format(i+1))
# check best pre-treatment with a global PLSR model
from utils.regress import Plsr
preReg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=100)
temp_path = Path('temp/')
with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile:
......@@ -328,7 +353,7 @@ if not spectra.empty and not y.empty:
# run Julia Jchemo as subprocess
import subprocess
subprocess_path = Path("utils/")
subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"])
subprocess.run([f"{sys.executable}", subprocess_path / "lwplsr_call.py"])
# retrieve json results from Julia JChemo
try:
with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
......@@ -380,12 +405,13 @@ if not spectra.empty and not y.empty:
Reg.best_hyperparams_print = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}
Reg.best_hyperparams_ = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}
Reg.__hash__ = hash_data(Reg.best_hyperparams_print)
Reg.__hash__ = ObjectHash(current = hash_,add = Reg.best_hyperparams_print)
except FileNotFoundError as e:
Reg = None
for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
case 'TPE-iPLS':
from utils.regress import TpeIpls
Reg = TpeIpls(train = [X_train, y_train], test=[X_test, y_test], n_intervall = s, n_iter=it, cv = nb_folds)
# reg_model = Reg.model_
......@@ -413,10 +439,10 @@ if not spectra.empty and not y.empty:
it = st.number_input(label = 'Enter the number of iterations', min_value = 2, max_value = 500, value = 250)
else:
s, it = None, None
# p_hash(str(s)+str(it))
hash_ = ObjectHash( current = hash_,add = str(s)+str(it))
remodel_button = st.button('re-model the data', key=4, help=None, type="primary", use_container_width=True, on_click=increment)
# p_hash(st.session_state.counter)
hash_ = ObjectHash(current = hash_, add = st.session_state.counter)
Reg = RequestingModelCreation(change = hash_)
reg_model = Reg.model_
hash_ = hash(Reg)
......
......@@ -25,7 +25,7 @@ def data_split(x, y):
from kennard_stone import train_test_split
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 42)
train_index, test_index = X_train.index, X_test.index
train_index, test_index = np.array(X_train.index), np.array(X_test.index)
return X_train, X_test, y_train, y_test, train_index, test_index
## descriptive stat
......
......@@ -70,6 +70,7 @@ def reg_plot( meas, pred, train_idx, test_idx):
meas[i] = np.array(meas[i]).reshape(-1, 1)
pred[i] = np.array(pred[i]).reshape(-1, 1)
from sklearn.linear_model import LinearRegression
M = LinearRegression()
M.fit(meas[i], pred[i])
a1[i] = np.round(M.coef_[0][0],2)
......@@ -107,6 +108,7 @@ def resid_plot( meas, pred, train_idx, test_idx):
e = [np.subtract(meas[0] ,pred[0]), np.subtract(meas[1], pred[1])]
for i in range(len(meas)):
from sklearn.linear_model import LinearRegression
M = LinearRegression()
M.fit( np.array(meas[i]).reshape(-1,1), np.array(e[i]).reshape(-1,1))
a1[i] = np.round(M.coef_[0],2)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment