Skip to content
Snippets Groups Projects
Commit cce2ab51 authored by DIANE's avatar DIANE
Browse files

commit all

parent 8a11ddaa
No related branches found
No related tags found
No related merge requests found
Showing
with 760 additions and 624 deletions
File moved
File moved
File moved
File moved
File moved
File moved
File moved
# Clustering Methods # Clustering Methods
## K-Means clustering ## K-Means clustering
::: src.utils.KMEANS_.Sk_Kmeans ::: src.utils.KMEANS_.SkKmeans
## HDBSCAN clustering ## HDBSCAN clustering
::: src.utils.HDBSCAN_Clustering.Hdbscan ::: src.utils.HDBSCAN_Clustering.Hdbscan
# Models creation # Models creation
## PLSR from Pinard (scikit learn) ## PLSR from Pinard (scikit learn)
::: src.utils.KMEANS_.Sk_Kmeans ::: src.utils.KMEANS_.SkKmeans
## lwPlsR from Jchemo (Julia) ## lwPlsR from Jchemo (Julia)
::: src.utils.LWPLSR_.LWPLSR ::: src.utils.LWPLSR_.LWPLSR
\ No newline at end of file
This diff is collapsed.
...@@ -35,5 +35,5 @@ from utils.data_parsing import * ...@@ -35,5 +35,5 @@ from utils.data_parsing import *
from utils.hash import * from utils.hash import *
from utils.visualize import * from utils.visualize import *
from utils.miscellaneous import ObjectHash from utils.miscellaneous import ObjectHash
from utils.samsel import RDM, KS from utils.samsel import Samplers
from report import report from report import report
\ No newline at end of file
{"meta_project": "cs", "meta_sample_species": "cs", "meta_sample_category": "Animal", "meta_sample_pretreatment": "Powder", "meta_machine_ID": "cs", "meta_sample_sub_category": "Leaf litter", "meta_sample_humidity": "Fresh", "meta_scan_place": "Pace"} {"meta_project": "ds", "meta_sample_species": "ds", "meta_sample_category": "Other", "meta_sample_pretreatment": "Pastile", "meta_machine_ID": "ds", "meta_sample_sub_category": "Animal part", "meta_sample_humidity": "Wet", "meta_scan_place": "Pace"}
\ No newline at end of file \ No newline at end of file
This diff is collapsed.
...@@ -9,7 +9,6 @@ st.set_page_config(page_title = "NIRS Utils", page_icon = ":goat:", layout = "wi ...@@ -9,7 +9,6 @@ st.set_page_config(page_title = "NIRS Utils", page_icon = ":goat:", layout = "wi
UiComponents(pagespath = pages_folder, csspath= css_file,imgpath=image_path , UiComponents(pagespath = pages_folder, csspath= css_file,imgpath=image_path ,
header=True, sidebar= True, bgimg=False, colborders=True) header=True, sidebar= True, bgimg=False, colborders=True)
hash_ = '' hash_ = ''
# Initialize the variable in session state if it doesn't exist for st.cache_data # Initialize the variable in session state if it doesn't exist for st.cache_data
if 'counter' not in st.session_state: if 'counter' not in st.session_state:
st.session_state.counter = 0 st.session_state.counter = 0
...@@ -18,13 +17,6 @@ def increment(): ...@@ -18,13 +17,6 @@ def increment():
# #################################### Methods ############################################## # #################################### Methods ##############################################
def delete_files(keep):
supp = []
# Walk through the directory
for root, dirs, files in os.walk('report/', topdown=False):
for file in files:
if file != 'logo_cefe.png' and not any(file.endswith(ext) for ext in keep):
os.remove(os.path.join(root, file))
class lw: class lw:
def __init__(self, Reg_json, pred): def __init__(self, Reg_json, pred):
...@@ -35,9 +27,9 @@ class lw: ...@@ -35,9 +27,9 @@ class lw:
################ clean the results dir ############# ################ clean the results dir #############
delete_files(keep = ['.py', '.pyc','.bib']) HandleItems.delete_files(keep = ['.py', '.pyc','.bib'])
for i in ['model', 'dataset', 'figures']: for i in ['model', 'dataset', 'figures']:
dirpath = Path('./report/out/')/i dirpath = Path('./report/results/')
if not dirpath.exists(): if not dirpath.exists():
dirpath.mkdir(parents=True, exist_ok=True) dirpath.mkdir(parents=True, exist_ok=True)
# ####################################### page preamble ####################################### # ####################################### page preamble #######################################
...@@ -48,7 +40,7 @@ c0.image("./images/model_creation.png", use_column_width = True) # graphical abs ...@@ -48,7 +40,7 @@ c0.image("./images/model_creation.png", use_column_width = True) # graphical abs
################################################################# Begin : I- Data loading and preparation ###################################### ################################################################# Begin : I- Data loading and preparation ######################################
files_format = ['csv', 'dx'] # Supported files format files_format = ['csv', 'dx'] # Supported files format
file = c1.radio('Select files format:', options = files_format,horizontal = True) # Select a file format file = c1.radio('Select files format:', options = files_format, horizontal = True) # Select a file format
spectra = DataFrame() # preallocate the spectral data block spectra = DataFrame() # preallocate the spectral data block
y = DataFrame() # preallocate the target(s) data block y = DataFrame() # preallocate the target(s) data block
...@@ -57,12 +49,13 @@ match file: ...@@ -57,12 +49,13 @@ match file:
# load csv file # load csv file
case 'csv': case 'csv':
from utils.data_parsing import CsvParser from utils.data_parsing import CsvParser
# @st.cache_data
def read_csv(file = file, change = None, dec = None, sep= None, names = None, hdr = None): def read_csv(file = file, change = None, dec = None, sep= None, names = None, hdr = None):
delete_files(keep = ['.py', '.pyc','.bib']) HandleItems.delete_files(keep = ['.py', '.pyc','.bib'])
from utils.data_parsing import CsvParser from utils.data_parsing import CsvParser
par = CsvParser(file= file) par = CsvParser(file= file)
par.parse(decimal = dec, separator = sep, index_col = names, header = hdr) par.parse(decimal = dec, separator = sep, index_col = names, header = hdr)
return par.float, par.meta_data, par.meta_data_st_, par.df return par.float, par.meta_data, par.meta_data_st_, par.df, par.rownames
with c1: with c1:
# Load X-block data # Load X-block data
...@@ -79,9 +72,8 @@ match file: ...@@ -79,9 +72,8 @@ match file:
hdrx = 0 if phdrx =="yes" else None hdrx = 0 if phdrx =="yes" else None
namesx = 0 if pnamesx =="yes" else None namesx = 0 if pnamesx =="yes" else None
try: try:
spectra, _, _, xfile = read_csv(file= xcal_csv, change = hash_, dec = decx, sep = sepx, names =namesx, hdr = hdrx) spectra, _, meta_spec, xfile, spec_labels = read_csv(file = xcal_csv, change = hash_, dec = decx, sep = sepx, names = namesx, hdr = hdrx)
N,P = spectra.shape N_specs, nwls = spectra.shape
st.success('xfile has been loaded successfully')
except: except:
st.error('Error: The xfile has not been loaded successfully, please consider tuning the dialect settings!') st.error('Error: The xfile has not been loaded successfully, please consider tuning the dialect settings!')
...@@ -105,8 +97,7 @@ match file: ...@@ -105,8 +97,7 @@ match file:
hdry = 0 if phdry =="yes" else None hdry = 0 if phdry =="yes" else None
namesy = 0 if pnamesy =="yes" else None namesy = 0 if pnamesy =="yes" else None
try: try:
chem_data, _, _, yfile = read_csv(file= ycal_csv, change = hash_, dec = decy, sep = sepy, names =namesy, hdr = hdry) chem_data, _, _, yfile, y_labels = read_csv(file= ycal_csv, change = hash_, dec = decy, sep = sepy, names =namesy, hdr = hdry)
st.success('yfile has been loaded successfully')
except: except:
st.error('Error: The yfile has not been loaded successfully, please consider tuning the dialect settings!') st.error('Error: The yfile has not been loaded successfully, please consider tuning the dialect settings!')
...@@ -124,27 +115,18 @@ match file: ...@@ -124,27 +115,18 @@ match file:
xy_str += str(stringio.read()) xy_str += str(stringio.read())
# p_hash([xy_str + str(xcal_csv.name) + str(ycal_csv.name), hdrx, sepx, hdry, sepy]) # p_hash([xy_str + str(xcal_csv.name) + str(ycal_csv.name), hdrx, sepx, hdry, sepy])
hash_ = ObjectHash(current=hash_,add = xy_str) hash_ = ObjectHash(current=hash_,add = xy_str)
file_name = str(xcal_csv.name) + str(ycal_csv.name) file_name = str(xcal_csv.name) + str(ycal_csv.name)
# yfile = read_csv(file= ycal_csv, change = hash_) # yfile = read_csv(file= ycal_csv, change = hash_)
if yfile.shape[1]>0 and xfile.shape[1]>0 : if yfile.shape[1]>0 and xfile.shape[1]>0 :
if 'chem_data' in globals(): if 'chem_data' in globals():
if chem_data.shape[1] > 1: if chem_data.shape[1] > 1:
yname = c1.selectbox('Select a target', options = ['']+chem_data.columns.tolist(), format_func = lambda x: x if x else "<Select>") yname = c1.selectbox('Select a target', options = [''] + chem_data.columns.tolist(), format_func = lambda x: x if x else "<Select>")
if yname: if yname:
y = chem_data.loc[:, yname] y = chem_data.loc[:, yname]
else: else:
...@@ -155,8 +137,27 @@ match file: ...@@ -155,8 +137,27 @@ match file:
### warning ### warning
if not y.empty: if not y.empty:
if spectra.shape[0] != y.shape[0]: y.index = y.index.astype(str)
st.error('Error: X and Y have different sample size') duplicate_indices = y.index
st.write(duplicate_indices)
if not y.empty:
if spectra.shape[0] == y.shape[0]:
st.info('Info: X and Y have different number of rows')
else:
st.info('Info: X and Y have different number of rows')
if spectra.shape[0] >= y.shape[0]:
if namesy == 0:
pass
else :
st.warning('No labels are provided for target, therefore, both target and spectra are considered well organized!')
if spectra.shape[0] < y.shape[0]:
st.write('The number of samples chemically analyzed exceeds the number of scanned samples!')
y = DataFrame y = DataFrame
spectra = DataFrame spectra = DataFrame
...@@ -178,7 +179,7 @@ match file: ...@@ -178,7 +179,7 @@ match file:
# p_hash(str(dxdata)+str(data_file.name)) # p_hash(str(dxdata)+str(data_file.name))
## load and parse the temp dx file ## load and parse the temp dx file
@st.cache_data # @st.cache_data
def read_dx(tmp_path): def read_dx(tmp_path):
M = JcampParser(path = tmp_path) M = JcampParser(path = tmp_path)
M.parse() M.parse()
...@@ -238,21 +239,21 @@ if not spectra.empty and not y.empty: ...@@ -238,21 +239,21 @@ if not spectra.empty and not y.empty:
st.pyplot(spectra_plot) ######## Loaded graph st.pyplot(spectra_plot) ######## Loaded graph
if st.session_state.interface =='advanced': if st.session_state.interface =='advanced':
with st.container(): with st.container():
values = st.slider('Select a range of values', min_value = 0, max_value = 100, value = (0, P)) values = st.slider('Select a range of values', min_value = 0, max_value = nwls, value = (0, nwls))
hash_ = ObjectHash(current=hash_, add= values)
spectra = spectra.iloc[:,values[0]:values[1]] hash_ = ObjectHash(current= hash_, add= values)
spectra = spectra.iloc[:, values[0]:values[1]]
nwl = spectra.shape nwl = spectra.shape
st.pyplot(plot_spectra(spectra, xunits = 'Wavelength/Wavenumber', yunits = "Signal intensity"))
if np.array(spectra.columns).dtype.kind in ['i', 'f']:
colnames = spectra.columns
else:
colnames = np.arange(spectra.shape[1])
if np.array(spectra.columns).dtype.kind in ['i', 'f']:
colnames = spectra.columns
else:
colnames = np.arange(spectra.shape[1])
hash_ = ObjectHash(current= hash_, add=values)
st.pyplot(plot_spectra(spectra, xunits = 'Wavelength/Wavenumber', yunits = "Signal intensity"))
from utils.miscellaneous import data_split from utils.miscellaneous import data_split
X_train, X_test, y_train, y_test, train_index, test_index = data_split(x=spectra, y=y) X_train, X_test, y_train, y_test, train_index, test_index = data_split(x=spectra, y=y)
...@@ -510,7 +511,7 @@ if Reg: ...@@ -510,7 +511,7 @@ if Reg:
st.write(Reg.best_hyperparams_print) st.write(Reg.best_hyperparams_print)
@st.cache_data(show_spinner =False) @st.cache_data(show_spinner =False)
def preprocessings(change): def preprocessings(change):
with open('report/out/Preprocessing.json', "w") as outfile: with open('report/results/Preprocessing.json', "w") as outfile:
json.dump(Reg.best_hyperparams_, outfile) json.dump(Reg.best_hyperparams_, outfile)
preprocessings(change=hash_) preprocessings(change=hash_)
...@@ -670,17 +671,17 @@ if Reg: ...@@ -670,17 +671,17 @@ if Reg:
match file: match file:
# load csv file # load csv file
case 'csv': case 'csv':
xfile.to_csv('report/out/dataset/'+ xcal_csv.name, sep = ';', encoding = 'utf-8', mode = 'a') xfile.to_csv('report/results/dataset/'+ xcal_csv.name, sep = ';', encoding = 'utf-8', mode = 'a')
yfile.to_csv('report/out/dataset/'+ ycal_csv.name, sep = ';', encoding = 'utf-8', mode = 'a') yfile.to_csv('report/results/dataset/'+ ycal_csv.name, sep = ';', encoding = 'utf-8', mode = 'a')
case 'dx': case 'dx':
with open('report/out/dataset/'+data_file.name, 'w') as dd: with open('report/results/dataset/'+data_file.name, 'w') as dd:
dd.write(dxdata) dd.write(dxdata)
with open('./report/out/model/'+ model_type + '.pkl','wb') as f:# export model with open('./report/results/model/'+ model_type + '.pkl','wb') as f:# export model
from joblib import dump from joblib import dump
dump(reg_model, f) dump(reg_model, f)
figpath =Path('./report/out/figures/') figpath =Path('./report/results/figures/')
spectra_plot.savefig(figpath / "spectra_plot.png") spectra_plot.savefig(figpath / "spectra_plot.png")
target_plot.savefig(figpath / "histogram.png") target_plot.savefig(figpath / "histogram.png")
imp_fig.savefig(figpath / "variable_importance.png") imp_fig.savefig(figpath / "variable_importance.png")
...@@ -688,11 +689,11 @@ if Reg: ...@@ -688,11 +689,11 @@ if Reg:
fig0.write_image(figpath / "meas_vs_pred_cv_onebyone.png") fig0.write_image(figpath / "meas_vs_pred_cv_onebyone.png")
measured_vs_predicted.savefig(figpath / 'measured_vs_predicted.png') measured_vs_predicted.savefig(figpath / 'measured_vs_predicted.png')
residuals_plot.savefig(figpath / 'residuals_plot.png') residuals_plot.savefig(figpath / 'residuals_plot.png')
# with open('report/out/Preprocessing.json', "w") as outfile: # with open('report/results/Preprocessing.json', "w") as outfile:
# json.dump(Reg.best_hyperparams_, outfile) # json.dump(Reg.best_hyperparams_, outfile)
if model_type == 'TPE-iPLS': # export selected wavelengths if model_type == 'TPE-iPLS': # export selected wavelengths
wlfilename = './report/out/model/'+ model_type+'-selected_wavelengths.xlsx' wlfilename = './report/results/model/'+ model_type+'-selected_wavelengths.xlsx'
all = concat([intervalls_with_cols.T, Reg.selected_features_], axis = 0, ignore_index=True).T all = concat([intervalls_with_cols.T, Reg.selected_features_], axis = 0, ignore_index=True).T
all.columns=['wl_from','wl_to','idx_from', 'idx_to'] all.columns=['wl_from','wl_to','idx_from', 'idx_to']
all.to_excel(wlfilename) all.to_excel(wlfilename)
...@@ -701,7 +702,7 @@ if Reg: ...@@ -701,7 +702,7 @@ if Reg:
if Path("./report/report.tex").exists(): if Path("./report/report.tex").exists():
report.generate_report(change = hash_) report.generate_report(change = hash_)
if Path("./report/report.pdf").exists(): if Path("./report/report.pdf").exists():
move("./report/report.pdf", "./report/out/report.pdf") move("./report/report.pdf", "./report/results/report.pdf")
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~# # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# pklfile = {'model_': Reg.model_,"model_type" : model_type, 'training_data':{'raw-spectra':spectra,'target':y, }, # pklfile = {'model_': Reg.model_,"model_type" : model_type, 'training_data':{'raw-spectra':spectra,'target':y, },
...@@ -720,7 +721,7 @@ if Reg: ...@@ -720,7 +721,7 @@ if Reg:
else: else:
pklfile['selected-wls'] = {'idx':None, "wls":None } pklfile['selected-wls'] = {'idx':None, "wls":None }
with open('./report/out/file_system.pkl', "wb") as pkl: with open('./report/results/file_system.pkl', "wb") as pkl:
dump(pklfile, pkl) dump(pklfile, pkl)
return change return change
...@@ -733,7 +734,7 @@ if Reg: ...@@ -733,7 +734,7 @@ if Reg:
if len(os.listdir('./report/out/figures/'))>2: if len(os.listdir('./report/results/figures/'))>2:
make_archive(base_name="./report/Results", format="zip", base_dir="out", root_dir = "./report")# create a zip file make_archive(base_name="./report/Results", format="zip", base_dir="out", root_dir = "./report")# create a zip file
move("./report/Results.zip", f"./report/{tempdirname}/Results.zip")# put the inside the temp dir move("./report/Results.zip", f"./report/{tempdirname}/Results.zip")# put the inside the temp dir
with open(f"./report/{tempdirname}/Results.zip", "rb") as f: with open(f"./report/{tempdirname}/Results.zip", "rb") as f:
...@@ -746,4 +747,4 @@ if Reg: ...@@ -746,4 +747,4 @@ if Reg:
args = None, kwargs = None,type = "primary",use_container_width = True, disabled = disabled_down) args = None, kwargs = None,type = "primary",use_container_width = True, disabled = disabled_down)
delete_files(keep = ['.py', '.pyc','.bib']) HandleItems.delete_files(keep = ['.py', '.pyc','.bib'])
...@@ -25,23 +25,14 @@ hash_ = '' ...@@ -25,23 +25,14 @@ hash_ = ''
# hash_ = hash_data(hash_+str(add)) # hash_ = hash_data(hash_+str(add))
# return hash_ # return hash_
dirpath = Path('report/out/model') dirpath = Path('report/results/model')
if dirpath.exists() and dirpath.is_dir(): if dirpath.exists() and dirpath.is_dir():
rmtree(dirpath) rmtree(dirpath)
if 'Predict' not in st.session_state: if 'Predict' not in st.session_state:
st.session_state['Predict'] = False st.session_state['Predict'] = False
# #################################### Methods ##############################################
# empty temp figures #################################### Methods ##############################################
def delete_files(keep):
supp = []
# Walk through the directory
for root, dirs, files in os.walk('report/', topdown=False):
for file in files:
if file != 'logo_cefe.png' and not any(file.endswith(ext) for ext in keep):
os.remove(os.path.join(root, file))
###################################################################
st.header("Prediction making using a previously developed model") st.header("Prediction making using a previously developed model")
c1, c2 = st.columns([2, 1]) c1, c2 = st.columns([2, 1])
c1.image("./images/prediction making.png", use_column_width=True) c1.image("./images/prediction making.png", use_column_width=True)
...@@ -140,7 +131,7 @@ with c2: ...@@ -140,7 +131,7 @@ with c2:
def read_csv(file = None, change = None, dec = None, sep= None, names = None, hdr = None): def read_csv(file = None, change = None, dec = None, sep= None, names = None, hdr = None):
delete_files(keep = ['.py', '.pyc','.bib']) HandleItems.delete_files(keep = ['.py', '.pyc','.bib'])
from utils.data_parsing import CsvParser from utils.data_parsing import CsvParser
if file is not None: if file is not None:
par = CsvParser(file= file) par = CsvParser(file= file)
...@@ -354,15 +345,15 @@ if not pred_data.empty:# Load the model with joblib ...@@ -354,15 +345,15 @@ if not pred_data.empty:# Load the model with joblib
match test: match test:
# load csv file # load csv file
case 'csv': case 'csv':
df.to_csv('report/out/dataset/'+ new_data.name, sep = ';', encoding = 'utf-8', mode = 'a') df.to_csv('report/results/dataset/'+ new_data.name, sep = ';', encoding = 'utf-8', mode = 'a')
case 'dx': case 'dx':
with open('report/out/dataset/'+new_data.name, 'w') as dd: with open('report/results/dataset/'+new_data.name, 'w') as dd:
dd.write(dxdata) dd.write(dxdata)
prepspectraplot.savefig('./report/out/figures/raw_spectra.png') prepspectraplot.savefig('./report/results/figures/raw_spectra.png')
rawspectraplot.savefig('./report/out/figures/preprocessed_spectra.png') rawspectraplot.savefig('./report/results/figures/preprocessed_spectra.png')
hist.savefig('./report/out/figures/histogram.png') hist.savefig('./report/results/figures/histogram.png')
result.round(4).to_csv('./report/out/The_analysis_result.csv', sep = ";") result.round(4).to_csv('./report/results/The_analysis_result.csv', sep = ";")
return change return change
preparing_results_for_downloading(change = hash_) preparing_results_for_downloading(change = hash_)
...@@ -372,7 +363,7 @@ if not pred_data.empty:# Load the model with joblib ...@@ -372,7 +363,7 @@ if not pred_data.empty:# Load the model with joblib
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
with TemporaryDirectory( prefix="results", dir="./report") as temp_dir:# create a temp directory with TemporaryDirectory( prefix="results", dir="./report") as temp_dir:# create a temp directory
tempdirname = os.path.split(temp_dir)[1] tempdirname = os.path.split(temp_dir)[1]
if len(os.listdir('./report/out/figures/'))==3: if len(os.listdir('./report/results/figures/'))==3:
make_archive(base_name="./report/Results", format="zip", base_dir="out", root_dir = "./report")# create a zip file make_archive(base_name="./report/Results", format="zip", base_dir="out", root_dir = "./report")# create a zip file
move("./report/Results.zip", f"./report/{tempdirname}/Results.zip")# put the inside the temp dir move("./report/Results.zip", f"./report/{tempdirname}/Results.zip")# put the inside the temp dir
with open(f"./report/{tempdirname}/Results.zip", "rb") as f: with open(f"./report/{tempdirname}/Results.zip", "rb") as f:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment