diff --git a/src/Packages.py b/src/Packages.py index cc8331c26ef0e400517186565811efbd7919c45a..f2f58a261b99ef60271b408c833ff09af1d8ab74 100644 --- a/src/Packages.py +++ b/src/Packages.py @@ -1,6 +1,7 @@ ## Data loading, handling, and preprocessing import os import json +import glob import sys from pathlib import Path import csv @@ -31,7 +32,6 @@ from pandas.api.types import is_float_dtype from plotly.subplots import make_subplots from matplotlib.cm import ScalarMappable import streamlit.components.v1 as components - # Clustering from sklearn.cluster import KMeans, HDBSCAN,AffinityPropagation from scipy.spatial.distance import euclidean, cdist diff --git a/src/Report/report.py b/src/Report/report.py index 9dc48ee30beefb0d790f28b5133be6c08260cbe4..e7e5e9ec2815bdfe41526d85d78ee259a3970f80 100644 --- a/src/Report/report.py +++ b/src/Report/report.py @@ -4,6 +4,7 @@ import os import pandas as pd import os.path import re +import streamlit as st def intersect(l1, l2): return l1.intersection(set(l2)) @@ -405,10 +406,14 @@ def report(*args): # create the Tex file - sections in args will be displayed: {'sample':'Sample Selection';'model':'Model Creation';'predict':'Predictions';'help':'LaTEX help for figs and tables';} # latex_report = report('sample', 'predict',) +import shutil +@st.cache_data +def compile_latex(change): + my = Path("./Report/report.pdf") + if my.is_file(): + os.remove("./Report/report.pdf") -def compile_latex(): # path to pdflatex - # pdflatex_path = Path("C:/Users/maimouni/AppData/Local/Programs/MiKTeX/miktex/bin/x64/") from config.config import pdflatex_path filename_path = Path("Report/") filename = 'report.tex' @@ -426,6 +431,7 @@ def compile_latex(): #for ext in extensions: #os.unlink(str(filename_path / filename[:-4]) + ext) # open the report - proc = subprocess.Popen([str(filename[:-4]) + '.pdf'], cwd = filename_path, shell=True) + # proc = subprocess.Popen([str(filename[:-4]) + '.pdf'], cwd = "./results", shell=True) proc.communicate() + # compile_latex() \ No newline at end of file diff --git a/src/images/model_creation.png b/src/images/model_creation.png index 9a0e22a44405e3b222d4370a42067cb8b4b6ee4d..8d7a0d71f2ad355d402042ebdb7a379f997e1067 100644 Binary files a/src/images/model_creation.png and b/src/images/model_creation.png differ diff --git a/src/images/prediction making.PNG b/src/images/prediction making.PNG new file mode 100644 index 0000000000000000000000000000000000000000..e981a4c776675a2d368db7472441c61cf9db75e0 Binary files /dev/null and b/src/images/prediction making.PNG differ diff --git a/src/images/sample selection.PNG b/src/images/sample selection.PNG new file mode 100644 index 0000000000000000000000000000000000000000..287bd9d305e3fcd72abc95482147703a554b5a25 Binary files /dev/null and b/src/images/sample selection.PNG differ diff --git a/src/pages/1-samples_selection.py b/src/pages/1-samples_selection.py index d42908aedc26553752a8d694dcef4b9211d070ea..f46e65d3cfe9ee25ee7045edc4c384e88e4b13c8 100644 --- a/src/pages/1-samples_selection.py +++ b/src/pages/1-samples_selection.py @@ -3,14 +3,16 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") from Modules import * # empty temp figures -repertoire_a_vider = Path('Report/figures') -if os.path.exists(repertoire_a_vider): - for fichier in os.listdir(repertoire_a_vider): - chemin_fichier = os.path.join(repertoire_a_vider, fichier) - if os.path.isfile(chemin_fichier) or os.path.islink(chemin_fichier): - os.unlink(chemin_fichier) - elif os.path.isdir(chemin_fichier): - shutil.rmtree(chemin_fichier) +for i in ['Report/figures','Report/datasets']: + repertoire_a_vider = Path(i) + if os.path.exists(repertoire_a_vider): + for fichier in os.listdir(repertoire_a_vider): + chemin_fichier = os.path.join(repertoire_a_vider, fichier) + if os.path.isfile(chemin_fichier) or os.path.islink(chemin_fichier): + os.unlink(chemin_fichier) + elif os.path.isdir(chemin_fichier): + shutil.rmtree(chemin_fichier) + # HTML pour le bandeau "CEFE - CNRS" add_header() #load specific model page css @@ -47,9 +49,10 @@ match st.session_state["interface"]: default_sample_selection_option = 0 ################################### I - Data Loading and Visualization ######################################## +date_time = datetime.datetime.now().strftime('_%y_%m_%d_%H_%M_') st.title("Calibration Subset Selection") col2, col1 = st.columns([3, 1]) -col2.image("./images/graphical_abstract.jpg", use_column_width=True) +col2.image("./images/sample selection.png", use_column_width=True) ## Preallocation of data structure spectra = pd.DataFrame() meta_data = pd.DataFrame() @@ -69,15 +72,19 @@ selection_number = None # loader for datafile data_file = col1.file_uploader("Data file", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5) - if not data_file: col1.warning('âš ï¸ Please load data file !') else: # Retrieve the extension of the file - test = data_file.name[data_file.name.find('.'):] - match test: + # test = data_file.name[data_file.name.find('.'):] + + + extension = data_file.name.split(".")[-1] + userfilename = data_file.name.replace(f".{extension}", '') + + match extension: ## Load .csv file - case '.csv': + case 'csv': with col1: # Select list for CSV delimiter psep = st.radio("Select csv separator - _detected_: " + str(find_delimiter('data/'+data_file.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+data_file.name))),horizontal=True, key=9) @@ -88,16 +95,22 @@ else: else: col = False imp = pd.read_csv(data_file, sep=psep, index_col=col) + imp.to_csv("./Report/datasets/"+data_file.name,sep = ';', encoding='utf-8', mode='a') + # spectra = col_cat(imp)[0] # meta_data = col_cat(imp)[1] spectra, md_df_st_ = col_cat(imp) meta_data = md_df_st_ st.success("The data have been loaded successfully", icon="✅") ## Load .dx file - case '.dx': + case 'dx': # Create a temporary file to save the uploaded file with NamedTemporaryFile(delete=False, suffix=".dx") as tmp: tmp.write(data_file.read()) + with open(tmp.name, 'r') as dd: + dxdata = dd.read() + with open('Report/datasets/'+data_file.name, 'w') as dd: + dd.write(dxdata) tmp_path = tmp.name with col1: _, spectra, meta_data, md_df_st_ = read_dx(file = tmp_path) @@ -118,7 +131,7 @@ if not spectra.empty: col2, col1 = st.columns([3, 1]) with col2: fig, ax = plt.subplots(figsize = (30,7)) - if test =='.dx': + if extension =='dx': lab = ['Wavenumber (1/cm)' if meta_data.loc[:,'xunits'][0] == '1/cm' else 'Wavelength (nm)'] if lab[0] =='Wavenumber (1/cm)': spectra.T.plot(legend=False, ax = ax).invert_xaxis() @@ -171,7 +184,11 @@ if not spectra.empty: bb1.warning('âš ï¸ Please choose an algorithm !') case "PCA": - dr_model = LinearPCA(xc, Ncomp=8) + @st.cache_data + def dr_model_(change): + dr_model = LinearPCA(xc, Ncomp=8) + return dr_model + dr_model = dr_model_(change = hash_data(xc)) case "UMAP": if not meta_data.empty: @@ -184,10 +201,20 @@ if not spectra.empty: supervised = md_df_st_[col] else: supervised = None - dr_model = Umap(numerical_data = MinMaxScale(spectra), cat_data = supervised) + @st.cache_data + def dr_model_(change): + dr_model = Umap(numerical_data = MinMaxScale(spectra), cat_data = supervised) + return dr_model + dr_model = dr_model_(change = hash_data(spectra)) + case 'NMF': - dr_model = Nmf(spectra, Ncomp= 3) + @st.cache_data + def dr_model_(change): + dr_model = Nmf(spectra, Ncomp= 3) + return dr_model + dr_model = dr_model_(change = hash_data(spectra)) + if dr_model: axis1 = bb3.selectbox("x-axis", options = dr_model.scores_.columns, index=0) @@ -382,7 +409,7 @@ if not spectra.empty: st.write('Loadings plot') p = dr_model.loadings_ freq = pd.DataFrame(colnames, index=p.index) - if test =='.dx': + if extension =='dx': if meta_data.loc[:,'xunits'][0] == '1/cm': freq.columns = ['Wavenumber (1/cm)'] xlab = "Wavenumber (1/cm)" @@ -566,47 +593,79 @@ if labels: sel.write(sam) - +# st.write(hash_data(change = './Report/report.tex')) +with open('./Report/report.tex') as myfile: + filehash = hash_data(myfile.read()) # figs_list = os.listdir("./Report/figures") if data_file: Nb_ech = str(n_samples) nb_clu = str(sam1.shape[0]) - ############################### - st.header('Download Analysis Results', divider='blue') - M9, M10 = st.columns([1,1]) - M10.info('The results are automatically converted into LaTeX code, a strong typesetting system noted for its remarkable document formatting.\ - The comprehensive capabilities of LaTeX ensure that your data and findings are cleanly and properly presented,\ - with accurate formatting and organizing.') - - items_download = M9.selectbox('To proceed, please choose the file or files you want to download from the list below:', - options = ['','Selected Subset', 'Report', 'Both Selected Subset & Report'], index=0, format_func=lambda x: x if x else "<Select>", - key=None, help=None, on_change=None, args=None, kwargs=None, placeholder="Choose an option", disabled=False, label_visibility="visible") - - - ## Save model and download report - - # st.session_state.a = "Please wait while your LaTeX report is being compiled..." - date_time = datetime.datetime.strftime(datetime.date.today(), '_%Y_%m_%d_') - # match items_download: - # case '': - - if items_download: - if M9.button('Download', type="primary"): - match items_download: - case '': - M9.warning('Please select an item from the dropdown list!') - case 'Selected Subset': - sam.to_csv('./data/subset/seleced subset.csv', sep = ";") + ################################################### + ## generate report + latex_report = report.report('Representative subset selection', data_file.name, dim_red_method, + clus_method, Nb_ech, ncluster, selection, selection_number, nb_clu,tcr, sam) + + @st.cache_data + def download_res(file,sam): + zipname = f'results{date_time}subset_selection_{file.name.split('.')[0]}.zip' # name of the zipfile + with open('./temp/fname.json', 'w') as f: # dump filename and save it as a .json file + json.dump(zipname, f) + shutil.make_archive(base_name = zipname.split('.')[0],format = "zip",root_dir = "./Report", base_dir = "figures")# create zip containing figures and report + + file_path = Path("./temp/"+zipname) + sam.to_csv("./"+zipname,sep = ';', + encoding='utf-8', mode='a', + compression=dict(method='zip',archive_name=f"selected subset for reference analysis_{userfilename}_{date_time}_.csv")) + + with zipfile.ZipFile("./"+zipname, 'a') as newzip: + newzip.write("./Report/report.pdf", arcname="report.pdf") + newzip.write("./Report/datasets/"+os.listdir("./Report/datasets")[0], arcname=os.listdir("./Report/datasets")[0]) - case 'Report': - # M9.info("Please wait while your LaTeX report is being compiled...") - latex_report = report.report('Representative subset selection', data_file.name, dim_red_method, clus_method, Nb_ech, ncluster, selection, selection_number, nb_clu,tcr, sam) - report.compile_latex() - - case 'Both Selected Subset & Report': - sam.to_csv('./data/subset/seleced subset.csv', sep = ";") - latex_report = report.report('Representative subset selection', data_file.name, dim_red_method, clus_method, Nb_ech, ncluster, selection, selection_number, nb_clu,tcr, sam) - report.compile_latex() - M9.success('The selected item has been exported successfully!') + + # #### add data to zip + # match data_file.name: + # case 'csv': + # with open(data_file.name, 'wb') as cs: + # st.write(data_file.getbuffer()) + # case 'dx': + # st.write(4) + ### move the .zip file to the temp directory + shutil.move('./'+zipname,'./temp/'+ zipname) + + a ='' + for i in (data_file.name, dim_red_method,clus_method, Nb_ech, tcr.astype(str)): + a += str(i) + + myfilepdf = Path("./Report/report.pdf") + if 'htest' not in st.session_state: + st.session_state.htest = '0' + report.compile_latex(change =hash_data(a)) + st.write(hash_data(a)) + if myfilepdf.is_file(): + download_res(file = data_file, sam = sam) + + elif st.session_state['htest'] != hash_data(a): + st.session_state['htest'] = hash_data(a) + report.compile_latex(change =hash_data(a)) + st.write(hash_data(a)) + if myfilepdf.is_file(): + download_res(file = data_file, sam = sam) + else: + pass + + + list_of_files = glob.glob(r"./temp/*.zip") + if len(list_of_files) >3: + oldest_file = min(list_of_files, key=os.path.getctime) + os.remove(oldest_file) + list_of_files = glob.glob(r"./temp/*.zip") + recent_file = max(list_of_files, key=os.path.getctime) + + with open('./temp/fname.json', 'r') as f: + zipname = json.load(f) + if os.path.split(recent_file)[1] == os.path.split(zipname)[1]: + with open("./temp/"+zipname, "rb") as fp: + st.download_button('Download', data = fp, file_name=zipname, mime="application/zip", + args=None, kwargs=None,type="primary",use_container_width=True) diff --git a/src/pages/3-prediction.py b/src/pages/3-prediction.py index 25897fda0a5d92303ecedad13d941882469e27bc..8600dee93c0ea6b3d83d84485eed64ec05f04495 100644 --- a/src/pages/3-prediction.py +++ b/src/pages/3-prediction.py @@ -17,7 +17,7 @@ local_css(css_file / "style_model.css") st.title("Prediction making using a previously developed model") M10, M20= st.columns([2, 1]) -M10.image("./images/graphical_abstract.jpg", use_column_width=True) +M10.image("./images/prediction making.png", use_column_width=True) # M1, M2= st.columns([2, 1]) @@ -43,8 +43,8 @@ else: if test == files_format[0]: # - qsep = M2.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+file.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+file.name))), key=2) - qhdr = M2.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+file.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+file.name))), key=3) + qsep = M20.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+file.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+file.name))), key=2) + qhdr = M20.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+file.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+file.name))), key=3) if qhdr == 'yes': col = 0 else: @@ -56,9 +56,9 @@ else: tmp.write(file.read()) tmp_path = tmp.name chem_data, spectra, meta_data, _ = read_dx(file = tmp_path) - M2.success("The data have been loaded successfully", icon="✅") + M20.success("The data have been loaded successfully", icon="✅") if chem_data.to_numpy().shape[1]>0: - yname = M2.selectbox('Select target', options=chem_data.columns) + yname = M20.selectbox('Select target', options=chem_data.columns) measured = chem_data.loc[:,yname] == 0 y = chem_data.loc[:,yname].loc[measured] pred_data = spectra.loc[measured] @@ -69,7 +69,7 @@ else: # Load parameters -st.header("I - Spectral data visualization", divider='blue') +st.header("I - Spectral data preprocessing & visualization", divider='blue') if not pred_data.empty:# Load the model with joblib M1, M2= st.columns([2, 1]) M1.write('Raw spectra') @@ -79,7 +79,7 @@ if not pred_data.empty:# Load the model with joblib ### preprocessing preprocessed = pd.DataFrame if not pred_data.empty: - params = M4.file_uploader("Load preprocessings params", type = '.json', help=" .json file") + params = M2.file_uploader("Load preprocessings params", type = '.json', help=" .json file") if params: prep = json.load(params) # M4.write(ProcessLookupError) @@ -99,7 +99,6 @@ if not pred_data.empty: ################################################################################################ ## plot preprocessed spectra -st.header('II - Spectral data preprocessing', divider='blue') if not preprocessed.empty: M3, M4= st.columns([2, 1]) M3.write('Preprocessed spectra') @@ -107,16 +106,16 @@ if not preprocessed.empty: M3.pyplot(fig2) SG = f'- Savitzky-Golay derivative parameters \:(Window_length:{prep['window_length']}; polynomial order: {prep['polyorder']}; Derivative order : {prep['deriv']})' Norm = f'- Spectral Normalization \: {norm}' - M4.write('The spectra were preprocessed using:\n'+SG+"\n"+Norm) + M4.info('The spectra were preprocessed using:\n'+SG+"\n"+Norm) ################### Predictions making ########################## -st.header("III - Prediction making", divider='blue') -if not pred_data.empty:# Load the model with joblib - M5, M6 = st.columns([2, 0.01]) +st.header("II - Prediction making", divider='blue') +if not pred_data.empty and params:# Load the model with joblib + M5, M6 = st.columns([2, 1]) #dir = os.listdir('data/models/')[1:] dir = os.listdir('data/models/') dir.insert(0,'') - model_name = M5.selectbox("Select your model from the dropdown list:", options = dir, key = 21) + model_name = M6.selectbox("Select your model from the dropdown list:", options = dir, key = 21) if model_name and model_name !='': export_name += '_with_' + model_name[:model_name.find('.')] @@ -124,10 +123,10 @@ if not pred_data.empty:# Load the model with joblib loaded_model = joblib.load(f) if loaded_model: - M5.success("The model has been loaded successfully", icon="✅") - s = M5.checkbox('the model is of ipls type?') + M6.success("The model has been loaded successfully", icon="✅") + s = M6.checkbox('the model is of ipls type?') if s: - index = M5.file_uploader("select wavelengths index file", type="csv") + index = M6.file_uploader("select wavelengths index file", type="csv") if index: intervalls = pd.read_csv(index, sep=';', index_col=0).to_numpy() idx = [] @@ -135,7 +134,7 @@ if not pred_data.empty:# Load the model with joblib idx.extend(np.arange(intervalls[i,0], intervalls[i,1]+1)) if loaded_model: - if M5.button('Predict'): + if M6.button('Predict', type='primary'): if s: result = loaded_model.predict(preprocessed.iloc[:,idx]) else: @@ -171,8 +170,8 @@ if loaded_model: thispatch.set_facecolor(color) M5.pyplot(fig) - M6.write('Predicted values table') - M6.dataframe(result.T) + st.write('Predicted values table') + st.dataframe(result.T) ################################## result.to_csv(export_folder + export_name + '.csv', sep = ';')