diff --git a/src/Class_Mod/KMEANS_.py b/src/Class_Mod/KMEANS_.py
index 9e69ba5b1d187ecad370ccb5d6783323892db942..8c67f1d8eeccc528d54afe61a88814705c12746a 100644
--- a/src/Class_Mod/KMEANS_.py
+++ b/src/Class_Mod/KMEANS_.py
@@ -39,7 +39,7 @@ class Sk_Kmeans:
             idxidx.append(f'{i+1}_clust')
             values.append((s[i] - s[i+1])*100 / s[i])
 
-        id = np.max(np.where(np.array(values) > 20))+2
+        id = np.max(np.where(np.array(values) > 10))+2
         return id
     
     def fit_optimal(self, nclusters):
diff --git a/src/Class_Mod/KennardStone.py b/src/Class_Mod/KennardStone.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e5c5cbe32150e3fd7cf4cdfe860db2766888043
--- /dev/null
+++ b/src/Class_Mod/KennardStone.py
@@ -0,0 +1,26 @@
+from Packages import *
+from typing import Sequence, Dict, Optional, Union
+
+class KS:
+    def __init__(self, x:Optional[Union[np.ndarray|pd.DataFrame]], rset:Optional[Union[float|int]]):
+        self.x = x
+        self.ratio = rset
+        self._train, self._test = ks.train_test_split(self.x, train_size = self.ratio/100)
+    
+    @property
+    def calset(self):
+        clu = self._train.index.tolist()
+        
+        return self.x, clu
+    
+class RDM:
+    def __init__(self, x:Optional[Union[np.ndarray|pd.DataFrame]], rset:Optional[Union[float|int]]):
+        self.x = x
+        self.ratio = rset
+        self._train, self._test = train_test_split(self.x, train_size = self.ratio/100)
+    
+    @property
+    def calset(self):
+        clu = self._train.index.tolist()
+        
+        return self.x, clu
\ No newline at end of file
diff --git a/src/Class_Mod/RegModels.py b/src/Class_Mod/RegModels.py
index ce07a07e6bf541d8e078dfe12846d96d4868e28a..6e4cc01aa69614f1e85eb20165c68063a440d026 100644
--- a/src/Class_Mod/RegModels.py
+++ b/src/Class_Mod/RegModels.py
@@ -4,7 +4,7 @@ from Class_Mod import metrics, Snv, No_transformation, KF_CV, sel_ratio
 
 class Regmodel(object):
     
-    def __init__(self, train, test, n_iter, add_hyperparams = None, nfolds = 5, **kwargs):
+    def __init__(self, train, test, n_iter, add_hyperparams = None, nfolds = 3, **kwargs):
         self.SCORE = 100000000
         self._xc, self._xt, self._ytrain, self._ytest = train[0], test[0], train[1], test[1]
         self._nc, self._nt, self._p = train[0].shape[0], test[0].shape[0], train[0].shape[1]
@@ -152,7 +152,7 @@ class TpeIpls(Regmodel):
         self.n_arrets = self.n_intervall*2
         
         
-        r = {'n_components': hp.randint('n_components', 2,20)}
+        r = {'n_components': hp.randint('n_components', 2,10)}
         r.update({f'v{i}': hp.randint(f'v{i}', 0, train[0].shape[1]) for i in range(1,self.n_arrets+1)})
 
         super().__init__(train, test, n_iter, add_hyperparams = r)
diff --git a/src/Class_Mod/__init__.py b/src/Class_Mod/__init__.py
index 82330cbd9b33dc9a70d5ae8cb47b6fd7b01fdafc..82bee16645d433af12604ea26fdb7dfbeb26457f 100644
--- a/src/Class_Mod/__init__.py
+++ b/src/Class_Mod/__init__.py
@@ -15,4 +15,5 @@ from .SK_PLSR_ import PlsR
 from .PLSR_Preprocess import PlsProcess
 from .NMF_ import Nmf
 from .Ap import AP
-from .RegModels import Plsr, TpeIpls
\ No newline at end of file
+from .RegModels import Plsr, TpeIpls
+from .KennardStone import KS, RDM
\ No newline at end of file
diff --git a/src/Modules.py b/src/Modules.py
index ba9e78454bea329013c893e864d5b1042b72ef1a..f440038abae3c75c9085ec85ff607bf4c7ffd893 100644
--- a/src/Modules.py
+++ b/src/Modules.py
@@ -10,4 +10,4 @@ pages_folder = Path("pages/")
 from style.header import add_header, add_sidebar
 from config.config import pdflatex_path
 local_css(css_file / "style.css")
-
+from Class_Mod import KS, RDM
diff --git a/src/Report/report.py b/src/Report/report.py
index dfbdab5b5a78ac9e5bdb9c84c6c8dfd2d2f3d5b0..4a0b309ae62b78f29e6dafa4fd95146926e648ab 100644
--- a/src/Report/report.py
+++ b/src/Report/report.py
@@ -10,17 +10,21 @@ def intersect(l1, l2):
 def check(file):
     return os.path.isfile(file)
 def report(*args):
-    signal_preprocess = {'Snv':'Standard Normal Variate (SNV)'}
-    dim_red_methods= {'PCA':'Principal Components Analysis (PCA)',
-                      'UMAP':'Uniform Manifold Approximation and Projection (UMAP)',
-                        'NMF':'Non-negative Matrix Factorization (NMF)'}  # List of dimensionality reduction algos
-    cluster_methods = {'Kmeans':'Kmeans',
-                       'HDBSCAN':'Hierarchical Density-Based Spatial Clustering of Applications with Noise (HDBSCAN)',
-                        'AP':'Affinity Propagation (AP)'} # List of clustering algos
+    signal_preprocess = {'Snv':'Standard Normal Variate (SNV) \cite{barnes1989standard}',
+                         'SG': 'Savitzky-Golay (SG) \cite{savitzky1964smoothing}'}
+    dim_red_methods= {'PCA':'Principal Components Analysis (PCA) \cite{wold1987principal,ringner2008principal,greenacre2022principal,JMLR:v12:pedregosa11a}',
+                      'UMAP':'Uniform Manifold Approximation and Projection (UMAP) \cite{ghojogh2021uniform,JMLR:v12:pedregosa11a}',
+                        'NMF':'Non-negative Matrix Factorization (NMF) \cite{lopes2015non}'}  # List of dimensionality reduction algos
+    cluster_methods = {'Kmeans':'Kmeans \cite{chong2021k,JMLR:v12:pedregosa11a}',
+                       'HDBSCAN':'Hierarchical Density-Based Spatial Clustering of Applications with Noise (HDBSCAN) \cite{mcinnes2017hdbscan}',
+                        'AP':'Affinity Propagation (AP) \cite{dueck2009affinity,JMLR:v12:pedregosa11a}',
+                        'KS':'Kennard-Stone algorithm (KS)',
+                        'RDM': 'random approach'} # List of clustering algos
+    
     selec_strategy = {'center':'PCA','random':'PCA'}
-    reg_algo ={"Full-PLSR":'full Partial Least Squares (PLS)',
-                "Locally Weighted PLSR": 'Locally Weighted Partial Least Squares (LWPLS)',
-                "Interval-PLSR": "Tree-structured Parzen estimator-interval Partial Least Squares (TPE-iPLS)"} 
+    reg_algo ={"PLS":'Partial Least Squares (PLS) \cite{Wold2001,JMLR:v12:pedregosa11a}',
+                "LW-PLS": 'Locally Weighted-Partial Least Squares (LW-PLS) \cite{Lesnoff2020}',
+                "TPE-iPLS": "Tree-structured Parzen estimator-interval Partial Least Squares (TPE-iPLS)"} 
 
     to_report=[]
     j=0
@@ -35,26 +39,30 @@ def report(*args):
             globals()[df_name] = arg.select_dtypes(include=['float64', 'int64'])
     
     latex_report = ""
-    latex_report += r"""\documentclass[a4paper,10pt]{article}
+    latex_report += r"""\documentclass[11pt]{article}
     \usepackage{fancyhdr}
     \usepackage{graphicx}
     \usepackage{geometry}
+    \geometry{a4paper, left=2cm, right=2cm, top=1.5cm, bottom=5cm,
+      headheight=0.05cm, footskip=1.7cm}
+
     \usepackage{changepage}
-    \geometry{a4paper, left=2cm, right=2cm, top=1.5cm, bottom=3cm,
-      headheight=0.05cm, footskip=1cm}
-    \usepackage{caption, subcaption}
-    \usepackage{hyperref}
     \usepackage{hyphenat}
     \usepackage{booktabs}
     \usepackage{times}
-    \usepackage{etoolbox,fancyhdr,xcolor}
+    \usepackage{parskip}
+    \usepackage{float}
+    \setlength{\parskip}{\baselineskip} % Example setting
+    \usepackage{cite}     % For citing with range compression
+    \usepackage{etoolbox}
+    \usepackage{xcolor}
     \newcommand{\headrulecolor}[1]{\patchcmd{\headrule}{\hrule}{\color{#1}\hrule}{}{}}
     \newcommand{\footrulecolor}[1]{\patchcmd{\footrule}{\hrule}{\color{#1}\hrule}{}{}}
     \renewcommand{\headrulewidth}{1pt}
     \headrulecolor{red!100}%
     \renewcommand{\footrulewidth}{1pt}
     \footrulecolor{red!100}%
-    \graphicspath{{images/}, {Figures/}}    
+    \graphicspath{{images/}, {Figures/}}
     \fancyhead[R]{\includegraphics[width=0.1\textwidth]{logo_cefe.png}}
     \fancyhead[L]{PACE - NIRS Analysis Report}
     \fancyfoot[L]{Project Name to fill}
@@ -64,7 +72,20 @@ def report(*args):
     \addtolength{\topmargin}{-9.2942pt}
     \pagestyle{fancy}
 
-    \DeclareCaptionLabelFormat{myfigureformat}{\textbf{Fig. #2.}}
+    % Customize appearance of figure references
+    \usepackage{xcolor}   % For defining colors    
+    \definecolor{myblue}{RGB}{0,0,128} % RGB values for blue
+
+    \usepackage{hyperref}
+    \hypersetup{colorlinks=true,linkcolor=myblue,citecolor=myblue,urlcolor=myblue}
+    \usepackage{cleveref} % For clever references
+    
+
+    \usepackage{subcaption}
+    \usepackage{caption}
+    % Redefine cref formats for figures and tables
+
+    \DeclareCaptionLabelFormat{myfigureformat}{\textbf{Fig. #2}}
     \captionsetup[figure]{
         labelformat=myfigureformat, % Apply the custom format
         justification=centering, % Justify the caption text
@@ -74,11 +95,15 @@ def report(*args):
     \DeclareCaptionLabelFormat{mytableformat}{\textbf{Table #2}}
     \captionsetup[table]{
         labelformat=mytableformat, % Apply the custom format
-        justification=justified, % Justify the caption text
+        justification=centering, % Justify the caption text
         singlelinecheck=false, % Allow the caption to occupy multiple lines
         skip=0pt, % Vertical space between caption and table
         position=top % Position the caption at the top of the table
     }
+    \crefformat{figure}{\textcolor{myblue}{Fig.~#2#1#3}}
+    \Crefformat{figure}{\textcolor{myblue}{Fig.~#2#1#3}} % Capitalized version for beginning of sentence
+    \crefformat{table}{\textcolor{myblue}{table~#2#1#3}}
+    \Crefformat{table}{\textcolor{myblue}{Table~#2#1#3}} % Capitalized version for beginning of sentence
 
     
     \begin{document}
@@ -90,10 +115,11 @@ def report(*args):
     latex_report += r"""\noindent
     \textbf{QUERY MADE: }{"""+ re.sub(r'([_%])', r'\\\1',to_report[0])+ r"""}.\\
     \noindent
-    \textbf{ENTERED INPUTS: }{"""+ re.sub(r'([_%])', r"\\\1", to_report[1])+ r"""}.\\"""
+    \textbf{INPUT DATA: }{"""+ re.sub(r'([_%])', r"\\\1", to_report[1])+ r"""}.\\"""
     latex_report += r"""\section*{Results}"""
+    
     latex_report += r"""\subsection*{Spectral data visualization}"""
-    latex_report += r"""Acquired spectra were visualized in fig.\ref{raw_spectra} by plotting the signal of the samples captured in the specific spectral range 
+    latex_report += r"""Acquired spectra were visualized in (\cref{raw_spectra}) by plotting the signal of the samples captured in the specific spectral range.
     This helps observe general patterns and trends in the spectra, and understand the variability within the data.
     \begin{figure}[h]
     \centering
@@ -104,54 +130,107 @@ def report(*args):
 
     if 'Representative subset selection' in to_report:
         latex_report += r"""\subsection*{Multivariable Data Analysis}"""
-        latex_report += r"""\indent For optimal selection of subset of the samples to analyze through the \cite{Lesnoff2020}
-          reference method, a pipeline consisting of consecutively applying features extraction/dimensionality\cite{BellonMaurel2010,scikit-learn}
-            reduction and clustering analysis was developed. Features extraction was performed by means of {"""+dim_red_methods[to_report[2]] + r"""} 
-            technique which helps represent the high dimensional spectra in a reduced perceptible 3D
-            subspace spanned by a few number of features (three features in our case), while clustering analysis was performed
-              using the {"""+cluster_methods[to_report[3]] + r"""} technique which
-              helps group the data into groups of spectra that share the same carachteristics. """
+        latex_report += r""" Multivariable calibration models have widely been used for quantitative analysis and chemical analysis fields.
+            Different multivariable modelling techniques are used for calibration models developement, ranging from linear to non linear techniques, and the
+            performance of models developed using these techniques depends heavily on the overall quality of the data and the degree of representativeness
+            of calibration set, interchangeably called training set, used for its development, i.e, how much the training set captures the characteristics
+            and diversity of the entire population or dataset from which it is drawn \cite{li2016strategy}.\par"""
+        
+        latex_report += r""" For optimal selection of a reprentative subset of the samples to analyze through the 
+        reference method and use for calibration models development, a pipeline consisting of consecutively applying features extraction (or dimensionality
+        reduction) and"""
+        
+
+        if 'KS' in to_report or 'RDM' in to_report:
+            latex_report += r""" samples subset selection was developed."""
+        else:
+            latex_report += r""" clustering analysis was developed."""
+       
+        latex_report += r""" Features extraction was performed by means of {"""+dim_red_methods[to_report[2]] + r"""} technique that helps 
+            represent the high dimensional spectra in a reduced perceptible 3D subspace spanned by a few number of features (three features in our case), while """
+        
+
+        if 'KS' in to_report or 'RDM' in to_report:
+            latex_report += r""" samples subset selection was performed using the {"""+cluster_methods[to_report[3]] + r"""} technique.\par"""
+        else:
+            latex_report += r""" clustering analysis was performed using the {"""+cluster_methods[to_report[3]] + r"""} technique that helps group the data into groups of spectra
+            that share the same carachteristics.\par"""
         
-        latex_report += r"""After applying the pipeline, a subset sampling method, consisting of"""
-        if 'center' in to_report:
-            latex_report += r""" selecting {"""+to_report[7]+ r"""} samples, each from a distict cluster, with the least euclidian distance to the center of the cluster identified by {"""+to_report[3]+ r"""} and to which it the sample belongs."""
-        if 'random' in to_report:
-              latex_report += r""" fitting a second clustering model, specifically kmeans, to each individual data cluster and selecting {"""+to_report[7]+ r"""}
-                 samples or less from each subcluster (if a subcluster contains less than {"""+to_report[7]+ r"""} samples, then all samples included
-                  in this subcluster are selected), was applied.\\"""
+
+        if 'KS' not in to_report and not 'RDM' in to_report:
+            latex_report += r""" After implementing the pipeline, a subset sampling method, consisting of"""
+            if 'center' in to_report:
+                latex_report += r""" selecting {"""+to_report[7]+ r"""} samples, each from a distict cluster, with the least euclidian distance to the center of the cluster identified by {"""+to_report[3]+ r"""} and to which it the sample belongs."""
+            if 'random' in to_report:
+                latex_report += r""" fitting a second clustering model, specifically kmeans, to each individual data cluster and selecting {"""+to_report[7]+ r"""}
+                    samples or less from each subcluster (if a subcluster contains less than {"""+to_report[7]+ r"""} samples, then all samples included
+                    in this subcluster are selected), was applied.\par"""
+                
                   
         if "PCA" in to_report:
-            latex_report += r"""\indent To detect the presence of any spectral outliers, the influence and residuals plots were constructed,
+            latex_report += r"""\indent To detect potential spectral outliers, the influence and residuals plots \cite{Mejia2017} were constructed,
               with outlyingness limits established at the 95\% confidence level. Together, these plots helps distinguish regular observations,
                 which form a homogeneous group near the subspace generated by the PCs; good leverage points,
                   which are at the same plane as the subspace but distant from the ROs; orthogonal observations, which have a
                     large residual distance to the subspace, but whose projection is on the subspace; and, finally, bad leverage
-                      points, which have a large residual distance such that the projection on the subspace is away from regular observations.\\*"""
+                      points, which have a large residual distance such that the projection on the subspace is away from regular observations.\par"""
               
-        latex_report += """\indent Results of applying this workflow are displayed in fig. 2. Based of the features extracted using
-         {"""+to_report[2]+ r"""}, {"""+to_report[3]+ r"""} revealed the  existance of {"""+to_report[5] + r"""}
-            data clusters that are visualized with different colors.
-        \begin{figure}[h!]
+        latex_report += """ Results of applying this workflow are displayed in""" 
+        if 'PCA' in to_report:
+            latex_report += """ (\cref{pcaplots,hotelling_and_influence,loadings})."""
+        elif 'NMF' in to_report:
+            latex_report += """ (\cref{pcaplots,loadings})."""
+        else:
+            latex_report += """ (\cref{pcaplots})."""
+
+        if 'KS' in to_report or 'RDM' in to_report:
+            latex_report += """ Based of the features extracted using {"""+to_report[2]+ r"""}, """
+        
+        else:
+            latex_report += """ Based of the features extracted using {"""+to_report[2]+ r"""},
+            {"""+to_report[3]+ r"""} revealed the  existance of {"""+to_report[5] + r"""} data clusters, visualized with different colors, from which """
+        
+        latex_report += r"""a subset of {"""+to_report[8]+ r"""} samples was selected"""
+        if 'KS' in to_report or 'RDM' in to_report:
+            latex_report += r""", by the {"""+cluster_methods[to_report[3]] + r"""},"""
+
+        latex_report +=  r""" and extracted to be representative of the whole data set, i.e, to reflect the variation included in the whole data set.
+              This subset of samples is suggested to be used for a robust NIR calibration developement,
+                therefore should to be analyzed by adequate reference analytical procedures (generally requiring destructive sample preparation) to collect data for the target variable to be modelled.\par"""
+            
+        latex_report += r"""
+        \begin{figure}[h]
         \captionsetup{justification=centering}
-            \centering
-            \begin{minipage}[b]{0.33\textwidth}
+        \centering
+        \begin{minipage}[b]{0.33\textwidth}
                 \includegraphics[width=\linewidth]{scores_pc1_pc2.png}
-            \end{minipage}%
-            \begin{minipage}[b]{0.33\textwidth}
+        \end{minipage}%
+        \begin{minipage}[b]{0.33\textwidth}
                 \includegraphics[width=\linewidth]{scores_pc1_pc3.png}
-            \end{minipage}%
-            \begin{minipage}[b]{0.33\textwidth}
+        \end{minipage}%
+        \begin{minipage}[b]{0.33\textwidth}
                 \includegraphics[width=\linewidth]{scores_pc2_pc3.png}
-            \end{minipage}
-            \centering
-            \caption{Illustration of the pairwise projection of spectra onto the reduced 3 dimensional subspace, clustering, and sample selection
+        \end{minipage}
+        \centering
+        \caption{Illustration of the pairwise projection of spectra onto the reduced 3 dimensional subspace, clustering, and sample selection
             results: data points with the same color belong to the same cluster and data points colored in black correspond to the samples to be
             analyzed by a standard reference analytical procedure}
-            \label{pcaplots}
+        \label{pcaplots}
         \end{figure} """
-    
+
+        if 'PCA' in to_report or 'NMF' in to_report:
+            latex_report += r"""
+            \begin{figure}[h!]
+            \centering
+            \includegraphics[width=.6\linewidth]{loadings_plot.png}
+            \caption{Loadings plot}
+            \label{loadings}
+            \end{figure}
+            """
         if 'PCA' in to_report:
             latex_report += r"""
+            \newpage
+            \begin{raggedbottom}            
             \begin{figure}[h!]
             \centering
             \begin{minipage}[b]{0.33\textwidth}
@@ -165,31 +244,60 @@ def report(*args):
         \caption{Outliers detection plots;(a) and (b) , respectively, correspond to the hotelling and influence plots}
         \label{hotelling_and_influence}
         \end{figure}
+        \end{raggedbottom}
         """
-        latex_report += r"""A subset of {"""+to_report[8]+ r"""} samples were identified and selected to be representative and were suggested to be used for robust NIR calibration developement
-         , i.e, to be analyzed by adequate reference analytical procedures (generally requiring destructive sample preparation).
-         """
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
     elif 'Predictive model development' in to_report:
         latex_report += r"""\paragraph{}To develop a robust NIR calibration that formally correlates the spectral signature of the samples in the NIR region
           with the corresponding reference data obtained by analyzing the samples using a suitable reference analytical procedure,
-            a pipeline consisting of consecutively performing spectral signal correction followed by multivariable predictive modelling was applied.
-              Signal correction was performed by """
+            a pipeline consisting of consecutively performing spectral signal preprocessing followed by multivariable predictive modelling was applied.
+              Signal preprocessing was performed by """
+        
         if 'No_transformation' not in to_report:
             latex_report += r"""normalizing the raw spectra using {"""+signal_preprocess[to_report[3]]+ r""", then """
         
         if to_report[3] !="No_derivation":
             latex_report += r"""taking the {"""+to_report[2]+ r"""}-order derivative of a the {"""+to_report[4]+ r"""}-order Savitzky-Golay (SG)
             polynomial estimated over a moving window of {"""+to_report[5]+ r"""} data points"""
-        latex_report += r""". Subequently, the obtained data was split into two subsets using Kennard-Stone (KS) algorithm; a calibration (Cal) and Validation
-          (Val) subsets, the former ,consisting of 80\% of the data, was used for multivarible calibration development while the latter ,consisting of
-            the remaining 20\% of the data, was used for evaluating the predictive and the generalizability performance of the developed calibration."""
-        latex_report += r""" To optimally select hyperparameters of the model and the signal preprocessing methods, prevent that the model overfit the data,
-            and optimize the predictive performance of the model, 5-folds Cross Validation (CV) was performed."""
-        latex_report += r"""\paragraph{} Fig 5, and table 6 display descriptive summary of the input data, trainset, and testset."""
+
+        latex_report += r""". The obtained preprocessed spectra were appropriately matched with the reference values, then Kennard-Stone (KS) algorithm \cite{ferreira2021kennard} was used for 
+        to split the dataset into two data subsets (\cref{fig:Histogram} and \cref{table:desc_stats}) for regression modeling; training and testing subsets, the former, consisting of 80\% of the data, was used to
+          develop a {"""+reg_algo[to_report[6]]+ r"""} predictive model, while the latter, consisting of the remaining 20\% of the data, was used to evaluate its
+            predictive and generalizability performance.\par"""
+        
+        if any(i in to_report for i in ('PLS', 'TPE-iPLS')):
+            latex_report += r""" The latente variables for the {"""+to_report[6]+ r"""} based model were estimated using the Non-linear Iterative Partial Least Squares (NIPALS) algorithm that was first introduced by 
+            the econometrician and statistician Herman Andreas Ole Wold \cite{wold1975path}."""
+
+        latex_report += r""" The evaluation of the model performance was performed by measuring its scores on a set of agnostic statistical metrics widely used to evaluate NIR calibration models, 
+        specifically, the correlation coefficient (r), the coefficient of determination (R2), the Root Mean Squared Error (RMSE), the Mean Absolute Error (MAE), the Ratio of Performance to Deviation (RPD), the Ratio of 
+          performance to inter-quartile (RPIQ) \cite{BellonMaurel2010}.\par"""
+        latex_report += r""" To optimize the performance of the calibration, the hyperparameters of predictive model and the selection of signal preprocessing methods were
+         performed simultaneously and automatically using the Tree-Structured Parzen Estimator (TPE) as an optimization algorithm. The optimal preprocessing-hyperparameters combination
+           was assumed to minimize the RMSE of 5-folds Cross-Validation (CV).\par"""
+        
+        
     
         latex_report += r"""
-        \begin{figure}[h]
+        \begin{figure}[H]
         \centering
         \includegraphics[width=1\linewidth]{Histogram.png}
         \caption{Kde plot visualizing the distribution of the target variable, a subset of training, and testing sets}
@@ -197,12 +305,14 @@ def report(*args):
         \end{figure}
         """ + df0.style.format("${:.2f}$").to_latex( position_float = 'centering', hrules = True,
                                                      caption = 'Descriptive statistics of the target variable, subsets used to develop and validate the predictive model',
-                                                     label= 'reg_perf') +r""""""
+                                                     label= 'table:desc_stats') + r""""""
 
         
-        latex_report += r"""Predictive modelling development was performed using the {"""+reg_algo[to_report[6]]+ r"""} regression method."""
         latex_report += r"""
-        For fig.\ref{fig:CV}
+            \cref{fig:CV} and \cref{table:CV} show the CV results achieved with the best hyperparameters-preprocessing combination found by the optimization algorithm.
+            These results are beneficial and important the evaluating of the bias-variance tradeoff. The best hyperparameters-preprocessing combination was identified 
+            and used to create a predictive model that was evaluated for its explanatory (train) and predictive (test) performance (\cref{table:reg_perf}).\par
+
             \begin{figure}[h]
             \captionsetup{justification=centering}
             \centering
@@ -212,38 +322,44 @@ def report(*args):
             \begin{minipage}[c]{0.5\textwidth}
                 \includegraphics[width=\linewidth]{meas_vs_pred_cv_all.png}
             \end{minipage}%
-            \caption{ Visualization of measured vs predicted values scatter plot for cross-validation }
-        \label{CV}
-        \end{figure}"""
-
-        if "Full-PLSR" in to_report:
-            latex_report += r"""the most important and influential spectral regions in the model, were visualized in fig.5"""
-        elif "Locally Weighted PLSR" in to_report:
-            """"""
-        elif "Interval-PLSR" in to_report:
-            latex_report += r"""Three intervalls were selected by the TPE-iPLS"""
-        
-        latex_report += r"""The calibration, CV, and prediction performance achieved by the developed model was evaluated
-          by measuring its scores on a set of agnostic statistical metrics widely used to evaluate NIR calibration models. 
-          specifically, the Root Mean Squared Error (RMSE), the Ratio of Performance to Deviation (RPD), the Ratio of 
-          performance to inter-quartile (RPIQ). A table summarizing the model performance is shown bellow(Table. 4).\par""""""
-        """ + df1.style.format("${:.2f}$").to_latex(position_float = 'centering', hrules = True, caption = 'Model performances summary', label= 'reg_perf') + r""""""
+            \caption{ Visualization of measured vs predicted values for cross-validation }
+        \label{fig:CV}
+        \end{figure}
+        """ + df2.style.format("${:.2f}$").to_latex(position_float = 'centering', hrules = True, caption = 'Cross-Validation summary', label= 'table:CV') + r"""
+        """
+        latex_report += df1.style.format("${:.2f}$").to_latex(position_float = 'centering', hrules = True, caption = 'Model performances summary', label= 'table:reg_perf')
 
-        if "Full-PLSR" in to_report:
-        
-            latex_report += r""" To identify the important variables in the model, Variable Importance in Projection (VIP) test applied, and the important variables in the model were 
-            visualized in Fig.8 \par
+        if "PLS" in to_report:
+            latex_report += r"""To identify the most important and influential spectral regions in the model, Selectivity ratio (SR) \cite{kvalheim2020variable, farres2015comparison} test applied, and the important variables in the model were 
+            visualized in \cref{fig:importance}. \par
             
             \begin{figure}[h]
             \centering
             \includegraphics[width=1\linewidth]{Variable_importance.png}
             \caption{Visualizing important spectral regions identified in the PLS model on the raw and preprocessed average spectrum}
-            \label{fig:Histogram}
+            \label{fig:importance}
             \end{figure}
             """
-        
-        latex_report += r"""After numerically analyzing the performance of the model, a visual investigation (figs 7 and 8) of goodness of model fit was performed to identify potential
-          issues such as a pattern, that has not been captured by the model, or outliers.\par.
+
+        elif "LW-PLS " in to_report:
+            """"""
+        elif "TPE-iPLS" in to_report:
+            latex_report += r"""
+            Many research papers have proved that interval selection methods, with different number of intervalls, helps reduce noise and model overfitting,
+              increases computational efficiency and results interpretability, and maximizes the model's predictive accuracy. For the current analysis, the selected spectral 
+              intervalls or regions that were used for predictive model development were visualized in \cref{fig:importanceipls}. \par
+
+            \begin{figure}[h]
+            \centering
+            \includegraphics[width=1\linewidth]{Variable_importance.png}
+            \caption{Visualizing spectral regions used for TPE-iPLS model development on the raw and preprocessed average spectrum}
+            \label{fig:importanceipls}
+            \end{figure}
+            """
+
+        # latex_report += r"""""" + df1.style.format("${:.2f}$").to_latex(position_float = 'centering', hrules = True, caption = 'Model performances summary', label= 'table:reg_perf') + r""""""
+        latex_report += r"""  Following a numerical analysis of the model performance, measured against predicted values \cite{pauwels2019evaluating} and residuals against measured \cite{belloto1985residual} plots (\cref{fig:diagnosis}) were analysed to 
+        visually assess the goodness of model fit and to detect potential flaws such as a pattern that the model failed to capture or outliers.\par
 
         \begin{figure}[h]
         \captionsetup{justification=centering}
@@ -255,11 +371,20 @@ def report(*args):
                 \includegraphics[width=\linewidth]{residuals_plot.png}
             \end{minipage}%
             \caption{Post-hoc analysis of the developed predictive model; measured vs predicted values (a) and measured vs residuals (b) plots }
-        \label{pcaplots}
+        \label{fig:diagnosis}
         \end{figure}"""            
-    latex_report += r"""
+        
+    latex_report += r"""                                                   
+    \clearpage
+    \pagebreak
+    \newpage
+    \section*{ACKNOWLEDGEMENTS}
+    This tool is provided by the Chemical Analysis Platform for Ecology - Montpellier, France.\\
+            Thanks to Abderrahim DIANE, Mouhcine MAIMOUNI, Alexandre GRANIER, Remy BEUGNON, Vincent NEGRE et Nicolas BARTHES.\\
+            Source code available at \href{https://src.koda.cnrs.fr/cefe/pace/nirs_workflow}{CNRS forge}.
+
     \fontsize{8}{9}\selectfont
-    \bibliographystyle{apalike}
+    \bibliographystyle{IEEEtran}
     % \bibliographystyle{abbrv}
     \bibliography{refs.bib}
     \clearpage
diff --git a/src/app.py b/src/app.py
index cc8f226ce4197e976a0f9ac28544e70fb3e25cf0..f7ae082d532b038faa680874f503df23b29cd538 100644
--- a/src/app.py
+++ b/src/app.py
@@ -5,23 +5,90 @@ from Modules import *
 from Class_Mod.DATA_HANDLING import *
 
 
-add_header()
+# page_element="""
+# <style>
+# [data-testid="stAppViewContainer"]{
+#   background-image: url("https://www.cefe.cnrs.fr/templates/rt_zephyr/images/backgrounds/img-sky.jpg");
+#   background-size: cover;
+# }
+# </style>
+# """
+# st.markdown(page_element, unsafe_allow_html=True)
 
+add_header()
 add_sidebar(pages_folder)
 
+
+st.markdown(
+    """
+    <style>
+    [data-testid="stAppViewContainer"]{
+    background-image: url("https://www.cefe.cnrs.fr/templates/rt_zephyr/images/backgrounds/img-sky.jpg");
+    background-size: cover;
+    }
+        .header1 { color: black; }
+    .green { color: green; }
+    .centered-text {
+        text-align: center;
+        color: black;}
+    .header1 { color: black;font-size: 70px;font-family: monospace; }
+    .header2 { color: rgb(74,165,41); }
+    .header3 { color: green; }
+    .blackfont {color: black;}
+
+    button {
+    height: auto;border-color:black;
+    width = 40px;
+    padding-top: 10px !important;
+    padding-bottom: 10px !important;}
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+
+from PIL import Image
+
+
 # Page header
 with st.container():
     st.subheader("Plateforme d'Analyses Chimiques pour l'Ecologie-PACE :goat:")
-    st.title("NIRS Utils")
-    st.markdown("#### This app allows users to perform visualization, pre-treatments, samples selection & predictions on their NIRS spectra.")
+        # st.markdown("#### Welcome to")
+    st.markdown("""
+        <h1 class="header1">Easily process <br> your spectral data<br> with <span class="green">NIRS Utils</span></h1>
+        """, unsafe_allow_html=True)
+
+for i in range(5):
+    mm1s, mmd2=st.columns([2,2])
+image = Image.open("C:/Users/diane/Desktop/nirs_workflow/src/images/general.jpg")
+new_image = image.resize((700, 400))
+mmd2.image(new_image)
+
+mm1,mm, mm2=st.columns([2,1.5,2])
+with mm1:    
+    # st.title("welcome to NIRS Utils")
+    st.markdown('<h2 class="centered-text">About</h2>', unsafe_allow_html=True)
+
+    st.markdown('<h3 class="centered-text"> NIRS Utils is a powerful tool that was developed to ease the spectral data processing process. It benifits from the synergy between web and data science frameworks to offer a user-friendly interface featured a variety of analytical capabilities. Further information can be found here.</h3>', unsafe_allow_html=True)
+   #for easing the spectral data processing.
+    # st.markdown("We could add documentation here")
+    # st.write("Samples selection (PCA, [UMAP](https://umap-learn.readthedocs.io/en/latest/how_umap_works.html), ...), Predictive Modelling ([Pinard](https://github.com/GBeurier/pinard), [LWPLSR](https://doi.org/10.1002/cem.3209), ...), and Predictions using your data (CSV or DX files) and/or PACE NIRS Database.")
+
+with mm2:    
+    # st.title("welcome to NIRS Utils")
+    st.markdown('<h2 class="centered-text">Key Features</h2>', unsafe_allow_html=True)
+    st.markdown('<h3 class="centered-text"> Our app featured a variety of analytical capabilities that makes it the optimal solution for spectral data processing : <br> - Easy-to-use. <br> - Use advanced frameworks. <br> - Enhanced automation capabilities. <br> - Saves your time and efforts.</h3>', unsafe_allow_html=True)
+
+
+
+for i in range(3):
     header1, header2, header3,header4 = st.columns(4)
-    if header1.button("Inputs"):
-        st.switch_page(pages_folder / '4-inputs.py')
-    if header2.button("Samples Selection"):
-        st.switch_page(pages_folder / '1-samples_selection.py')
-    if header3.button("Models Creation"):
-        st.switch_page(pages_folder / '2-model_creation.py')
-    if header4.button("Predictions"):
-        st.switch_page(pages_folder / '3-prediction.py')
-    st.markdown("We could add documentation here")
-    st.write("Samples selection (PCA, [UMAP](https://umap-learn.readthedocs.io/en/latest/how_umap_works.html), ...), Predictive Modelling ([Pinard](https://github.com/GBeurier/pinard), [LWPLSR](https://doi.org/10.1002/cem.3209), ...), and Predictions using your data (CSV or DX files) and/or PACE NIRS Database.")
+
+header1, header2, header3,header4 = st.columns(4)
+if header1.button("Inputs"):
+    st.switch_page(pages_folder / '4-inputs.py')
+if header2.button("Samples Selection"):
+    st.switch_page(pages_folder / '1-samples_selection.py')
+if header3.button("Models Creation"):
+    st.switch_page(pages_folder / '2-model_creation.py')
+if header4.button("Predictions"):
+    st.switch_page(pages_folder / '3-prediction.py')
diff --git a/src/config/config.py b/src/config/config.py
index 4aaa13569b9389fee298accd511a8d07357d7e40..d143aa7c810a12adbc1a16dff1a66360e22fd16c 100644
--- a/src/config/config.py
+++ b/src/config/config.py
@@ -2,5 +2,5 @@
 from pathlib import Path
 
 # pdflatex_path = Path("C:/Users/maimouni/AppData/Local/Programs/MiKTeX/miktex/bin/x64/")
-pdflatex_path = Path("C:/Donnees/Logiciels/Papps/PortableApps/Notepad++Portable/LaTEX/texmfs/install/miktex/bin/")
-# pdflatex_path = Path("C:/Users/diane/AppData/Local/Programs/MiKTeX/miktex/bin/x64/")
+# pdflatex_path = Path("C:/Donnees/Logiciels/Papps/PortableApps/Notepad++Portable/LaTEX/texmfs/install/miktex/bin/")
+pdflatex_path = Path("C:/Users/diane/AppData/Local/Programs/MiKTeX/miktex/bin/x64/")
diff --git a/src/images/graphical_abstract.jpg b/src/images/graphical_abstract.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c0b8d09f230d8f6ca36da81cbd59f633b2bb1aee
Binary files /dev/null and b/src/images/graphical_abstract.jpg differ
diff --git a/src/images/wp9684463-data-analytics-wallpapers.jpg b/src/images/wp9684463-data-analytics-wallpapers.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6ca06796059f1eb207a3b214537fc1b27b0f03c3
Binary files /dev/null and b/src/images/wp9684463-data-analytics-wallpapers.jpg differ
diff --git a/src/pages/1-samples_selection.py b/src/pages/1-samples_selection.py
index 60e2de22c3a4ec06709230aed96b8e1fb564eee1..af79beaef0976b945fdb12b8f039842dfaf32390 100644
--- a/src/pages/1-samples_selection.py
+++ b/src/pages/1-samples_selection.py
@@ -21,7 +21,7 @@ add_sidebar(pages_folder)
 
 # algorithms available in our app
 dim_red_methods=['', 'PCA','UMAP', 'NMF']  # List of dimensionality reduction algos
-cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP'] # List of clustering algos
+cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP', 'KS', 'RDM'] # List of clustering algos
 selec_strategy = ['center','random']
 
 if st.session_state["interface"] == 'simple':
@@ -46,9 +46,9 @@ if st.session_state["interface"] == 'advanced':
     default_sample_selection_option = 0
 
 ################################### I - Data Loading and Visualization ########################################
-st.header("I - Spectral Data Visualization", divider='blue')
+st.title("Calibration Subset Selection")
 col2, col1 = st.columns([3, 1])
-
+col2.image("C:/Users/diane/Desktop/nirs_workflow/src/images/graphical_abstract.jpg", use_column_width=True)
 ## Preallocation of data structure
 spectra = pd.DataFrame()
 meta_data = pd.DataFrame()
@@ -66,8 +66,12 @@ selection = None
 selection_number = None
 
 # loader for datafile
-data_file = col1.file_uploader("Load NIRS Data", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
-if data_file:
+data_file = col1.file_uploader("Data file", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
+
+
+if not data_file:
+    col1.warning('⚠️ Please load data file !')
+else:
     # Retrieve the extension of the file
     test = data_file.name[data_file.name.find('.'):]
     ## Load .csv file
@@ -101,12 +105,15 @@ if data_file:
 
     
 ## Visualize spectra
+st.header("I - Spectral Data Visualization", divider='blue')
 if not spectra.empty:
+    n_samples = spectra.shape[0]
+    nwl = spectra.shape[1]
     # retrieve columns name and rows name of spectra
     colnames = list(spectra.columns)
     rownames = [str(i) for i in list(spectra.index)]
     spectra.index = rownames
-
+    col2, col1 = st.columns([3, 1])
     with col2:
         fig, ax = plt.subplots(figsize = (30,7))
         if test =='.dx':
@@ -125,43 +132,49 @@ if not spectra.empty:
         plt.tight_layout()
         st.pyplot(fig)
         
+        # update lines size
+        for line in ax.get_lines():
+            line.set_linewidth(0.8)  # Set the desired line width here
+
         # Update the size of plot axis for exprotation to report
         l, w = fig.get_size_inches()
         fig.set_size_inches(8, 3)
         for label in (ax.get_xticklabels()+ax.get_yticklabels()):
-            ax.xaxis.label.set_size(10)
-            ax.yaxis.label.set_size(10)
+            ax.xaxis.label.set_size(9.5)
+            ax.yaxis.label.set_size(9.5)
         plt.tight_layout()
         fig.savefig("./Report/figures/spectra_plot.png", dpi=400) ## Export report
         fig.set_size_inches(l, w)# reset the plot size to its original size
         data_info = pd.DataFrame({'Name': [data_file.name],
-                                'Number of scanned samples': [spectra.shape[0]]},
+                                'Number of scanned samples': [n_samples]},
                                   index = ['Input file'])
+    with col1:
+        st.info('Information on the loaded data file')
         st.write(data_info) ## table showing the number of samples in the data file
 
 ############################## Exploratory data analysis ###############################
 st.header("II - Exploratory Data Analysis-Multivariable Data Analysis", divider='blue')
-scores, loadings, pc = st.columns([2, 3, 0.5])
-influence, hotelling, qexp = st.columns([2, 2, 1])
-st.header('III - Selected samples for chemical analysis', divider='blue')
 
 ###### 1- Dimensionality reduction ######
 t = pd.DataFrame # scores
 p = pd.DataFrame # loadings
 if not spectra.empty:
-    dim_red_method = pc.selectbox("Dimensionality reduction techniques: ", options = dim_red_methods, index = default_reduction_option, key = 37)
-    clus_method = pc.selectbox("Clustering techniques: ", options = cluster_methods, index = default_clustering_option, key = 38)
+    bb1, bb2, bb3, bb4, bb5, bb6, bb7 = st.columns([1,1,0.6,0.6,0.6,1.5,1.5])
+    dim_red_method = bb1.selectbox("Dimensionality reduction techniques: ", options = dim_red_methods, index = default_reduction_option, key = 37)
+    clus_method = bb2.selectbox("Clustering/sampling techniques: ", options = cluster_methods, index = default_clustering_option, key = 38)
     xc = standardize(spectra, center=True, scale=False)
 
 
-    if dim_red_method == dim_red_methods[1]:
+    if dim_red_method == dim_red_methods[0]:
+        bb1.warning('⚠️ Please choose an algothithm !')
+    elif dim_red_method == dim_red_methods[1]:
         dr_model = LinearPCA(xc, Ncomp=8)
 
     elif dim_red_method == dim_red_methods[2]:
         if not meta_data.empty:
             filter = md_df_st_.columns
             filter = filter.insert(0, 'Nothing')
-            col = pc.selectbox('Supervised UMAP by:', options= filter, key=108)
+            col = bb1.selectbox('Supervised UMAP by:', options= filter, key=108)
             if col == 'Nothing':
                 supervised = None
             else:
@@ -174,28 +187,37 @@ if not spectra.empty:
         dr_model = Nmf(spectra, Ncomp= 3)
 
     if dr_model:
-        axis1 = pc.selectbox("x-axis", options = dr_model.scores_.columns, index=0)
-        axis2 = pc.selectbox("y-axis", options = dr_model.scores_.columns, index=1)
-        axis3 = pc.selectbox("z-axis", options = dr_model.scores_.columns, index=2)
+        axis1 = bb3.selectbox("x-axis", options = dr_model.scores_.columns, index=0)
+        axis2 = bb4.selectbox("y-axis", options = dr_model.scores_.columns, index=1)
+        axis3 = bb5.selectbox("z-axis", options = dr_model.scores_.columns, index=2)
 
         t = pd.concat([dr_model.scores_.loc[:,axis1], dr_model.scores_.loc[:,axis2], dr_model.scores_.loc[:,axis3]], axis = 1)
 
 
 
 ###### II - clustering #######
+
 if not t.empty:
+    if dim_red_method == 'UMAP':
+        scores = st.container()
+    else:
+        scores, loadings= st.columns([3,3])
+
     tcr = standardize(t)
         # Clustering
     # 1- K-MEANS Clustering
+    if clus_method == cluster_methods[0]:
+        bb2.warning('⚠️ Please choose an algothithm !')
+
     if clus_method == cluster_methods[1]:
         cl_model = Sk_Kmeans(tcr, max_clusters = 25)
         ncluster = scores.number_input(min_value=2, max_value=25, value=cl_model.suggested_n_clusters_, label = 'Select the desired number of clusters')
-        fig2 = px.bar(cl_model.inertia_.T, y = 'inertia')
-        scores.write(f"Suggested n_clusters : {cl_model.suggested_n_clusters_}")
-        scores.plotly_chart(fig2,use_container_width=True)
-        img = pio.to_image(fig2, format="png")
-        with open("./Report/figures/Elbow.png", "wb") as f:
-                f.write(img)    
+        # fig2 = px.bar(cl_model.inertia_.T, y = 'inertia')
+        # scores.write(f"Suggested n_clusters : {cl_model.suggested_n_clusters_}")
+        # scores.plotly_chart(fig2,use_container_width=True)
+        # img = pio.to_image(fig2, format="png")
+        # with open("./Report/figures/Elbow.png", "wb") as f:
+        #         f.write(img)    
         data, labels, clu_centers = cl_model.fit_optimal(nclusters = ncluster)
 
     # 2- HDBSCAN clustering
@@ -211,14 +233,30 @@ if not t.empty:
         cl_model = AP(X = tcr)
         data, labels, clu_centers = cl_model.fit_optimal_
         ncluster = len(clu_centers)
+
+    elif clus_method == cluster_methods[4]:
+        rset = scores.number_input(min_value=0, max_value=100, value=20, label = 'The ratio of data to be sampled (%)')
+        cl_model = KS(x = tcr, rset = rset)
+        calset = cl_model.calset
+        labels = ["ind"]*n_samples
+        ncluster = "1"
+        selection_number = 'None'
+
+    elif clus_method == cluster_methods[5]:
+        rset = scores.number_input(min_value=0, max_value=100, value=20, label = 'The ratio of data to be sampled (%)')
+        cl_model = RDM(x = tcr, rset = rset)
+        calset = cl_model.calset
+        labels = ["ind"]*n_samples
+        ncluster = "1"
+        selection_number = 'None'
     
     if clus_method == cluster_methods[2]:
         #clustered = np.where(np.array(labels) != 'Non clustered')[0]
-        clustered = np.arange(tcr.shape[0])
+        clustered = np.arange(n_samples)
         non_clustered = np.where(np.array(labels) == 'Non clustered')[0]
 
     else:
-        clustered = np.arange(tcr.shape[0])
+        clustered = np.arange(n_samples)
         non_clustered = None
     
     new_tcr = tcr.iloc[clustered,:]    
@@ -229,72 +267,44 @@ samples_df_chem = pd.DataFrame
 selected_samples = []
 selected_samples_idx = []
 
-
-if labels:
+if not labels:
+    custom_color_palette = px.colors.qualitative.Plotly[:1]
+elif labels:
     num_clusters = len(np.unique(labels))
     custom_color_palette = px.colors.qualitative.Plotly[:num_clusters]
     if clus_method:
-        selection = scores.radio('Select samples selection strategy:',
-                                    options = selec_strategy, index = default_sample_selection_option, key=102)
-    # Strategy 0
-    if selection == selec_strategy[0]:
-        # list samples at clusters centers - Use sklearn.metrics.pairwise_distances_argmin if you want more than 1 sample per cluster
-        closest, _ = pairwise_distances_argmin_min(clu_centers, new_tcr)
-        selected_samples_idx = np.array(new_tcr.index)[list(closest)]
-        selected_samples_idx = selected_samples_idx.tolist()
-        
-    #### Strategy 1
-    elif selection == selec_strategy[1]:
-        selection_number = scores.number_input('How many samples per cluster?',
-                                                min_value = 1, step=1, value = 3)
-        s = np.array(labels)[np.where(np.array(labels) !='Non clustered')[0]]
-        for i in np.unique(s):
-            C = np.where(np.array(labels) == i)[0]
-            if C.shape[0] >= selection_number:
-                # scores.write(list(tcr.index)[labels== i])
-                km2 = KMeans(n_clusters = selection_number)
-                km2.fit(tcr.iloc[C,:])
-                clos, _ = pairwise_distances_argmin_min(km2.cluster_centers_, tcr.iloc[C,:])
-                selected_samples_idx.extend(tcr.iloc[C,:].iloc[list(clos)].index)
-            else:
-                selected_samples_idx.extend(new_tcr.iloc[C,:].index.to_list())
-            # list indexes of selected samples for colored plot    
-
-    if selected_samples_idx:
-        if meta_data.empty:
-            sam1 = pd.DataFrame({'name': spectra.index[clustered][selected_samples_idx],
-                                'cluster':np.array(labels)[clustered][selected_samples_idx]},
-                                index = selected_samples_idx)
+        if clus_method == cluster_methods[4] or clus_method == cluster_methods[5]:
+            selected_samples_idx = calset[1]
+            selection = 'None'
         else:
-            sam1 = meta_data.iloc[clustered,:].iloc[selected_samples_idx,:]
-            sam1.insert(loc=0, column='index', value=selected_samples_idx)
-            sam1.insert(loc=1, column='cluster', value=np.array(labels)[selected_samples_idx])
-        sam1.index = np.arange(len(selected_samples_idx))+1
-        st.write(f' - The total number of samples: {tcr.shape[0]}.\n- The number of selected samples for chemical analysis: {sam1.shape[0]} - {round(sam1.shape[0]/tcr.shape[0]*100, 1)}%.')
-        sam = sam1
-        if clus_method == cluster_methods[2]:
-            unclus = st.checkbox("Include non clustered samples (for HDBSCAN clustering)", value=True)
+            selection = scores.radio('Select samples selection strategy:',
+                                        options = selec_strategy, index = default_sample_selection_option, key=102)
+        # Strategy 0
+        if selection == selec_strategy[0]:
+            # list samples at clusters centers - Use sklearn.metrics.pairwise_distances_argmin if you want more than 1 sample per cluster
+            closest, _ = pairwise_distances_argmin_min(clu_centers, new_tcr)
+            selected_samples_idx = np.array(new_tcr.index)[list(closest)]
+            selected_samples_idx = selected_samples_idx.tolist()
+            
+        #### Strategy 1
+        elif selection == selec_strategy[1]:
+            selection_number = scores.number_input('How many samples per cluster?',
+                                                    min_value = 1, step=1, value = 3)
+            s = np.array(labels)[np.where(np.array(labels) !='Non clustered')[0]]
+            for i in np.unique(s):
+                C = np.where(np.array(labels) == i)[0]
+                if C.shape[0] >= selection_number:
+                    # scores.write(list(tcr.index)[labels== i])
+                    km2 = KMeans(n_clusters = selection_number)
+                    km2.fit(tcr.iloc[C,:])
+                    clos, _ = pairwise_distances_argmin_min(km2.cluster_centers_, tcr.iloc[C,:])
+                    selected_samples_idx.extend(tcr.iloc[C,:].iloc[list(clos)].index)
+                else:
+                    selected_samples_idx.extend(new_tcr.iloc[C,:].index.to_list())
+                # list indexes of selected samples for colored plot    
 
-        if clus_method == cluster_methods[2]:
-            if selected_samples_idx:
-                if unclus:
-                    if meta_data.empty:
-                        sam2 = pd.DataFrame({'name': spectra.index[non_clustered],
-                                            'cluster':['Non clustered']*len(spectra.index[non_clustered])},
-                                            index = spectra.index[non_clustered])
-                    else :
-                        sam2 = meta_data.iloc[non_clustered,:]
-                        sam2.insert(loc=0, column='index', value= spectra.index[non_clustered])
-                        sam2.insert(loc=1, column='cluster', value=['Non clustered']*len(spectra.index[non_clustered]))
-                    
-                    sam = pd.concat([sam1, sam2], axis = 0)
-                    sam.index = np.arange(sam.shape[0])+1
-                    st.write(f' The number of Non-clustered samples is {sam2.shape[0]} samples. Total selected samples: {sam1.shape[0] + sam2.shape[0]} - {round((sam1.shape[0] + sam2.shape[0]) / tcr.shape[0] * 100, 1)}%.')
-        else:
-            sam = sam1
-        st.write(sam)
 ################################      Plots visualization          ############################################
- 
+
 
     ## Scores
 if not t.empty:
@@ -412,12 +422,13 @@ if not spectra.empty:
                 f.write(img)
 #############################################################################################################
     if dim_red_method == dim_red_methods[1]:
+        influence, hotelling = st.columns([3, 3])
         with influence:
             st.write('Influence plot')
             # Laverage
             Hat =  t.to_numpy() @ np.linalg.inv(np.transpose(t.to_numpy()) @ t.to_numpy()) @ np.transpose(t.to_numpy())
             leverage = np.diag(Hat) / np.trace(Hat)
-            tresh3 = 2 * t.shape[1]/t.shape[0]
+            tresh3 = 2 * tcr.shape[1]/n_samples
             # Loadings
             p = pd.concat([dr_model.loadings_.loc[:,axis1], dr_model.loadings_.loc[:,axis2], dr_model.loadings_.loc[:,axis3]], axis = 1)
             # Matrix reconstruction
@@ -429,7 +440,7 @@ if not spectra.empty:
             # color with metadata
             if not meta_data.empty and clus_method:
                 if col == "None":
-                    l1 = ["Samples"]* t.shape[0]
+                    l1 = ["Samples"]* n_samples
 
                 elif col == clus_method:
                     l1 = labels
@@ -441,7 +452,7 @@ if not spectra.empty:
                 l1 = labels
 
             elif meta_data.empty and not clus_method:
-                l1 = ["Samples"]* t.shape[0]
+                l1 = ["Samples"]* n_samples
             
             elif not meta_data.empty and not clus_method:
                 l1 = list(map(str.lower,md_df_st_[col]))
@@ -455,7 +466,7 @@ if not spectra.empty:
             out3 = leverage > tresh3
             out4 = residuals > tresh4
 
-            for i in range(t.shape[0]):
+            for i in range(n_samples):
                 if out3[i]:
                     if not meta_data.empty:
                         ann =  meta_data.loc[:,'name'][i]
@@ -477,7 +488,8 @@ if not spectra.empty:
             fig.add_annotation(text= '(a)', align='center', showarrow= False, xref='paper', yref='paper', x=-0.125, y= 1,
                                              font= dict(color= "black", size= 35), bgcolor ='white', borderpad= 2, bordercolor= 'black', borderwidth= 3)
             fig.write_image('./Report/figures/influence_plot.png', engine = 'kaleido')
-            
+        
+        
         with hotelling:
             st.write('T²-Hotelling vs Q-residuals plot')
             # Hotelling
@@ -485,9 +497,8 @@ if not spectra.empty:
             # Q residuals: Q residuals represent the magnitude of the variation remaining in each sample after projection through the model
             residuals = np.diag(np.subtract(xc.to_numpy(), xp)@ np.subtract(xc.to_numpy(), xp).T)
 
-            I = t.shape[0]
-            fcri = sc.stats.f.isf(0.05, 3, I)
-            tresh0 = (3 * (I ** 2 - 1) * fcri) / (I * (I - 3))
+            fcri = sc.stats.f.isf(0.05, 3, n_samples)
+            tresh0 = (3 * (n_samples ** 2 - 1) * fcri) / (n_samples * (n_samples - 3))
             tresh1 = sc.stats.chi2.ppf(0.05, df = 3)
             
             fig = px.scatter(t, x = hotelling, y = residuals, color=labels if list(labels) else None,
@@ -500,7 +511,7 @@ if not spectra.empty:
             out1 = residuals > tresh1
 
             
-            for i in range(t.shape[0]):
+            for i in range(n_samples):
                 if out0[i]:
                     if not meta_data.empty:
                         ann =  meta_data.loc[:,'name'][i]
@@ -522,12 +533,49 @@ if not spectra.empty:
                                              font= dict(color= "black", size= 35), bgcolor ='white', borderpad= 2, bordercolor= 'black', borderwidth= 3)
             fig.write_image("./Report/figures/hotelling_plot.png", format="png")
 
+st.header('III - Selected Samples for Reference Analysis', divider='blue')
+if labels:
+    sel, info = st.columns([3, 1])
+    sel.write("Tabular identifiers of selected samples for reference analysis:")
+    if selected_samples_idx:
+        if meta_data.empty:
+            sam1 = pd.DataFrame({'name': spectra.index[clustered][selected_samples_idx],
+                                'cluster':np.array(labels)[clustered][selected_samples_idx]},
+                                index = selected_samples_idx)
+        else:
+            sam1 = meta_data.iloc[clustered,:].iloc[selected_samples_idx,:]
+            sam1.insert(loc=0, column='index', value=selected_samples_idx)
+            sam1.insert(loc=1, column='cluster', value=np.array(labels)[selected_samples_idx])
+        sam1.index = np.arange(len(selected_samples_idx))+1
+        info.info(f'Information !\n - The total number of samples: {n_samples}.\n- The number of samples selected for reference analysis: {sam1.shape[0]}.\n - The proportion of samples selected for reference analysis: {round(sam1.shape[0]/n_samples*100)}%.')
+        sam = sam1
+        if clus_method == cluster_methods[2]:
+            unclus = sel.checkbox("Include non clustered samples (for HDBSCAN clustering)", value=True)
+
+        if clus_method == cluster_methods[2]:
+            if selected_samples_idx:
+                if unclus:
+                    if meta_data.empty:
+                        sam2 = pd.DataFrame({'name': spectra.index[non_clustered],
+                                            'cluster':['Non clustered']*len(spectra.index[non_clustered])},
+                                            index = spectra.index[non_clustered])
+                    else :
+                        sam2 = meta_data.iloc[non_clustered,:]
+                        sam2.insert(loc=0, column='index', value= spectra.index[non_clustered])
+                        sam2.insert(loc=1, column='cluster', value=['Non clustered']*len(spectra.index[non_clustered]))
+                    
+                    sam = pd.concat([sam1, sam2], axis = 0)
+                    sam.index = np.arange(sam.shape[0])+1
+                    info.write(f' The number of Non-clustered samples is {sam2.shape[0]} samples. Total selected samples: {sam1.shape[0] + sam2.shape[0]} - {round((sam1.shape[0] + sam2.shape[0]) / n_samples * 100, 1)}%.')
+        else:
+            sam = sam1
+        sel.write(sam)
 
 
-Nb_ech = str(tcr.shape[0])
-nb_clu = str(sam1.shape[0])
 # figs_list = os.listdir("./Report/figures")
 if data_file:
+    Nb_ech = str(n_samples)
+    nb_clu = str(sam1.shape[0])
     with st.container():
         if st.button("Download report"):
             latex_report = report.report('Representative subset selection', data_file.name, dim_red_method, clus_method, Nb_ech, ncluster, selection, selection_number, nb_clu,tcr, sam)
diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py
index 84c0439c6ff038f68020c0896baff4571d432707..01890edfc0c5fedd345e1ef0f809d700ff188f93 100644
--- a/src/pages/2-model_creation.py
+++ b/src/pages/2-model_creation.py
@@ -22,20 +22,14 @@ local_css(css_file / "style_model.css")
     ####################################### page Design #######################################
 st.title("Calibration Model Development")
 st.markdown("Create a predictive model, then use it for predicting your target variable (chemical data) from NIRS spectra")
-st.header("I - Data visualization", divider='blue')
 M0, M00 = st.columns([1, .4])
-st.header("II - Model creation", divider='blue')
-M1, M2 = st.columns([2 ,4])
-st.header("Cross-Validation results")
-cv1, cv2 = st.columns([2,2])
+M0.image("C:/Users/diane/Desktop/nirs_workflow/src/images/graphical_abstract.jpg", use_column_width=True)
+# st.header("II - Model creation", divider='blue')
+# st.header("Cross-Validation results")
+# cv1, cv2 = st.columns([2,2])
 cv3 = st.container()
 
-st.header("III - Model Diagnosis", divider='blue')
-M7, M8 = st.columns([2,2])
-M7.write('Predicted vs Measured values')
-M8.write('Residuals plot')
-M9 = st.container()
-M9.write("-- Save the model --")
+
     ##############################################################################################
 
 
@@ -84,8 +78,8 @@ if file == files_format[0]:
             
 
             spectra = pd.DataFrame(spectra).astype(float)
-            if not meta_data.empty :
-                st.write(meta_data)
+            # if not meta_data.empty :
+            #     st.write(meta_data)
 
             if spectra.shape[0] != y.shape[0]:
                 M00.warning('X and Y have different sample size')
@@ -117,7 +111,9 @@ elif file == files_format[1]:
         os.unlink(tmp_path)
 
 ### split the data
+st.header("I - Data visualization", divider='blue')
 if not spectra.empty and not y.empty:
+    M0, M000 = st.columns([1, .4])
     if np.array(spectra.columns).dtype.kind in ['i','f']:
         colnames = spectra.columns
     else:
@@ -153,21 +149,36 @@ if not spectra.empty and not y.empty:
     fig.savefig("./Report/figures/Histogram.png")
 
 
-    M0.write('Loaded data summary')
-    M0.write(pd.DataFrame([desc_stats(y_train),desc_stats(y_test),desc_stats(y)], index =['train', 'test', 'total'] ).round(2))
+    M000.write('Loaded data summary')
+    M000.write(pd.DataFrame([desc_stats(y_train),desc_stats(y_test),desc_stats(y)], index =['train', 'test', 'total'] ).round(2))
     stats=pd.DataFrame([desc_stats(y_train),desc_stats(y_test),desc_stats(y)], index =['train', 'test', 'total'] ).round(2)
     ####################################### Insight into the loaded data
 
 
     ####################################### Model creation ###################################################
-    reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR"]
-    regression_algo = M1.selectbox("Choose the algorithm for regression", options= reg_algo, key = 12, placeholder ="Choose an option")
+st.header("II - Model creation", divider='blue')
+if not spectra.empty and not y.empty:
+    M10, M20, M30, M40, M50 = st.columns([1,1,1,1,1])
+    modes = ['regression', 'classification']
+    mode =M10.radio("Supervised modelling mode", options=modes)
+    if mode == 'regression':
+        reg_algo = ["","PLS", "LW-PLS", "TPE-iPLS"]
+        regression_algo = M20.selectbox("Choose the regression algorithm", options= reg_algo, key = 12, placeholder ="Choose an option")
+
+    elif mode == 'classification':
+        reg_algo = ["","PLS", "LW-PLS", "TPE-iPLS"]
+        regression_algo = M20.selectbox("Choose the classification algorithm", options= reg_algo, key = 12, placeholder ="Choose an option")
+
+    
+
     # split train data into nb_folds for cross_validation
     nb_folds = 3
     folds = KF_CV.CV(X_train, y_train, nb_folds)
 
     if not regression_algo:
-        M1.warning('Choose a modelling algorithm from the dropdown list !')
+        M20.warning('Choose a modelling algorithm from the dropdown list !')
+    else:
+        M1, M2 = st.columns([2 ,4])
     if regression_algo == reg_algo[1]:
         # Train model with model function from application_functions.py
         Reg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=1)
@@ -266,23 +277,24 @@ if not spectra.empty and not y.empty:
 
             
     elif regression_algo == reg_algo[3]:
-        s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3)
-        it = M1.number_input(label='Enter the number of iterations', min_value=2, max_value=10, value=3)
+        s = M20.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3)
+        it = M20.number_input(label='Enter the number of iterations', min_value=1, max_value=3, value=2)
         progress_text = "The model is being created. Please wait."
             
         Reg = TpeIpls(train = [X_train, y_train], test=[X_test, y_test], n_intervall = s, n_iter=it)
         pro = M1.progress(0, text="The model is being created. Please wait!")
         pro.empty()
-        M1.progress(100, text = "The model has successfully been  created!")            
+        M20.progress(100, text = "The model has successfully been  created!")            
         time.sleep(1)
         reg_model = Reg.model_
 
-        M2.write('-- Important Spectral regions used for model creation --')
+        
         intervalls = Reg.selected_features_.T
         intervalls_with_cols = Reg.selected_features_.T
         for i in range(intervalls.shape[0]):
             for j in range(intervalls.shape[1]):
                 intervalls_with_cols.iloc[i,j] = spectra.columns[intervalls.iloc[i,j]]
+        M2.write('-- Important Spectral regions used for model creation --')
         M2.table(intervalls_with_cols)
         
     # elif regression_algo == reg_algo[4]:
@@ -293,7 +305,9 @@ if not spectra.empty and not y.empty:
 
 
 #         ###############################################################################################################DDDVVVVVVVVVV
+        
 #        ################# Model analysis ############
+if not spectra.empty and not y.empty:
     if regression_algo in reg_algo[1:] and Reg is not None:
         #M2.write('-- Pretreated data (train) visualization and important spectral regions in the model --   ')
 
@@ -334,7 +348,8 @@ if not spectra.empty and not y.empty:
         # with M2:
         #     st.components.v1.html(htmlfig, height=600)
         
-        
+        st.header("Cross-Validation results")
+        cv1, cv2 = st.columns([2,2])
         ############
         cv2.write('-- Cross-Validation Summary--')
         cv2.write(Reg.CV_results_)
@@ -370,7 +385,7 @@ if not spectra.empty and not y.empty:
         
 # ##########
         M1.write("-- Model performance --")
-        if regression_algo != "Locally Weighted PLSR":
+        if regression_algo != reg_algo[2]:
             M1.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
         else:
             M1.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
@@ -381,15 +396,24 @@ if not spectra.empty and not y.empty:
         
         #my_circular_progress.st_circular_progress()
         #my_circular_progress.update_value(progress=20)
-        if regression_algo != "Locally Weighted PLSR":
+        if regression_algo != reg_algo[2]:
             a = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
         else:
             a = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
 
+st.header("III - Model Diagnosis", divider='blue')
+if not spectra.empty and not y.empty:
+    if regression_algo in reg_algo[1:] and Reg is not None:
+        
+        M7, M8 = st.columns([2,2])
+        M7.write('Predicted vs Measured values')
+        M8.write('Residuals plot')
+
+
         M7.pyplot(a)
         plt.savefig('./Report/figures/measured_vs_predicted.png')
         prep_para = Reg.best_hyperparams_
-        if regression_algo != "Locally Weighted PLSR":
+        if regression_algo != reg_algo[2]:
             prep_para.pop('n_components')
             for i in ['deriv','polyorder']:
                 if Reg.best_hyperparams_[i] == 0:
@@ -399,7 +423,7 @@ if not spectra.empty and not y.empty:
                 elif Reg.best_hyperparams_[i] > 1:
                     prep_para[i] = f"{Reg.best_hyperparams_[i]}nd"
         
-        if regression_algo != "Locally Weighted PLSR":
+        if regression_algo != reg_algo[2]:
             residual_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index, test_idx=test_index)
         else:
             residual_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index, test_idx=test_index)
@@ -407,10 +431,13 @@ if not spectra.empty and not y.empty:
         M8.pyplot(residual_plot)
         plt.savefig('./Report/figures/residuals_plot.png')
         
-        if regression_algo != "Locally Weighted PLSR":
+        if regression_algo != reg_algo[2]:
             rega = Reg.selected_features_  ##### ADD FEATURES IMPORTANCE PLOT
             
             #model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20)
+            
+        M9 = st.container()
+        M9.write("-- Save the model --")
         model_name = M9.text_input('Give it a name')
         date_time = datetime.datetime.strftime(datetime.date.today(), '_%Y_%m_%d_')
         if M9.button('Export Model'):
@@ -454,7 +481,7 @@ if not spectra.empty and not y.empty and regression_algo:
     if regression_algo in reg_algo[1:] and Reg is not None:
         fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 4), sharex=True)
         ax1.plot(colnames, np.mean(X_train, axis = 0), color = 'black', label = 'Average spectrum (Raw)')
-        if regression_algo != "Locally Weighted PLSR_":
+        if regression_algo != reg_algo[2]:
             ax2.plot(colnames, np.mean(Reg.pretreated_spectra_ , axis = 0), color = 'black', label = 'Average spectrum (pretreated)')
         ax2.set_xlabel('Wavelenghts')
         plt.tight_layout()
@@ -471,8 +498,12 @@ if not spectra.empty and not y.empty and regression_algo:
                     else:
                         min, max = intervalls['from'][j], intervalls['to'][j]
                     
-                    eval(f'ax{i+1}').axvspan(min, max, color='#00ff00', alpha=0.5, lw=0)                
+                    eval(f'ax{i+1}').axvspan(min, max, color='#00ff00', alpha=0.5, lw=0)
+
+
         if regression_algo == reg_algo[1]:
+                # st.write(colnames[np.array(Reg.sel_ratio_.index)])
+                # st.write(colnames[np.array(Reg.sel_ratio_.index)])
                 ax1.scatter(colnames[np.array(Reg.sel_ratio_.index)], np.mean(X_train, axis = 0)[np.array(Reg.sel_ratio_.index)],
                              color = 'red', label = 'Important variables')
                 ax2.scatter(colnames[Reg.sel_ratio_.index], np.mean(Reg.pretreated_spectra_, axis = 0)[np.array(Reg.sel_ratio_.index)],
@@ -483,6 +514,9 @@ if not spectra.empty and not y.empty and regression_algo:
         M2.write('-- Visualization of the spectral regions used for model creation --')
         fig.savefig("./Report/figures/Variable_importance.png")
         M2.pyplot(fig)
+        # if regression_algo == reg_algo[3]:
+        #     M2.write('-- Important Spectral regions used for model creation --')
+        #     M2.table(intervalls_with_cols)
 
 ## Load .dx file
 if Reg is not None:
@@ -490,14 +524,21 @@ if Reg is not None:
         if st.button("Download the report"):
             if regression_algo == reg_algo[1]:
                     latex_report = report.report('Predictive model development', file_name, stats, list(Reg.best_hyperparams_.values()), regression_algo, model_per, cv_results)
-                    report.compile_latex()
+    
+            elif regression_algo == reg_algo[2]:
+                    latex_report = report.report('Predictive model development', file_name, stats,
+                                                  list({key: Reg.best_hyperparams_[key] for key in ['deriv', 'normalization', 'polyorder', 'window_length'] if key in Reg.best_hyperparams_}.values()), regression_algo, model_per, cv_results)
+                    
+            elif regression_algo == reg_algo[3]:
+                    latex_report = report.report('Predictive model development', file_name, stats,
+                                                  list({key: Reg.best_hyperparams_[key] for key in ['deriv', 'normalization', 'polyorder', 'window_length'] if key in Reg.best_hyperparams_}.values()), regression_algo, model_per, cv_results)
+                    
+
+            
             if regression_algo is None:
                 st.warning('Data processing has not been performed or finished yet!', icon = "⚠️")
             else:
                 pass
-
+            report.compile_latex()
         else:
-            pass
-
-
- 
\ No newline at end of file
+            pass
\ No newline at end of file
diff --git a/src/pages/3-prediction.py b/src/pages/3-prediction.py
index dcd59360d2870b62dfdd706e86b1d9df8c7fb782..7389167932eabcb6874540a0b303f913a53e204a 100644
--- a/src/pages/3-prediction.py
+++ b/src/pages/3-prediction.py
@@ -15,27 +15,29 @@ add_sidebar(pages_folder)
 
 local_css(css_file / "style_model.css")
 
+st.title("Prediction making using a previously developed model")
+M10, M20= st.columns([2, 1])
+M10.image("C:/Users/diane/Desktop/nirs_workflow/src/images/graphical_abstract.jpg", use_column_width=True)
 
-st.header("Data loading", divider='blue')
-M1, M2= st.columns([2, 1])
+# M1, M2= st.columns([2, 1])
 
-st.header('Data preprocessing', divider='blue')
-M3, M4= st.columns([2, 1])
 
-st.header("Prediction making", divider='blue')
-M5, M6 = st.columns([2, 0.01])
+
+# st.header("Prediction making", divider='blue')
+# M5, M6 = st.columns([2, 0.01])
 
 
 files_format = ['.csv', '.dx']
-file = M2.file_uploader("Select NIRS Data to predict", type = files_format, help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
+file = M20.file_uploader("Select NIRS Data to predict", type = files_format, help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
 export_folder = './data/predictions/'
 export_name = 'Predictions_of_'
 reg_algo = ["Interval-PLS"]
 pred_data = pd.DataFrame()
 loaded_model = None
 
-
-if file:
+if not file:
+    M20.warning('Insert your spectral data file here!')
+else:
     test = file.name[file.name.find('.'):]
     export_name += file.name[:file.name.find('.')]
 
@@ -67,7 +69,9 @@ if file:
 
 
 # Load parameters
+st.header("I - Spectral data visualization", divider='blue')
 if not pred_data.empty:# Load the model with joblib
+    M1, M2= st.columns([2, 1])
     M1.write('Raw spectra')
     fig = plot_spectra(pred_data, xunits = 'lab', yunits = "meta_data.loc[:,'yunits'][0]")
     M1.pyplot(fig)
@@ -95,7 +99,9 @@ if not pred_data.empty:
 
 ################################################################################################
 ## plot preprocessed spectra
+st.header('II - Spectral data preprocessing', divider='blue')
 if not preprocessed.empty:
+    M3, M4= st.columns([2, 1])
     M3.write('Preprocessed spectra')
     fig2 = plot_spectra(preprocessed, xunits = 'lab', yunits = "meta_data.loc[:,'yunits'][0]")
     M3.pyplot(fig2)
@@ -104,7 +110,9 @@ if not preprocessed.empty:
     M4.write('The spectra were preprocessed using:\n'+SG+"\n"+Norm)
 
 ################### Predictions making  ##########################
+st.header("III - Prediction making", divider='blue')
 if not pred_data.empty:# Load the model with joblib
+    M5, M6 = st.columns([2, 0.01])
     #dir = os.listdir('data/models/')[1:]
     dir = os.listdir('data/models/')
     dir.insert(0,'')
diff --git a/src/pages/4-inputs.py b/src/pages/4-inputs.py
index 671182332587000596b195cbeb46c1a573dd0895..5b3369fd37768eef214e320b5c13ca247159fa2d 100644
--- a/src/pages/4-inputs.py
+++ b/src/pages/4-inputs.py
@@ -25,7 +25,8 @@ with st.container():
     with st.form(key='my_form'):
 
 
-        st.header("Fill in your details:",divider="blue")
+        st.header("Complete and send the following form with the data context:",divider="blue")
+        st.warning('Make sure that the form is well completed, because the reliability of the results depends mainly on it !', icon="⚠️")
 
 
         col1, col3,col2 = st.columns((2,0.5,2))
diff --git a/src/style/header.py b/src/style/header.py
index 0c01027999a443d8899d52547030651cea7de5aa..30719da2c2df503a922ec3d7102c666c21922230 100644
--- a/src/style/header.py
+++ b/src/style/header.py
@@ -2,14 +2,15 @@ from Packages import *
 def add_header():
     st.markdown(
         """
-        <div style="width: 100%; background-color: #4682B4; padding: 10px; margin-bottom: 10px;">
-          <h1 style="text-align: center; color: white;">PACE - MEEB / CEFE</h1>
-          <h2 style="text-align: center; color: white;">NIRS Utils</h2>
+        <div style="width: 100%;height: 170px; background-color: rgb(122,176,199); padding: 10px; margin-bottom: 10px; ">
+          <h1 style="text-align: center; color: green;">PACE - MEEB / CEFE</h1>
+          <h2 style="text-align: center; color: green;">NIRS Utils</h2>
         </div>
         """,
         unsafe_allow_html=True,
     )
 
+
 def add_sidebar(pages_folder):
     if 'interface' not in st.session_state:
         st.session_state['interface'] = 'simple'
diff --git a/src/style/style_model.css b/src/style/style_model.css
index b346c7507c170f87f6893f5f2aa319b3ce0974ff..b6399243cd4f13a6bc74074bc8a91e4a25573551 100644
--- a/src/style/style_model.css
+++ b/src/style/style_model.css
@@ -1,14 +1,24 @@
 /* CSS Snippet from W3schools: https://www.w3schools.com/howto/howto_css_contact_form.asp */
-div[data-testid="column"]:nth-of-type(1) {
-    border:2px solid rgba(0,0,0, .4);border-radius: 20px;padding: 15px;
-}
 
-div[data-testid="column"]:nth-of-type(2) {
-    border:2px solid rgba(0,0,0, .4);border-radius: 20px;padding: 15px;
+/* div[data-testid="column"]:nth-of-type(2) {
+    border:2px solid rgba(0,0,0, 1);border-radius: 20px;padding: 15px;
     text-align: left;
+} */
+
+div[data-testid="column"]:nth-of-type(1) {
+    border: 2px solid rgba(0, 0, 0, 1);
+    border-radius: 20px;
+    padding: 15px;
+    text-align: left;
+    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3); /* Example shadow */
+    transition: box-shadow 0.3s ease-in-out; /* Smooth transition for the shadow */
 }
 
-div[data-testid="column"]:nth-of-type(3) {
-    border:2px solid rgba(0,0,0, .4);border-radius: 20px;padding: 15px;
+div[data-testid="column"]:nth-of-type(2) {
+    border: 2px solid rgba(0, 0, 0, 1);
+    border-radius: 20px;
+    padding: 15px;
     text-align: left;
+    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3); /* Example shadow */
+    transition: box-shadow 0.3s ease-in-out; /* Smooth transition for the shadow */
 }