diff --git a/src/pages/0-inputs.py b/src/pages/0-inputs.py
index e5c229ad496cd4c0394fe2baa48c0499d7677510..be662b3cc18caa4253096f26618322f562aa88b9 100644
--- a/src/pages/0-inputs.py
+++ b/src/pages/0-inputs.py
@@ -6,7 +6,7 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
 
 
 # layout
-UiComponents(pagespath = pages_folder, csspath= css_file,imgpath=image_path ,
+ui_components(pagespath = pages_folder, csspath= css_file,imgpath=image_path ,
              header=True, sidebar= True, bgimg=True, colborders=False)
 
 
diff --git a/src/pages/1-samples_selection.py b/src/pages/1-samples_selection.py
index 25b1e83feff5aa62a71b1a8edf3d722edf571df6..2ec62d926efd4723ce0eb5551a2512e18bece7bd 100644
--- a/src/pages/1-samples_selection.py
+++ b/src/pages/1-samples_selection.py
@@ -4,7 +4,7 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
 
 
 # layout
-UiComponents(pagespath=pages_folder, csspath=css_file, imgpath=image_path,
+ui_components(pagespath=pages_folder, csspath=css_file, imgpath=image_path,
              header=True, sidebar=True, bgimg=False, colborders=True)
 st.header("Calibration Subset Selection")  # page title
 st.markdown(
diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py
index c0d731bfaa0e1d9b17bd75f89d3c8dadc9eb8c2f..99b3113bb794b8fa5fe905ef3613ff91cfc687ef 100644
--- a/src/pages/2-model_creation.py
+++ b/src/pages/2-model_creation.py
@@ -1,27 +1,37 @@
 from common import *
+
 st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
 
 
 # layout
-UiComponents(pagespath=pages_folder, csspath=css_file, imgpath=image_path,
-             header=True, sidebar=True, bgimg=False, colborders=True)
+ui_components(
+    pagespath=pages_folder,
+    csspath=css_file,
+    imgpath=image_path,
+    header=True,
+    sidebar=True,
+    bgimg=False,
+    colborders=True,
+)
 
 
-st_var(variable='counter', initialize=True, update=False, type='increment')
+st_var(variable="counter", initialize=True, update=False, type="increment")
 
 ################ clean the results dir #############
-HandleItems.delete_files(keep=['.py', '.pyc', '.bib'])
-Path('./report/results/model').mkdir(parents=True, exist_ok=True)
+HandleItems.delete_files(keep=[".py", ".pyc", ".bib"])
+Path("./report/results/model").mkdir(parents=True, exist_ok=True)
 # ####################################### page preamble #######################################
 st.header("Calibration Model Development")  # page title
-st.markdown("Create a predictive model, then use it for predicting your target variable (chemical data) from NIRS spectra")
-c0, c1 = st.columns([1, .4])
-c0.image("./images/model_creation.png",
-         use_column_width=True)  # graphical abstract
+st.markdown(
+    "Create a predictive model, then use it for predicting your target variable (chemical data) from NIRS spectra"
+)
+c0, c1 = st.columns([1, 0.4])
+c0.image("./images/model_creation.png", use_column_width=True)  # graphical abstract
 
 ################################################################# Begin : I- Data loading and preparation ######################################
-filetype = c1.radio('Select files format:', options=[
-                    'csv', 'dx'], horizontal=True)  # Select a file format
+filetype = c1.radio(
+    "Select files format:", options=["csv", "dx"], horizontal=True
+)  # Select a file format
 
 x_block = DataFrame()  # preallocate the spectral data block
 y_block = DataFrame()  # preallocate the target(s) data block
@@ -31,99 +41,136 @@ predictors = []
 with c1:
     match filetype:
         # load csv file
-        case 'csv':
-            hash_ = ''
+        case "csv":
+            hash_ = ""
             meta_y, meta_y = DataFrame, DataFrame
             from utils.data_parsing import csv_parser
+
             # Load X-block data
-            xfile = st.file_uploader("Select NIRS Data", type="csv",
-                                     help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
+            xfile = st.file_uploader(
+                "Select NIRS Data",
+                type="csv",
+                help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns",
+            )
             if xfile:
-                c1_1, c2_2 = st.columns([.5, .5])
+                c1_1, c2_2 = st.columns([0.5, 0.5])
                 with c1_1:
-                    decx = st.radio('decimal(x):', options=[
-                                    ".", ","], horizontal=True)
-                    sepx = st.radio("separator(x):", options=[
-                                    ";", ","], horizontal=True)
+                    decx = st.radio("decimal(x):", options=[".", ","], horizontal=True)
+                    sepx = st.radio(
+                        "separator(x):", options=[";", ","], horizontal=True
+                    )
                 with c2_2:
-                    phdrx = st.radio("header(x): ", options=[
-                                     "yes", "no"], horizontal=True)
-                    pnamesx = st.radio("samples name(x):", options=[
-                                       "yes", "no"], horizontal=True)
+                    phdrx = st.radio(
+                        "header(x): ", options=["yes", "no"], horizontal=True
+                    )
+                    pnamesx = st.radio(
+                        "samples name(x):", options=["yes", "no"], horizontal=True
+                    )
 
                 hdrx = 0 if phdrx == "yes" else None
                 namesx = 0 if pnamesx == "yes" else None
                 try:
-                    hash_ = ObjectHash(current=hash_, add=[
-                                       xfile.getvalue(), decx, sepx, phdrx, pnamesx])
+                    hash_ = ObjectHash(
+                        current=hash_,
+                        add=[xfile.getvalue(), decx, sepx, phdrx, pnamesx],
+                    )
                     x_block, meta_x = csv_parser(
-                        path=xfile, decimal=decx, separator=sepx, index_col=namesx, header=hdrx, change=None)
+                        path=xfile,
+                        decimal=decx,
+                        separator=sepx,
+                        index_col=namesx,
+                        header=hdrx,
+                        change=None,
+                    )
                     if x_block.shape[1] > 20:
                         st.success(
-                            "The data have been loaded successfully and spectral data was successfully detected, you might need to tune dialect.", icon="✅")
+                            "The data have been loaded successfully and spectral data was successfully detected, you might need to tune dialect.",
+                            icon="✅",
+                        )
                     else:
                         st.warning(
-                            "The data have been loaded successfully but spectral data was not detected.")
+                            "The data have been loaded successfully but spectral data was not detected."
+                        )
                 except:
                     st.error(
-                        'Error: The xfile has not been loaded successfully, please consider tuning the dialect settings!')
+                        "Error: The xfile has not been loaded successfully, please consider tuning the dialect settings!"
+                    )
             else:
-                st.info('Info: Insert your spectral data file above!')
+                st.info("Info: Insert your spectral data file above!")
 
             # Load Y-block data
-            yfile = st.file_uploader("Select corresponding Chemical Data", type="csv",
-                                     help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
+            yfile = st.file_uploader(
+                "Select corresponding Chemical Data",
+                type="csv",
+                help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column",
+            )
             if yfile:
-                c1_1, c2_2 = st.columns([.5, .5])
+                c1_1, c2_2 = st.columns([0.5, 0.5])
                 with c1_1:
-                    decy = st.radio('decimal(y):', options=[
-                                    ".", ","], horizontal=True)
-                    sepy = st.radio("separator(y):", options=[
-                                    ";", ","], horizontal=True)
+                    decy = st.radio("decimal(y):", options=[".", ","], horizontal=True)
+                    sepy = st.radio(
+                        "separator(y):", options=[";", ","], horizontal=True
+                    )
                 with c2_2:
-                    phdry = st.radio("header(y): ", options=[
-                                     "yes", "no"], horizontal=True)
-                    pnamesy = st.radio("samples name(y):", options=[
-                                       "yes", "no"], horizontal=True)
+                    phdry = st.radio(
+                        "header(y): ", options=["yes", "no"], horizontal=True
+                    )
+                    pnamesy = st.radio(
+                        "samples name(y):", options=["yes", "no"], horizontal=True
+                    )
 
                 hdry = 0 if phdry == "yes" else None
                 namesy = 0 if pnamesy == "yes" else None
                 try:
-                    hash_ = ObjectHash(current=hash_, add=[
-                                       yfile.getvalue(), decy, sepy, phdry, pnamesy])
+                    hash_ = ObjectHash(
+                        current=hash_,
+                        add=[yfile.getvalue(), decy, sepy, phdry, pnamesy],
+                    )
                     y_block, meta_y = csv_parser(
-                        path=yfile, decimal=decy, separator=sepy, index_col=namesy, header=hdry, change=None)
+                        path=yfile,
+                        decimal=decy,
+                        separator=sepy,
+                        index_col=namesy,
+                        header=hdry,
+                        change=None,
+                    )
                     if y_block.shape[1] >= 1:
                         st.success(
-                            "The data have been loaded successfully and the target data was successfully detected.", icon="✅")
+                            "The data have been loaded successfully and the target data was successfully detected.",
+                            icon="✅",
+                        )
                     else:
                         st.warning(
-                            "The data have been loaded successfully but no target data was not detected.")
+                            "The data have been loaded successfully but no target data was not detected."
+                        )
                 except:
                     st.error(
-                        'Error: The yfile has not been loaded successfully, please consider tuning the dialect settings!')
+                        "Error: The yfile has not been loaded successfully, please consider tuning the dialect settings!"
+                    )
             else:
-                st.info('Info: Insert your target data file above!')
+                st.info("Info: Insert your target data file above!")
 
             # AFTER LOADING BOTH X AND Y FILES
             if xfile and yfile:
                 # create a str instance for storing the hash of both x and y data
-                xy_str = ''
+                xy_str = ""
                 from io import StringIO
+
                 for i in ["xfile", "yfile"]:
-                    stringio = StringIO(
-                        eval(str(i)+'.getvalue().decode("utf-8")'))
+                    stringio = StringIO(eval(str(i) + '.getvalue().decode("utf-8")'))
                     xy_str += str(stringio.read())
                 file_name = str(xfile.name) + str(yfile.name)
 
                 if None in [namesx, namesy]:
                     st.warning(
-                        'Warning: Ensure each row in one file matches the same sample in the other file to maintain correct x-y data alignment.')
+                        "Warning: Ensure each row in one file matches the same sample in the other file to maintain correct x-y data alignment."
+                    )
 
         # jcamp file
-        case 'dx':
+        case "dx":
             file = st.file_uploader(
-                "Select Data", type=".dx", help=" :mushroom: select a dx file")
+                "Select Data", type=".dx", help=" :mushroom: select a dx file"
+            )
             if file:
                 file_name = str(file.name)
                 try:
@@ -133,82 +180,136 @@ with c1:
                         tmp.write(file.read())
                         tmp_path = tmp.name
                     from utils.data_parsing import jcamp_parser
+
                     x_block, y_block, meta_data = jcamp_parser(
-                        path=tmp_path, include='all', change=hash_)
+                        path=tmp_path, include="all", change=hash_
+                    )
                     st.success(
-                        "Info: The data have been loaded successfully", icon="✅")
+                        "Info: The data have been loaded successfully", icon="✅"
+                    )
                 except:
                     st.error(
-                        'Error: The input file has not been loaded successfully, please consider tuning the dialect settings!')
+                        "Error: The input file has not been loaded successfully, please consider tuning the dialect settings!"
+                    )
 
             else:
-                st.info('Info: Load your file here!')
+                st.info("Info: Load your file here!")
 
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~
 if x_block.shape[1] > 0 and y_block.shape[1] > 0:
     if len(x_block.index) > len(set(x_block.index)):
         c1.warning(
-            "X-block:Duplicate sample IDs found. Suffixes (#1, #2, ...) have been added to duplicate IDs.")
-        x_block.index = x_block.index.where(~x_block.index.duplicated(keep=False),
-                                            x_block.groupby(x_block.index).cumcount().add(1).astype(str).radd(x_block.index.astype(str) + '#'))
+            "X-block:Duplicate sample IDs found. Suffixes (#1, #2, ...) have been added to duplicate IDs."
+        )
+        x_block.index = x_block.index.where(
+            ~x_block.index.duplicated(keep=False),
+            x_block.groupby(x_block.index)
+            .cumcount()
+            .add(1)
+            .astype(str)
+            .radd(x_block.index.astype(str) + "#"),
+        )
     if len(y_block.index) > len(set(y_block.index)):
         c1.warning(
-            "Y-block:Duplicate sample IDs found. Suffixes (#1, #2, ...) have been added to duplicate IDs.")
-        y_block.index = y_block.index.where(~y_block.index.duplicated(keep=False),
-                                            y_block.groupby(y_block.index).cumcount().add(1).astype(str).radd(y_block.index.astype(str) + '#'))
+            "Y-block:Duplicate sample IDs found. Suffixes (#1, #2, ...) have been added to duplicate IDs."
+        )
+        y_block.index = y_block.index.where(
+            ~y_block.index.duplicated(keep=False),
+            y_block.groupby(y_block.index)
+            .cumcount()
+            .add(1)
+            .astype(str)
+            .radd(y_block.index.astype(str) + "#"),
+        )
     y = DataFrame()
     if y_block.shape[1] > 1:
-        options = [''] + y_block.columns.tolist()
+        options = [""] + y_block.columns.tolist()
     elif y_block.shape[1] == 1:
         options = y_block.columns.tolist()
 
     # drop down list to select the target variable
-    yname = c1.selectbox('Select a target:', options=options,
-                         disabled=True if len(options) <= 1 else False,
-                         format_func=fmt)
+    yname = c1.selectbox(
+        "Select a target:",
+        options=options,
+        disabled=True if len(options) <= 1 else False,
+        format_func=fmt,
+    )
     # define the target variable
     if not x_block.empty and yname:
-        if len(y_block.loc[:, yname].dropna().index.intersection(x_block.dropna().index)) > 0:
-            y = y_block.loc[y_block.loc[:, yname].dropna().index.intersection(
-                x_block.dropna().index), yname]  # 1d
-            x_block = x_block.loc[y_block.loc[:, yname].dropna(
-            ).index.intersection(x_block.dropna().index), :]
+        if (
+            len(
+                y_block.loc[:, yname]
+                .dropna()
+                .index.intersection(x_block.dropna().index)
+            )
+            > 0
+        ):
+            y = y_block.loc[
+                y_block.loc[:, yname]
+                .dropna()
+                .index.intersection(x_block.dropna().index),
+                yname,
+            ]  # 1d
+            x_block = x_block.loc[
+                y_block.loc[:, yname]
+                .dropna()
+                .index.intersection(x_block.dropna().index),
+                :,
+            ]
         else:
             c1.error(
-                'X-Y blocks matching issue: X_block and Y_block have no common samples name !')
+                "X-Y blocks matching issue: X_block and Y_block have no common samples name !"
+            )
 
     else:
-        c1.info('Info: Select the target analyte from the drop down list!')
+        c1.info("Info: Select the target analyte from the drop down list!")
 
 if not y.empty:
     if len(y.index) > len(set(y.index)):
         st.warning(
-            "Duplicate sample IDs found. Suffixes (#1, #2, ...) have been added to duplicate IDs.")
-        meta_y['names'] = y.index
+            "Duplicate sample IDs found. Suffixes (#1, #2, ...) have been added to duplicate IDs."
+        )
+        meta_y["names"] = y.index
         # Keep all duplicates (True for replicated)
         mask = y.index.duplicated(keep=False)
         # For the duplicated sample_ids, apply suffix (_1, _2, etc.)
-        y.index = y.index.where(~mask,
-                                y.groupby(y.index).cumcount().add(1).astype(str).radd(y.index.astype(str) + '#'))
+        y.index = y.index.where(
+            ~mask,
+            y.groupby(y.index)
+            .cumcount()
+            .add(1)
+            .astype(str)
+            .radd(y.index.astype(str) + "#"),
+        )
 
     if filetype == "csv":
         # Find the intersection of index names
-        if not meta_y.empty and not meta_x.empty:  # both of xfile and yfile includes meta_data
-            common_samples = meta_y.loc[y.index].index.intersection(
-                meta_x.index)
+        if (
+            not meta_y.empty and not meta_x.empty
+        ):  # both of xfile and yfile includes meta_data
+            common_samples = meta_y.loc[y.index].index.intersection(meta_x.index)
             if len(common_samples) > 0:
                 lens = list(meta_y.columns) + list(meta_y.columns)
                 if len(lens) > len(set(lens)):
                     from collections import Counter
-                    duplicates = [item for item, count in Counter(
-                        lens).items() if count > 1]
+
+                    duplicates = [
+                        item for item, count in Counter(lens).items() if count > 1
+                    ]
                     from pandas.util import hash_pandas_object
-                    if hash_pandas_object(meta_y.loc[common_samples, duplicates]).sum() == hash_pandas_object(meta_x.loc[common_samples, duplicates]).sum():
-                        meta_data = concat(
-                            [meta_y.loc[common_samples, :]], axis=1)
+
+                    if (
+                        hash_pandas_object(meta_y.loc[common_samples, duplicates]).sum()
+                        == hash_pandas_object(
+                            meta_x.loc[common_samples, duplicates]
+                        ).sum()
+                    ):
+                        meta_data = concat([meta_y.loc[common_samples, :]], axis=1)
                 elif len(lens) == len(set(lens)):
                     meta_data = concat(
-                        [meta_y.loc[common_samples, :], meta_x.loc[common_samples, :]], axis=1)
+                        [meta_y.loc[common_samples, :], meta_x.loc[common_samples, :]],
+                        axis=1,
+                    )
             else:
                 meta_data = DataFrame()
 
@@ -216,8 +317,7 @@ if not y.empty:
             meta_data = meta_y.loc[y.index, :]
 
         elif meta_y.empty and not meta_x.empty:  # only xfile that includes meta_data
-            common_samples = meta_x.loc[x_block.index].index.intersection(
-                y_block.index)
+            common_samples = meta_x.loc[x_block.index].index.intersection(y_block.index)
             if len(common_samples) > 0:
                 meta_data = meta_x.loc[common_samples, :]
             else:
@@ -226,102 +326,149 @@ if not y.empty:
             meta_data = DataFrame()
     ################################################### END : I- Data loading and preparation ####################################################
 
-
 #                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~    BEGIN : visualize and split the data     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-st.subheader("I - Data visualization", divider='blue')
+st.subheader("I - Data visualization", divider="blue")
 if not x_block.empty and not y.empty:
     hash_ = ObjectHash(current=hash_, add=[y])
     nwls = x_block.shape[1]
     x_block.columns = x_block.columns.astype(str)
+    wls = x_block.columns
     # insight on loaded data
     spectra_plot = plot_spectra(
-        x_block, mean=True, xunits='Wavelength/Wavenumber', yunits="Signal intensity")
+        x_block, mean=True, xunits="Wavelength/Wavenumber", yunits="Signal intensity"
+    )
     from utils.miscellaneous import desc_stats
 
-    c2, c3 = st.columns([1, .4])
+    c2, c3 = st.columns([1, 0.4])
     with c2:
         st.pyplot(spectra_plot)  # Loaded graph
-        if st.session_state.interface == 'advanced':
+        if st.session_state.interface == "advanced":
             with st.container():
-                values = st.slider('Select a range of values',
-                                   min_value=0, max_value=nwls, value=(0, nwls))
+                values = st.slider(
+                    "Select a range of values",
+                    min_value=0,
+                    max_value=nwls,
+                    value=(0, nwls),
+                )
 
             hash_ = ObjectHash(current=hash_, add=values)
-            x_block = x_block.iloc[:, values[0]:values[1]]
+            x_block = x_block.iloc[:, values[0] : values[1]]
             nwl = x_block.shape[1]
+            wls = wls[values[0] : values[1]]
 
             clipped_spectra_plot = plot_spectra(
-                x_block.mean(), xunits='Wavelength/Wavenumber', yunits="Signal intensity")
+                x_block.mean(),
+                xunits="Wavelength/Wavenumber",
+                yunits="Signal intensity",
+            )
             st.pyplot(clipped_spectra_plot)
 
     from utils.miscellaneous import data_split
+
     X_train, X_test, y_train, y_test, train_index, test_index = data_split(
-        x=x_block, y=y)
+        x=x_block, y=y
+    )
     import functools
+
     train_pos = functools.reduce(
-        lambda acc, x: acc + [list(x_block.index).index(x)], list(train_index), [])
+        lambda acc, x: acc + [list(x_block.index).index(x)], list(train_index), []
+    )
     test_pos = functools.reduce(
-        lambda acc, x: acc + [list(x_block.index).index(x)], list(test_index), [])
+        lambda acc, x: acc + [list(x_block.index).index(x)], list(test_index), []
+    )
 
     with c3:
-        st.write('Loaded data summary')
-        stats = DataFrame([desc_stats(y), desc_stats(y_train), desc_stats(y_test)], index=[str(yname)+' (Total)',
-                          str(yname)+' (Cal)', str(yname)+' (Val)']).round(2)
+        st.write("Loaded data summary")
+        stats = DataFrame(
+            [desc_stats(y), desc_stats(y_train), desc_stats(y_test)],
+            index=[
+                str(yname) + " (Total)",
+                str(yname) + " (Cal)",
+                str(yname) + " (Val)",
+            ],
+        ).round(2)
         st.write(stats)
         # histogramms
-        target_plot = hist(y=y, y_train=y_train,
-                           y_test=y_test, target_name=yname)
+        target_plot = hist(y=y, y_train=y_train, y_test=y_test, target_name=yname)
         st.pyplot(target_plot)
         st.info(
-            'Info: 70/30 split ratio was used to split the dataset into calibration and validation subsets')
+            "Info: 70/30 split ratio was used to split the dataset into calibration and validation subsets"
+        )
 
 
 ################################################### END : visualize and split the data #######################################################
 
-    ###################################################     BEGIN : Create Model     ####################################################
+###################################################     BEGIN : Create Model     ####################################################
 model_type = None  # initialize the selected regression algorithm
-model = None        # initialize the regression model object
+model = None  # initialize the regression model object
 intervalls_with_cols = DataFrame()
 
-st.subheader("II - Model creation", divider='blue')
+st.subheader("II - Model creation", divider="blue")
 if not x_block.empty and not y.empty:
     c4, c5, c6 = st.columns([1, 1, 3])
     with c4:
         # select type of supervised modelling problem
-        mode = c4.radio("The nature of the target variable :",
-                        options=['Continuous', 'Categorical'], disabled =True)
+        mode = c4.radio(
+            "The nature of the target variable :",
+            options=["Continuous", "Categorical"],
+            disabled=True,
+        )
         hash_ = ObjectHash(current=hash_, add=mode)
 
         match st.session_state["interface"]:
-            case 'advanced':
+            case "advanced":
                 with c5:
                     if mode == "Continuous":
-                        model_type = st.selectbox("Choose a modelling algorithm:", options=["", "PLS","TPE-iPLS","LW-PLS"],# "LW-PLS", ],
-                                                  key=12, format_func=lambda x: x+'R' if x else "<Select>", disabled=False)
-                    elif mode == 'Categorical':
-                        model_type = st.selectbox("Choose a modelling algorithm:", options=["", "PLS","TPE-iPLS","LW-PLS"],# "LW-PLS", ],
-                                                  key=12, format_func=lambda x: x+'DA' if x else "<Select>", disabled=False)
-
-            case 'simple':
+                        model_type = st.selectbox(
+                            "Choose a modelling algorithm:",
+                            options=["", "PLS", "TPE-iPLS", "LW-PLS"],  # "LW-PLS", ],
+                            key=12,
+                            format_func=lambda x: x + "R" if x else "<Select>",
+                            disabled=False,
+                        )
+                    elif mode == "Categorical":
+                        model_type = st.selectbox(
+                            "Choose a modelling algorithm:",
+                            options=["", "PLS", "TPE-iPLS", "LW-PLS"],  # "LW-PLS", ],
+                            key=12,
+                            format_func=lambda x: x + "DA" if x else "<Select>",
+                            disabled=False,
+                        )
+
+            case "simple":
                 if mode == "Continuous":
                     with c5:
-                        model_type = st.selectbox("Choose a modelling algorithm:", options=["PLS"],
-                                                  key=12, format_func=lambda x: x+'R' if x else "<Select>", disabled=True)
+                        model_type = st.selectbox(
+                            "Choose a modelling algorithm:",
+                            options=["LW-PLS"],
+                            key=12,
+                            format_func=lambda x: x + "R" if x else "<Select>",
+                            disabled=True,
+                        )
                     with c6:
                         st.markdown(
-                            'Example1: Quantifying the volume of nectar consumed by a pollinator during a foraging session.')
+                            "Example1: Quantifying the volume of nectar consumed by a pollinator during a foraging session."
+                        )
                         st.markdown(
-                            "Example2: Measure the sugar content, amino acids, or other compounds in nectar from different flower species.")
+                            "Example2: Measure the sugar content, amino acids, or other compounds in nectar from different flower species."
+                        )
 
-                elif mode == 'Categorical':
+                elif mode == "Categorical":
                     with c5:
-                        model_type = st.selectbox("Choose a modelling algorithm:", options=["PLS"],
-                                                  key=12, format_func=lambda x: x+'DA' if x else "<Select>", disabled=True)
+                        model_type = st.selectbox(
+                            "Choose a modelling algorithm:",
+                            options=["PLS"],
+                            key=12,
+                            format_func=lambda x: x + "DA" if x else "<Select>",
+                            disabled=True,
+                        )
                     with c6:
                         st.markdown(
-                            "Example1: Classifying pollinators into categories such as bees, butterflies, moths, and beetles.")
+                            "Example1: Classifying pollinators into categories such as bees, butterflies, moths, and beetles."
+                        )
                         st.markdown(
-                            "Example2: Classifying plants based on their health status, such as healthy, stressed, or diseased, using NIR spectral data.")
+                            "Example2: Classifying plants based on their health status, such as healthy, stressed, or diseased, using NIR spectral data."
+                        )
         hash_ = ObjectHash(current=hash_, add=[mode, model_type])
 
     with c6:
@@ -329,259 +476,204 @@ if not x_block.empty and not y.empty:
         match model_type:
             case "PLS":
                 st.markdown(
-                    "#### For further details on the PLS (Partial Least Squares) algorithm, check the following reference:")
+                    "#### For further details on the PLS (Partial Least Squares) algorithm, check the following reference:"
+                )
                 st.markdown(
-                    '##### https://www.tandfonline.com/doi/abs/10.1080/03610921003778225')
+                    "##### https://www.tandfonline.com/doi/abs/10.1080/03610921003778225"
+                )
 
             case "LW-PLS":
                 st.markdown(
-                    "#### For further details on the LW-PLS (Locally Weighted - Partial Least Squares) algorithm, check the following reference:")
+                    "#### For further details on the LW-PLS (Locally Weighted - Partial Least Squares) algorithm, check the following reference:"
+                )
                 st.markdown(
-                    '##### https://analyticalsciencejournals.onlinelibrary.wiley.com/doi/full/10.1002/cem.3117')
+                    "##### https://analyticalsciencejournals.onlinelibrary.wiley.com/doi/full/10.1002/cem.3117"
+                )
 
             case "TPE-iPLS":
-                st.markdown("#### For further details on the TPE-iPLS (Tree-structured Parzen Estimator based interval-Partial Least Squares) algorithm, which is a wrapper method for interval selection, check the following references:")
                 st.markdown(
-                    "##### https://papers.nips.cc/paper_files/paper/2011/file/86e8f7ab32cfd12577bc2619bc635690-Paper.pdf")
+                    "#### For further details on the TPE-iPLS (Tree-structured Parzen Estimator based interval-Partial Least Squares) algorithm, which is a wrapper method for interval selection, check the following references:"
+                )
+                st.markdown(
+                    "##### https://papers.nips.cc/paper_files/paper/2011/file/86e8f7ab32cfd12577bc2619bc635690-Paper.pdf"
+                )
                 st.markdown(
-                    '##### https://www.tandfonline.com/doi/abs/10.1080/03610921003778225')
+                    "##### https://www.tandfonline.com/doi/abs/10.1080/03610921003778225"
+                )
                 st.markdown(
-                    '##### https://journals.sagepub.com/doi/abs/10.1366/0003702001949500')
+                    "##### https://journals.sagepub.com/doi/abs/10.1366/0003702001949500"
+                )
         st.markdown("-------------")
 
-    # Training set preparation for cross-validation(CV)
     with c5:  # Model columns
+        # Training set preparation for cross-validation(CV)
         nb_folds = 3
+
         @st.cache_data
-        def RequestingModelCreation(change):
-            from utils.regress import Plsr
-            pre = Plsr(train=[X_train, y_train], test=[
-                        X_test, y_test], n_iter=40, cv=nb_folds)
-            global Model
+        def cv_folds(change):
+            folds = KF_CV().CV(x=X_train, y=np.array(y_train), n_folds=nb_folds)
+            return folds
+
+        folds = cv_folds(change=hash_)
+        # ----------------------------------
+
+        if model_type:
+
+            def optimize_signal(change):
+                # PLSr for optimizing signal processing
+                from utils.regress import Plsr
+
+                pre = Plsr(
+                    train=(X_train.to_numpy(), y_train),
+                    test=(X_test.to_numpy(), y_test),
+                    n_iter=50,
+                    cvfolds=folds,
+                )
+                pre.fit_()
+                return pre
+
+            # preprocess spectra with optimized signal preprocessing methods
+            pre = optimize_signal(change=hash_)
+            preprocessed = [
+                signal_preprocess(i, tune=pre.signal_tune_)
+                for i in [X_train.to_numpy(), X_test.to_numpy()]
+            ]
+
+        @st.cache_data
+        def requesting_model_creation(change):
+            # Fit models using preprocessed spectra
             match model_type:
-                case 'PLS':
+                case "PLS":
+                    # since preprocessing methods were optimized and evaluated based on pls model, no need to refit the model.
                     Model = pre
 
-                case 'TPE-iPLS':
+                case "TPE-iPLS":
                     from utils.regress import TpeIpls
-                    Model = TpeIpls(train=[X_train, y_train], test=[
-                        X_test, y_test], n_intervall=internum, n_iter=iternum, cv=nb_folds, bestglobalparams = pre.best_hyperparams_)
-                    Model.best_fit()
-                
-                case 'LW-PLS':
+
+                    Model = TpeIpls(
+                        train=(preprocessed[0], np.array(y_train)),
+                        test=(preprocessed[1], np.array(y_test)),
+                        n_iter=100,
+                        cvfolds=folds,
+                    )
+                    Model.fit_()
+
+                case "LW-PLS":
                     from utils.regress import LWPLS
-                    Model = LWPLS(train = [X_train, y_train], test = [X_test, y_test], n_iter = 30, cv = nb_folds, bestglobalparams = pre.best_hyperparams_)
-                    Model.best_fit()
-                
-                # The snippet of code below was first used to communicate with Julia for developing lwplsr() LWPLS modelling, but just lately, lwplsr() xas written in Python and utilized instead.
-                # case 'LW-PLS':
-                #     # split train data into nb_folds for cross_validation
-                #     folds = KF_CV.CV(X_train, y_train, nb_folds)
-                #     # export data to csv for Julia train/test
-                #     global x_train_np, y_train_np, x_test_np, y_test_np
-                #     data_to_work_with = ['x_train_np',
-                #                          'y_train_np', 'x_test_np', 'y_test_np']
-                #     x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(
-                #     ), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
-                #     # Cross-Validation calculation
-                #     d = {}
-                #     for i in range(nb_folds):
-                #         d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(
-                #             folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]]
-                #         data_to_work_with.append("xtr_fold{0}".format(i+1))
-                #         data_to_work_with.append("ytr_fold{0}".format(i+1))
-                #         data_to_work_with.append("xte_fold{0}".format(i+1))
-                #         data_to_work_with.append("yte_fold{0}".format(i+1))
-                #     # check best pre-treatment with a global PLSR model
-                #     from utils.regress import Plsr
-                #     pre = Plsr(train=[X_train, y_train], test=[X_test, y_test], n_iter=5)
-                #     temp_path = Path('temp/')
-                #     with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile:
-                #         json.dump(pre.best_hyperparams_, outfile)
-                #     # export Xtrain, Xtest, Ytrain, Ytest and all CV folds to temp folder as csv files
-                #     for i in data_to_work_with:
-                #         if 'fold' in i:
-                #             j = d[i]
-                #         else:
-                #             j = globals()[i]
-                #         np.savetxt(temp_path / str(i + ".csv"),
-                #                    j, delimiter=",")
-                #     open(temp_path / 'model', 'w').close()
-                #     # run Julia Jchemo as subprocess
-                #     import subprocess
-                #     subprocess_path = Path("utils/")
-                #     subprocess.run(
-                #         [str(sys.executable), subprocess_path / "lwplsr_call.py"])
-                #     # retrieve json results from Julia JChemo
-                #     try:
-                #         with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
-                #             Reg_json = json.load(outfile)
-                #             # delete csv files
-                #             for i in data_to_work_with:
-                #                 os.unlink(temp_path / str(i + ".csv"))
-                #         # delete json file after import
-                #         os.unlink(temp_path / "lwplsr_outputs.json")
-                #         os.unlink(temp_path / "lwplsr_preTreatments.json")
-                #         os.unlink(temp_path / 'model')
-                #         # format result data into Reg object
-                #         # keys of the dict
-                #         pred = ['pred_data_train', 'pred_data_test']
-                #         for i in range(nb_folds):
-                #             # add cv folds keys to pred
-                #             pred.append("CV" + str(i+1))
-
-                #         from utils.regress import LwplsObject
-                #         Model = LwplsObject(Reg_json=Reg_json, pred=pred)
-                #         Model.CV_results_ = DataFrame()
-                #         Model.cv_data_ = {'YpredCV': {}, 'idxCV': {}}
-                #         # set indexes to Model.pred_data (train, test, folds idx)
-                #         for i in range(len(pred)):
-                #             Model.pred_data_[i] = Model.pred_data_[
-                #                 i].T.reset_index().drop(columns=['index'])
-                #             if i == 0:  # data_train
-                #                 Model.pred_data_[i].index = list(y_train.index)
-                #                 Model.pred_data_[i] = Model.pred_data_[
-                #                     i].iloc[:, 0]
-                #             elif i == 1:  # data_test
-                #                 Model.pred_data_[i].index = list(y_test.index)
-                #                 Model.pred_data_[i] = Model.pred_data_[
-                #                     i].iloc[:, 0]
-                #             else:
-                #                 # CVi
-                #                 Model.pred_data_[i].index = folds[list(folds)[
-                #                     i-2]]
-                #                 Model.cv_data_[
-                #                     'YpredCV']['Fold' + str(i-1)] = np.array(Model.pred_data_[i]).reshape(-1)
-                #                 Model.cv_data_[
-                #                     'idxCV']['Fold' + str(i-1)] = np.array(folds[list(folds)[i-2]]).reshape(-1)
-
-                #         Model.CV_results_ = KF_CV.metrics_cv(y=y_train, ypcv=Model.cv_data_[
-                #             'YpredCV'], folds=folds)[1]
-                #         # cross validation results print
-                #         Model.best_hyperparams_print = Model.best_hyperparams_
-                #         # plots
-                #         Model.cv_data_ = KF_CV().meas_pred_eq(y=np.array(y_train),
-                #                                               ypcv=Model.cv_data_['YpredCV'], folds=folds)
-                #         Model.pretreated_spectra_ = pre.pretreated_spectra_
-
-                #         Model.best_hyperparams_print = {
-                #             **pre.best_hyperparams_, **Model.best_hyperparams_}
-                #         Model.best_hyperparams_ = {
-                #             **pre.best_hyperparams_, **Model.best_hyperparams_}
-
-                #         Model.__hash__ = ObjectHash(
-                #             current=hash_, add=Model.best_hyperparams_print)
-                #     except FileNotFoundError:
-                #         Model = None
-                #         for i in data_to_work_with:
-                #             os.unlink(temp_path / str(i + ".csv"))
 
+                    Model = LWPLS(
+                        train=(preprocessed[0], np.array(y_train)),
+                        test=(preprocessed[1], np.array(y_test)),
+                        globalplsVL=pre.model_.n_components,
+                        n_iter=25,
+                        cvfolds=folds,
+                    )
+                    Model.fit_()
                 case "":
                     Model = None
                     st.info(
-                        'Info: Choose a modelling algorithm from the dropdown list!')
+                        "Info: Choose a modelling algorithm from the dropdown list!"
+                    )
             return Model
 
-        c7, c8 = st.columns([2, 2])
-        with c7:
-            internum = st.number_input(label='No.intervals', min_value=1,
-                                       max_value=6, value=2 if model_type == 'TPE-iPLS' else None,
-                                       disabled=False if model_type == 'TPE-iPLS' else True)
-        with c8:
-            iternum = st.number_input(label='No.iterations', min_value=2,
-                                      max_value=500, value=10 if model_type == 'TPE-iPLS' else None, disabled=False if model_type == 'TPE-iPLS' else True)
-        remodel_button = st.button('re-model the data', type="primary", use_container_width=True,
-                                   disabled=False if model_type else True,
-                                   on_click=lambda: st_var(variable='counter', initialize=False, update=True, type='increment'))
-
-        hash_ = ObjectHash(current=hash_, add=[
-            iternum, internum, st.session_state.counter, model_type])
-        modelling = RequestingModelCreation(change=hash_)
+        # ask the users some parameters for TPE-PLS modelling
+        # with st.container():
+        # Remodel the data in case the performance of the model is not satisfactory
+        remodel_button = st.button(
+            "re-model the data",
+            type="primary",
+            use_container_width=True,
+            disabled=False if model_type else True,
+            on_click=lambda: st_var(
+                variable="counter", initialize=False, update=True, type="increment"
+            ),
+        )
+
+        hash_ = ObjectHash(current=hash_, add=[st.session_state.counter, model_type])
+
+        # RequestingModelCreation
+        modelling = requesting_model_creation(change=hash_)
 
         if model_type:
             info = st.info(
-                'Info: The model is being created. This may take a few minutes.')
+                "Info: The model is being created. This may take a few minutes."
+            )
     with c5:
         if model_type and modelling:
-            # st.write(modelling.__dict__.keys())
             info.empty()
-            st.success(
-                'Success! Your model has been created and is ready to use.')
-            model = modelling.model_
+            st.success("Success! Your model has been created and is ready to use.")
         elif model_type and modelling:
             st.error("Error: Model creation failed. Please try again.")
 
-
 if model_type:
     if modelling:
         model = modelling.model_
         # fitted values and predicted  values
-        yc = modelling.pred_data_[0]
-        yt = modelling.pred_data_[1]
+        yc = modelling.pred_data_["train"]
+        yt = modelling.pred_data_["test"]
 
         c7, c8 = st.columns([2, 4])
         with c7:
             # Show and export the preprocessing methods
-            st.write('-- Spectral preprocessing info --')
-            st.write(modelling.best_hyperparams_print)
+            st.write("-- Spectral preprocessing info --")
+            from utils.data_handling import signal_preprocess_str
+
+            st.write(signal_preprocess_str(pre.signal_tune_))
             # Show the model performance table
             st.write("-- Model performance --")
-            if model_type == 'LW-PLS':  # if the model is of LW-PLS type, display only test performance
+            if (
+                model_type == "LW-PLS"
+            ):  # if the model is of LW-PLS type, display only test performance
                 model_per = DataFrame(
-                    metrics(t=[y_test, yt], method='regression').scores_)
+                    metrics(t=[y_test, yt], method="regression").scores_
+                )
             else:
                 model_per = DataFrame(
-                    metrics(c=[y_train, yc], t=[y_test, yt], method='regression').scores_)
+                    metrics(
+                        c=[y_train, yc], t=[y_test, yt], method="regression"
+                    ).scores_
+                )
             st.dataframe(model_per)
 
     @st.cache_data(show_spinner=False)
     def prep_important(change, model_type, model_hash):
-        raw = DataFrame(X_train.mean(), columns=['Average spectrum (Raw)'])
-        prep = DataFrame(modelling.pretreated_spectra_.mean(),
-                         columns=['Average spectrum (Pretreated)'])
+        raw = DataFrame(X_train.mean(), columns=["Average spectrum (Raw)"])
+        prep = DataFrame(
+            np.mean(preprocessed[0], axis=0), columns=["Average spectrum (Pretreated)"]
+        )
         prep.index = X_train.columns
         match model_type:
-            case 'PLS':
-                fig, (ax1, ax2, ax3) = plt.subplots(
-                    3, 1, figsize=(12, 4), sharex=True)
+            case "PLS":
+                fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 4), sharex=True)
                 from utils.varimportance import vip
-                vips = vip(modelling.pretreated_spectra_, y_train, model)
-                vips = DataFrame(vips, columns=['vips'])
+
+                vips = vip(preprocessed[0], y_train, model)
+                vips = DataFrame(vips, columns=["vips"])
                 vips.index = X_train.columns
                 vips.plot(ax=ax3, legend=False, color="#7ab0c7")
                 ax3.grid()
-                ax3.set_xlabel('Wavelenghts/Wavenumbers')
-                ax3.set_ylabel('Vip')
-
-            case 'TPE-iPLS':
-                fig, (ax1, ax2, ax3) = plt.subplots(
-                    3, 1, figsize=(12, 4), sharex=True)
+                ax3.set_xlabel("Wavelenghts/Wavenumbers")
+                ax3.set_ylabel("Vip")
 
-                a = []
-                modelling.limits = modelling.limits.astype(int)
-                for i in range(int(len(modelling.limits)/2)):
-                    a.append(modelling.pretreated_spectra_.iloc[:, int(
-                        modelling.limits[2*i]):int(modelling.limits[2*i+1])+1])
-                    predictors.append(np.arange(int(
-                        modelling.limits[2*i]), int(modelling.limits[2*i+1])+1))
+            case "TPE-iPLS":
+                fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 4), sharex=True)
 
                 from utils.varimportance import vip
-                vips = vip(concat(a, axis=1), y_train, model)
-                vips = DataFrame(
-                    vips, columns=['vips'], index=concat(a, axis=1).columns)
 
-                for i in range(len(a)):
-                    vips.loc[a[i].columns].plot(
-                        ax=ax3, legend=False, color="#7ab0c7")
+                vips = np.zeros(len(wls))
+                vips[modelling.selected_wls_] = vip(
+                    preprocessed[0][:, modelling.selected_wls_], y_train, model=model
+                )
+                vips = DataFrame(vips, index=wls)
 
-                ax3.set_ylabel('Vip')
+                vips.plot(ax=ax3, legend=False, color="#7ab0c7")
+                ax3.set_ylabel("Vip")
                 ax3.grid()
-                ax3.set_xlabel('Wavelenghts/Wavenumbers')
+                ax3.set_xlabel("Wavelenghts/Wavenumbers")
 
-            case  'LW-PLS':
-                fig, (ax1, ax2) = plt.subplots(
-                    2, 1, figsize=(12, 4), sharex=True)
-                ax2.set_xlabel('Wavelenghts/Wavenumbers')
+            case "LW-PLS":
+                fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 4), sharex=True)
+                ax2.set_xlabel("Wavelenghts/Wavenumbers")
 
         raw.plot(ax=ax1, color="#7ab0c7")
         prep.plot(ax=ax2, color="#7ab0c7")
@@ -590,232 +682,421 @@ if model_type:
         ax2.grid()
         plt.tight_layout()
 
-        if model_type != 'PLSR':
+        if model_type != "PLSR":
             nplts = 2
         else:
             nplts = 3
         for i in range(nplts):
-            eval('ax'+str(i+1)).grid(color='grey', linestyle=':', linewidth=0.2)
-            eval('ax'+str(i+1)).margins(x=0)
-            eval('ax'+str(i+1)).legend(loc='upper right')
-            eval('ax'+str(i+1)).set_ylabel('Intensity')
-            if model_type == 'TPE-iPLS':
-                for j in range(int(len(modelling.limits)/2)):
-                    min, max = modelling.limits.astype(
-                        int)[2*j], modelling.limits[2*j+1]
-                    eval('ax'+str(i+1)).axvspan(min, max,
-                                                color='#00ff00', alpha=0.5, lw=0)
+            eval("ax" + str(i + 1)).grid(color="grey", linestyle=":", linewidth=0.2)
+            eval("ax" + str(i + 1)).margins(x=0)
+            eval("ax" + str(i + 1)).legend(loc="upper right")
+            eval("ax" + str(i + 1)).set_ylabel("Intensity")
 
         return fig
 
-    # Visualize raw, preprocessed spectra, and selected intervalls(in case of ipls)
-    with c7:
-        if model_type == 'TPE-iPLS':
-            @st.cache_data
-            def tpeipls(change):
-                st.write(
-                    '-- Important Spectral regions used for model creation --')
-
-                modelling.limits = modelling.limits.astype(int)
-                selected_wls = DataFrame(np.reshape(
-                    x_block.columns[modelling.limits], (-1, 2)), columns=['from', 'to'])
-                selected_wls.index = [
-                    'Region#'+str(i+1) for i in range(selected_wls.shape[0])]
-                st.table(selected_wls)
-
-                sel_features = []
-                for i in range(int(len(modelling.limits)/2)):
-                    sel_features.append(np.array(X_train.columns)[
-                                        modelling.limits[2*i]:modelling.limits[2*i+1]+1])
-                sel_features = np.hstack(sel_features)
-
-                return sel_features, selected_wls
-            sel_features, selected_wls = tpeipls(change=hash_)
-
     with c8:
         st.write(
-            '-- Visualization of average spectrum computed before and after spectral preprocessing --')
+            "-- Visualization of average spectrum computed before and after spectral preprocessing --"
+        )
         imp_fig = prep_important(
-            change=st.session_state.counter, model_type=model_type, model_hash=hash_)
+            change=st.session_state.counter, model_type=model_type, model_hash=hash_
+        )
         st.pyplot(imp_fig)
 
         # Display CV results
-    numbers_dict = {1: "One", 2: "Two", 3: "Three", 4: "Four", 5: "Five",
-                    6: "Six", 7: "Seven", 8: "Eight", 9: "Nine", 10: "Ten"}
-    st.subheader(str(numbers_dict[nb_folds])+"-Fold Cross-Validation results")
+    numbers_dict = {3: "Three", 5: "Five"}
+    st.subheader(str(numbers_dict[nb_folds]) + "-Fold Cross-Validation results")
     cv1, cv2 = st.columns([2, 2])
 
     @st.cache_data(show_spinner=False)
     def cv_display(change):
-        fig1 = px.scatter(modelling.cv_data_[0], x='Measured', y='Predicted', trendline='ols', color='Folds', symbol='Folds',
-                          color_discrete_sequence=px.colors.qualitative.G10)
-        fig1.add_shape(type='line', x0=.95 * min(modelling.cv_data_[0].loc[:, 'Measured']), x1=1.05 * max(modelling.cv_data_[0].loc[:, 'Measured']),
-                       y0=.95 * min(modelling.cv_data_[0].loc[:, 'Measured']), y1=1.05 * max(modelling.cv_data_[0].loc[:, 'Measured']), line=dict(color='black', dash="dash"))
+        fig1 = px.scatter(
+            modelling.cv_data_[0],
+            x="Measured",
+            y="Predicted",
+            trendline="ols",
+            color="Folds",
+            symbol="Folds",
+            color_discrete_sequence=px.colors.qualitative.G10,
+        )
+        fig1.add_shape(
+            type="line",
+            x0=0.95 * min(modelling.cv_data_[0].loc[:, "Measured"]),
+            x1=1.05 * max(modelling.cv_data_[0].loc[:, "Measured"]),
+            y0=0.95 * min(modelling.cv_data_[0].loc[:, "Measured"]),
+            y1=1.05 * max(modelling.cv_data_[0].loc[:, "Measured"]),
+            line=dict(color="black", dash="dash"),
+        )
         fig1.update_traces(marker_size=7, showlegend=False)
 
-        fig0 = px.scatter(modelling.cv_data_[0], x='Measured', y='Predicted', trendline='ols', color='Folds', symbol="Folds", facet_col='Folds', facet_col_wrap=1,
-                          color_discrete_sequence=px.colors.qualitative.G10, text='index', width=800, height=1000)
+        fig0 = px.scatter(
+            modelling.cv_data_[0],
+            x="Measured",
+            y="Predicted",
+            trendline="ols",
+            color="Folds",
+            symbol="Folds",
+            facet_col="Folds",
+            facet_col_wrap=1,
+            color_discrete_sequence=px.colors.qualitative.G10,
+            text="index",
+            width=800,
+            height=1000,
+        )
         fig0.update_traces(marker_size=8, showlegend=False)
         return fig0, fig1
+
     fig0, fig1 = cv_display(change=modelling.cv_data_)
 
-#
+    #
     with cv2:
-        cv_results = DataFrame(modelling.CV_results_).round(4)  # CV table
-        st.write('--Tabular Cross-Validation Summary--')
-        st.table(cv_results.astype(str).style.map(lambda _: "background-color: #cecece;",
-                 subset=(cv_results.index.drop(['sd', 'mean', 'cv']), slice(None))))
-        st.write('----')
-        st.write('-- Out-of-Fold Predictions Visualization (All in one) --')
+        cv_results = DataFrame(modelling.cv_results_).round(4)  # CV table
+        st.write("--Tabular Cross-Validation Summary--")
+        st.table(
+            cv_results.astype(str).style.map(
+                lambda _: "background-color: #cecece;",
+                subset=(cv_results.index.drop(["sd", "mean", "cv"]), slice(None)),
+            )
+        )
+        st.write("----")
+        st.write("-- Out-of-Fold Predictions Visualization (All in one) --")
         st.plotly_chart(fig1, use_container_width=True)
 
     with cv1:
-        st.write('-- Out-of-Fold Predictions Visualization (Separate plots) --')
+        st.write("-- Out-of-Fold Predictions Visualization (Separate plots) --")
         st.plotly_chart(fig0, use_container_width=True)
 
     ###################################################    BEGIN : Model Diagnosis    ####################################################
-st.subheader("III - Model Diagnosis", divider='blue')
+st.subheader("III - Model Diagnosis", divider="blue")
 if model_type:
     if modelling:
-        # signal preprocessing results preparation for latex report
-        prep_para = modelling.best_hyperparams_.copy()
-        if model_type != "LW-PLS":
-            prep_para.pop('n_components')
-            for i in ['deriv', 'polyorder']:
-                if modelling.best_hyperparams_[i] == 0:
-                    prep_para[i] = '0'
-                elif modelling.best_hyperparams_[i] == 1:
-                    prep_para[i] = '1st'
-                elif modelling.best_hyperparams_[i] > 1:
-                    prep_para[i] = str(modelling.best_hyperparams_[i])+'nd'
-
         # reg plot and residuals plot
         yc = y_train if model_type == "LW-PLS" else yc
-        measured_vs_predicted = reg_plot([y_train, y_test], [
-                                         yc, yt], train_idx=train_index, test_idx=test_index, trainplot=False if model_type == "LW-PLS" else True)
-        residuals_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index,
-                                    test_idx=test_index,  trainplot=False if model_type == "LW-PLS" else True)
+        measured_vs_predicted = reg_plot(
+            [y_train, y_test],
+            [yc, yt],
+            train_idx=train_index,
+            test_idx=test_index,
+            trainplot=False if model_type == "LW-PLS" else True,
+        )
+        residuals_plot = resid_plot(
+            [y_train, y_test],
+            [yc, yt],
+            train_idx=train_index,
+            test_idx=test_index,
+            trainplot=False if model_type == "LW-PLS" else True,
+        )
 
         M7, M8 = st.columns([2, 2])
         with M7:
-            st.write('Predicted vs Measured values')
+            st.write("Predicted vs Measured values")
             st.pyplot(measured_vs_predicted)
-    #         # regression_plot.savefig('./report/figures/measured_vs_predicted.png')
 
         with M8:
-            st.write('Residuals plot')
+            st.write("Residuals plot")
             st.pyplot(residuals_plot)
-    #         # residual_plot.savefig('./report/figures/residuals_plot.png')
-
-###################################################      END : Model Diagnosis   #######################################################
 
+# ###################################################      END : Model Diagnosis   #######################################################
 
-###################################################    BEGIN : Download results    #######################################################
-##########################################################################################################################################
-##########################################################################################################################################
+# ###################################################    BEGIN : Download results    #######################################################
+# ##########################################################################################################################################
+# ##########################################################################################################################################
 if model:
     zip_data = ""
-    st.header('Download the analysis results')
-    st.write("**Note:** Please check the box only after you have finished processing your data and are satisfied with the results. Checking the box prematurely may slow down the app and could lead to crashes.")
+    st.header("Download the analysis results")
+    st.write(
+        "**Note:** Please check the box only after you have finished processing your data and are satisfied with the results. Checking the box prematurely may slow down the app and could lead to crashes."
+    )
     decis = st.checkbox("Yes, I want to download the results")
     if decis:
+
         @st.cache_data(show_spinner=True)
         def export_results(change):
-            res_path = Path('./report/results/')
+            res_path = Path("./report/results/")
             # Export data files
             match filetype:
-                case 'csv':
-                    with open(res_path/'dataset'/xfile.name, "wb") as f:
+                case "csv":
+                    with open(res_path / "dataset" / xfile.name, "wb") as f:
                         f.write(xfile.getvalue())
-                    with open(res_path/'dataset'/yfile.name, "wb") as f:
+                    with open(res_path / "dataset" / yfile.name, "wb") as f:
                         f.write(yfile.getvalue())
-                case 'dx':
-                    with open('report/results/dataset/'+file.name, 'w') as f:
+                case "dx":
+                    with open("report/results/dataset/" + file.name, "w") as f:
                         f.write(file.getvalue().decode("utf-8"))
 
-            # preprocessings
-            with open(res_path / 'Preprocessing.json', "w") as outfile:
-                json.dump(modelling.best_hyperparams_, outfile)
+            # Export signal preprocessing parameters
+            with open(res_path / "Preprocessing.json", "w") as outfile:
+                json.dump(pre.signal_tune_, outfile)
 
-            # model
-            with open('./report/results/model/' + model_type + '.pkl', 'wb') as f:  # export model
+            # export model
+            with open("./report/results/model/" + model_type + ".pkl", "wb") as f:
                 from joblib import dump
-                dump(model, f)
 
-            # intervalls in ipls
-            if model_type == 'TPE-iPLS':  # export selected wavelengths
-                nm = str(model_type)+'-selected_wavelengths.xlsx'
-                wlfilename = res_path / 'model' / nm
-                selected_wls.to_excel(wlfilename)
+                dump(model, f)
 
             # Export figs
             spectra_plot.savefig(res_path / "figures/raw_spectra.png")
             measured_vs_predicted.savefig(
-                res_path / 'figures/measured_vs_predicted.png')
-            residuals_plot.savefig(res_path / 'figures/residuals_plot.png')
+                res_path / "figures/measured_vs_predicted.png"
+            )
+            residuals_plot.savefig(res_path / "figures/residuals_plot.png")
             target_plot.savefig(res_path / "figures/kdeplot.png")
             fig1.write_image(res_path / "figures/meas_vs_pred_cv_all.png")
             fig0.write_image(res_path / "figures/meas_vs_pred_cv_onebyone.png")
-            if st.session_state.interface == 'advanced':
+            if st.session_state.interface == "advanced":
                 if nwls != nwl:
                     clipped_spectra_plot.savefig(
-                        res_path / "figures/clipped_spectra.png")
+                        res_path / "figures/clipped_spectra.png"
+                    )
             imp_fig.savefig(res_path / "figures/vipscores.png")
 
             # pickle the results
-            pklfile = {'data': {'raw-spectra': x_block, 'target': y, 'training_data_idx': train_pos, 'testing_data_idx': test_pos},
-                       'spec-preprocessing': {"normalization": modelling.best_hyperparams_['normalization'], 'SavGol(polyorder,window_length,deriv)': [modelling.best_hyperparams_["polyorder"],
-                                                                                                                                                       modelling.best_hyperparams_[
-                                                                                                                                                           'window_length'],
-                                                                                                                                                       modelling.best_hyperparams_['deriv']]},
-                       "model_type": model_type, 'model_': modelling.model_, 'performance': model_per, 'measvspred': measured_vs_predicted, 'predictors_': list(x_block.columns), 'selected-wls': sel_features if model_type == 'TPE-iPLS' else None}
-            if model_type == 'LW-PLS':  # export LWPLS best model parameters
-                pklfile['lwpls_params'] = modelling.best_hyperparams_
-
-            with open(res_path / 'file_system.pkl', "wb") as pkl:
+            pklfile = {
+                "data": {
+                    "wls": wls,
+                    "raw-spectra": x_block,
+                    "target": {"name": yname, "target": y},
+                    "idx": {"train": train_pos, "test": test_pos},
+                },
+                "spec-preprocessing": pre.signal_tune_,
+                "model": {
+                    "model_type": model_type,
+                    "model_": modelling.model_,
+                    "performance": model_per,
+                    "measvspred": measured_vs_predicted,
+                    "selected-wls": modelling.selected_wls_,
+                },
+            }
+
+            with open(res_path / "file_system.pkl", "wb") as pkl:
                 dump(pklfile, pkl)
-
-            # export .texfile
-            match model_type:
-                case 'PLS':
-                    latex_report = report.report('Predictive model development', file_name, stats, list(
-                        prep_para.values()), model_type, model_per, cv_results)
-
-                case 'LW-PLS':
-                    latex_report = report.report('Predictive model development', file_name, stats,
-                                                 list({key: modelling.best_hyperparams_[key] for key in ['deriv', 'normalization', 'polyorder', 'window_length'] if key in modelling.best_hyperparams_}.values()), model_type, model_per, cv_results)
-
-                case 'TPE-iPLS':
-                    latex_report = report.report('Predictive model development', file_name, stats,
-                                                 list({key: modelling.best_hyperparams_[key] for key in ['deriv', 'normalization', 'polyorder', 'window_length'] if key in modelling.best_hyperparams_}.values()), model_type, model_per, cv_results)
-
-            if Path("./report/report.tex").exists():
-                report.generate_report(change=hash_)
-                if Path("./report/report.pdf").exists():
-                    for i in ['tex', 'pdf']:
-                        move('./report/report.'+str(i),
-                             './report/results/report.'+str(i))
+                latex_report = report.report(
+                    "Predictive model development",
+                    file_name,
+                    stats,
+                    [
+                        str(pre.signal_tune_["normalization"]),
+                        str(pre.signal_tune_["window_length"]),
+                        str(pre.signal_tune_["polyorder_deriv"]["deriv"]),
+                        str(pre.signal_tune_["polyorder_deriv"]["polyorder"]),
+                    ],
+                    model_type,
+                    model_per,
+                    cv_results,
+                )
+
+                if Path("./report/report.tex").exists():
+                    report.generate_report(change=hash_)
+                    if Path("./report/report.pdf").exists():
+                        for i in ["tex", "pdf"]:
+                            move(
+                                "./report/report." + str(i),
+                                "./report/results/report." + str(i),
+                            )
 
         export_results(change=hash_)
 
         try:
             from tempfile import TemporaryDirectory
+
             # create a temp directory
             with TemporaryDirectory(prefix="results", dir="./report") as temp_dir:
                 tempdirname = os.path.split(temp_dir)[1]
-                if len(os.listdir('./report/results/figures/')) > 2:
-                    make_archive(base_name="./report/Results", format="zip",
-                                 base_dir="results", root_dir="./report")  # create a zip file
+                if len(os.listdir("./report/results/figures/")) > 2:
+                    make_archive(
+                        base_name="./report/Results",
+                        format="zip",
+                        base_dir="results",
+                        root_dir="./report",
+                    )  # create a zip file
                     # put the inside the temp dir
-                    move("./report/Results.zip",
-                         './report/'+str(tempdirname)+'/Results.zip')
-                    with open('./report/'+str(tempdirname)+'/Results.zip', "rb") as f:
+                    move(
+                        "./report/Results.zip",
+                        "./report/" + str(tempdirname) + "/Results.zip",
+                    )
+                    with open(
+                        "./report/" + str(tempdirname) + "/Results.zip", "rb"
+                    ) as f:
                         zip_data = f.read()
         except:
             pass
-    date_time = datetime.now().strftime('%y%m%d%H%M')
-    disabled_down = True if zip_data == '' else False
-    st.download_button(label='Download', data=zip_data, file_name='Nirs_Workflow_' + str(date_time)+'_'+str(mode)+'_'+str(model_type)+'_'+str(file_name)+'.zip', mime="application/zip",
-                       args=None, kwargs=None, type="primary", use_container_width=True, disabled=disabled_down)
-
-    HandleItems.delete_files(keep=['.py', '.pyc', '.bib'])
+    date_time = datetime.now().strftime("%y%m%d%H%M")
+    disabled_down = True if zip_data == "" else False
+    st.download_button(
+        label="Download",
+        data=zip_data,
+        file_name="Nirs_Workflow_"
+        + str(date_time)
+        + "_"
+        + str(mode)
+        + "_"
+        + str(model_type)
+        + "_"
+        + str(file_name)
+        + ".zip",
+        mime="application/zip",
+        args=None,
+        kwargs=None,
+        type="primary",
+        use_container_width=True,
+        disabled=disabled_down,
+    )
+
+    HandleItems.delete_files(keep=[".py", ".pyc", ".bib"])
+
+
+####################################################################################
+if st.session_state.interface == "simple":
+    if model:
+        if st.checkbox(" Yes, i want to generate predictions"):
+            p1, p2 = st.columns([0.5, 0.5])
+            with p2:
+                predfile = st.file_uploader(
+                    "Please provide the NIRS data, from which predictions will be generated.",
+                    type=["csv", "dx"],
+                    help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns",
+                )
+            if predfile:
+                with p2:
+                    c1_1, c2_2 = st.columns([0.5, 0.5])
+                match predfile.name.split(".")[-1]:
+                    case "csv":
+                        with c1_1:
+                            dec = st.radio(
+                                "decimal:", options=[".", ","], horizontal=True
+                            )
+                            sep = st.radio(
+                                "separator:", options=[";", ","], horizontal=True
+                            )
+                        with c2_2:
+                            phdr = st.radio(
+                                "header: ", options=["yes", "no"], horizontal=True
+                            )
+                            pnames = st.radio(
+                                "samples name:", options=["yes", "no"], horizontal=True
+                            )
+
+                        hdr = 0 if phdr == "yes" else None
+                        names = 0 if pnames == "yes" else None
+
+                        try:
+                            pred_data, _ = csv_parser(
+                                path=predfile,
+                                decimal=dec,
+                                separator=sep,
+                                index_col=names,
+                                header=hdr,
+                                change=None,
+                            )
+                            if pred_data.shape[1] > 20:
+                                p2.success(
+                                    "The data have been loaded successfully and spectral data was successfully detected, you might need to tune dialect.",
+                                    icon="✅",
+                                )
+                            else:
+                                p2.warning(
+                                    "The data have been loaded successfully but spectral data was not detected."
+                                )
+                        except:
+                            p2.error(
+                                "Error: The xfile has not been loaded successfully, please consider tuning the dialect settings!"
+                            )
+
+                    case "dx":
+                        try:
+                            # creating the temp file
+                            with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
+                                tmp.write(predfile.read())
+                                tmp_path = tmp.name
+                            from utils.data_parsing import jcamp_parser
+
+                            pred_data, _, _ = jcamp_parser(
+                                path=tmp_path, include="x_block", change=85
+                            )
+                            st.success(
+                                "Info: The data have been loaded successfully",
+                                icon="✅",
+                            )
+                        except:
+                            st.error(
+                                "Error: The input file has not been loaded successfully!"
+                            )
+
+                if preprocessed[0].shape[1] == pred_data.shape[1]:
+                    # preprocess spectra
+                    pred_spectra = DataFrame(
+                        signal_preprocess(np.array(pred_data), tune=pre.signal_tune_)
+                    )
+
+                    # visualize raw and preprocessed spectra
+                    pred_plot_raw = plot_spectra(
+                        pred_data,
+                        mean=True,
+                        xunits="Wavelength/Wavenumber",
+                        yunits="Signal intensity",
+                    )
+                    pred_plot_preprocessed = plot_spectra(
+                        pred_spectra,
+                        mean=True,
+                        xunits="Wavelength/Wavenumber",
+                        yunits="Signal intensity",
+                    )
+                    with p1:
+                        st.pyplot(pred_plot_raw)
+                        st.pyplot(pred_plot_preprocessed)
+
+                    # make predictions
+                    if "makepred" not in st.session_state:
+                        st.session_state["makepred"] = False
+
+                    makepred = st.button(
+                        "Predict",
+                        type="primary",
+                        use_container_width=True,
+                        disabled=False if model_type else True,
+                    )
+                    if makepred:
+                        st.session_state["makepred"] = True
+
+                    if st.session_state["makepred"]:
+                        match model_type:
+                            case "PLS":
+                                predictions = modelling.model_.predict(pred_spectra)
+
+                            case "LW-PLS":
+                                from utils.lwplsr_julia_converted import lwpls
+
+                                predictions = lwpls(
+                                    Xtrain=preprocessed[0],
+                                    ytrain=np.array(y_train),
+                                    Xtest=np.array(pred_spectra),
+                                    **modelling.model_,
+                                ).ravel()
+
+                            case "TPE-iPLS":
+                                predictions = modelling.model_.predict(
+                                    pred_spectra.iloc[:, modelling.selected_wls_]
+                                )
+
+                        prediction = DataFrame(
+                            predictions, index=pred_data.index, columns=["Predictions"]
+                        )
+                        # print predictions
+                        st.write(prediction)
+
+                        # download results
+                        from io import StringIO  # ✅ Import StringIO
+
+                        csv_buffer = StringIO()
+                        prediction.to_csv(csv_buffer, index=True, sep=";")
+                        csv_data = csv_buffer.getvalue()
+                        st.download_button(
+                            label="Download Predictions",
+                            data=csv_data,
+                            file_name="predictions.csv",
+                            mime="text/csv",
+                        )
+                else:
+                    st.error(
+                        "Error: the shape of data differs from that of the data used for model creation"
+                    )
diff --git a/src/pages/3-prediction.py b/src/pages/3-prediction.py
index dadd3539640fb4d3dc44171fe131093ae665bf42..e385b23a7b0ffee1075dc07260e1036a6cbd25d2 100644
--- a/src/pages/3-prediction.py
+++ b/src/pages/3-prediction.py
@@ -3,7 +3,7 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
 
 
 # layout
-UiComponents(pagespath=pages_folder, csspath=css_file, imgpath=image_path,
+ui_components(pagespath=pages_folder, csspath=css_file, imgpath=image_path,
              header=True, sidebar=True, bgimg=False, colborders=True)
 
 # Add 'predict' variable to session state
@@ -196,8 +196,8 @@ if not preprocessed.empty:
         def prep_info(change):
             pr = system_data['spec-preprocessing']['SavGol(polyorder,window_length,deriv)']
             aa, bb, cc = pr[1], pr[0], pr[2]
-            SG = '- Savitzky-Golay derivative :  \n(Window_length:' + str(
-                aa)+';  polynomial order:' + str(bb)+';  Derivative order :'+str(cc)+')'
+            SG = '- Savitzky-Golay derivative :  \n(window_length:' + str(
+                aa)+';  polynomial order:' + str(bb)+';  derivative order :'+str(cc)+')'
             Norm = '- Spectral Normalization \n:' + \
                 system_data['spec-preprocessing']['normalization']
             return SG, Norm
diff --git a/src/utils/clustering.py b/src/utils/clustering.py
index 6bb5b7f2b8d5d02b9999a1b26a18528f2c06c76b..bb8e36d7c79d2ce8646440e5ffdc89708b70623f 100644
--- a/src/utils/clustering.py
+++ b/src/utils/clustering.py
@@ -3,16 +3,15 @@ from pandas import DataFrame
 from sklearn.cluster import KMeans
 from sklearn.metrics import silhouette_score
 from scipy.spatial.distance import cdist
-
-#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  kmeans ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
-
 from sklearn.cluster import KMeans, AffinityPropagation, HDBSCAN
-from sklearn.metrics import silhouette_score
 import pandas as pd
 
-def clustering(X, method='kmeans', **kwargs):
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  kmeans ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+def clustering(X, method="kmeans", **kwargs):
     """
-    Perform clustering on the given dataset using the specified method. 
+    Perform clustering on the given dataset using the specified method.
     Available clustering methods are:
     - **'kmeans'**: K-Means clustering algorithm, which partitions the data into `k` clusters.
     - **'ap'**: Affinity Propagation clustering, a graph-based algorithm that identifies clusters without predefining the number of clusters.
@@ -21,7 +20,7 @@ def clustering(X, method='kmeans', **kwargs):
     Parameters
     ----------
     X : DataFrame or array-like, shape (n_samples, n_features)
-        The input data for clustering. It can be a pandas DataFrame or a numpy array where 
+        The input data for clustering. It can be a pandas DataFrame or a numpy array where
         each row represents a sample, and each column represents a feature.
 
     method : str, default='kmeans'
@@ -31,11 +30,11 @@ def clustering(X, method='kmeans', **kwargs):
         - 'hdbscan': HDBSCAN (Hierarchical Density-Based Spatial Clustering of Applications with Noise). A method that works well for data with varying densities.
 
     kwargs : dict, optional
-        Additional keyword arguments that can be passed to the specific clustering method. 
+        Additional keyword arguments that can be passed to the specific clustering method.
         The following parameters are accepted for each method:
-        - For 'kmeans': 
+        - For 'kmeans':
             - 'max_k': int, the maximum number of clusters to consider when finding the optimal number of clusters using the Silhouette Score (default is 10).
-        - For 'hdbscan': 
+        - For 'hdbscan':
             - 'min_samples': int, the number of samples in a neighborhood for a point to be considered a core point (default is 8).
             - 'min_cluster_size': int, the minimum size of clusters (default is 10).
             - 'metric': str, the distance metric to use (default is 'euclidean').
@@ -44,7 +43,7 @@ def clustering(X, method='kmeans', **kwargs):
     -------
     tuple
         A tuple containing:
-        - A pandas DataFrame with the cluster assignments for each sample. The index corresponds to the sample names (from X), 
+        - A pandas DataFrame with the cluster assignments for each sample. The index corresponds to the sample names (from X),
           and a column "names" lists the cluster labels.
         - An integer representing the number of clusters found.
 
@@ -70,9 +69,9 @@ def clustering(X, method='kmeans', **kwargs):
     # Example using HDBSCAN clustering:
     result, num_clusters = clustering(X, method='hdbscan', min_samples=10, min_cluster_size=15)
     """
-    
-    if method == 'KMEANS':
-        max_k = kwargs.get('max_k', 10)
+
+    if method == "KMEANS":
+        max_k = kwargs.get("max_k", 10)
 
         # Find the optimal number of clusters using Silhouette Score
         def find_optimal_k(X, max_k):
@@ -90,25 +89,32 @@ def clustering(X, method='kmeans', **kwargs):
         optimal_k = find_optimal_k(X, max_k)
         model = KMeans(n_clusters=optimal_k, random_state=42, n_init=10, max_iter=300)
         labels = model.fit_predict(X)
-        res = pd.DataFrame({'names': X.index}, index = ['cluster#'+str(i+1) for i in labels])
+        res = pd.DataFrame(
+            {"names": X.index}, index=["cluster#" + str(i + 1) for i in labels]
+        )
         return res, len(set(labels))
 
-    elif method == 'AP':
+    elif method == "AP":
         model = AffinityPropagation(preference=-50, random_state=42)
         model.fit(X)
         labels = model.predict(X)
-        res = pd.DataFrame({'names': X.index}, index = ['cluster#'+str(i+1) for i in labels])
+        res = pd.DataFrame(
+            {"names": X.index}, index=["cluster#" + str(i + 1) for i in labels]
+        )
         return res, len(set(labels))
 
-    elif method == 'HDBSCAN':
-        min_samples = kwargs.get('min_samples', 8)
-        min_cluster_size = kwargs.get('min_cluster_size', 10)
-        metric = kwargs.get('metric', 'euclidean')
+    elif method == "HDBSCAN":
+        min_samples = kwargs.get("min_samples", 8)
+        min_cluster_size = kwargs.get("min_cluster_size", 10)
+        metric = kwargs.get("metric", "euclidean")
 
         model = HDBSCAN(min_samples=2, min_cluster_size=5, metric="euclidean")
         labels = model.fit_predict(X)
-        res = pd.DataFrame({'names': X.index}, ['cluster#'+str(i+1) if i != -1 else 'Non clustered' for i in labels])
-        return res, len(set(labels))-1
+        res = pd.DataFrame(
+            {"names": X.index},
+            ["cluster#" + str(i + 1) if i != -1 else "Non clustered" for i in labels],
+        )
+        return res, len(set(labels)) - 1
 
     else:
-        raise ValueError("Unknown clustering method: "+str(method))
+        raise ValueError("Unknown clustering method: " + str(method))
diff --git a/src/utils/data_handling.py b/src/utils/data_handling.py
index 1c427b58543d856a4f93b067c5371b3b7c06fdaa..e40d145577e5db01ad191bafbeec5e21f073e7db 100644
--- a/src/utils/data_handling.py
+++ b/src/utils/data_handling.py
@@ -1,11 +1,12 @@
 from pathlib import Path
-from typing import List
+from typing import List, Literal
 from sklearn.linear_model import LinearRegression
 from typing import List, Dict, Tuple
 from sklearn.preprocessing import StandardScaler
 from utils.eval_metrics import metrics
 import numpy as np
 from pandas import DataFrame
+import streamlit as st
 
 # try to automatically detect the field separator within the CSV
 # def find_delimiter(filename):
@@ -211,15 +212,8 @@ def standardize(X: DataFrame, center: bool = True, scale: bool = False) -> DataF
     sc = DataFrame(sk.fit_transform(X), index=X.index, columns=X.columns)
     return sc
 
-# Spectral preprocessing
-
-
-def Detrend(X):
-    c = detrend(X, axis=-1, type='linear', bp=0, overwrite_data=False)
-    return c
-
-
-def Snv(X: DataFrame) -> DataFrame:
+    #################################  Spectral preprocessing  ###################################
+def normalize(X, method:Literal['SNV', 'NaN']):
     """
     Performs Standard Normal Variate (SNV) transformation on the input DataFrame.
 
@@ -229,7 +223,7 @@ def Snv(X: DataFrame) -> DataFrame:
 
     Parameters
     ----------
-    X : DataFrame
+    X : array
         A pandas DataFrame containing the data to be transformed. Each column represents a feature.
 
     Returns
@@ -238,13 +232,146 @@ def Snv(X: DataFrame) -> DataFrame:
         A pandas DataFrame containing the standardized values, with the same indices and column names
         as the input DataFrame.
     """
-    xt = np.array(X).T
-    c = (xt - xt.mean()) / xt.std(axis=0)
-    return DataFrame(c.T, index=X.index, columns=X.columns)
 
+    match method:
+        case 'SNV':
+            norm = (X - np.mean(X, axis=1, keepdims=True)) / np.std(X, axis=1, keepdims=True)
+        case 'NaN':
+            norm = X
+    return norm
+
+def signal_preprocess(X, tune:Dict):
+    """
+    Preprocesses the input signal data by applying normalization and Savitzky-Golay filter.
+
+    This function first normalizes the signal data using the specified normalization method, and then
+    applies the Savitzky-Golay filter to compute a derivative of the signal.
+
+    Args:
+        X (np.ndarray or pd.Dataframe): The input signal data that needs to be preprocessed.
+                                 This is a 2D array containing numerical values.
+        tune (dict): A dictionary containing tuning parameters for signal preprocessing. Expected keys:
+            - "normalization" (str): The normalization method to apply to the data. 
+              Possible values could be methods like "SNV" (Standard Normal Variate), etc.
+            - "polyorder_deriv" (dict): A dictionary with two keys:
+                - "polyorder" (int): The polynomial order to use for the Savitzky-Golay filter.
+                - "deriv" (int): The order of the derivative to compute (usually 1 for first derivative).
+            - "window_length" (int): The window length for the Savitzky-Golay filter.
+
+    Returns:
+        np.ndarray: The preprocessed signal data, which has been normalized and then filtered using the Savitzky-Golay filter.
+
+    Example:
+        tune = {
+            'normalization': 'SNV',
+            'polyorder_deriv': {'polyorder': 3, 'deriv': 1},
+            'window_length': 7
+        }
+        X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
+        processed_signal = signal_preprocess(X, tune)
+        print(processed_signal)
+        # Output will depend on the normalization and filtering applied.
+
+    Notes:
+        - The function assumes that the `normalize` function and `savgol_filter` are available in the environment.
+        - The input signal `X` should be properly formatted (e.g., a 1D array or a list of numbers).
+        - If the "normalization" method is not supported, the `normalize` function should handle errors accordingly.
+    """
+    
+    from scipy.signal import savgol_filter
+    from utils.data_handling import normalize
+    
+    # Normalize the input signal using the specified method
+    x1 = normalize(X=X, method=tune['normalization'])
+    
+    # Apply the Savitzky-Golay filter to compute the derivative
+    x2 = savgol_filter(x1, polyorder=tune['polyorder_deriv']['polyorder'], 
+                       deriv=tune['polyorder_deriv']['deriv'],
+                       window_length=tune['window_length'])
+    
+    return x2
+
+
+@st.cache_data(show_spinner=False)
+def signal_preprocess_str(signal_tune: Dict) -> str:
+    """
+    Generates a summary string describing the best signal preprocessing
+    settings based on the given tuning parameters.
+
+    The function summarizes the spectral normalization method and the 
+    parameters used for the Savitzky-Golay derivative.
+
+    Args:
+        signal_tune (Dict): A dictionary containing tuning parameters for
+                             signal preprocessing. The dictionary should include:
+                             - "normalization" (str): A string indicating the 
+                               type of normalization applied. Possible values 
+                               include "SNV" for Standard Normal Variate or 
+                               "NaN" if no transformation was performed.
+                             - "window_length" (int): The window length used 
+                               for the Savitzky-Golay filter.
+                             - "polyorder_deriv" (dict): A dictionary containing 
+                               the Savitzky-Golay derivative parameters, which
+                               should include:
+                               - "polyorder" (int): The polynomial order used 
+                                 in the Savitzky-Golay filter.
+                               - "deriv" (int): The order of the derivative to 
+                                 compute.
+
+    Returns:
+        str: A formatted string summarizing the signal preprocessing settings,
+             including details on the spectral normalization method and the
+             Savitzky-Golay derivative parameters. For example:
+             "- Savitzky-Golay derivative parameters:  
+                (window_length: 7; polynomial order: 2; derivative order: 1)
+              - Spectral Normalization: Standard Normal Variate (SNV)"
+    
+    Example:
+        signal_tune = {
+            "normalization": "SNV",
+            "window_length": 7,
+            "polyorder_deriv": {
+                "polyorder": 2,
+                "deriv": 1
+            }
+        }
+        result = signal_preprocess_str(signal_tune)
+        print(result)
+        # Output:
+        # - Savitzky-Golay derivative parameters:  
+        #   (window_length: 7; polynomial order: 2; derivative order: 1)
+        # - Spectral Normalization: Standard Normal Variate (SNV)
+    """
 
-def No_transformation(X):
-    return X
+    # Handle normalization type
+    if "normalization" in signal_tune:
+        match signal_tune["normalization"]:
+            case "SNV":
+                a = "Standard Normal Variate (SNV)"
+            case "NaN":
+                a = " No transformation was performed"
+
+    # Get Savitzky-Golay parameters
+    bb, cc, dd = (
+        str(signal_tune["window_length"]),
+        str(signal_tune["polyorder_deriv"]["polyorder"]),
+        str(signal_tune["polyorder_deriv"]["deriv"]),
+    )
+    
+    SG = (
+        "- Savitzky-Golay derivative parameters:  \n(window_length:"
+        + bb
+        + ";polynomial order:"
+        + cc
+        + "; derivative order : "
+        + dd
+    )
+    
+    # Create normalization string
+    Norm = "- Spectral Normalization:  \n" + a
+    
+    # Return the combined summary string
+    return SG + "\n" + Norm
 
 
 ######################################## Cross val split ############################
diff --git a/src/utils/data_parsing.py b/src/utils/data_parsing.py
index 1ac2c0ed1752f5937045dc753a9c1bbe7e4eaf45..550586bda78a01e4ad52ee5249f9903710c07ddb 100644
--- a/src/utils/data_parsing.py
+++ b/src/utils/data_parsing.py
@@ -5,12 +5,12 @@ from tempfile import NamedTemporaryFile
 
 def jcamp_parser(path, include, change=None):
     """
-    Parses a JCAMP-DX file and extracts spectral data, target concentrations, 
+    Parses a JCAMP-DX file and extracts spectral data, target concentrations,
     and metadata as per the specified `include` parameter.
 
     Parameters:
         path (str): The file path to the JCAMP-DX file to be parsed.
-        include (list): Specifies which data blocks to include in the output. 
+        include (list): Specifies which data blocks to include in the output.
                         Options are:
                           - 'x_block': Extract spectra.
                           - 'y_block': Extract target concentrations.
@@ -30,67 +30,78 @@ def jcamp_parser(path, include, change=None):
 
     # Read the JCAMP-DX file
     dxfile = jc.jcamp_readfile(path)
-    nb = dxfile['blocks']
-    list_of_blocks = dxfile['children']
+    nb = dxfile["blocks"]
+    list_of_blocks = dxfile["children"]
 
     idx = []  # List to store sample names
     metdata = {}  # Dictionary to store metadata
 
     # Preallocate matrix for spectral data if 'x_block' or 'all' is included
-    if 'x_block' in include or 'all' in include:
+    if any(x in include for x in ("x_block", "all")):
         specs = np.zeros((nb, len(list_of_blocks[0]["y"])), dtype=float)
 
     # Initialize containers for target concentrations if 'y_block' or 'all' is included
-    if 'y_block' in include or 'all' in include:
+    if any(x in include for x in ("y_block", "all")):
         targets_tuple = {}
         pattern = r"\(([^,]+),(\d+(\.\d+)?),([^)]+)"
-        aa = list_of_blocks[0]['concentrations']
-        a = '\n'.join(line for line in aa.split('\n')
-                      if "NCU" not in line and "<<undef>>" not in line)
-        n_elements = a.count('(')
+        aa = list_of_blocks[0]["concentrations"]
+        a = "\n".join(
+            line
+            for line in aa.split("\n")
+            if "NCU" not in line and "<<undef>>" not in line
+        )
+        n_elements = a.count("(")
         # Extract chemical element names
         elements_name = [match[0] for match in re.findall(pattern, a)]
 
         # Helper function to extract concentration values
         def conc(sample=None, pattern=None):
-            prep = '\n'.join(line for line in sample.split(
-                '\n') if "NCU" not in line and "<<undef>>" not in line)
-            c = [np.NaN if match[1] == '0' else np.float64(
-                match[1]) for match in re.findall(pattern, prep)]
+            prep = "\n".join(
+                line
+                for line in sample.split("\n")
+                if "NCU" not in line and "<<undef>>" not in line
+            )
+            c = [
+                np.NaN if match[1] == "0" else np.float64(match[1])
+                for match in re.findall(pattern, prep)
+            ]
             return np.array(c)
 
     # Loop through all blocks in the file
     for i in range(nb):
-        idx.append(str(list_of_blocks[i]['title']))  # Store sample names
+        idx.append(str(list_of_blocks[i]["title"]))  # Store sample names
 
         # Extract spectra if 'x_block' or 'all' is included
-        if 'x_block' in include or 'all' in include:
-            specs[i] = list_of_blocks[i]['y']
+        if any(x in include for x in ("x_block", "all")):
+            specs[i] = list_of_blocks[i]["y"]
 
         # Extract metadata if 'meta' or 'all' is included
         block = list_of_blocks[i]
-        if 'meta' in include or 'all' in include:
+        if any(x in include for x in ("meta", "all")):
             metdata[i] = {
-                'name': block['title'],
-                'origin': block['origin'],
-                'date': block['date'],
-                'spectrometer': block['spectrometer/data system'].split('\n$$')[0],
-                'n_scans': block['spectrometer/data system'].split('\n$$')[6].split('=')[1],
-                'resolution': block['spectrometer/data system'].split('\n$$')[8].split('=')[1],
-                'xunits': block['xunits'],
-                'yunits': block['yunits'],
-                'firstx': block['firstx'],
-                'lastx': block['lastx'],
-                'npoints': block['npoints'],
+                "name": block["title"],
+                "origin": block["origin"],
+                "date": block["date"],
+                "spectrometer": block["spectrometer/data system"].split("\n$$")[0],
+                "n_scans": block["spectrometer/data system"]
+                .split("\n$$")[6]
+                .split("=")[1],
+                "resolution": block["spectrometer/data system"]
+                .split("\n$$")[8]
+                .split("=")[1],
+                "xunits": block["xunits"],
+                "yunits": block["yunits"],
+                "firstx": block["firstx"],
+                "lastx": block["lastx"],
+                "npoints": block["npoints"],
             }
 
         # Extract target concentrations if 'y_block' or 'all' is included
-        if 'y_block' in include or 'all' in include:
-            targets_tuple[i] = conc(
-                sample=block['concentrations'], pattern=pattern)
+        if any(x in include for x in ("y_block", "all")):
+            targets_tuple[i] = conc(sample=block["concentrations"], pattern=pattern)
 
     # Create DataFrame for target concentrations
-    if 'y_block' in include or 'all' in include:
+    if any(x in include for x in ("y_block", "all")):
         y_block = DataFrame(targets_tuple).T
         y_block.columns = elements_name
         y_block.index = idx
@@ -98,17 +109,17 @@ def jcamp_parser(path, include, change=None):
         y_block = DataFrame
 
     # Create DataFrame for spectral data
-    if 'x_block' in include or 'all' in include:
+    if any(x in include for x in ("x_block", "all")):
         wls = list_of_blocks[0]["x"]  # Wavelengths/frequencies/range
-        x_block = DataFrame(specs, columns=wls, index=idx).astype('float64')
+        x_block = DataFrame(specs, columns=wls, index=idx).astype("float64")
     else:
         x_block = DataFrame
 
     # Create DataFrame for metadata
-    if 'meta' in include or 'all' in include:
+    if any(x in include for x in ("meta", "all")):
         m = DataFrame(metdata).T
         m.index = idx
-        met = m.drop(m.columns[(m == '').all()], axis=1)
+        met = m.drop(m.columns[(m == "").all()], axis=1)
     else:
         met = DataFrame
 
@@ -150,21 +161,23 @@ def csv_parser(path, decimal, separator, index_col, header, change=None):
     - If `change` is provided, it will be applied to the non-floating point columns before returning them.
     """
     from pandas import read_csv
-    df = read_csv(path, decimal=decimal, sep=separator,
-                  index_col=index_col, header=header)
+
+    df = read_csv(
+        path, decimal=decimal, sep=separator, index_col=index_col, header=header
+    )
 
     # Select columns with float data type
-    float = df.select_dtypes(include='float')
+    float = df.select_dtypes(include="float")
 
     # Select columns without float data type and apply changes (like uppercasing strings)
-    non_float = df.select_dtypes(exclude='float')
+    non_float = df.select_dtypes(exclude="float")
 
     return float, non_float
 
 
 def meta_st(df):
     """
-    Preprocesses a DataFrame by retaining columns with between 2 and 59 unique values 
+    Preprocesses a DataFrame by retaining columns with between 2 and 59 unique values
     and converting string columns to uppercase.
 
     Parameters:
@@ -211,7 +224,7 @@ def meta_st(df):
         # Convert string columns to uppercase
         for i in df.columns:
             try:
-                df[[i]].astype('float')
+                df[[i]].astype("float")
             except:
                 df[[i]] = df[[i]].apply(lambda x: x.str.upper())
 
diff --git a/src/utils/eval_metrics.py b/src/utils/eval_metrics.py
index 44c36a149b297e96bd91f4b11d4ddda3e258e624..704dce2740c8846e561d54d513cac9d74fceaa8a 100644
--- a/src/utils/eval_metrics.py
+++ b/src/utils/eval_metrics.py
@@ -1,4 +1,3 @@
-
 from pandas import DataFrame
 import numpy as np
 
@@ -6,7 +5,7 @@ import numpy as np
 class metrics:
     """
     A class for calculating various performance metrics for regression and classification tasks.
-    
+
     This class can compute statistical metrics for regression and classification problems based on
     provided measured and predicted values. It can handle train, cross-validation, and test data separately,
     and return the metrics in a structured format.
@@ -20,7 +19,13 @@ class metrics:
     from typing import Optional, List
     from pandas import DataFrame
 
-    def __init__(self, c: Optional[float] = None, cv: Optional[List] = None, t: Optional[List] = None, method='regression') -> DataFrame:
+    def __init__(
+        self,
+        c: Optional[float] = None,
+        cv: Optional[List] = None,
+        t: Optional[List] = None,
+        method="regression",
+    ):
         """
         Initializes the metrics object and computes the performance metrics for the provided data.
 
@@ -40,23 +45,22 @@ class metrics:
         DataFrame
             A DataFrame containing the performance metrics for each dataset (train, cross-validation, test).
         """
-        
+
         phase = [c, cv, t]
         index = np.array(["train", "cv", "test"])
         notnone = [i for i in range(3) if phase[i] is not None]
         met_index = index[notnone]
-        methods = ['regression', 'classification']
+        methods = ["regression", "classification"]
         perf = {}
 
         for i in notnone:
-            if method == 'regression':
+            if method == "regression":
                 perf[index[i]] = metrics.reg_(phase[i][0], phase[i][1])
-            elif method == 'classification':
+            elif method == "classification":
                 perf[index[i]] = metrics.class_(phase[i][0], phase[i][1])
-        
 
         self.ret = DataFrame(perf).T
-             
+
     @staticmethod
     def reg_(meas, pred):
         """
@@ -77,33 +81,25 @@ class metrics:
             - 'r2': R-squared
             - 'rmse': Root Mean Square Error
             - 'mae': Mean Absolute Error
-            - 'rpd': Ratio of Performance to Deviation
-            - 'rpiq': Relative Predictive Interval Quality
+            - 'rpd': Residual Prediction Deviation
+            - 'rpiq': Ratio of Performance to InterQuartile distance
         """
-        
-        meas = np.array(meas)
-        pred = np.array(pred)
-        xbar = np.mean(meas)  # the average of measured values
-        e = np.subtract(meas, pred)
-        e2 = e ** 2  # the squared error
-
-        # Sum of squared:
-        # TOTAL
-        sst = np.sum((meas - xbar) ** 2)
-        # RESIDUAL
-        ssr = np.sum(e2)
-        # REGRESSION OR MODEL
-        ssm = np.sum(pred - xbar)
 
+        from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
+        from scipy.stats import pearsonr
+
+        meas = np.array(meas).flatten()
+        pred = np.array(pred).flatten()
         # Compute statistical metrics
-        metr = {}
-        metr['r'] = np.corrcoef(meas, pred)[0, 1]
-        metr['r2'] = 1 - ssr / sst
-        metr['rmse'] = np.sqrt(np.mean(e2))
-        metr['mae'] = np.mean(np.abs(e))
-        metr['rpd'] = np.std(meas) / np.sqrt(np.mean(e2))
-        metr['rpiq'] = (np.quantile(meas, 0.75) - np.quantile(meas, 0.25)) / np.sqrt(np.mean(e2))
-        
+        metr = {
+            "r": pearsonr(meas, pred)[0],
+            "r2": max(r2_score(meas, pred), 0.01),
+            "rmse": np.sqrt(mean_squared_error(meas, pred)),
+            "mae": mean_absolute_error(meas, pred),
+            "rpd": np.std(meas) / np.sqrt(mean_squared_error(meas, pred)),
+            "rpiq": (np.quantile(meas, 0.75) - np.quantile(meas, 0.25))
+            / np.sqrt(mean_squared_error(meas, pred)),
+        }
         return metr
 
     @staticmethod
diff --git a/src/utils/miscellaneous.py b/src/utils/miscellaneous.py
index f6b780181f10fca49e0116398b8c15a28c0dda69..3ee4d593960380555090c8ca761c0ffa3d7d9b19 100644
--- a/src/utils/miscellaneous.py
+++ b/src/utils/miscellaneous.py
@@ -2,32 +2,54 @@ import streamlit as st
 from pandas import DataFrame, read_csv
 import numpy as np
 
-# predict module
-def prediction(NIRS_csv, qsep, qhdr, model):
-    # hdr var correspond to column header True or False in the CSV
-    if qhdr == 'yes':
-        col = 0
-    else:
-        col = False
-    X_test = read_csv(NIRS_csv, sep=qsep, index_col=col)
-    Y_preds = model.predict(X_test)
-    # Y_preds = X_test
-    return Y_preds
+# # predict module
+# def prediction(NIRS_csv, qsep, qhdr, model):
+#     # hdr var correspond to column header True or False in the CSV
+#     if qhdr == 'yes':
+#         col = 0
+#     else:
+#         col = False
+#     X_test = read_csv(NIRS_csv, sep=qsep, index_col=col)
+#     Y_preds = model.predict(X_test)
+#     # Y_preds = X_test
+#     return Y_preds
 
 
-# function that create a download button - needs the data to save and the file name to store to
-def download_results(data, export_name):
-    with open(data) as f:
-        st.download_button('Download', f, export_name, type='primary')
+# # function that create a download button - needs the data to save and the file name to store to
+# def download_results(data, export_name):
+#     with open(data) as f:
+#         st.download_button('Download', f, export_name, type='primary')
 
 @st.cache_data(show_spinner =True)
 def data_split(x, y):
+    """
+    Splits input data into training and test sets using the Kennard-Stone algorithm.
+    
+    Parameters:
+        x (pd.DataFrame or pd.Series): Feature matrix or series.
+        y (pd.DataFrame or pd.Series): Target variable.
+    
+    Returns:
+        tuple: A tuple containing:
+            - X_train (pd.DataFrame or pd.Series): Training feature set.
+            - X_test (pd.DataFrame or pd.Series): Test feature set.
+            - y_train (pd.DataFrame or pd.Series): Training target set.
+            - y_test (pd.DataFrame or pd.Series): Test target set.
+            - train_index (np.ndarray): Indices of training samples.
+            - test_index (np.ndarray): Indices of test samples.
+    
+    Notes:
+        - The split is based on the Kennard-Stone algorithm, ensuring a representative selection of data points.
+        - The test set comprises 30% of the total data.
+    
+    Example:
+        >>> X_train, X_test, y_train, y_test, train_index, test_index = data_split(X, y)
+    """
     from kennard_stone import train_test_split
-    # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
-    X_train, X_test, y_train, y_test  = train_test_split(x, y, test_size = 0.30)
+    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.30)
     train_index, test_index = np.array(X_train.index), np.array(X_test.index)
+    
     return X_train, X_test, y_train, y_test, train_index, test_index
-
 ## descriptive stat
 @st.cache_data(show_spinner =True)
 def desc_stats(x):
@@ -46,60 +68,68 @@ def desc_stats(x):
 
 
 
-def ObjectHash(current = None, add = None):
+def ObjectHash(current=None, add=None):
+    """
+    Computes a hash value for various data types using xxHash (xxh32).
+    
+    Parameters:
+        current (any, optional): The first object to be hashed. Can be of type int, float, str, bool, bytes, list, tuple, dict, pandas.DataFrame, pandas.Series, or numpy.ndarray.
+        add (any, optional): The second object to be appended to `current` before hashing.
+    
+    Returns:
+        str: The computed xxh32 hash value as a hexadecimal string.
+    
+    Raises:
+        TypeError: If an unsupported data type is provided.
+    
+    Examples:
+        >>> ObjectHash(42)
+        'd6d924eb'
+        >>> ObjectHash("hello", "world")
+        '3c6e0b8a'
+        >>> import numpy as np
+        >>> ObjectHash(np.array([1, 2, 3]))
+        '8b4c79f7'
+    """
+    from pandas import DataFrame, Series
+    import numpy as np
+    import xxhash
+    
     def DatatoStr(data):
-        from pandas import DataFrame, Series
-        import numpy as np
-        """Hash various data types using MD5."""
-        
-        # Convert to a string representation
-        if isinstance(data, DataFrame):
-            data_str = data.to_string()
-        elif isinstance(data, Series):
+        """Converts supported data types into a string representation for hashing."""
+        if isinstance(data, DataFrame) or isinstance(data, Series):
             data_str = data.to_string()
         elif isinstance(data, np.ndarray):
             data_str = np.array2string(data, separator=',')
-        elif isinstance(data, (list, tuple)):
-            data_str = str(data)
-        elif isinstance(data, dict):
-            # Ensure consistent order for dict items
-            data_str = str(sorted(data.items()))
+        elif isinstance(data, (list, tuple, dict)):
+            data_str = str(sorted(data)) if isinstance(data, dict) else str(data)
         elif isinstance(data, (int, float, str, bool)):
             data_str = str(data)
         elif isinstance(data, bytes):
-            data_str = data.decode('utf-8', 'ignore')  # Decode bytes to string
-        elif isinstance(data, str):  # Check if it's a string representing file content
-            data_str = data
+            data_str = data.decode('utf-8', 'ignore')
         else:
-            raise TypeError("Unsupported data type: "+str(type(data)))
+            raise TypeError("Unsupported data type: " + str(type(data)))
         
-        # Encode the string to bytes
-        data_bytes = data_str.encode()
-        return str(data_bytes)
+        return data_str.encode()
     
-
-    import xxhash
-    if current == None and add == None:
-        object = "None"
+    if current is None and add is None:
         print('Insert the object for which you want to compute the hash value.')
-    elif current != None and add != None:
-        object = DatatoStr(current)+ DatatoStr(add)
-    elif current == None and add != None:
-        object = DatatoStr(add)
-    elif current != None and add == None:
-        object = "None"
-
-         # Compute the MD5 hash
+        object_data = "None".encode()
+    elif current is not None and add is not None:
+        object_data = DatatoStr(current) + DatatoStr(add)
+    elif add is not None:
+        object_data = DatatoStr(add)
+    else:
+        object_data = "None".encode()
     
-    md5_hash = xxhash.xxh32(object).hexdigest()
-    return str(md5_hash)
+    return xxhash.xxh32(object_data).hexdigest()
 
 
 
-def JointoMain():
-    import os
-    for i in ['utils','style']:
-        import sys
-        sys.path.append(os.path.join(os.path.dirname(__file__), i))
+# def JointoMain():
+#     import os
+#     for i in ['utils','style']:
+#         import sys
+#         sys.path.append(os.path.join(os.path.dirname(__file__), i))
 
-#
\ No newline at end of file
+# #
\ No newline at end of file
diff --git a/src/utils/regress.py b/src/utils/regress.py
index 9ac3a696ed27ba918d30ac0ef6d99f3f7aa6f863..79b918bd21a85127b00ea80a48714582dc30b476 100644
--- a/src/utils/regress.py
+++ b/src/utils/regress.py
@@ -1,355 +1,553 @@
+import random
 from .lwplsr_julia_converted import lwpls
-import streamlit as st
 import numpy as np
-from pandas import DataFrame
+from pandas import DataFrame, Series
 from utils.eval_metrics import metrics
-from scipy.signal import savgol_filter
 from sklearn.cross_decomposition import PLSRegression
 from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK, anneal
+from hyperopt.pyll.base import scope
+from utils.data_handling import signal_preprocess, KF_CV, sel_ratio
+from typing import List, Dict, Any, Tuple
 
-from utils.data_handling import Snv, No_transformation, KF_CV, sel_ratio
-
+import streamlit as st
 
-class Regmodel(object):
-    def __init__(self, train, test, n_iter, add_hyperparams=None, remove_hyperparams=None, nfolds=3, **kwargs):
 
-        self.SCORE = 100000000
-        self._xc, self._xt, self._ytrain, self._ytest = train[0], test[0], train[1], test[1]
-        self._nc, self._nt, self._p = train[0].shape[0], test[0].shape[0], train[0].shape[1]
-        self._model, self._best = None, None
-        self._yc, self._ycv, self._yt = None, None, None
+class BaseSys1:
+    """
+    Base class for building and tuning regression models with spectral data.
+    This class manages training and testing data, hyperparameter tuning,
+    feature selection, and model evaluation.
+
+    Attributes:
+        cvfolds (Dict): Dict containing Kfold Cross-Validation split data, with keys representing the name of folds, and values containing a 1D array of indexes to acess sampled data for out of fold testing.
+        _xc (DataFrame): The training features matrix.
+        _xt (DataFrame): The testing features matrix.
+        _ytrain (Series): The training target values.
+        _ytest (Series): The testing target values.
+        _nc (int): Number of training samples.
+        _nt (int): Number of testing samples.
+        _p (int): Number of features (columns) in the training data.
+        __model (object or None): The trained regression model.
+        _yc (np.array or None): Predictions for the training set.
+        _ycv (Dict or None): Cross-validation results.
+        _yt (np.array or None): Predictions for the testing set.
+        _cv_df (DataFrame): Cross-validation performance (r2,rmse...).
+        important_features (list or None): List of important features after model training.
+        tune_ (dict): Dictionary of the best hyperparameters from tuning.
+
+        Initializes the BaseSys1 object with training and testing data, performs hyperparameter tuning,
+        and sets up model attributes.
+
+        Args:
+            train (tuple): A tuple containing the training feature matrix (DataFrame)
+                           and the training target values (Series).
+            test (tuple): A tuple containing the testing feature matrix (DataFrame)
+                          and the testing target values (Series).
+            n_iter (int): The number of iterations for the hyperparameter optimization.
+            add (dict or None): Optional dictionary of additional parameters to add to the search space.
+            cvfolds (Dict): Dict containing Kfold Cross-Validation split data, with keys representing the name of folds, and values containing a 1D array of indexes to acess sampled data for out of fold testing.
+            **kwargs: Additional keyword arguments for tuning or customization.
+    """
+
+    def __init__(self, train: Tuple, test: Tuple, n_iter: int, cvfolds: Dict, **kwargs):
+        self.folds = cvfolds
+        self._xc, self._ytrain = train
+        self._xt, self._ytest = test
+        self._nc, self._p = train[0].shape
+        self._nt, _ = test[0].shape
+        (
+            self._yc,
+            self._ycv,
+            self._yt,
+            self._model,
+            self.__imp_ft,
+            self.selected_wls_,
+        ) = [None] * 6
         self._cv_df = DataFrame()
-        self._sel_ratio = DataFrame()
-        self._nfolds = nfolds
-        self._selected_bands = DataFrame(index=['from', 'to'])
-        self.important_features = None
 
-        if self._xc.shape[1] > 1000:
-            a = [15, 21, 27, 33]
+        # defining search space for optimization
+        if "new_space" in kwargs:
+            space = kwargs["new_space"]
         else:
-            a = [5, 7, 9]
-        self._hyper_params = {'polyorder': hp.choice('polyorder', [2]),
-                              'deriv': hp.choice('deriv', [0, 1]),
-                              # [15, 21, 27, 33]
-                              'window_length': hp.choice('window_length', [9]),
-                              'normalization': hp.choice('normalization', ['No_transformation'])}
-        if remove_hyperparams is not None:
-            for i in remove_hyperparams:
-                self._hyper_params.pop(i, None)
-
-        if add_hyperparams is not None:
-            self._hyper_params.update(add_hyperparams)
-            self._best = None
-
-        trials = Trials()
-        best_params = fmin(fn=self.objective,
-                           space=self._hyper_params,
-                           # Tree of Parzen Estimators’ (tpe) which is a Bayesian approach
-                           algo=tpe.suggest,
-                           max_evals=n_iter,
-                           trials=trials,
-                           verbose=1)
-
-    @property
-    def train_data_(self):
-        return [self._xc, self._ytrain]
-
-    @property
-    def test_data_(self):
-        return [self._xt, self._ytest]
-
-    @property
-    def pretreated_spectra_(self):
-        return self.pretreated
-
-    @property
-    def get_params_(self):  # This method return the search space where the optimization algorithm will search for optimal subset of hyperparameters
-        return self._hyper_params
+            space = {
+                "polyorder_deriv": hp.choice(
+                    "polyorder_deriv",
+                    [
+                        {"polyorder": 0, "deriv": 0},
+                        {"polyorder": 1, "deriv": 0},
+                        {"polyorder": 1, "deriv": 1},
+                        {"polyorder": 2, "deriv": 0},
+                        {"polyorder": 2, "deriv": 1},
+                        {"polyorder": 2, "deriv": 2},
+                    ],
+                ),
+                "window_length": hp.choice(
+                    "window_length",
+                    [15, 17, 21, 23] if self._xc.shape[1] > 1000 else [5, 7, 9, 11],
+                ),
+                "normalization": hp.choice("normalization", ["NaN", "SNV"]),
+            }
+
+        # add a parameter to search space
+        if "add" in kwargs:
+            space.update(kwargs["add"])
+
+        # launch optimization
+        best = fmin(
+            fn=self.objective,
+            space=space,
+            algo=tpe.suggest,
+            max_evals=n_iter,
+            verbose=1,
+        )
+        self._tune = space_eval(space, best)
+
+    def fit_(self):
+        """
+        A method for calculating the results with the best parameters tune.
+        """
+        pass
 
     def objective(self, params):
+        """
+        Objective function for hyperparameter optimization. This function
+        needs to be implemented in subclasses or as part of a custom model.
+
+        Args:
+            params (dict): The hyperparameters for the model.
+        """
         pass
 
     @property
-    # This method returns the subset of selected hyperparametes
-    def best_hyperparams_(self):
-        return self._best
+    def signal_tune_(self) -> Dict:
+        """
+        Returns the best set of hyperparameters determined from the tuning process.
+
+        This property accesses the set of hyperparameters that resulted in the best model
+        performance during the hyperparameter tuning. The set is stored in the attribute
+        `self.tune_` after the tuning process has been completed.
+
+        Returns:
+            dict: A dictionary containing the best hyperparameters.
+        """
+        sig = {
+            key: self._tune[key]
+            for key in ["normalization", "window_length", "polyorder_deriv"]
+            if key in self._tune
+        }
+        return sig
 
     @property
-    # This method returns a sentence telling what signal preprocessing method was applied
-    def best_hyperparams_print(self):
-        if self._best['normalization'] == 'Snv':
-            a = 'Standard Normal Variate (SNV)'
-
-        elif self._best['normalization'] == 'No_transformation':
-            a = " No transformation was performed"
-
-        bb, cc, dd = str(self._best['window_length']), str(
-            self._best['polyorder']), str(self._best['deriv'])
-        SG = '- Savitzky-Golay derivative parameters:  \n(Window_length:' + \
-            bb+';polynomial order:' + cc+'; Derivative order : ' + dd
-        Norm = '- Spectral Normalization:  \n'+a
-        return SG+"\n"+Norm
+    def tune_(self) -> Dict:
+        tune = {
+            key: value
+            for key, value in self._tune.items()
+            if key not in ["normalization", "window_length", "polyorder_deriv"]
+        }
+        return tune
 
     @property
-    def model_(self):  # This method returns the developed model
+    def model_(self) -> object:
+        """
+        Returns the trained model.
+
+        Returns:
+            object: The developed model (e.g., trained regression model).
+        """
         return self._model
 
     @property
-    def pred_data_(self):  # this method returns the predicted data in training and testing steps
-        return self._yc, self._yt
+    def pred_data_(self) -> Dict:
+        """
+        Returns the predicted values for both the training and testing datasets.
+
+        The method returns a dictionary containing the predictions for both the training
+        and testing datasets, with the following keys:
+            - 'train': A 1D NumPy array containing predictions for the training set.
+            - 'test': A 1D NumPy array containing predictions for the testing set.
+
+        Returns:
+            dict: A dictionary with keys 'train' and 'test', mapping to 1D NumPy arrays
+                representing the predictions for the training and testing sets, respectively.
+        """
+        return {"train": self._yc, "test": self._yt}
 
     @property
-    def cv_data_(self):  # Cross validation data
+    def cv_data_(self) -> Dict:
+        """
+        Returns a dict of two pandas DataFrames. The first DataFrame contains the
+        cross-validation results (columns: reference values, predicted values,
+        equations from cross-validation), while the second DataFrame contains the
+        slopes and intercepts of the predicted vs reference values model for each CV-fold.
+        """
         return self._ycv
 
     @property
-    def CV_results_(self):
+    def cv_results_(self) -> DataFrame:
+        """
+        Returns the performance metrics of the model on each separate fold, as well as the
+        average, standard deviation, and coefficient of variation across all folds.
+
+        Returns:
+            DataFrame: a pandas dataframe of Cross-Validation performance metrics.
+        """
         return self._cv_df
 
-    @property
-    def important_features_(self):
-        return self.important_features
+    # -------------------------------------->  PLSR <-------------------------------------- #
 
-    @property
-    def selected_features_(self):
-        return self._selected_bands
 
-    @property
-    def sel_ratio_(self):
-        return self._sel_ratio
+class Plsr(BaseSys1):
+    """
+    Plsr (Partial Least Squares Regression) class for performing regression modeling
+    with signal preprocessing and cross-validation. It inherits from the `BaseSys1` class
+    and customizes its behavior for PLSR-specific preprocessing and scoring.
 
-########################################### PLSR   #########################################
+    Args:
+        train (tuple): A tuple containing the training feature matrix (DataFrame)
+                        and the training target values (Series).
+        test (tuple): A tuple containing the testing feature matrix (DataFrame)
+                        and the testing target values (Series).
+        n_iter (int): The number of iterations for the hyperparameter optimization.
+        add (dict or None): Optional dictionary of additional parameters to add to the search space.
+        cvfolds (Dict): Dict containing Kfold Cross-Validation split data, with keys representing the name of folds, and values containing a 1D array of indexes to acess sampled data for out of fold testing.
 
+    """
 
-class Plsr(Regmodel):
-    def __init__(self, train, test, n_iter=10, cv=3):
-        super().__init__(train, test, n_iter, nfolds=cv, add_hyperparams={
-            'n_components': hp.randint('n_components', 1, 20)})
-        # parameters in common
+    def __init__(self, train, test, cvfolds, n_iter):
+        add = {"n_components": scope.int(hp.randint("n_components", 1, 20))}
+        super().__init__(train, test, n_iter, add=add, cvfolds=cvfolds)
+        self.cvfolds = cvfolds
 
     def objective(self, params):
-        params['n_components'] = int(params['n_components'])
-        x0 = [self._xc, self._xt]
-
-        x1 = []
-        x1.append(eval(str(params['normalization'])+'(x0[0])'))
-        x1.append(eval(str(params['normalization'])+'(x0[1])'))
+        """
+        Defines the objective function for hyperparameter tuning and model scoring.
 
-        a, b, c = params['deriv'], params['polyorder'], params['window_length']
-        if a > b or b > c:
-            if self._best is not None:
-                a, b, c = self._best['deriv'], self._best['polyorder'], self._best['window_length']
+        Args:
+            params (dict): A dictionary containing model hyperparameters like 'n_components',
+                        'normalization', 'deriv', 'polyorder', and 'window_length'.
 
-            else:
-                a, b, c = 0, 0, 1
+        Returns:
+            float: The score (lower is better) based on the cross-validation error (RMSE) and R2.
+        """
 
-        params['deriv'], params['polyorder'], params['window_length'] = a, b, c
-        x2 = [savgol_filter(x1[i], polyorder=params['polyorder'], deriv=params['deriv'],
-                            window_length=params['window_length']) for i in range(2)]
+        # ------------------> Signal Preprocessing <------------------#
+        x = [signal_preprocess(i, tune=params) for i in [self._xc, self._xt]]
 
-        model = PLSRegression(scale=False, n_components=params['n_components'])
-        folds = KF_CV().CV(x=x2[0], y=np.array(
-            self._ytrain), n_folds=self._nfolds)
+        # ------------------> Regression Modelling <-------------------#
+        model = PLSRegression(scale=False, n_components=params["n_components"])
         yp = KF_CV().cross_val_predictor(
-            model=model, folds=folds, x=x2[0], y=np.array(self._ytrain))
+            model=model, folds=self.folds, x=x[0], y=np.array(self._ytrain)
+        )
         self._cv_df = KF_CV().metrics_cv(
-            y=np.array(self._ytrain), ypcv=yp, folds=folds)[1]
-
-        score = self._cv_df.loc["mean", 'rmse'] / \
-            np.max([0.01, self._cv_df.loc["mean", 'r2']])
-
-        Model = PLSRegression(scale=False, n_components=params['n_components'])
-        Model.fit(x2[0], self._ytrain)
-
-        if self.SCORE > score:
-            self.SCORE = score
-            self._ycv = KF_CV().meas_pred_eq(y=np.array(self._ytrain), ypcv=yp, folds=folds)
-            self._yc = Model.predict(x2[0])
-            self._yt = Model.predict(x2[1])
-            self._model = Model
-            for key, value in params.items():
-                try:
-                    params[key] = int(value)
-                except (TypeError, ValueError):
-                    params[key] = value
-
-            self._best = params
-            self.pretreated = DataFrame(x2[0])
-            self._sel_ratio = sel_ratio(Model, x2[0])
+            y=np.array(self._ytrain), ypcv=yp, folds=self.folds
+        )[1]
+        # ------------------------> Scoring <--------------------------#
+        score = self._cv_df.loc["mean", "rmse"] / self._cv_df.loc["mean", "r2"]
         return score
 
-    ############################################ iplsr #########################################
+    def fit_(self):
+        """
+        A method for calculating the results with the best parameters tune.
+        """
+        x = [signal_preprocess(i, tune=self.signal_tune_) for i in [self._xc, self._xt]]
+
+        self._model = PLSRegression(
+            scale=False, n_components=self._tune["n_components"]
+        )
+        self._model.fit(x[0], np.array(self._ytrain))
 
+        yp = KF_CV().cross_val_predictor(
+            model=self._model, folds=self.folds, x=x[0], y=np.array(self._ytrain)
+        )
+        self._cv_df = KF_CV().metrics_cv(
+            y=np.array(self._ytrain), ypcv=yp, folds=self.folds
+        )[1]
 
-class TpeIpls(Regmodel):
-    def __init__(self, train, test, n_iter=10, n_intervall=5, cv=3, bestglobalparams=None):
-        self.glob = bestglobalparams
-        self._best = {}
-        self.folds = KF_CV().CV(x=np.array(
-            train[0]), y=np.array(train[1]), n_folds=3)
-        x1 = [eval(str(self.glob['normalization'])+'(train[0])'),
-              eval(str(self.glob['normalization'])+'(test[0])')]
-        self.x2 = [savgol_filter(x1[i], polyorder=self.glob['polyorder'], deriv=self.glob['deriv'],
-                                 window_length=self.glob['window_length']) for i in range(2)]
-        self.n_intervall = n_intervall
-        self.n_arrets = self.n_intervall*2
+        self._ycv = KF_CV().meas_pred_eq(
+            y=np.array(self._ytrain), ypcv=yp, folds=self.cvfolds
+        )
+        self._yc, self._yt = [self._model.predict(i) for i in x]
 
-        add = {'n_components': hp.randint('n_components', 1, 20)}
-        add.update({'v'+str(i): hp.randint('v'+str(i), 0,
-                   train[0].shape[1]) for i in range(1, self.n_arrets+1)})
-        super().__init__(train, test, n_iter, nfolds=cv, add_hyperparams=add)
+    ############################################ iplsr #########################################
 
-        # parameters in common
+
+class TpeIpls(BaseSys1):
+    """
+    TpeIpls (Tree-structured Parzen Estimator - Interval Partial Least Squares)
+    is a subclass of BaseSys1 designed for variable selection in regression models.
+    This class utilizes hyperparameter optimization for Partial Least Squares (PLS)
+    regression by selecting optimal wavelengths (features) and the number of PLS components.
+
+    Attributes:
+        cvfolds (Dict): Cross-validation folds for model evaluation.
+        selected_wls_ (array-like): Selected wavelengths (features) after optimization.
+        _model (PLSRegression): Trained PLS regression model.
+        _yc (array-like): Model predictions on training data.
+        _ycv (array-like): Cross-validated predictions.
+        _yt (array-like): Model predictions on test data.
+
+    Methods:
+        generate_polulation(population_size, candidate_length):
+            Generates a population of wavelength selection candidates for optimization.
+
+        objective(params):
+            Objective function for hyperparameter optimization, returning a score
+            based on RMSE and R-squared metrics.
+
+        fit_():
+            Trains the PLS regression model using the selected wavelengths and components.
+
+
+    Initializes the TpeIpls class, defining the search space for hyperparameter optimization.
+
+        Args:
+            train (Tuple): Training dataset containing (X_train, y_train).
+            test (Tuple): Test dataset containing (X_test, y_test).
+            n_iter (int): Number of iterations for hyperparameter tuning.
+            cvfolds (Dict): Cross-validation folds for evaluation.
+    """
+
+    def __init__(self, train: Tuple, test: Tuple, n_iter: int, cvfolds: Dict):
+        print(
+            "---------------------------------    Start Variable Selection Process    ---------------------------------"
+        )
+        self.cvfolds = cvfolds
+
+        # Define search space for hyperparameter optimization
+        newspace = {
+            "n_components": scope.int(
+                hp.randint(
+                    "n_components",
+                    1,
+                    min(train[0].shape[1] // 20, train[0].shape[0] - 1),
+                )
+            ),
+            "candidate": hp.choice(
+                "candidate",
+                TpeIpls.generate_polulation(
+                    population_size=200, candidate_length=train[0].shape[1]
+                ),
+            ),
+        }
+
+        super().__init__(
+            train=train, test=test, n_iter=n_iter, cvfolds=cvfolds, new_space=newspace
+        )
+
+    @staticmethod
+    def generate_polulation(population_size: int, candidate_length: int):
+        """
+        Generates a population of wavelength selection candidates, where each candidate is a binary mask
+        indicating selected wavelengths.
+
+        Args:
+            population_size (int): Number of candidate selections to generate.
+            candidate_length (int): Total number of wavelengths (features).
+
+        Returns:
+            List[np.ndarray]: A list of wavelength index selections.
+        """
+        population = []
+        min_run, max_run = candidate_length // 8, candidate_length // 3
+
+        for _ in range(population_size):
+            sequence = []
+            current = random.choice([0, 1])  # Start with either 0 or 1
+
+            while len(sequence) < candidate_length:
+                run_length = random.randint(min_run, max_run)
+                sequence.extend(
+                    [current] * min(run_length, candidate_length - len(sequence))
+                )
+                current = 1 - current  # Flip between 0 and 1
+
+            population.append(np.nonzero(sequence)[0])
+
+        return population
 
     def objective(self, params):
-        # wevelengths index
-        self.idx = [params['v'+str(i)] for i in range(1, self.n_arrets+1)]
-        self.idx.sort()
-        arrays = [np.arange(self.idx[2*i], self.idx[2*i+1]+1)
-                  for i in range(self.n_intervall)]
-        id = np.unique(np.concatenate(arrays, axis=0), axis=0)
-        prepared_data = [self.x2[i][:, id] for i in range(2)]
-
-        # Modelling
-        folds = KF_CV().CV(x=prepared_data[0], y=np.array(
-            self._ytrain), n_folds=self._nfolds)
-        try:
-            model = PLSRegression(
-                scale=False, n_components=params['n_components'])
-            yp = KF_CV().cross_val_predictor(model=model, folds=folds,
-                                             x=prepared_data[0], y=np.array(self._ytrain))
-            self._cv_df = KF_CV().metrics_cv(
-                y=np.array(self._ytrain), ypcv=yp, folds=folds)[1]
-        except:
-            params["n_components"] = 1
-            model = PLSRegression(
-                scale=False, n_components=params["n_components"])
-            yp = KF_CV().cross_val_predictor(model=model, folds=folds,
-                                             x=prepared_data[0], y=np.array(self._ytrain))
-            self._cv_df = KF_CV().metrics_cv(
-                y=np.array(self._ytrain), ypcv=yp, folds=folds)[1]
-
-        score = self._cv_df.loc["mean", 'rmse'] / \
-            np.max([0.01, self._cv_df.loc["mean", 'r2']])
-
-        if self.SCORE > score:
-            self.SCORE = score
-            self._best = params
-            self.arrays = arrays
-            self.prepared_data = prepared_data
-            self.model = model
+        """
+        Objective function used for hyperparameter optimization.
+        Evaluates the performance of a PLS model on cross-validation data using RMSE and R-squared.
+
+        Args:
+            params (dict): Dictionary containing 'candidate' (selected wavelengths)
+                           and 'n_components' (number of PLS components).
+
+        Returns:
+            float: Score computed as RMSE divided by the maximum of (0.01, R-squared).
+        """
+        candidate = params["candidate"]  # Selected wavelengths index
+        model = PLSRegression(scale=False, n_components=params["n_components"])
+        yp = KF_CV().cross_val_predictor(
+            model=model,
+            folds=self.folds,
+            x=self._xc[:, candidate],
+            y=np.array(self._ytrain),
+        )
+
+        self._cv_df = KF_CV().metrics_cv(
+            y=np.array(self._ytrain), ypcv=yp, folds=self.folds
+        )[1]
+
+        score = self._cv_df.loc["mean", "rmse"] / self._cv_df.loc["mean", "r2"]
         return score
 
-    def best_fit(self):
-        Model = PLSRegression(
-            scale=False, n_components=self.model.n_components)
-        Model.fit(self.prepared_data[0], self._ytrain)
-
-        self._yc = Model.predict(self.prepared_data[0])
-        yp = KF_CV().cross_val_predictor(model=Model, folds=self.folds,
-                                         x=self.prepared_data[0], y=np.array(self._ytrain))
-        self._ycv = KF_CV().meas_pred_eq(y=np.array(
-            self._ytrain), ypcv=yp, folds=self.folds)
-        self._yt = Model.predict(self.prepared_data[1])
-        self._model = Model
-
-        for key, value in self._best.items():
-            try:
-                self._best[key] = int(value)
-            except (TypeError, ValueError):
-                self._best[key] = value
-
-        self.pretreated = DataFrame(self.x2[0])
-        limits = np.ones(len(self.arrays)*2)
-        for i in range(len(self.arrays)):
-            limits[2*i], limits[2*i +
-                                1] = self.arrays[i][0], self.arrays[i][self.arrays[i].shape[0]-1]
-
-        self.limits = limits.astype(int)
-
-    ###########################################  LWPLSR  #########################################
-
-
-class LwplsObject:
-    def __init__(self, Reg_json=None, pred=None):
-        if Reg_json is not None and pred is not None:
-            from pandas import json_normalize
-            self.model_ = Reg_json['model']
-            self.best_hyperparams_ = Reg_json['best_lwplsr_params']
-            self.pred_data_ = [json_normalize(Reg_json[i]) for i in pred]
-
-    ############################################  Pcr  #########################################
-
-
-class LWPLS(Regmodel):
-    def __init__(self, train, test, n_iter=10, cv=3, bestglobalparams=None):
-        self.glob = bestglobalparams
-        self._best = {}
-        add = {
-            'localplsVL': hp.randint('localplsVL', 2, bestglobalparams['n_components']),
-            'dist': hp.choice('dist', ['euc', 'mah']),
-            'h': hp.randint('h', 1, 3)}
-        self.folds = KF_CV().CV(x=np.array(
-            train[0]), y=np.array(train[1]), n_folds=3)
-
-        x1 = [eval(str(self.glob['normalization'])+'(train[0])'),
-              eval(str(self.glob['normalization'])+'(test[0])')]
-        self.x2 = [savgol_filter(x1[i], polyorder=self.glob['polyorder'], deriv=self.glob['deriv'],
-                                 window_length=self.glob['window_length']) for i in range(2)]
-        super().__init__(train, test, n_iter, nfolds=cv,
-                         add_hyperparams=add, remove_hyperparams=None)
+    def fit_(self):
+        """
+        Fits the final PLS regression model using the selected wavelengths and components from optimization.
+        Computes predictions on training, cross-validation, and test datasets.
+        """
+        self.selected_wls_ = self._tune["candidate"]
+        self._model = PLSRegression(
+            scale=False, n_components=self._tune["n_components"]
+        )
+        self._model.fit(self._xc[:, self.selected_wls_], self._ytrain)
+
+        self._yc = self._model.predict(self._xc[:, self.selected_wls_])
+        yp = KF_CV().cross_val_predictor(
+            model=self._model,
+            folds=self.folds,
+            x=self._xc[:, self.selected_wls_],
+            y=np.array(self._ytrain),
+        )
+        self._ycv = KF_CV().meas_pred_eq(
+            y=np.array(self._ytrain), ypcv=yp, folds=self.folds
+        )
+        self._yt = self._model.predict(self._xt[:, self.selected_wls_])
+
+    #####################################################################################
+
+    ###################################  LWPLSR  ########################################
+
+from typing import Tuple, Dict
+
+class LWPLS(BaseSys1):
+    """
+    Locally Weighted Partial Least Squares (LWPLS) model.
+    
+    This class implements a variation of the PLS algorithm that incorporates local weighting 
+    to improve predictive performance. It inherits from `BaseSys1` and optimizes model 
+    hyperparameters using cross-validation.
+    
+    Attributes:
+    ----------
+    cvfolds : Dict
+        Dictionary of cross-validation folds.
+    __globalplsVL : int
+        The number of latent variables for the global PLS model.
+    _model : dict
+        Dictionary storing the best model parameters after tuning.
+    _cv_df : DataFrame
+        Cross-validation performance metrics.
+    _ycv : array
+        Predicted values from cross-validation.
+    _yt : array
+        Predicted values on test data.
+    
+    Parameters:
+    ----------
+    train : Tuple
+        Training data.
+    test : Tuple
+        Test data.
+    n_iter : int
+        Number of iterations for hyperparameter tuning.
+    cvfolds : Dict
+        Dictionary defining the cross-validation folds.
+    globalplsVL : int
+        Number of latent variables for global PLS.
+    """
+    
+    def __init__(self, train: Tuple, test: Tuple, n_iter: int, cvfolds: Dict, globalplsVL: int):
+        print(
+            "---------------------------------    Start LWPLS              ------------------------------------"
+        )
+        self.cvfolds = cvfolds
+        self.__globalplsVL = globalplsVL
+
+        newspace = {
+            "localplsVL": hp.randint("localplsVL", 2, self.__globalplsVL),
+            "dist": hp.choice("dist", ["euc", "mah"]),
+            "h": hp.randint("h", 1, 3),
+        }
+        
+        super().__init__(train, test, n_iter, cvfolds=cvfolds, new_space=newspace)
 
     def objective(self, params):
+        """
+        Objective function for hyperparameter tuning.
+        
+        This function evaluates the performance of LWPLS on cross-validation folds
+        using the given hyperparameters.
+        
+        Parameters:
+        ----------
+        params : dict
+            Dictionary containing hyperparameter values.
+        
+        Returns:
+        -------
+        float
+            The optimization score, calculated as RMSE divided by R².
+        """
         yp = {}
-        for i in self.folds.keys():
-            yp[i] = lwpls(Xtrain=np.delete(np.array(self.x2[0]), self.folds[i], axis=0),
-                          ytrain=np.delete(
-                              np.array(self._ytrain), self.folds[i], axis=0),
-                          Xtest=np.array(self.x2[0])[self.folds[i]],
-                          globalplsVL=self.glob['n_components'], metric=params['dist'], h=params['h'], k=200,
-                          localplsVL=params['localplsVL'], center=True, scale=False, sklearn=True).ravel()
-
-        self._cv_df = KF_CV().metrics_cv(y=np.array(
-            self._ytrain), ypcv=yp, folds=self.folds)[1]
-        score = self._cv_df.loc["mean", 'rmse'] / \
-            np.max([0.01, self._cv_df.loc["mean", 'r2']])
-
-        if self.SCORE > score:
-            self.SCORE = score
-            self._best = params
+        for fold, idx in self.folds.items():
+            yp[fold] = lwpls(
+                Xtrain=np.delete(np.array(self._xc), idx, axis=0),
+                ytrain=np.delete(np.array(self._ytrain), idx, axis=0),
+                Xtest=np.array(self._xc)[idx, :],
+                globalplsVL=self.__globalplsVL,
+                metric=params["dist"],
+                h=params["h"],
+                k=200,
+                localplsVL=params["localplsVL"],
+                center=True,
+                scale=False,
+                sklearn=True,
+            ).ravel()
+
+        self._cv_df = KF_CV().metrics_cv(
+            y=np.array(self._ytrain), ypcv=yp, folds=self.folds
+        )[1]
+        score = self._cv_df.loc["mean", "rmse"] / self._cv_df.loc["mean", "r2"]
         return score
 
-    def best_fit(self):
-        from .lwplsr_julia_converted import lwpls
+    def fit_(self):
+        """
+        Fit the LWPLS model using the best hyperparameters found during tuning.
+        
+        This function trains the model on the entire training dataset and makes predictions 
+        for both cross-validation folds and test data.
+        """
         yp = {}
-        for i in self.folds.keys():
-            yp[i] = lwpls(Xtrain=np.delete(np.array(self.x2[0]), self.folds[i], axis=0),
-                          ytrain=np.delete(
-                              np.array(self._ytrain), self.folds[i], axis=0),
-                          Xtest=np.array(self.x2[0])[self.folds[i]],
-                          globalplsVL=self.glob['n_components'], metric=self._best['dist'], h=self._best['h'], k=200,
-                          localplsVL=self._best['localplsVL'], center=True, scale=False, sklearn=True).ravel()
-
-        self._ycv = KF_CV().meas_pred_eq(y=np.array(
-            self._ytrain), ypcv=yp, folds=self.folds)
-        self._yt = lwpls(Xtrain=np.array(self.x2[0]),
-                         ytrain=np.array(self._ytrain),
-                         Xtest=np.array(self.x2[1]),
-                         globalplsVL=self.glob['n_components'], metric=self._best['dist'], h=self._best['h'], k=200,
-                         localplsVL=self._best['localplsVL'], center=True, scale=False, sklearn=True).ravel()
-        self.pretreated = DataFrame(self.x2[0])
-        self._model = "LW-PLS"
-        for key, value in self._best.items():
-            self._best[key] = int(value) if isinstance(value, np.int64) else float(
-                value) if isinstance(value, np.float64) else value
-
-        self._model = {'globalplsVL': self.glob['n_components'],
-                       'localplsVL': self._best['localplsVL'],
-                       'dist': self._best['dist'],
-                       'k': 200,
-                       'h': self._best['h']}
-        self._best = {'normalization':self.glob['normalization'],
-                          'polyorder':self.glob['polyorder'],
-                          'window_length':self.glob['window_length'],
-                          'deriv':self.glob['deriv'],
-                          'globalplsVL': self.glob['n_components']}
+        from .lwplsr_julia_converted import lwpls
+
+        self._model = {
+            "globalplsVL": self.__globalplsVL,
+            "metric": self._tune["dist"],
+            "h": self._tune["h"],
+            "k": 200,
+            "localplsVL": self._tune["localplsVL"],
+            "center": True,
+            "scale": False,
+            "sklearn": True,
+        }
+
+        for fold, idx in self.folds.items():
+            yp[fold] = lwpls(
+                Xtrain=np.delete(np.array(self._xc), idx, axis=0),
+                ytrain=np.delete(np.array(self._ytrain), idx, axis=0),
+                Xtest=np.array(self._xc)[idx, :],
+                **self._model,
+            ).ravel()
+
+        self._ycv = KF_CV().meas_pred_eq(
+            y=np.array(self._ytrain), ypcv=yp, folds=self.folds
+        )
+        self._yt = lwpls(
+            Xtrain=np.array(self._xc),
+            ytrain=np.array(self._ytrain),
+            Xtest=np.array(self._xt),
+            **self._model,
+        ).ravel()
\ No newline at end of file
diff --git a/src/utils/samsel.py b/src/utils/samsel.py
index 909a6f6bf52754f7b7a50aaef59c4542c8f085be..781a463153042fe0fc05180159c217e27729acea 100644
--- a/src/utils/samsel.py
+++ b/src/utils/samsel.py
@@ -6,15 +6,12 @@ import numpy as np
 from scipy.spatial.distance import cdist
 
 
-
-
 class Samplers:
     def __init__(self) -> None:
         pass
-    
-    
+
     @staticmethod
-    def ksrdm(X, rset, method = 'rdm') -> Tuple[Union[ndarray, DataFrame], list]:
+    def ksrdm(X, rset, method="rdm") -> Tuple[Union[ndarray, DataFrame], list]:
         """
         Splits the dataset using the Kennard-Stone algorithm.
 
@@ -27,35 +24,35 @@ class Samplers:
             A tuple containing:
             - The original dataset (`self.x`).
             - A list of indices representing the training set selection.
-        
+
         Notes
         -----
         Requires `kennard_stone` library to be installed.
         """
-        if  method =='ks':
+        if method == "ks":
             from kennard_stone import train_test_split
-        elif 'rdm':
+        elif "rdm":
             from sklearn.model_selection import train_test_split
-        train, test = train_test_split(X, train_size= rset)
+        train, test = train_test_split(X, train_size=rset)
         # res = tuple(zip(_train.index, self.x.index))
-        
+
         import numpy as np
-        calset = DataFrame(index = X.index, columns = ['calset'])
-        calset['names'] = X.index
-        calset['calset'].loc[train.index] = 'Selected'
-        calset['calset'].loc[test.index] = 'Not-Selected'
-        calset.index = calset['calset'].to_numpy()
-        calset['cluster'] =["cluster1"] * X.shape[0]
-        return calset.drop(['calset'], axis = 1)
 
+        calset = DataFrame(index=X.index, columns=["calset"])
+        calset["names"] = X.index
+        calset["calset"].loc[train.index] = "Selected"
+        calset["calset"].loc[test.index] = "Not-Selected"
+        calset.index = calset["calset"].to_numpy()
+        calset["cluster"] = ["cluster1"] * X.shape[0]
+        return calset.drop(["calset"], axis=1)
 
     def medoid(X, t):
         """
         Computes the medoid of a DataFrame.
-        
+
         Parameters:
         df (pandas.DataFrame): DataFrame where rows represent samples and columns represent variables.
-        
+
         Returns:
         str: The name (index) of the medoid (most central sample).
         """
@@ -63,7 +60,9 @@ class Samplers:
         sname = []
         for i in set(t.index):
             # Compute pairwise distances between all samples
-            distances = cdist(X.loc[t.loc[i,:].values,:].values, X.values, metric='euclidean')
+            distances = cdist(
+                X.loc[t.loc[i, :].values, :].values, X.values, metric="euclidean"
+            )
             # Sum the distances for each sample (row)
             sum_distances = np.sum(distances, axis=1)
             # Find the index of the sample with the smallest sum of distances
@@ -74,68 +73,99 @@ class Samplers:
         # calset['names'] = X.index
         return sname
 
-
-
-
-
-
-
-
-
 def selection_method(X, method, **kwargs):
+    """
+    Selects samples from the dataset `X` based on the specified `method`.
+
+    Parameters:
+    -----------
+    X : pandas.DataFrame
+        The dataset from which samples are selected. Each row represents a sample.
+
+    method : str
+        The sample selection method to use. Options include:
+        - 'random': Randomly selects samples.
+        - 'kennard-stone': Selects samples using the Kennard-Stone algorithm.
+        - 'meta-ks': Applies KMeans clustering, then Kennard-Stone within each cluster.
+        - 'meta-medoids': Applies KMeans clustering, then selects cluster medoids.
+
+    **kwargs : dict
+        Additional keyword arguments based on the method:
+        - rset (float): Proportion of samples to select for 'random' and 'kennard-stone'.
+        - labels (array-like): Class labels for stratification during random sampling.
+        - rset_meta (float): Proportion of samples per cluster for 'meta-ks'.
+
+    Returns:
+    --------
+    list
+        A list of indices corresponding to the selected samples.
+
+    Example:
+    --------
+    >>> selection_method(X, method='random', rset=0.3)
+    [0, 5, 12, 18, 22]
+    """
     import streamlit as st
-    #['random', 'kennard-stone', 'medoids', 'meta-clusters']
-    if method =='random':
+    import numpy as np
+
+    if method == "random":
         from sklearn.model_selection import train_test_split
-        if 'labels' in kwargs:
-            selected, _ = train_test_split(X, train_size= kwargs['rset'], random_state= 42, stratify= kwargs['labels'])
+
+        if "labels" in kwargs:
+            selected, _ = train_test_split(
+                X, train_size=kwargs["rset"], random_state=42, stratify=kwargs["labels"]
+            )
         else:
-            pass
-            selected, _ = train_test_split(X, train_size= kwargs['rset'], random_state= 42)
+            selected, _ = train_test_split(
+                X, train_size=kwargs["rset"], random_state=42
+            )
         sname = list(selected.index)
 
-        
-    elif method == 'kennard-stone':
+    elif method == "kennard-stone":
         from kennard_stone import train_test_split
-        selected, _ = train_test_split(X, train_size= kwargs['rset'])
+
+        selected, _ = train_test_split(X, train_size=kwargs["rset"])
         sname = list(selected.index)
-        
 
-    if method in ['meta-ks','meta-medoids']:
+    elif method in ["meta-ks", "meta-medoids"]:
+        from sklearn.cluster import KMeans
+        from sklearn.metrics import silhouette_score
+
         best_k = 2
         best_score = -1
-        for k in range(2, min(10,X.shape[0])):
-            from sklearn.cluster import KMeans
-            from sklearn.metrics import silhouette_score
-            model = KMeans(n_clusters=best_k, random_state=42, init='random', n_init=1, max_iter=100)
+        for k in range(2, min(10, X.shape[0])):
+            model = KMeans(n_clusters=k, random_state=42, init="random", n_init=1, max_iter=100)
             labels = model.fit_predict(X)
             score = silhouette_score(X, labels)
             if score > best_score:
                 best_score = score
                 best_k = k
-        from sklearn.cluster import KMeans
-        model = KMeans(n_clusters=best_k, random_state=42, init='random', n_init=1, max_iter=100)
-        model.fit(X)
-        yp = model.predict(X)
+
+        model = KMeans(n_clusters=best_k, random_state=42, init="random", n_init=1, max_iter=100)
+        yp = model.fit_predict(X)
 
         sname = []
         for i in range(best_k):
-            t = X.loc[yp==i]
+            cluster_samples = X.loc[yp == i]
+
             if method == "meta-medoids":
                 from scipy.spatial.distance import cdist
-                distances = cdist(t.values, t.values, metric='euclidean')                    
+
+                distances = cdist(cluster_samples.values, cluster_samples.values, metric="euclidean")
                 sum_distances = np.sum(distances, axis=1)
                 medoid_index = np.argmin(sum_distances)
-                sname.append(X.index[medoid_index])
-                
-        
-            elif method == 'meta-ks':
+                sname.append(cluster_samples.index[medoid_index])
+
+            elif method == "meta-ks":
                 from kennard_stone import train_test_split
-                if t.shape[0]>5:
-                    selected, _ = train_test_split(t, train_size= kwargs['rset_meta'])
+
+                if cluster_samples.shape[0] > 5:
+                    selected, _ = train_test_split(cluster_samples, train_size=kwargs["rset_meta"])
                 else:
-                    selected = t
-                sname +=list(selected.index)
-                # import streamlit as st
-                # st.write(best_k)
-    return sname
\ No newline at end of file
+                    selected = cluster_samples
+                sname += list(selected.index)
+
+    else:
+        raise ValueError("Unsupported selection method. Choose from 'random', 'kennard-stone', 'meta-ks', or 'meta-medoids'.")
+
+    return sname
diff --git a/src/utils/visualize.py b/src/utils/visualize.py
index 141580d2d956049ac61191fd5c4628500546d1a7..27b015f26f86df67d06d362309432219aa058dae 100644
--- a/src/utils/visualize.py
+++ b/src/utils/visualize.py
@@ -1,37 +1,101 @@
-
 import streamlit as st
 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
+
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 
 @st.cache_data
 def pred_hist(pred):
-    # Creating histogram
-    hist, axs = plt.subplots(1, 1, figsize=(15, 3),
-                             tight_layout=True)
-
-    # Add x, y gridlines
-    axs.grid(color='grey', linestyle='-.', linewidth=0.5, alpha=0.6)
-    # Remove axes splines
-    for s in ['top', 'bottom', 'left', 'right']:
+    """
+    Generate a histogram for the given prediction data.
+
+    This function creates a single-row histogram plot with custom styling,
+    including gridlines, removed splines, and adjusted tick parameters.
+
+    Args:
+        pred (array-like): A list or array of prediction values to visualize.
+
+    Returns:
+        matplotlib.figure.Figure: The generated histogram figure.
+
+    Example:
+        ```python
+        import numpy as np
+        pred = np.random.normal(0, 1, 1000)
+        hist = pred_hist(pred)
+        st.pyplot(hist)
+        ```
+    """
+    # Create a figure and axis for the histogram
+    hist, axs = plt.subplots(1, 1, figsize=(15, 3), tight_layout=True)
+
+    # Add gridlines to the plot
+    axs.grid(color="grey", linestyle="-.", linewidth=0.5, alpha=0.6)
+
+    # Remove axes splines (box lines around the plot)
+    for s in ["top", "bottom", "left", "right"]:
         axs.spines[s].set_visible(False)
-    # Remove x, y ticks
-    axs.xaxis.set_ticks_position('none')
-    axs.yaxis.set_ticks_position('none')
-    # Add padding between axes and labels
+
+    # Remove x and y axis ticks
+    axs.xaxis.set_ticks_position("none")
+    axs.yaxis.set_ticks_position("none")
+
+    # Add padding between axes and labels for clarity
     axs.xaxis.set_tick_params(pad=5)
     axs.yaxis.set_tick_params(pad=10)
-    # Creating histogram
+
+    # Create the histogram
     N, bins, patches = axs.hist(pred, bins=12)
+
     return hist
 
 
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~
 @st.cache_data
-def plot_spectra(specdf=None, color=None, cmap=None, xunits=None, yunits=None, mean=False):
-    # pass
+def plot_spectra(
+    specdf=None, color=None, cmap=None, xunits=None, yunits=None, mean=False
+):
+    """
+    Plots spectral data from a DataFrame, with optional color coding based on categories
+    and an option to plot the mean spectrum.
+
+    Parameters:
+    -----------
+    specdf : pandas.DataFrame, optional
+        DataFrame containing spectral data, where rows represent samples and columns represent wavelengths.
+
+    color : pandas.Series, optional
+        Series containing categorical labels for each spectrum, used for color mapping.
+
+    cmap : dict, optional
+        Dictionary mapping category labels (from `color`) to corresponding plot colors.
+
+    xunits : str, optional
+        Label for the x-axis, representing the wavelength units.
+
+    yunits : str, optional
+        Label for the y-axis, representing the intensity units.
+
+    mean : bool, default=False
+        If True, plots the mean spectrum across all samples as a bold black line.
+
+    Returns:
+    --------
+    matplotlib.figure.Figure
+        A matplotlib figure object containing the spectral plot.
+
+    Example:
+    --------
+    >>> import pandas as pd
+    >>> import numpy as np
+    >>> specdf = pd.DataFrame(np.random.rand(10, 100))
+    >>> color = pd.Series(['A'] * 5 + ['B'] * 5)
+    >>> cmap = {'A': 'red', 'B': 'blue'}
+    >>> fig = plot_spectra(specdf, color=color, cmap=cmap, xunits='Wavelength (nm)', yunits='Absorbance', mean=True)
+    >>> fig.show()
+    """
     import matplotlib.pyplot as plt
     import numpy as np
 
@@ -39,7 +103,6 @@ def plot_spectra(specdf=None, color=None, cmap=None, xunits=None, yunits=None, m
 
     if color is None or cmap is None:
         specdf.T.plot(legend=False, ax=ax, color="blue")
-
     else:
         cats = color.unique()
         for key, value in cmap.items():
@@ -49,6 +112,7 @@ def plot_spectra(specdf=None, color=None, cmap=None, xunits=None, yunits=None, m
         for key, value in cmap.items():
             idx = color.index[color == key].tolist()
             specdf.loc[idx].T.plot(legend=False, ax=ax, color=value)
+
     if mean:
         specdf.mean().T.plot(legend=False, ax=ax, color="black", linewidth=5)
 
@@ -56,125 +120,343 @@ def plot_spectra(specdf=None, color=None, cmap=None, xunits=None, yunits=None, m
     ax.set_ylabel(yunits, fontsize=30)
     plt.margins(x=0)
     plt.tight_layout()
-    # plt.legend()
+
     return fig
 
 
 @st.cache_data
 def barhplot(metadf, cmap):
+    """
+    Creates a horizontal bar plot based on the counts of unique values
+    in the first column of the provided DataFrame.
+
+    Parameters:
+    -----------
+    metadf : pandas.DataFrame
+        DataFrame containing the categorical data to be visualized.
+        The plot is based on the counts of unique values in the first column.
+
+    cmap : dict
+        Dictionary mapping unique values in `metadf` to corresponding colors.
+        The keys should match the unique values in the first column of `metadf`.
+
+    Returns:
+    --------
+    matplotlib.figure.Figure
+        A matplotlib figure object containing the horizontal bar plot.
+
+    Example:
+    --------
+    >>> import pandas as pd
+    >>> metadf = pd.DataFrame({'Category': ['A', 'B', 'A', 'C', 'B', 'A']})
+    >>> cmap = {'A': 'red', 'B': 'blue', 'C': 'green'}
+    >>> fig = barhplot(metadf, cmap)
+    >>> fig.show()
+    """
+    import matplotlib.pyplot as plt
+
     counts = metadf.groupby(metadf.columns[0]).size()
     counts = counts.loc[cmap.keys()]
+
     fig, ax = plt.subplots(figsize=(10, 5))
     ax.barh(counts.index, counts.values, color=cmap.values())
     plt.gca().invert_yaxis()
-    plt.xlabel('Count')
+    plt.xlabel("Count")
     plt.ylabel(str(metadf.columns[0]).capitalize())
+
     return fig
 
 
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cal/val hist ~~~~~~~~~~~~~~~~~~~~~~~~~~
 @st.cache_data
-def hist(y, y_train, y_test, target_name='y'):
+def hist(y, y_train, y_test, target_name="y"):
+    """
+    Plot histograms of the overall, calibration, and validation target distributions.
+
+    This function generates a histogram plot displaying the distribution of the target variable
+    for the entire dataset, along with separate distributions for the calibration (training) and
+    validation (testing) subsets. It uses Seaborn's histplot function with kernel density estimation
+    (KDE) to visualize the distribution curves.
+
+    Parameters:
+    -----------
+    y : array-like
+        Array of target values for the total dataset.
+    y_train : array-like
+        Array of target values for the calibration (training) dataset.
+    y_test : array-like
+        Array of target values for the validation (testing) dataset.
+    target_name : str, optional (default='y')
+        Name of the target variable. This is used for labeling purposes in the plot.
+
+    Returns:
+    --------
+    matplotlib.figure.Figure
+        The Matplotlib Figure object containing the histogram plot.
+
+    Example:
+    --------
+    >>> fig = hist(y_total, y_calibration, y_validation, target_name='Response')
+    >>> fig.show()
+    """
+    # Create a new figure and axis with specified dimensions.
     fig, ax = plt.subplots(figsize=(5, 2))
-    sns.histplot(y, color="#004e9e", kde=True, label=str(
-        target_name) + " (Total)", ax=ax, fill=True)
-    sns.histplot(y_train, color="#2C6B6F", kde=True,
-                 label=str(target_name)+" (Cal)", ax=ax, fill=True)
-    sns.histplot(y_test, color="#d0f7be", kde=True, label=str(
-        target_name)+" (Val)", ax=ax, fill=True)
+
+    # Plot histogram for the total dataset with KDE.
+    sns.histplot(
+        y,
+        color="#004e9e",
+        kde=True,
+        label=str(target_name) + " (Total)",
+        ax=ax,
+        fill=True,
+    )
+
+    # Plot histogram for the calibration (training) dataset with KDE.
+    sns.histplot(
+        y_train,
+        color="#2C6B6F",
+        kde=True,
+        label=str(target_name) + " (Cal)",
+        ax=ax,
+        fill=True,
+    )
+
+    # Plot histogram for the validation (testing) dataset with KDE.
+    sns.histplot(
+        y_test,
+        color="#d0f7be",
+        kde=True,
+        label=str(target_name) + " (Val)",
+        ax=ax,
+        fill=True,
+    )
+
+    # Set the x-axis label using the target name.
     ax.set_xlabel(str(target_name))
+
+    # Add a legend to the plot.
     plt.legend()
+
+    # Adjust the layout for a tight fit.
     plt.tight_layout()
+
     return fig
 
 
 @st.cache_data
 def reg_plot(meas, pred, train_idx, test_idx, trainplot=True):
+    """
+    Generate regression plots with linear fit lines for calibration and validation data.
+
+    This function generates regression plots by plotting measured versus predicted values for
+    both calibration (training) and validation (testing) datasets. It computes linear regression
+    models for the datasets using scikit-learn's LinearRegression, rounds the resulting slope and
+    intercept to two decimal places, and displays the regression equation in the legend. Additionally,
+    it annotates data points that are considered outliers based on a threshold of three standard deviations
+    from the mean residual.
+
+    Parameters:
+    -----------
+    meas : tuple of arrays
+        A tuple containing measured values for calibration and validation data.
+        - meas[0]: measured values for calibration (training).
+        - meas[1]: measured values for validation (testing).
+    pred : tuple of arrays
+        A tuple containing predicted values for calibration and validation data.
+        - pred[0]: predicted values for calibration (training).
+        - pred[1]: predicted values for validation (testing).
+    train_idx : array-like
+        Indices (or labels) corresponding to the calibration (training) data samples.
+    test_idx : array-like
+        Indices (or labels) corresponding to the validation (testing) data samples.
+    trainplot : bool, optional (default=True)
+        If True, both calibration and validation data are plotted.
+        If False, only validation data is plotted.
+
+    Returns:
+    --------
+    matplotlib.figure.Figure
+        A Matplotlib Figure object containing the regression plot.
+
+    Example:
+    --------
+    >>> fig = reg_plot((cal_measured, val_measured), (cal_predicted, val_predicted),
+                       cal_indices, val_indices, trainplot=True)
+    >>> fig.show()
+    """
+
+    # Initialize arrays to store the intercepts (a0) and slopes (a1) for both datasets.
     a0 = np.ones(2)
     a1 = np.ones(2)
 
+    # Determine how many regression models to compute: 2 if calibration data is plotted, else 1.
     n = 2 if trainplot else 1
     for i in range(n):
+        # Convert the measured and predicted arrays to column vectors.
         meas[i] = np.array(meas[i]).reshape(-1, 1)
         pred[i] = np.array(pred[i]).reshape(-1, 1)
 
+        # Import and instantiate the linear regression model.
         from sklearn.linear_model import LinearRegression
+
         M = LinearRegression()
         M.fit(meas[i], pred[i])
+        # Store the slope (coefficient) and intercept, rounded to 2 decimal places.
         a1[i] = np.round(M.coef_[0][0], 2)
         a0[i] = np.round(M.intercept_[0], 2)
 
+    # Compute residuals for calibration data if trainplot is True.
     if trainplot:
-        ec = np.subtract(np.array(meas[0]).reshape(-1),
-                         np.array(pred[0]).reshape(-1))
-    et = np.subtract(np.array(meas[1]).reshape(-1),
-                     np.array(pred[1]).reshape(-1))
+        ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1))
+    # Compute residuals for validation data.
+    et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))
 
+    # Create a new figure and axis object with specified dimensions.
     fig, ax = plt.subplots(figsize=(12, 4))
-    if trainplot:
-        sns.regplot(x=meas[0], y=pred[0], color="#2C6B6F", label='Cal (Predicted = '+
-                    str(a0[0]) +' + '+ str(a1[0]) +' x Measured)', scatter_kws={'edgecolor': 'black'})
-    sns.regplot(x=meas[1], y=pred[1], color='#d0f7be', label='Val (Predicted = '+
-                str(a0[1])+' + '+ str(a1[1])+' x Measured)', scatter_kws={'edgecolor': 'black'})
-    plt.plot([np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05],
-             [np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], color='black')
 
+    # Plot the calibration data regression if enabled.
+    if trainplot:
+        sns.regplot(
+            x=meas[0],
+            y=pred[0],
+            color="#2C6B6F",
+            label="Cal (Predicted = "
+            + str(a0[0])
+            + " + "
+            + str(a1[0])
+            + " x Measured)",
+            scatter_kws={"edgecolor": "black"},
+        )
+
+    # Plot the validation data regression.
+    sns.regplot(
+        x=meas[1],
+        y=pred[1],
+        color="#d0f7be",
+        label="Val (Predicted = " + str(a0[1]) + " + " + str(a1[1]) + " x Measured)",
+        scatter_kws={"edgecolor": "black"},
+    )
+
+    # Plot an identity line (y = x) for reference.
+    plt.plot(
+        [np.min(meas[0]) - 0.05, np.max(meas[0]) + 0.05],
+        [np.min(meas[0]) - 0.05, np.max(meas[0]) + 0.05],
+        color="black",
+    )
+
+    # Annotate calibration data outliers if trainplot is enabled.
     if trainplot:
         for i, txt in enumerate(train_idx):
-            # plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
-            if np.abs(ec[i]) > np.mean(ec) + 3*np.std(ec):
+            # Identify outliers as those with residuals greater than mean + 3*std.
+            if np.abs(ec[i]) > np.mean(ec) + 3 * np.std(ec):
                 plt.annotate(
-                    txt, (np.array(meas[0]).reshape(-1)[i], np.array(pred[0]).reshape(-1)[i]))
+                    txt,
+                    (
+                        np.array(meas[0]).reshape(-1)[i],
+                        np.array(pred[0]).reshape(-1)[i],
+                    ),
+                )
+
+    # Annotate validation data outliers.
     for i, txt in enumerate(test_idx):
-        if np.abs(et[i]) > np.mean(et) + 3*np.std(et):
+        if np.abs(et[i]) > np.mean(et) + 3 * np.std(et):
             plt.annotate(
-                txt, (np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i]))
+                txt,
+                (np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i]),
+            )
 
-    ax.set_ylabel('Predicted values')
-    ax.set_xlabel('Measured values')
+    # Set axis labels and include a legend.
+    ax.set_ylabel("Predicted values")
+    ax.set_xlabel("Measured values")
     plt.legend()
     plt.margins(0)
-    # fig.savefig('./report/figures/measured_vs_predicted.png')
-    return fig
 
-# Resid plot
+    return fig
 
 
+# Resid plot
 @st.cache_data
 def resid_plot(meas, pred, train_idx, test_idx, trainplot=True):
-
+    """
+    Generate a residual plot for calibration and validation datasets.
+
+    This function creates a residual plot comparing measured and predicted values
+    for both calibration (training) and validation (testing) datasets. It highlights
+    residuals that deviate more than three standard deviations from the mean and
+    annotates them with their corresponding sample indices.
+
+    Parameters:
+    -----------
+    meas : tuple of arrays
+        A tuple containing measured values for the calibration and validation datasets.
+        meas[0] corresponds to calibration measurements, and meas[1] to validation measurements.
+    pred : tuple of arrays
+        A tuple containing predicted values for the calibration and validation datasets.
+        pred[0] corresponds to calibration predictions, and pred[1] to validation predictions.
+    train_idx : array-like
+        Indices corresponding to the calibration dataset samples.
+    test_idx : array-like
+        Indices corresponding to the validation dataset samples.
+    trainplot : bool, optional (default=True)
+        If True, includes the calibration residual plot alongside the validation plot.
+
+    Returns:
+    --------
+    matplotlib.figure.Figure
+        The generated residual plot as a Matplotlib figure object.
+
+    Example:
+    --------
+    >>> fig = resid_plot((y_train, y_test), (y_pred_train, y_pred_test), train_indices, test_indices)
+    >>> fig.show()
+
+    Notes:
+    ------
+    - Outliers are defined as residuals greater than three standard deviations from the mean.
+    - The plot includes a horizontal reference line at zero residual for clarity.
+    """
+
+    # Calculate residuals for the validation dataset
     et = np.subtract(meas[1], pred[1])
     ett = np.array(et).reshape(-1, 1)
 
+    # Create the plot
     fig, ax = plt.subplots(figsize=(12, 4))
-    plt.axhline(y=0, c='black', linestyle=':')
+    plt.axhline(y=0, c="black", linestyle=":")
+
+    # Plot training residuals if requested
     if trainplot:
         ec = np.subtract(meas[0], pred[0])
         ecc = np.array(ec).reshape(-1, 1)
-        sns.scatterplot(x=pred[0], y=ec, color="#2C6B6F",
-                        label='Cal', edgecolor="black")
+        sns.scatterplot(
+            x=pred[0], y=ec, color="#2C6B6F", label="Cal", edgecolor="black"
+        )
 
+        # Annotate training outliers
         for i, txt in enumerate(train_idx):
-            if np.abs(ecc[i]) > np.mean(ecc) + 3*np.std(ecc):
+            if np.abs(ecc[i]) > np.mean(ecc) + 3 * np.std(ecc):
                 plt.annotate(txt, (np.array(pred[0]).reshape(-1)[i], ecc[i]))
 
+    # Plot validation residuals
+    sns.scatterplot(x=pred[1], y=et, color="#d0f7be", label="Val", edgecolor="black")
 
-
-    sns.scatterplot(x=pred[1], y=et, color="#d0f7be",
-                    label='Val', edgecolor="black")
+    # Annotate validation outliers
     for i, txt in enumerate(test_idx):
         if np.abs(ett[i]) > np.mean(ett) + 3 * np.std(ett):
             plt.annotate(txt, (np.array(pred[1]).reshape(-1)[i], ett[i]))
 
+    # Set plot limits based on residuals
     if trainplot:
-        lim = np.max(abs(np.concatenate([ec, et], axis=0)))*1.1
+        lim = np.max(abs(np.concatenate([ec, et], axis=0))) * 1.1
     else:
-        lim = np.max(abs(et))*1.1
-    plt.ylim(- lim, lim)
+        lim = np.max(abs(et)) * 1.1
+    plt.ylim(-lim, lim)
 
-    ax.set_ylabel('Residuals')
-    ax.set_xlabel('Predicted values')
+    # Set plot labels and legend
+    ax.set_ylabel("Residuals")
+    ax.set_xlabel("Predicted values")
     plt.legend()
     plt.margins(0)
-    # fig.savefig('./report/figures/residuals_plot.png')
-    return fig
+
+    return fig
\ No newline at end of file