From 01cef6b45d149b3861bf8e477d0e71c777b198a7 Mon Sep 17 00:00:00 2001
From: DIANE <abderrahim.diane@cefe.cnrs.fr>
Date: Thu, 10 Oct 2024 13:30:31 +0200
Subject: [PATCH] update app files

---
 src/Modules.py                  |  13 --
 src/__init__.py                 |   9 -
 src/app.py                      |   2 +-
 src/common.py                   |  28 +++
 src/mod.py                      |  23 ---
 src/pages/0-inputs.py           |   2 +-
 src/pages/4-inputs.py           | 101 ----------
 src/style/header.py             |  48 -----
 src/utils/Ap.py                 |  16 --
 src/utils/DxReader.py           | 103 ----------
 src/utils/Evaluation_Metrics.py |  56 ------
 src/utils/HDBSCAN_Clustering.py | 335 --------------------------------
 src/utils/KMEANS_.py            |  52 -----
 src/utils/KennardStone.py       |  25 ---
 src/utils/Kmedoids.py           |   0
 src/utils/NMF_.py               |  28 ---
 src/utils/PCA_.py               |  53 -----
 src/utils/PLSR_.py              |  51 -----
 src/utils/PLSR_Preprocess.py    | 100 ----------
 src/utils/RegModels.py          | 229 ----------------------
 src/utils/SK_PLSR_.py           | 118 -----------
 src/utils/UMAP_.py              |  31 ---
 src/utils/VarSel.py             | 163 ----------------
 23 files changed, 30 insertions(+), 1556 deletions(-)
 delete mode 100644 src/Modules.py
 delete mode 100644 src/__init__.py
 create mode 100644 src/common.py
 delete mode 100644 src/mod.py
 delete mode 100644 src/pages/4-inputs.py
 delete mode 100644 src/style/header.py
 delete mode 100644 src/utils/Ap.py
 delete mode 100644 src/utils/DxReader.py
 delete mode 100644 src/utils/Evaluation_Metrics.py
 delete mode 100644 src/utils/HDBSCAN_Clustering.py
 delete mode 100644 src/utils/KMEANS_.py
 delete mode 100644 src/utils/KennardStone.py
 delete mode 100644 src/utils/Kmedoids.py
 delete mode 100644 src/utils/NMF_.py
 delete mode 100644 src/utils/PCA_.py
 delete mode 100644 src/utils/PLSR_.py
 delete mode 100644 src/utils/PLSR_Preprocess.py
 delete mode 100644 src/utils/RegModels.py
 delete mode 100644 src/utils/SK_PLSR_.py
 delete mode 100644 src/utils/UMAP_.py
 delete mode 100644 src/utils/VarSel.py

diff --git a/src/Modules.py b/src/Modules.py
deleted file mode 100644
index d4518bd..0000000
--- a/src/Modules.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from Packages import *
-from utils import Plsr, LinearPCA, Umap, find_col_index, PinardPlsr, Nmf, AP
-from utils import LWPLSR, list_files, metrics, TpeIpls, reg_plot, resid_plot, Sk_Kmeans, DxRead, Hdbscan, read_dx, PlsProcess
-from utils.DATA_HANDLING import *
-from utils.Miscellaneous import prediction, download_results, plot_spectra, local_css, desc_stats, hash_data,data_split, pred_hist
-from utils.Hash import create_hash, check_hash
-from report import report
-css_file = Path("style/")
-pages_folder = Path("pages/")
-from style.header import add_header, add_sidebar
-from config.config import pdflatex_path
-local_css(css_file / "style.css")
-from utils import KS, RDM
diff --git a/src/__init__.py b/src/__init__.py
deleted file mode 100644
index bd8f7cb..0000000
--- a/src/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-"""This package provides a complete workflow to users how want to proced to NIRS analysis without particular knowledge.
-
-This is a webapp with Streamlit.
-GUI shows whatever is needed for Samples Selection based on NIRS spectra and then, to compute a model to predict
-    chemical values on your samples.
-
-Examples:
-    streamlit run ./app.py
-"""
\ No newline at end of file
diff --git a/src/app.py b/src/app.py
index 01077f6..07edda0 100644
--- a/src/app.py
+++ b/src/app.py
@@ -1,5 +1,5 @@
 from common import *
-
+st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
 
 
 
diff --git a/src/common.py b/src/common.py
new file mode 100644
index 0000000..c7bd9ca
--- /dev/null
+++ b/src/common.py
@@ -0,0 +1,28 @@
+# """This package provides a complete workflow to users how want to proced to NIRS analysis without particular knowledge.
+
+# This is a webapp with Streamlit.
+# GUI shows whatever is needed for Samples Selection based on NIRS spectra and then, to compute a model to predict
+#     chemical values on your samples.
+
+# Examples:
+#     streamlit run ./app.py
+# """
+# ##
+
+import streamlit as st
+from pathlib import Path
+
+css_file = Path("style/")
+pages_folder = Path("pages/")
+
+
+
+from utils.data_parsing import JcampParser, CsvParser
+from style.layout import BackgroundImg, add_header, add_sidebar, local_css
+from utils.data_handling import *
+from utils.data_parsing import *
+from utils.hash import *
+from utils.visualize import *
+from utils.miscellaneous import ObjectHash
+from utils.samsel import RDM, KS
+from report import report
\ No newline at end of file
diff --git a/src/mod.py b/src/mod.py
deleted file mode 100644
index a62aef5..0000000
--- a/src/mod.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""This package provides a complete workflow to users how want to proced to NIRS analysis without particular knowledge.
-
-This is a webapp with Streamlit.
-GUI shows whatever is needed for Samples Selection based on NIRS spectra and then, to compute a model to predict
-    chemical values on your samples.
-
-Examples:
-    streamlit run ./app.py
-"""
-##
-from Packages import *
-# from utils import read_dx, DxRead,  Plsr, LinearPCA, Umap, find_col_index, PinardPlsr, Nmf, AP
-# from utils import LWPLSR, list_files, metrics, TpeIpls, reg_plot, resid_plot, Sk_Kmeans, DxRead, Hdbscan, read_dx, PlsProcess, PinardPlsr, Plsr
-from utils.DATA_HANDLING import *
-from utils.Miscellaneous import prediction, download_results, plot_spectra, local_css, desc_stats, hash_data, hist,data_split, pred_hist,background_img
-from utils.Hash import create_hash, check_hash
-from report import report
-css_file = Path("style/")
-pages_folder = Path("pages/")
-from style import add_header, add_sidebar
-# from style.header import add_header, add_sidebar
-from config.config import pdflatex_path
-local_css(css_file / "style.css")
\ No newline at end of file
diff --git a/src/pages/0-inputs.py b/src/pages/0-inputs.py
index a64e37d..c168653 100644
--- a/src/pages/0-inputs.py
+++ b/src/pages/0-inputs.py
@@ -1,5 +1,5 @@
 from common import *
-
+st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
 
 
 
diff --git a/src/pages/4-inputs.py b/src/pages/4-inputs.py
deleted file mode 100644
index 21f6686..0000000
--- a/src/pages/4-inputs.py
+++ /dev/null
@@ -1,101 +0,0 @@
-from Packages import *
-st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide",)
-st.session_state["interface"] = st.session_state.get('interface')
-#""" if st.session_state["interface"] == 'simple':
-#    hide_pages("Predictions") """
-# from Modules import *
-from mod import *
-from utils.DATA_HANDLING import *
-background_img(change=None)
-
-
-#Import Header
-add_header()
-
-pages_folder = Path("pages/")
-
-
-# Initialize session state
-if 'form_submitted' not in st.session_state:
-    st.session_state['form_submitted'] = False
-
-with st.container():
-    # Text input fields
-    st.subheader("Complete and save the following form with the data context:",divider="blue")
-    st.warning('Make sure that the form is well completed, because the reliability of the results depends mainly on it !', icon="⚠️")
-
-    with st.form(key = 'my_form'):
-        _,col1, col3,col2 = st.columns((0.1, 1.4,0.5,2))
-        with col1:
-            ##############   Project information ###########
-            st.subheader("Project information", divider="blue")
-            meta_project = st.text_input('Project name :')
-            meta_machine_ID = st.text_input('NIRS ID :',)
-            meta_scan_place_options = ["Pace", "Other"]
-            meta_scan_place = st.radio("Analysis Laboratory :", meta_scan_place_options)
-            meta_sample_species = st.text_input('Samples species (If relevant, provide the sample species; otherwise insert No):')
-
-
-
-
-        with col2:
-            clo3,_, col4,_ = st.columns([1,0.2,1,0.3])
-            with clo3:
-                ##############   The Nature of the Samples ###########
-                if '' in [meta_project, meta_machine_ID,meta_sample_species]: disabled1 = True                 
-                else: disabled1 = False
-                st.subheader("The Nature of the Samples",divider="blue")
-                meta_sample_category_options = ["Soil", "Plant", "Animal", "Other"]
-                meta_sample_category = st.radio("Samples category :", [""] + meta_sample_category_options)
-                meta_sample_sub_category_options = ["Green leaves", "Leaf litter", "Litter", "Humus", "Soil", "Animal part", "Animal Powder", "Fungal sample", "Other"]
-                meta_sample_sub_category = st.radio("Sample category description :", [""] + meta_sample_sub_category_options)
-            
-            with col4:
-                st.subheader("The Physical State of the Samples",divider="blue")
-                meta_sample_humidity_options = ["Dry", "Fresh", "Wet"]
-                meta_sample_humidity = st.radio("Humidity state of the sample :", [""] + meta_sample_humidity_options)
-
-                meta_sample_pretreatment_options = ["Powder", "Pastile", "Liquid"]
-                meta_sample_pretreatment = st.radio("Type of sample pre-treatment :", [""] + meta_sample_pretreatment_options)
-            
-            # Création du dictionnaire avec les données du formulaire
-            form_data = {
-                "meta_project": meta_project,
-                "meta_sample_species": meta_sample_species,
-                "meta_sample_category": meta_sample_category,
-                "meta_sample_pretreatment": meta_sample_pretreatment,
-                "meta_machine_ID": meta_machine_ID,
-                "meta_sample_sub_category": meta_sample_sub_category,
-                "meta_sample_humidity": meta_sample_humidity,
-                "meta_scan_place": meta_scan_place
-            }
-
-        submitted = st.form_submit_button(label='Save')
-    if submitted:
-        if '' not in form_data.values(): 
-            # Save the form data here
-            st.session_state['form_submitted'] = True
-            st.success('Form was saved successfully!', icon="✅")
-            # Enregistrement des données dans un fichier JSON
-            with open('form_data.json', 'w') as json_file:
-                json.dump(form_data, json_file)
-
-            if st.session_state['interface'] == 'simple':
-                header3, header4 = st.columns(2)
-                if header3.button("Samples Selection"):
-                    st.switch_page(pages_folder / '1-samples_selection.py')
-                if header4.button("Model Creation"):
-                    st.switch_page(pages_folder / '2-model_creation.py')
-            elif st.session_state['interface'] == 'advanced':
-                header3, header4, header5 = st.columns(3)
-                if header3.button("Samples Selection"):
-                    st.switch_page(pages_folder / '1-samples_selection.py')
-                if header4.button("Model Creation"):
-                    st.switch_page(pages_folder / '2-model_creation.py')
-                if header5.button("Prediction"):
-                    st.switch_page(pages_folder / '3-prediction.py')
-
-        else:
-            st.error('Error: The form was not saved, please ensure the required fields are filled!')
-
-
diff --git a/src/style/header.py b/src/style/header.py
deleted file mode 100644
index 799e2ee..0000000
--- a/src/style/header.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from Packages import *
-def add_header():
-    st.markdown(
-        """
-        <div style="width: 100%;height: 130px;background-color: rgb(0,0,0,0);border: 4px solid rgb(122,176,199);padding: 1px;margin-bottom: 0px;border-radius: 20%; ">
-          <h2 style="font-family: 'Arial',d; text-align: center;color: #39bf55;">PACE - MEEB / CEFE</h1>
-          <h3 style="font-family: 'Arial';text-align: center; color: #2cb048;">NIRS Utils</h2>
-        </div>
-        <style>
-        .block-container {padding-top: 3rem;padding-bottom: 0rem;padding-left: 5rem;padding-right: 5rem;}
-        </style>
-        """, unsafe_allow_html=True)
-    
-
-def add_sidebar(pages_folder):
-    if 'interface' not in st.session_state:
-        st.session_state['interface'] = 'simple'
-    else:
-        st.session_state["interface"] = st.session_state.get('interface')
-
-    # # TOC menu on the left
-    show_pages(
-        [Page("app.py", "Home"),
-         Page(str(pages_folder / "4-inputs.py"), "Inputs"),
-         Page(str(pages_folder / "1-samples_selection.py"), "Samples Selection"),
-         Page(str(pages_folder / "2-model_creation.py"), "Models Creation & Predictions"),
-
-         ]
-    )
-
-    with st.sidebar:
-        interface = st.radio(label="Interface", options=['simple', 'advanced'], key='interface')
-        # st.page_link(str(pages_folder / '1-samples_selection.py'))
-        if st.session_state['interface'] == 'simple':
-            #     st.page_link(str(pages_folder / '2-model_creation.py'))
-            pass
-        # if advanced interface, split Models Creation and Predictions
-        elif st.session_state['interface'] == 'advanced':
-            show_pages(
-                [Page("app.py", "Home"),
-                 Page(str(pages_folder / "4-inputs.py"), "Inputs"),
-                 Page(str(pages_folder / "1-samples_selection.py"), "Samples Selection"),
-                 Page(str(pages_folder / "2-model_creation.py"), "Models Creation"),
-                 Page(str(pages_folder / "3-prediction.py"), "Predictions"),
-                 ]
-            )
-            # st.page_link(str(pages_folder / '2-model_creation.py'))
-            # st.page_link(str(pages_folder / '3-prediction.py'))
diff --git a/src/utils/Ap.py b/src/utils/Ap.py
deleted file mode 100644
index 2084d25..0000000
--- a/src/utils/Ap.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from Packages import *
-
-class AP:
-    def __init__(self, X):
-        ## input matrix
-        self.__x = np.array(X)
-
-        # Fit PCA model
-        self.M = AffinityPropagation(damping=0.5, max_iter=200, convergence_iter=15, copy=True, preference=None,
-                                 affinity='euclidean', verbose=False, random_state=None)
-        self.M.fit(self.__x)
-        self.yp = self.M.predict(self.__x)+1
-    @property
-    def fit_optimal_(self):
-        clu = [f'cluster#{i}' for i in self.yp]
-        return self.__x, clu, self.M.cluster_centers_
\ No newline at end of file
diff --git a/src/utils/DxReader.py b/src/utils/DxReader.py
deleted file mode 100644
index 8158228..0000000
--- a/src/utils/DxReader.py
+++ /dev/null
@@ -1,103 +0,0 @@
-from Packages import *
-import jcamp as jc
-
-class DxRead:
-
-    '''This module is designed to help retrieve spectral data as well as metadata of smaples  from jcamp file'''
-    def __init__(self, path):
-        #self.__path = path.replace('\\','/')
-        self.__path = path
-        self.__dxfile = jc.jcamp_readfile(self.__path)
-        
-        # Access samples data
-        self.__nb = self.__dxfile['blocks'] # Get the total number of blocks = The total number of scanned samples
-        self.__list_of_blocks = self.__dxfile['children']  # Store all blocks within a a list
-        self.__wl = self.__list_of_blocks[0]["x"] # Wavelengths/frequencies/range 
-    
-        # Start retreiving the data
-        specs = np.zeros((self.__nb, len(self.__list_of_blocks[0]["y"])), dtype=float) # preallocate a np matrix for sotoring spectra
-        self.idx = np.arange(self.__nb) # This list is designed to store samples name
-        self.__met = {}
-        for i in range(self.__nb): # Loop over the blocks
-            specs[i] = self.__list_of_blocks[i]['y']
-            block = self.__list_of_blocks[i]
-            block_met = {   'name': block['title'],
-                            'origin': block['origin'],
-                            'date': block['date'],
-                            #'time': block['time'],
-                            'spectrometer': block['spectrometer/data system'].split('\n$$')[0],
-                            'n_scans':block['spectrometer/data system'].split('\n$$')[6].split('=')[1],
-                            'resolution': block['spectrometer/data system'].split('\n$$')[8].split('=')[1],
-                            #'instrumental parameters': block['instrumental parameters'],
-                            'xunits': block['xunits'],
-                            'yunits': block['yunits'],
-                            #'xfactor': block['xfactor'],
-                            #'yfactor': block['yfactor'],
-                            'firstx': block['firstx'],
-                            'lastx': block['lastx'],
-                            #'firsty':block['firsty'],
-                            #'miny': block['miny'],
-                            #'maxy': block['maxy'],
-                            'npoints': block['npoints'],
-                            'concentrations':block['concentrations'],
-                            #'deltax':block['deltax']
-                            }
-            
-            self.__met[f'{i}'] = block_met
-        self.metadata_ = DataFrame(self.__met).T
-        self.spectra = DataFrame(np.fliplr(specs), columns= self.__wl[::-1], index = self.metadata_['name']) # Storing spectra in a dataframe
-
-
-
-        #### Concentrarions
-        self.pattern = r"\(([^,]+),(\d+(\.\d+)?),([^)]+)"
-        aa = self.__list_of_blocks[0]['concentrations']
-        a = '\n'.join(line for line in aa.split('\n') if "NCU" not in line and "<<undef>>" not in line)
-        n_elements = a.count('(')
-
-        ## Get the name of analyzed chamical elements
-        elements_name = []
-        for match in re.findall(self.pattern, a):
-                elements_name.append(match[0])
-
-        ## Retrieve concentrationds
-        df = self.metadata_['concentrations']
-        cc = {}
-        for i in range(self.metadata_.shape[0]):
-            cc[df.index[i]] = self.conc(df[str(i)])
-
-        ### dataframe conntaining chemical data
-        self.chem_data = DataFrame(cc, index=elements_name).T.astype(float)
-        self.chem_data.index = self.metadata_['name']
-
-    ### Method for retrieving the concentration of a single sample
-    def conc(self,sample):
-        prep = '\n'.join(line for line in sample.split('\n') if "NCU" not in line and "<<undef>>" not in line)
-        c = []
-        for match in re.findall(self.pattern, prep):
-                c.append(match[1])
-        concentration = np.array(c)
-        return concentration
-
-    @property
-    def specs_df_(self):
-        return self.spectra
-    @property
-    def md_df_(self):
-        me = self.metadata_.drop("concentrations", axis = 1)
-        me = me.drop(me.columns[(me == '').all()], axis = 1)
-        return me
-    @property
-    def md_df_st_(self):
-         rt = ['origin','date']
-         cl = self.metadata_.loc[:,rt]
-         return cl
-             
-    @property
-    def chem_data_(self):
-         return self.chem_data
-    
-@st.cache_data
-def read_dx(file):
-     M = DxRead(file)
-     return M.chem_data, M.specs_df_, M.md_df_, M.md_df_st_
\ No newline at end of file
diff --git a/src/utils/Evaluation_Metrics.py b/src/utils/Evaluation_Metrics.py
deleted file mode 100644
index ecbc6ab..0000000
--- a/src/utils/Evaluation_Metrics.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from Packages  import *
-
-class metrics:
-    def __init__(self, c:Optional[float] = None, cv:Optional[List] = None, t:Optional[List] = None, method = 'regression')-> DataFrame:
-        phase = [c, cv, t]
-        index = np.array(["train", "cv", "test"])
-        notnone = [i for i in range(3) if phase[i] != None]
-        met_index = index[notnone]
-        methods = ['regression', 'classification']
-        perf = {}
-        for i in notnone:
-            if method == 'regression':
-                perf[index[i]] = metrics.reg_(phase[i][0], phase[i][1])
-
-            elif method == 'classification':
-                perf[index[i]] = metrics.class_(phase[i][0], phase[i][1])
-        
-            if notnone == 1:
-                self.ret = perf.T
-            else:
-                self.ret = DataFrame(perf).T
-             
-    @staticmethod
-    def reg_(meas, pred):
-           meas = np.array(meas)
-           pred = np.array(pred)
-           xbar = np.mean(meas) # the average of measured values
-           e = np.subtract(meas , pred)
-           e2 = e**2# the squared error
-
-          # Sum of squared:
-           # TOTAL
-           sst = np.sum((meas - xbar)**2)
-           # RESIDUAL
-           ssr = np.sum(e2)
-           # REGRESSION OR MODEL
-           ssm = np.sum(pred - xbar)
-
-
-          # Compute statistical metrics
-           metr = {}
-           metr['r'] = np.corrcoef(meas, pred)[0, 1]
-           metr['r2'] = 1-ssr/sst
-           metr['rmse'] = np.sqrt(np.mean(e2))
-           metr['mae'] = np.mean(np.abs(e2))
-           metr['rpd'] = np.std(meas)/np.sqrt(np.mean(e2))
-           metr['rpiq'] = (np.quantile(meas, .75) - np.quantile(meas, .25))/np.sqrt(np.mean(e2))
-           return metr
-
-    @staticmethod
-    def class_(meas, pred):
-        pass
-
-    @property
-    def scores_(self):
-        return self.ret   
\ No newline at end of file
diff --git a/src/utils/HDBSCAN_Clustering.py b/src/utils/HDBSCAN_Clustering.py
deleted file mode 100644
index b4dbfca..0000000
--- a/src/utils/HDBSCAN_Clustering.py
+++ /dev/null
@@ -1,335 +0,0 @@
-from Packages import *
-
-class Hdbscan:
-    """Runs an automatically optimized sklearn.HDBSCAN clustering on dimensionality reduced space.
-
-    The HDBSCAN_scores_ @Property returns the cluster number of each sample (_labels) and the DBCV best score.
-
-    Returns:
-        _labels (DataFrame): DataFrame with the cluster belonging number for each sample
-        _hdbscan_score (float): a float with the best DBCV score after optimization
-
-    Examples:
-        - clustering = HDBSCAN((data)
-        - scores = clustering.HDBSCAN_scores_
-
-    """
-    def __init__(self, data):
-        """Initiate the HDBSCAN calculation
-
-        Args:
-            data (DataFrame): the Dimensionality reduced space, raw result of the UMAP.fit()
-            param_dist (dictionary): the HDBSCAN optimization parameters to test
-            _score (DataFrame): is a dataframe with the DBCV value for each combination of param_dist. We search for the higher value to then compute an HDBSCAN with the best parameters.
-        """
-        # Really fast
-        self._param_dist = {'min_samples': [8],
-                      'min_cluster_size':[10],
-                      'metric' : ['euclidean'],#,'manhattan'],
-                      }
-        # Medium
-        # self._param_dist = {'min_samples': [1,10],
-        #     'min_cluster_size':[5,50],
-        #     'metric' : ['euclidean','manhattan'],
-        #     }
-        # Complete
-        # self._param_dist = {'min_samples': [1,5,10,],
-        #       'min_cluster_size':[5,25,50,],
-        #       'metric' : ['euclidean','manhattan'],
-        #       }
-
-        self._clusterable_embedding = data
-
-        # RandomizedSearchCV not working...
-        # def scoring(model, clusterable_embedding):
-        #     label = HDBSCAN().fit_predict(clusterable_embedding)
-        #     hdbscan_score = DBCV(clusterable_embedding, label, dist_function=euclidean)
-        #     return hdbscan_score
-        # tunning = RandomizedSearchCV(estimator=HDBSCAN(), param_distributions=param_dist,  scoring=scoring)
-        # tunning.fit(clusterable_embedding)
-        # return tunning
-
-        # compute optimization. Test each combination of parameters and store DBCV score into _score.
-        # self._score = DataFrame()
-        # for i in self._param_dist.get('min_samples'):
-        #     for j in self._param_dist.get('min_cluster_size'):
-        #         self._ij_label = HDBSCAN(min_samples=i, min_cluster_size=j).fit_predict(self._clusterable_embedding)
-        #         self._ij_hdbscan_score = self.DBCV(self._clusterable_embedding, self._ij_label,)# dist_function=euclidean)
-        #         self._score.at[i,j] = self._ij_hdbscan_score
-        # get the best DBCV score
-        # self._hdbscan_bscore  = max(self._score.max())
-        # find the coordinates of the best clustering parameters and run HDBSCAN below
-        # self._bparams = np.where(self._score == self._hdbscan_bscore)
-        # run HDBSCAN with best params
-
-        # self.best_hdbscan = HDBSCAN(min_samples=self._param_dist['min_samples'][self._bparams[0][0]], min_cluster_size=self._param_dist['min_cluster_size'][self._bparams[1][0]], metric=self._param_dist['metric'][self._bparams[1][0]], store_centers="medoid", )
-        self.best_hdbscan = HDBSCAN(min_samples=self._param_dist['min_samples'][0], min_cluster_size=self._param_dist['min_cluster_size'][0], metric=self._param_dist['metric'][0], store_centers="medoid", )
-        self.best_hdbscan.fit_predict(self._clusterable_embedding)
-        self._labels = self.best_hdbscan.labels_
-        self._centers = self.best_hdbscan.medoids_
-
-
-    # def DBCV(self, X, labels, dist_function=euclidean):
-    #     """
-    #     Implimentation of Density-Based Clustering Validation "DBCV"
-    #
-    #     Citation: Moulavi, Davoud, et al. "Density-based clustering validation."
-    #     Proceedings of the 2014 SIAM International Conference on Data Mining.
-    #     Society for Industrial and Applied Mathematics, 2014.
-    #
-    #     Density Based clustering validation
-    #
-    #     Args:
-    #         X (np.ndarray): ndarray with dimensions [n_samples, n_features]
-    #             data to check validity of clustering
-    #         labels (np.array): clustering assignments for data X
-    #         dist_dunction (func): function to determine distance between objects
-    #             func args must be [np.array, np.array] where each array is a point
-    #
-    #     Returns:
-    #         cluster_validity (float): score in range[-1, 1] indicating validity of clustering assignments
-    #     """
-    #     graph = self._mutual_reach_dist_graph(X, labels, dist_function)
-    #     mst = self._mutual_reach_dist_MST(graph)
-    #     cluster_validity = self._clustering_validity_index(mst, labels)
-    #     return cluster_validity
-    #
-    #
-    # def _core_dist(self, point, neighbors, dist_function):
-    #     """
-    #     Computes the core distance of a point.
-    #     Core distance is the inverse density of an object.
-    #
-    #     Args:
-    #         point (np.array): array of dimensions (n_features,)
-    #             point to compute core distance of
-    #         neighbors (np.ndarray): array of dimensions (n_neighbors, n_features):
-    #             array of all other points in object class
-    #         dist_dunction (func): function to determine distance between objects
-    #             func args must be [np.array, np.array] where each array is a point
-    #
-    #     Returns: core_dist (float)
-    #         inverse density of point
-    #     """
-    #     n_features = np.shape(point)[0]
-    #     n_neighbors = np.shape(neighbors)[0]
-    #
-    #     distance_vector = cdist(point.reshape(1, -1), neighbors)
-    #     distance_vector = distance_vector[distance_vector != 0]
-    #     numerator = ((1/distance_vector)**n_features).sum()
-    #     core_dist = (numerator / (n_neighbors - 1)) ** (-1/n_features)
-    #     return core_dist
-    #
-    # def _mutual_reachability_dist(self, point_i, point_j, neighbors_i,
-    #                               neighbors_j, dist_function):
-    #     """.
-    #     Computes the mutual reachability distance between points
-    #
-    #     Args:
-    #         point_i (np.array): array of dimensions (n_features,)
-    #             point i to compare to point j
-    #         point_j (np.array): array of dimensions (n_features,)
-    #             point i to compare to point i
-    #         neighbors_i (np.ndarray): array of dims (n_neighbors, n_features):
-    #             array of all other points in object class of point i
-    #         neighbors_j (np.ndarray): array of dims (n_neighbors, n_features):
-    #             array of all other points in object class of point j
-    #         dist_function (func): function to determine distance between objects
-    #             func args must be [np.array, np.array] where each array is a point
-    #
-    #     Returns:
-    #         mutual_reachability (float)
-    #         mutual reachability between points i and j
-    #
-    #     """
-    #     core_dist_i = self._core_dist(point_i, neighbors_i, dist_function)
-    #     core_dist_j = self._core_dist(point_j, neighbors_j, dist_function)
-    #     dist = dist_function(point_i, point_j)
-    #     mutual_reachability = np.max([core_dist_i, core_dist_j, dist])
-    #     return mutual_reachability
-    #
-    #
-    # def _mutual_reach_dist_graph(self, X, labels, dist_function):
-    #     """
-    #     Computes the mutual reach distance complete graph.
-    #     Graph of all pair-wise mutual reachability distances between points
-    #
-    #     Args:
-    #         X (np.ndarray): ndarray with dimensions [n_samples, n_features]
-    #             data to check validity of clustering
-    #         labels (np.array): clustering assignments for data X
-    #         dist_dunction (func): function to determine distance between objects
-    #             func args must be [np.array, np.array] where each array is a point
-    #
-    #     Returns: graph (np.ndarray)
-    #         array of dimensions (n_samples, n_samples)
-    #         Graph of all pair-wise mutual reachability distances between points.
-    #
-    #     """
-    #     n_samples = np.shape(X)[0]
-    #     graph = []
-    #     counter = 0
-    #     for row in range(n_samples):
-    #         graph_row = []
-    #         for col in range(n_samples):
-    #             point_i = X[row]
-    #             point_j = X[col]
-    #             class_i = labels[row]
-    #             class_j = labels[col]
-    #             members_i = self._get_label_members(X, labels, class_i)
-    #             members_j = self._get_label_members(X, labels, class_j)
-    #             dist = self._mutual_reachability_dist(point_i, point_j,
-    #                                              members_i, members_j,
-    #                                              dist_function)
-    #             graph_row.append(dist)
-    #         counter += 1
-    #         graph.append(graph_row)
-    #     graph = np.array(graph)
-    #     return graph
-    #
-    #
-    # def _mutual_reach_dist_MST(self, dist_tree):
-    #     """
-    #     Computes minimum spanning tree of the mutual reach distance complete graph
-    #
-    #     Args:
-    #         dist_tree (np.ndarray): array of dimensions (n_samples, n_samples)
-    #             Graph of all pair-wise mutual reachability distances
-    #             between points.
-    #
-    #     Returns: minimum_spanning_tree (np.ndarray)
-    #         array of dimensions (n_samples, n_samples)
-    #         minimum spanning tree of all pair-wise mutual reachability
-    #             distances between points.
-    #     """
-    #     mst = minimum_spanning_tree(dist_tree).toarray()
-    #     return mst + np.transpose(mst)
-    #
-    #
-    # def _cluster_density_sparseness(self, MST, labels, cluster):
-    #     """
-    #     Computes the cluster density sparseness, the minimum density
-    #         within a cluster
-    #
-    #     Args:
-    #         MST (np.ndarray): minimum spanning tree of all pair-wise
-    #             mutual reachability distances between points.
-    #         labels (np.array): clustering assignments for data X
-    #         cluster (int): cluster of interest
-    #
-    #     Returns: cluster_density_sparseness (float)
-    #         value corresponding to the minimum density within a cluster
-    #     """
-    #     indices = np.where(labels == cluster)[0]
-    #     cluster_MST = MST[indices][:, indices]
-    #     cluster_density_sparseness = np.max(cluster_MST)
-    #     return cluster_density_sparseness
-    #
-    #
-    # def _cluster_density_separation(self, MST, labels, cluster_i, cluster_j):
-    #     """
-    #     Computes the density separation between two clusters, the maximum
-    #         density between clusters.
-    #
-    #     Args:
-    #         MST (np.ndarray): minimum spanning tree of all pair-wise
-    #             mutual reachability distances between points.
-    #         labels (np.array): clustering assignments for data X
-    #         cluster_i (int): cluster i of interest
-    #         cluster_j (int): cluster j of interest
-    #
-    #     Returns: density_separation (float):
-    #         value corresponding to the maximum density between clusters
-    #     """
-    #     indices_i = np.where(labels == cluster_i)[0]
-    #     indices_j = np.where(labels == cluster_j)[0]
-    #     shortest_paths = csgraph.dijkstra(MST, indices=indices_i)
-    #     relevant_paths = shortest_paths[:, indices_j]
-    #     density_separation = np.min(relevant_paths)
-    #     return density_separation
-    #
-    #
-    # def _cluster_validity_index(self, MST, labels, cluster):
-    #     """
-    #     Computes the validity of a cluster (validity of assignmnets)
-    #
-    #     Args:
-    #         MST (np.ndarray): minimum spanning tree of all pair-wise
-    #             mutual reachability distances between points.
-    #         labels (np.array): clustering assignments for data X
-    #         cluster (int): cluster of interest
-    #
-    #     Returns: cluster_validity (float)
-    #         value corresponding to the validity of cluster assignments
-    #     """
-    #     min_density_separation = np.inf
-    #     for cluster_j in np.unique(labels):
-    #         if cluster_j != cluster:
-    #             cluster_density_separation = self._cluster_density_separation(MST,
-    #                                                                      labels,
-    #                                                                      cluster,
-    #                                                                      cluster_j)
-    #             if cluster_density_separation < min_density_separation:
-    #                 min_density_separation = cluster_density_separation
-    #     cluster_density_sparseness = self._cluster_density_sparseness(MST,
-    #                                                              labels,
-    #                                                              cluster)
-    #     numerator = min_density_separation - cluster_density_sparseness
-    #     denominator = np.max([min_density_separation, cluster_density_sparseness])
-    #     cluster_validity = numerator / denominator
-    #     return cluster_validity
-    #
-    #
-    # def _clustering_validity_index(self, MST, labels):
-    #     """
-    #     Computes the validity of all clustering assignments for a
-    #     clustering algorithm
-    #
-    #     Args:
-    #         MST (np.ndarray): minimum spanning tree of all pair-wise
-    #             mutual reachability distances between points.
-    #         labels (np.array): clustering assignments for data X
-    #
-    #     Returns: validity_index (float):
-    #         score in range[-1, 1] indicating validity of clustering assignments
-    #     """
-    #     n_samples = len(labels)
-    #     validity_index = 0
-    #     for label in np.unique(labels):
-    #         fraction = np.sum(labels == label) / float(n_samples)
-    #         cluster_validity = self._cluster_validity_index(MST, labels, label)
-    #         validity_index += fraction * cluster_validity
-    #     return validity_index
-    #
-    #
-    # def _get_label_members(self, X, labels, cluster):
-    #     """
-    #     Helper function to get samples of a specified cluster.
-    #
-    #     Args:
-    #         X (np.ndarray): ndarray with dimensions [n_samples, n_features]
-    #             data to check validity of clustering
-    #         labels (np.array): clustering assignments for data X
-    #         cluster (int): cluster of interest
-    #
-    #     Returns: members (np.ndarray)
-    #         array of dimensions (n_samples, n_features) of samples of the
-    #         specified cluster.
-    #     """
-    #     indices = np.where(labels == cluster)[0]
-    #     members = X[indices]
-    #     return members
-
-    @property
-    def centers_(self):
-        # return self._labels, self._hdbscan_bscore, self._centers
-        return self._centers
-    @property
-    def labels_(self):
-        labels = [f'cluster#{i+1}' if i !=-1 else 'Non clustered' for i in self._labels]
-        return labels
-    @property
-    def non_clustered(self):
-        labels = [f'cluster#{i+1}' if i !=-1 else 'Non clustered' for i in self._labels]
-        non_clustered = np.where(np.array(labels) == 'Non clustered')[0]
-        return non_clustered
diff --git a/src/utils/KMEANS_.py b/src/utils/KMEANS_.py
deleted file mode 100644
index ea1f5ea..0000000
--- a/src/utils/KMEANS_.py
+++ /dev/null
@@ -1,52 +0,0 @@
-from Packages import *
-class Sk_Kmeans:
-    """K-Means clustering for Samples selection.
-
-    Returns:
-        inertia_ (DataFrame): DataFrame with ...
-        x (DataFrame): Initial data
-        clu (DataFrame): Cluster name for each sample
-        model.cluster_centers_ (DataFrame): Coordinates of the center of each cluster
-    """
-    def __init__(self, x, max_clusters):
-        """Initiate the KMeans class.
-
-        Args:
-            x (DataFrame): the original reduced data to cluster
-            max_cluster (Int): the max number of desired clusters.
-        """
-        self.x = x
-        self.max_clusters = max_clusters
-
-        self.inertia = DataFrame()
-        for i in range(1, max_clusters+1):
-            model = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
-            model.fit(x)
-            self.inertia[f'{i}_clust']= [model.inertia_]
-        self.inertia.index = ['inertia']
-
-    @property
-    def inertia_(self):
-        return self.inertia
-    
-    @property
-    def suggested_n_clusters_(self):
-        idxidx = []
-        values = []
-
-        s = self.inertia.to_numpy().ravel()
-        for i in range(self.max_clusters-1):
-            idxidx.append(f'{i+1}_clust')
-            values.append((s[i] - s[i+1])*100 / s[i])
-
-        id = np.max(np.where(np.array(values) > 5))+2
-        return id
-    
-    @property
-    def fit_optimal_(self):
-        model = KMeans(n_clusters = self.suggested_n_clusters_, init = 'k-means++', random_state = 42)
-        model.fit(self.x)
-        yp = model.predict(self.x)+1
-        clu = [f'cluster#{i}' for i in yp]
-
-        return self.x, clu, model.cluster_centers_
\ No newline at end of file
diff --git a/src/utils/KennardStone.py b/src/utils/KennardStone.py
deleted file mode 100644
index 1fb8595..0000000
--- a/src/utils/KennardStone.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from Packages import *
-from typing import Sequence, Dict, Optional, Union
-
-class KS:
-    def __init__(self, x:Optional[Union[np.ndarray|DataFrame]], rset:Optional[Union[float|int]]):
-        self.x = x
-        self.ratio = rset
-        self._train, self._test = ks_train_test_split(self.x, train_size = self.ratio)
-    
-    @property
-    def calset(self):
-        clu = self._train.index.tolist()
-        return self.x, clu
-    
-class RDM:
-    def __init__(self, x:Optional[Union[np.ndarray|DataFrame]], rset:Optional[Union[float|int]]):
-        self.x = x
-        self.ratio = rset
-        self._train, self._test = train_test_split(self.x, train_size = self.ratio)
-    
-    @property
-    def calset(self):
-        clu = self._train.index.tolist()
-        
-        return self.x, clu
\ No newline at end of file
diff --git a/src/utils/Kmedoids.py b/src/utils/Kmedoids.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/utils/NMF_.py b/src/utils/NMF_.py
deleted file mode 100644
index 8defac8..0000000
--- a/src/utils/NMF_.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from Packages import * 
-
-
-class Nmf:
-    def __init__(self, X, Ncomp=3):
-        ## input matrix
-        if np.min(X)<0:
-            self.__x = np.array(X-np.min(X))
-        else:
-            self.__x = np.array(X)
-        ## set the number of components to compute and fit the model
-        self.__ncp = Ncomp
-
-        # Fit PCA model
-        Mo = NMF(n_components=self.__ncp, init=None, solver='cd', beta_loss='frobenius',
-                    tol=0.0001, max_iter=300, random_state=None, alpha_W=0.0, alpha_H='same',
-                    l1_ratio=0.0, verbose=0, shuffle=False)
-        Mo.fit(self.__x)
-        # Results
-        self._p = Mo.components_.T
-        self._t = Mo.transform(self.__x)
-    @property
-    def scores_(self):
-        return DataFrame(self._t)
-    
-    @property
-    def loadings_(self):
-        return DataFrame(self._p)
\ No newline at end of file
diff --git a/src/utils/PCA_.py b/src/utils/PCA_.py
deleted file mode 100644
index c5023a0..0000000
--- a/src/utils/PCA_.py
+++ /dev/null
@@ -1,53 +0,0 @@
-from Packages import *
-
-class LinearPCA:
-    def __init__(self, X, Ncomp=10):
-        ## input matrix
-        self.__x = np.array(X)
-        ## set the number of components to compute and fit the model
-        self.__ncp = Ncomp
-
-        # Fit PCA model
-        M = PCA(n_components = self.__ncp)
-        M.fit(self.__x)
-
-        ######## results ########        
-        # Results
-        self.__pcnames = [f'PC{i+1}({100 *  M.explained_variance_ratio_[i].round(2)}%)' for i in range(self.__ncp)]
-        self._Qexp_ratio = DataFrame(100 *  M.explained_variance_ratio_, columns = ["Qexp"], index= [f'PC{i+1}' for i in range(self.__ncp)])
-
-        self._p = M.components_.T
-        self._t = M.transform(self.__x)
-        self.eigvals = M.singular_values_**2
-        self.Lambda = np.diag(self.eigvals)
-
-        # Matrix reconstruction or prediction making
-        self.T2 = {}
-        self._xp = {}
-        self._qres = {}
-        self.leverage = {}
-        
-        # 
-        for i in range(self.__ncp):
-            # Matrix reconstruction- prediction
-            self._xp[i] = np.dot(self._t[:,:i+1], self._p.T[:i+1,:])
-
-            
-            #self.T2[i] = np.diag(self._t[:,:i+1] @ np.transpose(self._t[:,:i+1]))
-
-            
-
-
-    @property
-    def scores_(self):
-        return DataFrame(self._t, columns= self.__pcnames)
-    
-    @property
-    def loadings_(self):
-        return DataFrame(self._p, columns=self.__pcnames)
-    
-    @property
-    def residuals_(self):
-        res = DataFrame(self._qres)
-        res.columns=self.__pcnames
-        return res
\ No newline at end of file
diff --git a/src/utils/PLSR_.py b/src/utils/PLSR_.py
deleted file mode 100644
index 6f90bf4..0000000
--- a/src/utils/PLSR_.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from Packages import *
-from utils.Miscellaneous import *
-from utils.Evaluation_Metrics import metrics
-
-class PinardPlsr:
-    def __init__(self, x_train, y_train, x_test, y_test):
-        self.x_train = x_train
-        self.x_test = x_test 
-        self.y_train = y_train
-        self.y_test = y_test
-
-        # create model module with PINARD
-        # Declare preprocessing pipeline
-        svgolay = [ ('_sg1',pp.SavitzkyGolay()),
-                    ('_sg2',pp.SavitzkyGolay())  # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
-                    ]
-        preprocessing = [   ('id', pp.IdentityTransformer()), # Identity transformer, no change to the data
-                            ('savgol', pp.SavitzkyGolay()), # Savitzky-Golay smoothing filter
-                            ('derivate', pp.Derivate()), # Calculate the first derivative of the data
-                            ('SVG', FeatureUnion(svgolay))
-                            ]
-        # Declare complete pipeline
-        pipeline = Pipeline([
-            ('scaler', MinMaxScaler()), # scaling the data
-            ('preprocessing', FeatureUnion(preprocessing)), # preprocessing
-            ('PLS',  PLSRegression(n_components=14))])
-        # Estimator including y values scaling
-        estimator = TransformedTargetRegressor(regressor = pipeline, transformer = MinMaxScaler())
-        # Training
-        self.trained = estimator.fit(self.x_train, self.y_train)
-
-
-        # fit scores
-        # Predictions on test set
-        self.yc = DataFrame(self.trained.predict(self.x_train)) # make predictions on test data and assign to Y_preds variable
-        self.ycv = DataFrame(cross_val_predict(self.trained, self.x_train, self.y_train, cv = 3)) # make predictions on test data and assign to Y_preds variable
-        self.yt = DataFrame(self.trained.predict(self.x_test)) # make predictions on test data and assign to Y_preds variable
-
-        ################################################################################################################
-        
-
-        ################################################################################################################
-
-    @property
-    def model_(self):
-        return self.trained
-    
-    @property
-    def pred_data_(self):
-        
-        return self.yc, self.ycv, self.yt
\ No newline at end of file
diff --git a/src/utils/PLSR_Preprocess.py b/src/utils/PLSR_Preprocess.py
deleted file mode 100644
index f83260c..0000000
--- a/src/utils/PLSR_Preprocess.py
+++ /dev/null
@@ -1,100 +0,0 @@
-from Packages import *
-from utils import metrics
-from utils.DATA_HANDLING import *
-
-class PlsProcess:
-    SCORE = 100000000
-    index_export = DataFrame()
-    def __init__(self, x_train, x_test, y_train, y_test, scale, Kfold):
-
-        PlsProcess.SCORE = 10000
-        self.xtrain = x_train
-        self.xtest = x_test
-        self.y_train =  y_train
-        self.y_test = y_test
-        self.scale = scale
-        self.Kfold = Kfold
-        self.model = None
-        self.p = self.xtrain.shape[1]
-        self.PLS_params = {'polyorder': hp.choice('polyorder', [0, 1, 2]),
-                           'deriv': hp.choice('deriv', [0, 1, 2]),
-                           'window_length': hp.choice('window_length', [15, 19, 23, 27]),
-                           'scatter': hp.choice('scatter', ['Snv', 'Non'])}
-        self.PLS_params['n_components'] = hp.randint("n_components", 2, 20)
-
-    def objective(self, params):
-        # Train the model
-        self.xtrain = eval(f'{params['scatter']}(self.xtrain)')
-        self.xtest = eval( f'{params['scatter']}(self.xtest)')
-
-
-
-        if params['deriv'] > params['polyorder'] or params['polyorder'] > params['window_length']:
-            params['deriv'] = 0
-            params['polyorder'] = 0
-            params['window_length'] = 1
-            self.x_train = self.xtrain
-            self.x_test = self.xtest
-        else:
-            self.x_train = DataFrame(eval(f'savgol_filter(self.xtrain, polyorder={params['polyorder']}, deriv={params['deriv']}, window_length = {params['window_length']})'),
-                                            columns = self.xtrain.columns, index= self.xtrain.index)
-            self.x_test = DataFrame(eval(f'savgol_filter(self.xtest, polyorder={params['polyorder']}, deriv={params['deriv']}, window_length = {params['window_length']})'),                                columns = self.xtest.columns , index= self.xtest.index)
-        
-        
-        try:
-            Model = PLSRegression(scale = self.scale, n_components = params['n_components'])
-            Model.fit(self.x_train, self.y_train)
-
-        except ValueError as ve:
-            params["n_components"] = 1
-            Model = PLSRegression(scale = self.scale, n_components = params["n_components"])
-            Model.fit(self.x_train, self.y_train)
-
-        ## make prediction
-        yc = Model.predict(self.x_train).reshape(-1)
-        ycv = cross_val_predict(Model, self.x_train, self.y_train, cv=self.Kfold, n_jobs=-1).reshape(-1)
-        yt = Model.predict(self.x_test).reshape(-1)
-        ####################
-        rmsecv = np.sqrt(mean_squared_error(self.y_train, ycv))
-        rmsec = np.sqrt(mean_squared_error(self.y_train, yc))
-        rmset = np.sqrt(mean_squared_error(self.y_test, yt))
-        
-
-        score = rmsecv/rmsec*np.round(rmset/rmsecv)*rmsecv*100/self.y_train.mean()*rmset*1000/self.y_test.mean()
-        if score < PlsProcess.SCORE-0.5 :
-            PlsProcess.SCORE = score
-            self.nlv = params['n_components']
-            self.best = params
-            self.model = Model
-            self.yc = yc
-            self.ycv = ycv
-            self.yt = yt
-        return score
-
-
-    ##############################################
-
-    def tune(self, n_iter):
-        trials = Trials()
-        
-        best_params = fmin(fn=self.objective,
-                           space=self.PLS_params,
-                           algo=tpe.suggest,  # Tree of Parzen Estimators’ (tpe) which is a Bayesian approach
-                           max_evals=n_iter,
-                           trials=trials,
-                           verbose=0)
-    
-    @property
-    def best_hyperparams(self):
-        self.b = {'Scatter':self.best['scatter'], 'Saitzky-Golay derivative parameters':{'polyorder':self.best['polyorder'],
-                                                                                'deriv':self.best['deriv'],
-                                                                                'window_length':self.best['window_length']}}
-        return self.b
-    
-    @property
-    def model_(self):
-        return self.model
-    
-    @property
-    def pred_data_(self):
-        return self.yc, self.ycv, self.yt
\ No newline at end of file
diff --git a/src/utils/RegModels.py b/src/utils/RegModels.py
deleted file mode 100644
index 1b759f0..0000000
--- a/src/utils/RegModels.py
+++ /dev/null
@@ -1,229 +0,0 @@
-from Packages import *
-from utils import metrics, Snv, No_transformation, KF_CV, sel_ratio
-
-
-class Regmodel(object):
-    
-    def __init__(self, train, test, n_iter, add_hyperparams = None, nfolds = 3, **kwargs):
-        self.SCORE = 100000000
-        self._xc, self._xt, self._ytrain, self._ytest = train[0], test[0], train[1], test[1]
-        self._nc, self._nt, self._p = train[0].shape[0], test[0].shape[0], train[0].shape[1]
-        self._model, self._best = None, None
-        self._yc, self._ycv, self._yt = None, None, None
-        self._cv_df = DataFrame()
-        self._sel_ratio = DataFrame()
-        self._nfolds = nfolds
-        self._selected_bands = DataFrame(index = ['from', 'to'])
-        self.important_features = None
-        self._hyper_params = {'polyorder': hp.choice('polyorder', [0, 1, 2]),
-                            'deriv': hp.choice('deriv', [0, 1, 2]),
-                            'window_length': hp.choice('window_length', [15, 21, 27, 33]),
-                            'normalization': hp.choice('normalization', ['Snv', 'No_transformation'])}
-        if add_hyperparams is not None:
-            self._hyper_params.update(add_hyperparams)
-            self._best = None
-
-        trials = Trials()
-        best_params = fmin(fn=self.objective,
-                            space=self._hyper_params,
-                            algo=tpe.suggest,  # Tree of Parzen Estimators’ (tpe) which is a Bayesian approach
-                            max_evals=n_iter,
-                            trials=trials,
-                            verbose=1)
-    
-    @property
-    def train_data_(self):
-        return [self._xc, self._ytrain]
-    
-    @property
-    def test_data_(self):
-        return [self._xt, self._ytest]
-
-    @property
-    def pretreated_spectra_(self):
-        return self.pretreated
-
-    @property
-    def get_params_(self):### This method return the search space where the optimization algorithm will search for optimal subset of hyperparameters
-       return self._hyper_params
-    
-    def objective(self, params):
-       pass
-    
-    @property
-    def best_hyperparams_(self): ### This method returns the subset of selected hyperparametes
-        return self._best
-    @property
-    def best_hyperparams_print(self):### This method returns a sentence telling what signal preprocessing method was applied
-        if self._best['normalization'] == 'Snv':
-            a = 'Standard Normal Variate (SNV)'
-
-        elif self._best['normalization'] == 'No_transformation':
-            a = " No transformation was performed"
-
-        SG = f'- Savitzky-Golay derivative parameters \:(Window_length:{self._best['window_length']};  polynomial order: {self._best['polyorder']};  Derivative order : {self._best['deriv']})'
-        Norm = f'- Spectral Normalization \: {a}'
-        return SG+"\n"+Norm
-    
-    @property
-    def model_(self): # This method returns the developed model
-        return self._model
-    
-    @property
-    def pred_data_(self): ## this method returns the predicted data in training and testing steps
-        return self._yc, self._yt
-    
-    @property
-    def cv_data_(self): ## Cross validation data
-        return self._ycv
-    
-    @property
-    def CV_results_(self):
-        return self._cv_df
-    @property
-    def important_features_(self):
-        return self.important_features
-    @property
-    def selected_features_(self):
-        return self._selected_bands
-    
-    @property
-    def sel_ratio_(self):
-        return self._sel_ratio
-  
-########################################### PLSR   #########################################
-class Plsr(Regmodel):
-    def __init__(self, train, test, n_iter = 10, cv = 3):
-        super().__init__(train, test, n_iter, nfolds = cv, add_hyperparams = {'n_components': hp.randint('n_components', 1,20)})
-        ### parameters in common
-        
-    def objective(self, params):
-        params['n_components'] = int(params['n_components'])
-        x0 = [self._xc, self._xt]
-        
-        x1 = [eval(str(params['normalization'])+"(x0[i])") for i in range(2)]
-
-        a, b, c = params['deriv'], params['polyorder'], params['window_length']
-        if a > b or b > c:
-            if self._best is not None:
-                a, b, c = self._best['deriv'], self._best['polyorder'], self._best['window_length']
-
-            else:
-                a, b, c = 0, 0, 1
-
-        params['deriv'], params['polyorder'], params['window_length']  = a, b, c
-        x2 = [savgol_filter(x1[i], polyorder=params['polyorder'], deriv=params['deriv'], window_length = params['window_length']) for i in range(2)]
-
-        model = PLSRegression(scale = False, n_components = params['n_components'])
-        folds = KF_CV().CV(x = x2[0], y = np.array(self._ytrain), n_folds = self._nfolds)
-        yp = KF_CV().cross_val_predictor(model = model, folds = folds, x = x2[0], y = np.array(self._ytrain))
-        self._cv_df = KF_CV().metrics_cv(y = np.array(self._ytrain), ypcv = yp, folds =folds)[1]
-                
-        score = self._cv_df.loc["cv",'rmse']
-        
-        Model = PLSRegression(scale = False, n_components = params['n_components'])
-        Model.fit(x2[0], self._ytrain)
-
-        if self.SCORE > score:
-            self.SCORE = score
-            self._ycv = KF_CV().meas_pred_eq(y = np.array(self._ytrain), ypcv=yp, folds=folds)
-            self._yc = Model.predict(x2[0])
-            self._yt = Model.predict(x2[1])
-            self._model = Model
-            for key,value in params.items():
-                try: params[key] =  int(value)
-                except (TypeError, ValueError): params[key] =  value
-
-            self._best = params
-            self.pretreated = DataFrame(x2[0])
-            self._sel_ratio = sel_ratio(Model, x2[0])
-        return score
-
-
-    ############################################ iplsr #########################################
-class TpeIpls(Regmodel):
-    def __init__(self, train, test, n_iter = 10, n_intervall = 5, cv = 3):
-        self.n_intervall = n_intervall
-        self.n_arrets = self.n_intervall*2
-        
-        
-        r = {'n_components': hp.randint('n_components', 1,20)}
-        r.update({f'v{i}': hp.randint(f'v{i}', 0, train[0].shape[1]) for i in range(1,self.n_arrets+1)})
-
-        super().__init__(train, test, n_iter, add_hyperparams = r, nfolds = cv)
-        
-        ### parameters in common
-        
-    def objective(self, params):
-        ### wevelengths index
-        self.idx = [params[f'v{i}'] for i in range(1,self.n_arrets+1)]
-        self.idx.sort()
-        arrays = [np.arange(self.idx[2*i],self.idx[2*i+1]+1) for i in range(self.n_intervall)]
-        id = np.unique(np.concatenate(arrays, axis=0), axis=0)
-
-        ### Preprocessing
-        x0 = [self._xc, self._xt]
-        x1 = [eval(str(params['normalization'])+"(x0[i])") for i in range(2)]
-
-        a, b, c = params['deriv'], params['polyorder'], params['window_length']
-        if a > b or b > c:
-            if self._best is not None:
-                a, b, c = self._best['deriv'], self._best['polyorder'], self._best['window_length']
-
-            else:
-                a, b, c = 0, 0, 1
-
-        params['deriv'], params['polyorder'], params['window_length']  = a, b, c
-        x2 = [savgol_filter(x1[i], polyorder=params['polyorder'], deriv=params['deriv'], window_length = params['window_length']) for i in range(2)]
-        
-        
-        prepared_data = [x2[i][:,id] for i in range(2)]
-
-        
-        ### Modelling
-        folds = KF_CV().CV(x = prepared_data[0], y = np.array(self._ytrain), n_folds = self._nfolds)
-        try:
-            model = PLSRegression(scale = False, n_components = params['n_components'])
-            yp = KF_CV().cross_val_predictor(model = model, folds = folds, x = prepared_data[0], y = np.array(self._ytrain))
-            self._cv_df = KF_CV().metrics_cv(y = np.array(self._ytrain), ypcv = yp, folds =folds)[1]
-        except ValueError as ve:
-            params["n_components"] = 1
-            model = PLSRegression(scale = False, n_components = params["n_components"])
-            yp = KF_CV().cross_val_predictor(model = model, folds = folds, x = prepared_data[0], y = np.array(self._ytrain))
-            self._cv_df = KF_CV().metrics_cv(y = np.array(self._ytrain), ypcv = yp, folds =folds)[1]
-
-
-        score = self._cv_df.loc['cv','rmse']
-        
-        Model = PLSRegression(scale = False, n_components = model.n_components)
-        Model.fit(prepared_data[0], self._ytrain)
-
-        if self.SCORE > score:
-            self.SCORE = score
-            self._ycv = KF_CV().meas_pred_eq(y = np.array(self._ytrain), ypcv=yp, folds=folds)
-            
-            self._yc = Model.predict(prepared_data[0])
-            self._yt = Model.predict(prepared_data[1])
-            self._model = Model
-            for key,value in params.items():
-                try: params[key] =  int(value)
-                except (TypeError, ValueError): params[key] =  value
-            self._best = params
-
-            self.pretreated = DataFrame(x2[0])
-            self.segments = arrays
-            
-            for i in range(len(self.segments)):
-                self._selected_bands[f'band{i+1}'] = [self.segments[i][0], self.segments[i][self.segments[i].shape[0]-1]]
-            self._selected_bands.index = ['from','to']
-                
-        return score
-    
-
-    ###########################################  LWPLSR  #########################################
-    ############################################  Pcr  #########################################
-
-class Pcr(Regmodel):
-    def __init__(self, train, test, n_iter = 10, n_val = 5):
-        super.__init__()
-        {f'pc{i}': hp.randint(f'pc{i+1}', 0, train[0].shape[1]) for i in range(self.n_val)}
diff --git a/src/utils/SK_PLSR_.py b/src/utils/SK_PLSR_.py
deleted file mode 100644
index c614311..0000000
--- a/src/utils/SK_PLSR_.py
+++ /dev/null
@@ -1,118 +0,0 @@
-from Packages import *
-from utils.Miscellaneous import *
-from utils.Evaluation_Metrics import metrics
-from utils.DATA_HANDLING import Snv
-
-class PlsR:
-    SCORE = 100000000
-    
-    def __init__(self, x_train, y_train, x_test, y_test):
-        self.PLS_params = {}
-        a = [0, 1, 2]
-        if min(a)==0:
-            b = [0]
-        elif min(a)==1:
-            b= [0,1]
-        elif min(a) ==2:
-            b = [0, 1, 2]
-        
-        self.PLS_params['Preprocess'] = {'Scatter':hp.choice('Scatter',['Snv', None]),
-                                         'window_length_sg':hp.choice('window_length_sg', [9, 13, 17, 21]),
-                                         'polyorder_sg':hp.choice('polyorder_sg',a),
-                                         'deriv_sg':hp.choice('deriv_sg', b)}
-        
-        self.PLS_params['n_components'] = hp.choice("n_components", list(np.arange(1,21)))
-    
-        self.x_train = x_train
-        self.x_test = x_test 
-        self.y_train = y_train
-        self.y_test = y_test
-        self.p = self.x_train.shape[1]
-
-        trials = Trials()
-        best_params = fmin(fn=self.objective,
-                           space=self.PLS_params,
-                           algo=tpe.suggest,  # Tree of Parzen Estimators’ (tpe) which is a Bayesian approach
-                           max_evals=100,
-                           trials=trials,
-                           verbose=0)
-                #####################################################################################################
-        if self.best['Preprocess']['Scatter'] is None:
-            xtrain = self.x_train
-            xtest = self.x_test
-        elif self.best_hyperparams['Preprocess']['Scatter'] == 'Snv':
-            xtrain = Snv(self.x_train)
-            xtest = Snv(self.x_test)
-
-        x_train = savgol_filter(xtrain, window_length = self.best['Preprocess']['window_length_sg'],
-                                    polyorder = self.best['Preprocess']['polyorder_sg'],
-                                    deriv=self.best['Preprocess']['deriv_sg'])
-            
-        x_test = savgol_filter(xtest, window_length = self.best['Preprocess']['window_length_sg'],
-                                    polyorder = self.best['Preprocess']['polyorder_sg'],
-                                    deriv=self.best['Preprocess']['deriv_sg'])
-        
-
-
-        ######################################################################################################
-        self.trained = PLSRegression(n_components= self.best['n_components'], scale = False)
-        self.trained.fit(x_train, self.y_train)
-
-        self.yc = DataFrame(self.trained.predict(x_train)) # make predictions on test data and assign to Y_preds variable
-        self.ycv = DataFrame(cross_val_predict(self.trained, x_train, self.y_train, cv = 3)) # make predictions on test data and assign to Y_preds variable
-        self.yt = DataFrame(self.trained.predict(x_test)) # make predictions on test data and assign to Y_preds variable
-        #######################################################################################################
-        
-    def objective(self, params):
-        ws = params['Preprocess']['window_length_sg']
-        po = params['Preprocess']['polyorder_sg']
-        dr = params['Preprocess']['deriv_sg']
-
-        if params['Preprocess']['Scatter'] is None:
-            xtrain = self.x_train
-            xtest = self.x_test
-        elif params['Preprocess']['Scatter'] == 'Snv':
-            xtrain = Snv(self.x_train)
-            xtest = Snv(self.x_test)
-
-        x_train = savgol_filter(xtrain, window_length = params['Preprocess']['window_length_sg'],
-                                    polyorder = params['Preprocess']['polyorder_sg'],
-                                    deriv=params['Preprocess']['deriv_sg'])
-            
-        x_test = savgol_filter(xtest, window_length = params['Preprocess']['window_length_sg'],
-                                    polyorder = params['Preprocess']['polyorder_sg'],
-                                    deriv=params['Preprocess']['deriv_sg'])
-        
-            
-        m = PLSRegression( n_components= params['n_components'], scale = False )
-        m.fit(x_train, self.y_train)
-
-        yc = m.predict(x_train)
-        ycv = cross_val_predict( m, x_train, self.y_train, cv = 5)
-        yt = m.predict(x_test)
-
-        rmsec = mean_squared_error(self.y_train, yc)
-        rmsecv = mean_squared_error(self.y_train, ycv)
-        rmset = mean_squared_error(self.y_test, yt)
-
-        SCORE = (rmsecv/rmset) + (rmsecv/rmsec) + (rmset/rmsec)
-        if SCORE < PlsR.SCORE:
-            PlsR.SCORE = SCORE
-            self.best = params
-        return SCORE
-    
-
-    @property
-    def model_(self):
-        return self.trained
-
-    @property
-    def best_hyperparams(self):
-        self.b = {'Scatter':self.best['Preprocess']['Scatter'], 'Saitzky-Golay derivative parameters':{'polyorder':self.best['Preprocess']['polyorder_sg'],
-                                                                                'deriv':self.best['Preprocess']['deriv_sg'],
-                                                                                'window_length':self.best['Preprocess']['window_length_sg']}}
-        return self.b
-    
-    @property
-    def pred_data_(self):
-        return np.array(self.yc).reshape(-1), np.array(self.ycv).reshape(-1), np.array(self.yt).reshape(-1)
\ No newline at end of file
diff --git a/src/utils/UMAP_.py b/src/utils/UMAP_.py
deleted file mode 100644
index b4110d5..0000000
--- a/src/utils/UMAP_.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# UMAP function for the Sample Selection module
-from Packages import *
-from utils.DATA_HANDLING import *
-
-class Umap:
-    """
-    The UMAP dimension reduction algorithm from scikit learn
-    """
-    def __init__(self, numerical_data, cat_data):
-        self.numerical_data = numerical_data
-        if cat_data is None:
-            self.categorical_data_encoded = cat_data
-        elif len(cat_data) > 0:
-            self.categorical_data = cat_data
-            self.le = LabelEncoder()
-            self.categorical_data_encoded = self.le.fit_transform(self.categorical_data)
-        else:
-            self.categorical_data_encoded = None
-
-        self.model = UMAP(n_neighbors=20, n_components=3, min_dist=0.0, )#random_state=42,)
-        self.model.fit(self.numerical_data, y = self.categorical_data_encoded)
-        self.scores_raw = self.model.transform(self.numerical_data)
-        self.scores = DataFrame(self.scores_raw)
-        self.scores.columns = [f'axis_{i+1}' for i in range(self.scores_raw.shape[1])]
-
-    @property
-    def scores_(self):
-        return self.scores
-    @property
-    def scores_raw_(self):
-        return self.scores_raw
\ No newline at end of file
diff --git a/src/utils/VarSel.py b/src/utils/VarSel.py
deleted file mode 100644
index 001f56e..0000000
--- a/src/utils/VarSel.py
+++ /dev/null
@@ -1,163 +0,0 @@
-from Packages import *
-from utils import metrics
-from utils import *
-from scipy.signal import savgol_filter
-class TpeIpls:
-    '''
-    This framework is added to the clan of wavelengths selection algorithms.It was introduced as an improvement
-      to the forward and backward intervall selection algorithms. This framework combines 
-      the partial least squares algorithm and the tree-parzen structed estimatior, which is a bayesian optimization algorithm
-      that was first introduced in 2011. This combination provides a wrapper method for intervall-PLS.
-    This work keeps the integrity of the spectral data. by treating the data as a sequential data rather than using
-      descrete optimization (point to point selection)
-    '''
-
-    '''Optimization algorithms can be used to find the subset of variables that optimize a certain criterion
-      (e.g., maximize predictive performance, minimize overfitting)'''
-    SCORE = 100000000
-    index_export = DataFrame()
-    def __init__(self, x_train, x_test, y_train, y_test,
-                  scale, Kfold, n_intervall):
-        TpeIpls.SCORE = 10000
-        self.xtrain = x_train
-        self.xtest = x_test
-        self.y_train=  y_train
-        self.y_test = y_test
-        self.scale = scale
-        self.Kfold = Kfold
-        self.p = self.xtrain.shape[1]
-        self.n_intervall = n_intervall
-        self.n_arrets = self.n_intervall*2
-        self.PLS_params = {f'v{i}': hp.randint(f'v{i}', 0, self.p) for i in range(1,self.n_arrets+1)}
-        self.PLS_params['n_components'] = hp.randint("n_components", 1, 10)
-        self.PLS_params['Preprocess'] = {'Scatter':hp.choice('Scatter',['Snv', None]),
-                                         'window_length_sg':hp.choice('window_length_sg', [9, 13, 17, 21]),
-                                         'polyorder_sg':hp.choice('polyorder_sg',[2]),
-                                         'deriv_sg':hp.choice('deriv_sg', [1])}
-    def objective(self, params):
-        self.idx = [params[f'v{i}'] for i in range(1,self.n_arrets+1)]
-        self.idx.sort()
-        
-        arrays = [np.arange(self.idx[2*i],self.idx[2*i+1]+1) for i in range(self.n_intervall)]
-
-        id = np.unique(np.concatenate(arrays, axis=0), axis=0)
-
-        ## first preprocessing method
-        if params['Preprocess']['Scatter'] =='Snv':
-            xtrain1 = Snv(self.xtrain)
-            xtest1 = Snv(self.xtest)
-        else:
-            xtrain1 = self.xtrain
-            xtest1 = self.xtest
-        
-        ## Second first preprocessing method
-        if params['Preprocess']['deriv_sg'] > params['Preprocess']['polyorder_sg'] or params['Preprocess']['polyorder_sg'] > params['Preprocess']['window_length_sg']:
-            params['Preprocess']['deriv_sg'] = 0
-            params['Preprocess']['polyorder_sg'] = 0
-            params['Preprocess']['window_length_sg'] = 1            
-
-
-        pt = params['Preprocess']
-        self.x_train = DataFrame(eval(f"savgol_filter(xtrain1, polyorder=pt['deriv_sg'], deriv=pt['deriv_sg'], window_length = pt['window_length_sg'], delta=1.0, axis=-1, mode='interp', cval=0.0)") ,
-                                    columns = self.xtrain.columns, index= self.xtrain.index)
-            
-        self.x_test = DataFrame(eval(f"savgol_filter(xtest1, polyorder=pt['deriv_sg'], deriv=pt['deriv_sg'], window_length = pt['window_length_sg'], delta=1.0, axis=-1, mode='interp', cval=0.0)") ,
-                                    columns = self.xtest.columns, index= self.xtest.index)
-
-
-        # Train the model
-        try:
-            Model = PLSRegression(scale = self.scale,n_components = params['n_components'])
-            Model.fit(self.x_train.iloc[:,id], self.y_train)
-        except ValueError as ve:            
-            Model = PLSRegression(scale = self.scale,n_components = 1)
-            Model.fit(self.x_train.iloc[:,id], self.y_train)
-            params['n_components'] = 1
-
-
-        ## make prediction
-        yc = Model.predict(self.x_train.iloc[:,id]).ravel()
-        ycv = cross_val_predict(Model, self.x_train.iloc[:,id], self.y_train, cv=self.Kfold, n_jobs=-1).ravel()
-        yt = Model.predict(self.x_test.iloc[:, id]).ravel()
-
-        ### compute r-squared
-        #r2c = r2_score(self.y_train, yc)
-        #r2cv = r2_score(self.y_train, ycv)
-        #r2t = r2_score(self.y_test, yt)
-        rmsecv = np.sqrt(mean_squared_error(self.y_train, ycv))
-        rmsec = np.sqrt(mean_squared_error(self.y_train, yc))
-
-        score = np.round(rmsecv/rmsec +  rmsecv*100/self.y_train.mean())
-        if score < TpeIpls.SCORE-0.5:
-            TpeIpls.SCORE = score
-            self.nlv = params['n_components'] 
-
-
-            TpeIpls.index_export = DataFrame()
-            TpeIpls.index_export["Vars"] = self.x_test.columns[id]
-            TpeIpls.index_export.index = id
-            self.best = params
-
-       
-            self.segments = arrays
-        return score
-
-    
-
-
-    ##############################################
-
-    def BandSelect(self, n_iter):
-        trials = Trials()
-        
-        best_params = fmin(fn=self.objective,
-                           space=self.PLS_params,
-                           algo=tpe.suggest,  # Tree of Parzen Estimators’ (tpe) which is a Bayesian approach
-                           max_evals=n_iter,
-                           trials=trials,
-                           verbose=0)
-
-        ban = {}
-        if self.segments:####### test
-            for i in range(len(self.segments)):
-                ban[f'band{i+1}'] = [self.segments[i][0], self.segments[i][self.segments[i].shape[0]-1]]
-            
-        self.bands = DataFrame(ban).T
-        self.bands.columns = ['from', 'to']
-
-
-        f = []
-        for i in range(self.bands.shape[0]):
-            f.extend(np.arange(self.bands["from"][i], self.bands["to"][i]+1))
-        variables_idx = list(set(f))
-
-
-
-        ############################################
-        for i in range(self.bands.shape[0]):
-            f.extend(np.arange(self.bands["from"][i], self.bands["to"][i]+1))
-        variables_idx = list(set(f))
-        
-        self.pls = PLSRegression(n_components=self.nlv, scale= self.scale)
-        self.pls.fit(self.x_train.iloc[:,variables_idx], self.y_train)
-
-        self.yc = self.pls.predict(self.x_train.iloc[:,variables_idx]).ravel()
-        self.ycv = cross_val_predict(self.pls, self.x_train.iloc[:,variables_idx], self.y_train, cv=self.Kfold, n_jobs=-1).ravel()
-        self.yt = self.pls.predict(self.x_test.iloc[:,variables_idx]).ravel()
-        
-        return self.bands, variables_idx
-    
-    @property
-    def best_hyperparams(self):
-        self.b = {'Scatter':self.best['Preprocess']['Scatter'], 'Saitzky-Golay derivative parameters':{'polyorder':self.best['Preprocess']['polyorder_sg'],
-                                                                                'deriv':self.best['Preprocess']['deriv_sg'],
-                                                                                'window_length':self.best['Preprocess']['window_length_sg']}}
-        return self.b
-
-    @property
-    def model_(self):
-        return self.pls
-    @property
-    def pred_data_(self):
-        return self.yc, self.ycv, self.yt
-    
\ No newline at end of file
-- 
GitLab