diff --git a/src/common.py b/src/common.py
index 92d27f25c22f702ce991850b649e690bcf29e552..24e7991b1f99f245677ca80750f9a9bb7337f4aa 100644
--- a/src/common.py
+++ b/src/common.py
@@ -24,7 +24,7 @@ import plotly.express as px
 from tempfile import NamedTemporaryFile
 import numpy as np
 from datetime import datetime
-
+import json
 
 from utils.data_parsing import JcampParser, CsvParser
 from style.layout import  UiComponents
diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py
index 26aa959d7ae221979b5c0264b75d780c7d3b82a8..c6384e619b13190708f9e44540185ebac82fb78a 100644
--- a/src/pages/2-model_creation.py
+++ b/src/pages/2-model_creation.py
@@ -8,12 +8,8 @@ st.set_page_config(page_title = "NIRS Utils", page_icon = ":goat:", layout = "wi
 # layout
 UiComponents(pagespath = pages_folder, csspath= css_file,imgpath=image_path ,
              header=True, sidebar= True, bgimg=False, colborders=True)
-
 hash_ = ''
-def p_hash(add):
-    global hash_
-    hash_ = hash_data(hash_+str(add))
-    return hash_
+
 # Initialize the variable in session state if it doesn't exist for st.cache_data
 if 'counter' not in st.session_state:
     st.session_state.counter = 0
@@ -89,7 +85,7 @@ match file:
                 for i in ["xcal_csv", "ycal_csv"]:
                     stringio = StringIO(eval(f'{i}.getvalue().decode("utf-8")'))
                     xy_str += str(stringio.read())
-                p_hash([xy_str + str(xcal_csv.name) + str(ycal_csv.name), hdrx, sepx, hdry, sepy])
+                # p_hash([xy_str + str(xcal_csv.name) + str(ycal_csv.name), hdrx, sepx, hdry, sepy])
                 # p_hash(add = )
                 
                 @st.cache_data
@@ -152,15 +148,17 @@ match file:
                     tmp_path = tmp.name
                     with open(tmp.name, 'r') as dd:
                         dxdata = dd.read()
-                        p_hash(str(dxdata)+str(data_file.name))
+                        # p_hash(str(dxdata)+str(data_file.name))
 
                 ## load and parse the temp dx file
                 @st.cache_data
-                def dx_loader(change):
-                    chem_data, spectra, meta_data, meta_data_st = read_dx(file =  tmp_path)    
-                    os.unlink(tmp_path)
-                    return chem_data, spectra, meta_data, meta_data_st
-                chem_data, spectra, meta_data, meta_data_st = dx_loader(change = hash_)
+                def read_dx(tmp_path):
+                    M = JcampParser(path = tmp_path)
+                    M.parse()
+                    # chem_data, spectra, meta_data, meta_data_st = read_dx(file =  tmp_path)    
+                    # os.unlink(tmp_path)
+                    return M.chem_data, M.specs_df_, M.md_df_, M.md_df_st_
+                chem_data, spectra, meta_data, meta_data_st = read_dx(tmp_path = tmp_path)
                 
                 if not spectra.empty:
                     st.success("Info: The data have been loaded successfully", icon = "✅")
@@ -191,13 +189,14 @@ match file:
 ################################################### BEGIN : visualize and split the data ####################################################
 st.subheader("I - Data visualization", divider = 'blue')
 if not spectra.empty and not y.empty:
-    p_hash(y)
-    p_hash(np.mean(spectra))
+    # p_hash(y)
+    # p_hash(np.mean(spectra))
     if np.array(spectra.columns).dtype.kind in ['i', 'f']:
         colnames = spectra.columns
     else:
         colnames = np.arange(spectra.shape[1])
-    
+
+    from utils.miscellaneous import data_split
     X_train, X_test, y_train, y_test, train_index, test_index = data_split(x=spectra, y=y)
     
 
@@ -205,6 +204,7 @@ if not spectra.empty and not y.empty:
     #### insight on loaded data
     spectra_plot = plot_spectra(spectra, xunits = 'Wavelength/Wavenumber', yunits = "Signal intensity")
     target_plot = hist(y = y, y_train = y_train, y_test = y_test, target_name=yname)
+    from utils.miscellaneous import desc_stats
     stats = DataFrame([desc_stats(y_train), desc_stats(y_test), desc_stats(y)], index =['train', 'test', 'total'] ).round(2) 
 
     # fig1, ax1 = plt.subplots( figsize = (12, 3))
@@ -241,7 +241,7 @@ if not spectra.empty and not y.empty:
         # select type of supervised modelling problem
         var_nature = ['Continuous', 'Categorical']
         mode = c4.radio("The nature of the target variable :", options = var_nature)
-        p_hash(mode)
+        # p_hash(mode)
         match mode:
             case "Continuous":
                 reg_algo = ["", "PLS", "LW-PLS", "TPE-iPLS"]
@@ -276,7 +276,7 @@ if not spectra.empty and not y.empty:
     #     st.session_state.model_type = model_type
     #     increment()
     
-    p_hash(model_type)
+    # p_hash(model_type)
 
 
     # Training set preparation for cross-validation(CV)
@@ -293,6 +293,7 @@ if not spectra.empty and not y.empty:
 
             match model_type:
                 case 'PLS':
+                    from utils.regress import Plsr
                     Reg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter = 100, cv = nb_folds)
                     # reg_model = Reg.model_
                     rega = Reg.selected_features_
@@ -412,10 +413,10 @@ if not spectra.empty and not y.empty:
                 it = st.number_input(label = 'Enter the number of iterations', min_value = 2, max_value = 500, value = 250)
             else:
                 s, it = None, None
-            p_hash(str(s)+str(it))
+            # p_hash(str(s)+str(it))
                 
             remodel_button = st.button('re-model the data', key=4, help=None, type="primary", use_container_width=True, on_click=increment)
-            p_hash(st.session_state.counter)
+            # p_hash(st.session_state.counter)
             Reg = RequestingModelCreation(change = hash_)
             reg_model = Reg.model_
             hash_ = hash(Reg)
diff --git a/src/utils/__init__.py b/src/utils/__init__.py
index 40891d9ea67507a805eb89c2bac9ff8983d8cac3..bcb7a25cf83e706623706601c9f04fcd9276ca03 100644
--- a/src/utils/__init__.py
+++ b/src/utils/__init__.py
@@ -10,4 +10,3 @@ Here are all the classes to perform your analysis
 # from .clustering import *
 # from .samsel import *
 # from .regress import *
-# from .eval_metrics import *
\ No newline at end of file
diff --git a/src/utils/data_handling.py b/src/utils/data_handling.py
index 1c222742d66d71f3b931fe1279f4d6960eed0004..cfa413e3ebc8f999c907e261fb5eb6d7ebc4a08f 100644
--- a/src/utils/data_handling.py
+++ b/src/utils/data_handling.py
@@ -1,4 +1,6 @@
 from utils.eval_metrics import metrics
+import numpy as np
+from pandas import DataFrame
 
 ## try to automatically detect the field separator within the CSV
 def find_delimiter(filename):
@@ -86,6 +88,7 @@ class KF_CV:
     ### KFCV(dict) returns a testset indices/Fold 
     @staticmethod
     def CV(x, y, n_folds:int):
+        from kennard_stone import KFold as ks_KFold
         test_folds = {}
         folds_name = [f'Fold{i+1}' for i in range(n_folds)]
         kf = ks_KFold(n_splits=n_folds, device='cpu')
@@ -132,12 +135,14 @@ class KF_CV:
             r = DataFrame()
             r['Predicted'] = ypcv[Fname]
             r['Measured'] = y[folds[Fname]]
+            from sklearn.linear_model import LinearRegression
             ols = LinearRegression().fit(DataFrame(y[folds[Fname]]), ypcv[Fname].reshape(-1,1))
             r.index = folds[Fname]
             r['Folds'] = [f'{Fname} (Predicted = {np.round(ols.intercept_[0], 2)} + {np.round(ols.coef_[0][0],2)} x Measured'] * r.shape[0]
             cvcv[i] = r
             coeff[Fname] = [ols.coef_[0][0], ols.intercept_[0]]
 
+        from pandas import concat
         data = concat(cvcv, axis = 0)
         data['index'] = [data.index[i][1] for i in range(data.shape[0])]
         data.index = data['index']
diff --git a/src/utils/eval_metrics.py b/src/utils/eval_metrics.py
index 5cba400d1b1c107838e27cebf2a2ff866aec0f27..4202feafe399c91f63d54f77068b7cf11d8bb523 100644
--- a/src/utils/eval_metrics.py
+++ b/src/utils/eval_metrics.py
@@ -1,4 +1,9 @@
 
+from pandas import DataFrame
+import numpy as np
+
+
+
 class metrics:
     from typing import Optional, List
     from pandas import DataFrame
diff --git a/src/utils/miscellaneous.py b/src/utils/miscellaneous.py
index 5a2ba421a43e0d7b1f7b1eedf18984890162582e..fd98143a5397415dc091388eaa9cdf963a815070 100644
--- a/src/utils/miscellaneous.py
+++ b/src/utils/miscellaneous.py
@@ -1,5 +1,6 @@
 import streamlit as st
-
+from pandas import DataFrame
+import numpy as np
 
 # predict module
 def prediction(NIRS_csv, qsep, qhdr, model):
@@ -21,16 +22,16 @@ def download_results(data, export_name):
 
 @st.cache_data(show_spinner =True)
 def data_split(x, y):
+    from kennard_stone import train_test_split
     # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
-    train_index, test_index = train_test_split_idx(x , y = y, method = "kennard_stone", metric = "correlation", test_size = 0.25, random_state = 42)
-    # Assign data to training and test sets
-    X_train, y_train = DataFrame(x.iloc[train_index,:]), y.iloc[train_index]
-    X_test, y_test = DataFrame(x.iloc[test_index,:]), y.iloc[test_index]
+    X_train, X_test, y_train, y_test  = train_test_split(x, y, test_size = 0.25, random_state = 42)
+    train_index, test_index = X_train.index, X_test.index
     return X_train, X_test, y_train, y_test, train_index, test_index
 
 ## descriptive stat
 @st.cache_data(show_spinner =True)
 def desc_stats(x):
+    from scipy.stats import skew, kurtosis
     a = {}
     a['N samples'] = x.shape[0]
     a['Min'] =  np.min(x)
diff --git a/src/utils/regress.py b/src/utils/regress.py
index 62c5f23322225da664721c8cab1b0928e262bde5..f4c3f9b6542369092e8cfd17c9a1ce7ea4cf8c2f 100644
--- a/src/utils/regress.py
+++ b/src/utils/regress.py
@@ -1,10 +1,15 @@
-from utils import metrics, Snv, No_transformation, KF_CV, sel_ratio
+import numpy as np
+from pandas import DataFrame
+from utils.eval_metrics import metrics
+from scipy.signal import savgol_filter
+from sklearn.cross_decomposition import PLSRegression
+from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK, anneal
+
+from utils.data_handling import Snv, No_transformation, KF_CV, sel_ratio
 
 
 class Regmodel(object):
-    from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK, anneal
     def __init__(self, train, test, n_iter, add_hyperparams = None, nfolds = 3, **kwargs):
-        from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK, anneal
 
         self.SCORE = 100000000
         self._xc, self._xt, self._ytrain, self._ytest = train[0], test[0], train[1], test[1]
diff --git a/src/utils/visualize.py b/src/utils/visualize.py
index 5ac80c34c96ab0bd1df4308a4f9ee36581d50e37..a122dd867fa4c54970181b549ddd62fd2d84edab 100644
--- a/src/utils/visualize.py
+++ b/src/utils/visualize.py
@@ -1,5 +1,8 @@
 
 import streamlit as st
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~
 @st.cache_data
 def pred_hist(pred):