diff --git a/src/Class_Mod/Miscellaneous.py b/src/Class_Mod/Miscellaneous.py
index 3c2f560ff3e8b000b4a4d813f881d9768a2276c4..a4e934e317db106458c29e9fd3f4f56f0252c8f9 100644
--- a/src/Class_Mod/Miscellaneous.py
+++ b/src/Class_Mod/Miscellaneous.py
@@ -23,13 +23,11 @@ def prediction(NIRS_csv, qsep, qhdr, model):
 @st.cache_data
 def reg_plot( meas, pred, train_idx, test_idx):
     ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1))
-    ecv = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))
-    et = np.subtract(np.array(meas[2]).reshape(-1), np.array(pred[2]).reshape(-1))
+    et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))
 
     fig, ax = plt.subplots(figsize = (12,4))
     sns.regplot(x = meas[0] , y = pred[0], color='blue', label = 'Calib')
-    sns.regplot(x = meas[1], y = pred[1], color='red', label = 'CV')
-    sns.regplot(x = meas[2], y = pred[2], color='green', label = 'Test')
+    sns.regplot(x = meas[1], y = pred[1], color='green', label = 'Test')
     plt.plot([np.min(meas[0])-0.05, np.max([meas[0]])+0.05], [np.min(meas[0])-0.05, np.max([meas[0]])+0.05], color = 'black')
 
     for i, txt  in enumerate(train_idx):
@@ -37,14 +35,9 @@ def reg_plot( meas, pred, train_idx, test_idx):
         if np.abs(ec[i])> np.mean(ec)+ 3*np.std(ec):
             plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i], np.array(pred[0]).reshape(-1)[i]))
 
-
-    for i, txt  in enumerate(train_idx):
-        if np.abs(ecv[i])> np.mean(ecv)+ 3*np.std(ecv):
-            plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i]))
-
     for i, txt  in enumerate(test_idx):
         if np.abs(et[i])> np.mean(et)+ 3*np.std(et):
-            plt.annotate(txt ,(np.array(meas[2]).reshape(-1)[i], np.array(pred[2]).reshape(-1)[i]))
+            plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i]))
 
     ax.set_ylabel('Predicted values')
     ax.set_xlabel('Measured values')
@@ -55,33 +48,33 @@ def reg_plot( meas, pred, train_idx, test_idx):
 def resid_plot( meas, pred, train_idx, test_idx):
     
     ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1))
-    ecv = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))
-    et = np.subtract(np.array(meas[2]).reshape(-1), np.array(pred[2]).reshape(-1))
+    et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))
     
 
     fig, ax = plt.subplots(figsize = (12,4))
-    sns.residplot(x = meas[0], y = pred[0], color='blue', label = 'Calib')
-    sns.residplot(x = meas[1], y = pred[1], color='red', label = 'CV')
-    sns.residplot(x = meas[2], y = pred[2], color='green', label = 'Test')
+    sns.scatterplot(x = meas[0], y = ec, color='blue', label = 'Calib')
+    sns.scatterplot(x = meas[1], y = et, color='green', label = 'Test')
+    plt.axhline(y= 0, c ='black', linestyle = ':')
+    lim = np.max(abs(np.concatenate([ec, et], axis = 0)))*1.1
+    plt.ylim(- lim, lim )    
+    
+
 
 
     for i, txt  in enumerate(train_idx):
         #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
         if np.abs(ec[i])> np.mean(ec)+ 3*np.std(ec):
-            plt.annotate(txt ,(np.array(pred[0]).reshape(-1)[i],ec[i]))
-
-
-    for i, txt  in enumerate(train_idx):
-        if np.abs(ecv[i])> np.mean(ecv)+ 3*np.std(ecv):
-            plt.annotate(txt ,(np.array(pred[1]).reshape(-1)[i],ecv[i]))
+            plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
 
     for i, txt  in enumerate(test_idx):
         if np.abs(et[i])> np.mean(et)+ 3*np.std(et):
-            plt.annotate(txt ,(np.array(pred[2]).reshape(-1)[i],et[i]))
+            plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i],et[i]))
     ax.set_xlabel(f'{ train_idx.shape}')
     ax.set_ylabel('Residuals')
     ax.set_xlabel('Measured values')
     plt.legend()
+    plt.margins(0)
+
 
 
 # function that create a download button - needs the data to save and the file name to store to
@@ -105,3 +98,18 @@ def plot_spectra(df, xunits, yunits):
     plt.margins(x = 0)
 
     return fig
+
+
+## descriptive stat
+def desc_stats(x):
+    a = {}
+    a['N samples'] = x.shape[0]
+    a['Min'] =  np.min(x)
+    a['Max'] = np.max(x)
+    a['Mean'] = np.mean(x)
+    a['Median'] = np.median(x)
+    a['S'] = np.std(x)
+    a['RSD(%)'] = np.std(x)*100/np.mean(x)
+    a['Skewness'] = skew(x, axis=0, bias=True)
+    a['Kurtosis'] = kurtosis(x, axis=0, bias=True)
+    return a
\ No newline at end of file
diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py
index 502ad9527d2d8a0074a625c335a6015bfa9b4f68..44f97e701b8651b3e2018054b94ee8b23468e2cf 100644
--- a/src/pages/2-model_creation.py
+++ b/src/pages/2-model_creation.py
@@ -4,7 +4,7 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
 from Modules import *
 from Class_Mod.DATA_HANDLING import *
 from pandas.api.types import is_float_dtype
-
+from Class_Mod.Miscellaneous import desc_stats
 add_header()
 
 st.session_state["interface"] = st.session_state.get('interface')
@@ -13,11 +13,14 @@ if st.session_state["interface"] == 'simple':
 
 
     ####################################### page Design #######################################
-st.header("Calibration Model Development", divider='blue')
-st.write("Create a predictive model, then use it for predicting your target variable (chemical values) from NIRS spectra")
+st.title("Calibration Model Development")
+st.markdown("Create a predictive model, then use it for predicting your target variable (chemical data) from NIRS spectra")
+st.header("I - Data visualization", divider='blue')
+M0, M00 = st.columns([1, .4])
+st.header("II - Model creation", divider='blue')
+
 M1, M2, M3 = st.columns([2,3,2])
 M4, M5 = st.columns([6,2])
-st.write("---")
 st.header("Model Diagnosis", divider='blue')
 
 M7, M8 = st.columns([2,2])
@@ -27,12 +30,11 @@ M9, M10 = st.columns([2,2])
 M9.write("-- Save the model --")
             ######################################################################
 
-
 reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR", "Full-PLSR-sklearn", "PrePLStester"]
       #######################################        ###########################################
 
 files_format = ['.csv', '.dx']
-file = M3.radio('select files format:', options = files_format)
+file = M00.radio('select files format:', options = files_format)
 
 ### Data
 spectra = pd.DataFrame
@@ -40,19 +42,19 @@ y = pd.DataFrame
 
 # load .csv file
 if file == files_format[0]:
-    xcal_csv = M3.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
+    xcal_csv = M00.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
     if xcal_csv:
-        sepx = M3.radio("Select separator (X file) - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)),
+        sepx = M00.radio("Select separator (X file) - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)),
                                 options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0)
-        hdrx = M3.radio("samples name (X file)? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)),
+        hdrx = M00.radio("samples name (X file)? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)),
                                 options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1)
         if hdrx == "yes": col = 0
         else: col = False
         
-    ycal_csv = M3.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
+    ycal_csv = M00.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
     if ycal_csv:
-        sepy = M3.radio("separator (Y file): ", options=[";", ","], key=2)
-        hdry = M3.radio("samples name (Y file)?: ", options=["no", "yes"], key=3)
+        sepy = M00.radio("separator (Y file): ", options=[";", ","], key=2)
+        hdry = M00.radio("samples name (Y file)?: ", options=["no", "yes"], key=3)
         if hdry == "yes": col = 0
         else: col = False
     
@@ -64,7 +66,7 @@ if file == files_format[0]:
             spectra, meta_data = col_cat(xfile)
             y, idx = col_cat(yfile)
             if y.shape[1]>1:
-                yname = M3.selectbox('Select target', options=y.columns)
+                yname = M00.selectbox('Select target', options=y.columns)
                 y = y.loc[:,yname]
             else:
                 y = y.iloc[:,0]
@@ -75,7 +77,7 @@ if file == files_format[0]:
                 st.write(meta_data)
 
             if spectra.shape[0] != y.shape[0]:
-                M3.warning('X and Y have different sample size')
+                M00.warning('X and Y have different sample size')
                 y = pd.DataFrame
                 spectra = pd.DataFrame
 
@@ -89,50 +91,59 @@ if file == files_format[0]:
 
 ## Load .dx file
 elif file == files_format[1]:
-    data_file = M3.file_uploader("Select Data", type=".dx", help=" :mushroom: select a dx file")
+    data_file = M00.file_uploader("Select Data", type=".dx", help=" :mushroom: select a dx file")
     if data_file:
         with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
             tmp.write(data_file.read())
             tmp_path = tmp.name
             chem_data, spectra, meta_data, meta_data_st = read_dx(file =  tmp_path)
-            M3.success("The data have been loaded successfully", icon="✅")
+            M00.success("The data have been loaded successfully", icon="✅")
             if chem_data.shape[1]>0:
-                yname = M3.selectbox('Select target', options=chem_data.columns)
+                yname = M00.selectbox('Select target', options=chem_data.columns)
                 measured = chem_data.loc[:,yname] > 0
                 y = chem_data.loc[:,yname].loc[measured]
                 spectra = spectra.loc[measured]
             else:
-                M3.warning('Warning: Chemical data are not included in your file !', icon="⚠️")
+                M00.warning('Warning: Chemical data are not included in your file !', icon="⚠️")
         os.unlink(tmp_path)
 
 ### split the data
 if not spectra.empty and not y.empty:
-    rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i")
-    # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
-    train_index, test_index = train_test_split_idx(spectra, y = y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
     
 
+    #rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i")
+    # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
+    train_index, test_index = train_test_split_idx(spectra, y = y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=42)
+
     # Assign data to training and test sets
     X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index]
     X_test, y_test = pd.DataFrame(spectra.iloc[test_index,:]), y.iloc[test_index]
     
-    sk = lambda x: skew(x, axis=0, bias=True)
-    ku = lambda x:kurtosis(x, axis=0, bias=True)
-    cv = lambda x: x.std()*100/x.mean()
-
-    M2.write('Loaded data summary')
-    M2.write(f'The loaded spectra consist of {spectra.shape[1]} wavelengths')
-    datainf = pd.DataFrame()
-    datainf['N samples'] = [X_train.shape[0], X_test.shape[0], spectra.shape[0] ]
-    datainf['Mean'] = [y_train.mean(), y_test.mean(), y.mean()]
-    datainf['SD'] = [y_train.std(), y_test.std(), y.std()]
-    datainf['CV(%)'] = [cv(y_train), cv(y_test), cv(y)]
-    datainf['Skewness'] = [sk(y_train), sk(y_test), sk(y)]
-    datainf['Kurtosis'] = [ku(y_train), ku(y_test), ku(y)]
-    datainf.index = ['Train', 'Test', 'Total']
-    M2.write(datainf.round(3))
-
-#######################################
+
+        #### insight on loaded data
+    fig, ax1 = plt.subplots( figsize = (12,3))
+    spectra.T.plot(legend=False, ax = ax1, linestyle = '--')
+    ax1.set_ylabel('Signal intensity')
+    ax1.margins(0)
+    plt.tight_layout()
+    M0.pyplot(fig)
+
+    fig, ax2 = plt.subplots(figsize = (12,3))
+    sns.histplot(y, color="deeppink", kde = True,label="y",ax = ax2, fill=True)
+    sns.histplot(y_train, color="blue", kde = True,label="y (train)",ax = ax2, fill=True)
+    sns.histplot(y_test, color="green", kde = True,label="y (test)",ax = ax2, fill=True)
+    ax2.set_xlabel('y')
+    plt.legend()
+    plt.tight_layout()
+
+    M0.pyplot(fig)
+
+
+    M0.write('Loaded data summary')
+    M0.write(pd.DataFrame([desc_stats(y_train),desc_stats(y_test),desc_stats(y)], index =['Train', 'Test', 'Total'] ).round(2))
+    ####################################### Insight into the loaded data
+
+    #######################################
     regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12)
     if regression_algo == reg_algo[1]:
         # Train model with model function from application_functions.py
@@ -161,16 +172,6 @@ if not spectra.empty and not y.empty:
                 Reg.pred_data_[i].index = list(y_train.index)
             else:
                 Reg.pred_data_[i].index = list(y_test.index)
-        # Reg.pred_data_[0] = Reg.pred_data_[0].T.reset_index().drop(columns = ['index'])
-        # Reg.pred_data_[0].index = list(y_train.index)
-        # Reg.pred_data_[1] = Reg.pred_data_[1].T.reset_index().drop(columns = ['index'])
-        # Reg.pred_data_[1].index = list(y_train_cv1.index)
-        #  Reg.pred_data_[2] = Reg.pred_data_[2].T.reset_index().drop(columns = ['index'])
-        # Reg.pred_data_[2].index = list(y_train_cv2.index)
-        #  Reg.pred_data_[3] = Reg.pred_data_[3].T.reset_index().drop(columns = ['index'])
-        # Reg.pred_data_[3].index = list(y_train_cv3.index)
-        # Reg.pred_data_[4] = Reg.pred_data_[4].T.reset_index().drop(columns = ['index'])
-        # Reg.pred_data_[4].index = list(y_test.index)
 
     elif regression_algo == reg_algo[3]:
         s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3)
@@ -220,7 +221,6 @@ if not spectra.empty and not y.empty:
         ################# Model analysis ############
     if regression_algo in reg_algo[1:]:
         yc = Reg.pred_data_[0]
-        ycv = Reg.pred_data_[1]
         yt = Reg.pred_data_[2]
             
         #if
@@ -230,7 +230,7 @@ if not spectra.empty and not y.empty:
             json.dump(Reg.best_hyperparams, outfile)      
 
         M2.write("-- Performance metrics --")
-        M2.dataframe(metrics(c = [y_train, yc], cv = [y_train, ycv], t = [y_test, yt], method='regression').scores_)
+        M2.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
         #from st_circular_progress import CircularProgress
         #my_circular_progress = CircularProgress(label = 'Performance',value = 50, key = 'my performance',
         #                                         size = "medium", track_color = "black", color = "blue")
@@ -238,8 +238,8 @@ if not spectra.empty and not y.empty:
         #my_circular_progress.st_circular_progress()
         #my_circular_progress.update_value(progress=20)
 
-        M7.pyplot(reg_plot([y_train, y_train, y_test],[yc, ycv, yt], train_idx = train_index, test_idx = test_index))
-        M8.pyplot(resid_plot([y_train, y_train, y_test],[yc, ycv, yt], train_idx = train_index, test_idx = test_index))
+        M7.pyplot(reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index))
+        M8.pyplot(resid_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index))
             
             
             #model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20)