diff --git a/Class_Mod/DATA_HANDLING.py b/Class_Mod/DATA_HANDLING.py
index 02e5694ac1468525247520df88f0b8c96361ee89..10fb7ab898055afa3fd3f44365bdaf46ce151ce8 100644
--- a/Class_Mod/DATA_HANDLING.py
+++ b/Class_Mod/DATA_HANDLING.py
@@ -31,17 +31,13 @@ def col_cat(data_import):
     if len(categorical_columns_list) > 0:
         categorical_data = pd.concat(categorical_columns_list, axis=1)
     if len(categorical_columns_list) == 0:
-        empty = ["" for x in range(len(data_import))]
-        categorical_columns_list.append(empty)
-        categorical_data = pd.DataFrame(categorical_columns_list).T
-        categorical_data.columns = ['no categories']
+        categorical_data = pd.DataFrame
     # Create numerical data matrix from the numerical columns list and fill na with the mean of the column
     numerical_data = pd.concat(numerical_columns_list, axis=1)
     numerical_data = numerical_data.apply(lambda x: x.fillna(x.mean())) #np.mean(x)))
-    # Scale the numerical data
-    scaler = StandardScaler()
-    scaled_values = scaler.fit_transform(numerical_data)
-    return numerical_data, categorical_data, scaled_values
+
+    return numerical_data, categorical_data
+
 
 
 def list_files(mypath, import_type):
diff --git a/Class_Mod/DxReader.py b/Class_Mod/DxReader.py
index d877ff2f2d43a995a51f660a6de5076343315352..f0248949d0697aca948f5053ab56c48bf04e1b6a 100644
--- a/Class_Mod/DxReader.py
+++ b/Class_Mod/DxReader.py
@@ -29,21 +29,21 @@ class DxRead:
             block_met = {   'name': block['title'],
                             'origin': block['origin'],
                             'date': block['date'],
-                            'time': block['time'],
-                            'spectrometer/data system': block['spectrometer/data system'],
-                            'instrumental parameters': block['instrumental parameters'],
-                            'xunits': block['xunits'],
-                            'yunits': block['yunits'],
-                            'xfactor': block['xfactor'],
-                            'yfactor': block['yfactor'],
-                            'firstx': block['firstx'],
-                            'lastx': block['lastx'],
-                            'firsty':block['firsty'],
-                            'miny': block['miny'],
-                            'maxy': block['maxy'],
-                            'npoints': block['npoints'],
+            #                'time': block['time'],
+            #                'spectrometer/data system': block['spectrometer/data system'],
+            #                'instrumental parameters': block['instrumental parameters'],
+            #                'xunits': block['xunits'],
+            #                'yunits': block['yunits'],
+            #                'xfactor': block['xfactor'],
+            #                'yfactor': block['yfactor'],
+            #                'firstx': block['firstx'],
+            #                'lastx': block['lastx'],
+            #                'firsty':block['firsty'],
+            #                'miny': block['miny'],
+            #                'maxy': block['maxy'],
+            #                'npoints': block['npoints'],
                             'concentrations':block['concentrations'],
-                            'deltax':block['deltax']
+            #                'deltax':block['deltax']
                             }
             self.__met[f'{i}'] = block_met
         self.metadata_ = pd.DataFrame(self.__met).T
@@ -87,8 +87,13 @@ class DxRead:
         return self.spectra
     @property
     def md_df_(self):
-        return self.metadata_
+        return self.metadata_.drop("concentrations", axis = 1)
     
     @property
     def chem_data_(self):
-         return self.chem_data
\ No newline at end of file
+         return self.chem_data
+    
+@st.cache_data
+def read_dx(file):
+     M = DxRead(file)
+     return M.chem_data, M.specs_df_, M.md_df_
\ No newline at end of file
diff --git a/Class_Mod/KMEANS_.py b/Class_Mod/KMEANS_.py
index ab9e22bcfe471916405de63e722e8853ceb2504e..526a43597155183de2241e0fd0b850f8b4af13ad 100644
--- a/Class_Mod/KMEANS_.py
+++ b/Class_Mod/KMEANS_.py
@@ -19,13 +19,6 @@ class Sk_Kmeans:
     def fit_optimal(self, nclusters):
         model = KMeans(n_clusters = nclusters, init = 'k-means++', random_state = 42)
         model.fit(self.x)
-        yp = model.predict(self.x)
-        num_colors = nclusters
-        colors = ['#' + ''.join([random.choice('0123456789ABCDEF') for _ in range(6)]) for _ in range(num_colors)]
-        col = np.array(['#' + ''.join([random.choice('0123456789ABCDEF') for _ in range(6)]) for _ in range(self.x.shape[0])])
-        for i in range(nclusters):
-            ss = np.where(yp==i)
-            col[ss] = colors[i]
-
-
-        return self.x, col
\ No newline at end of file
+        yp = model.predict(self.x)+1
+        clu = [f'cluster#{i}' for i in yp]
+        return self.x, clu
\ No newline at end of file
diff --git a/Class_Mod/Miscellaneous.py b/Class_Mod/Miscellaneous.py
index 1627b39960d520bd909555380c2fb86bf2badf08..79d1708cba65860d3e2cdf0d1ac50fd148a24937 100644
--- a/Class_Mod/Miscellaneous.py
+++ b/Class_Mod/Miscellaneous.py
@@ -47,3 +47,18 @@ def resid_plot( meas, pred):
 def download_results(data, export_name):
     with open(data) as f:
         st.download_button('Download Results', f, export_name)
+
+@st.cache_resource
+def plot_spectra(df):
+    if isinstance(df.columns[0], str):
+        m = 0
+    else: 
+        m = np.min(df.columns)
+
+    fig, ax = plt.subplots(figsize = (30,7))
+    df.T.plot(legend=False, ax = ax, color = 'blue')
+    ax.set_xlabel('Wavelength/Wavenumber', fontsize=18)
+    ax.set_ylabel('Signal intensity', fontsize=18)
+    plt.margins(x = 0)
+    plt.annotate(text = f'The total number of spectra is {df.shape[0]}', xy =(m, np.max(df)), size=20, color = 'black', backgroundcolor='red')
+    return fig
diff --git a/Class_Mod/__init__.py b/Class_Mod/__init__.py
index c684862836ba8af35807b889e3b822f091dad3d6..b5e1c5b63d602dd1291703ea4250e4ddf319254b 100644
--- a/Class_Mod/__init__.py
+++ b/Class_Mod/__init__.py
@@ -7,6 +7,6 @@ from .LWPLSR_ import model_LWPLSR
 from .Regression_metrics import metrics
 from .VarSel import TpeIpls
 from .Miscellaneous import resid_plot, reg_plot
-from .DxReader import DxRead
+from .DxReader import DxRead, read_dx
 from .HDBSCAN_Clustering import Hdbscan
 
diff --git a/Modules.py b/Modules.py
index 0076fb22adc7da0d1aec6530ee3f6ab0a754d370..09d297f18c22505322b5557d93afd8c60bd76db8 100644
--- a/Modules.py
+++ b/Modules.py
@@ -1,4 +1,4 @@
-from Class_Mod import LinearPCA, Umap, find_col_index, PinardPlsr, model_LWPLSR, list_files, metrics, TpeIpls, reg_plot, resid_plot, Sk_Kmeans, DxRead, Hdbscan
+from Class_Mod import LinearPCA, Umap, find_col_index, PinardPlsr, model_LWPLSR, list_files, metrics, TpeIpls, reg_plot, resid_plot, Sk_Kmeans, DxRead, Hdbscan, read_dx
 # find_col_index
 
-from Class_Mod.Miscellaneous import prediction, download_results
+from Class_Mod.Miscellaneous import prediction, download_results, plot_spectra
diff --git a/Packages.py b/Packages.py
index b0d939baa8021ba8dfa14088d1b33d972500954d..ec7d83f23abc877b99e5eb07c3abc95a2280edba 100644
--- a/Packages.py
+++ b/Packages.py
@@ -41,6 +41,7 @@ from PIL import Image
 import plotly.express as px
 import matplotlib.pyplot as plt
 import seaborn as sns
+import matplotlib
 
 ### Important Metrics
 from sklearn.metrics import pairwise_distances_argmin_min, adjusted_rand_score, adjusted_mutual_info_score
diff --git a/pages/1-samples_selection.py b/pages/1-samples_selection.py
index ffb4d81631eab0beda7d3fd473b21e004a6704f4..08d8cb6e364ee4b7a55b00dbab272583e1ab2c4e 100644
--- a/pages/1-samples_selection.py
+++ b/pages/1-samples_selection.py
@@ -8,195 +8,175 @@ if st.session_state["interface"] == 'simple':
     hide_pages("Predictions")
 
 ################################### Data Loading and Visualization ########################################
-container1 = st.container(border=True)
 col2, col1 = st.columns([3, 1])
 col1.header("Data Loading", divider='blue')
 col2.header("Spectral Data Visualization", divider='blue')
 
 
-container2 = st.container(border=True)
-container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divider='blue')
-scores, loadings, pc = st.columns([2, 3, 0.5])
-influence, hotelling, qexp = st.columns([2, 2, 1])
+## Preallocation of data structure
+spectra = pd.DataFrame
+meta_data = pd.DataFrame
+selected_samples = pd.DataFrame
 
 
-with container1:
-    # loader for csv file containing NIRS spectra
-    sselectx_csv = col1.file_uploader("Load NIRS Data", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
-    if sselectx_csv is not None:
-        test = sselectx_csv.name[sselectx_csv.name.find('.'):]
-        if test== '.csv':
-            with col1:
-                # Select list for CSV delimiter
-                psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9)
+# loader for datafile
+data_file = col1.file_uploader("Load NIRS Data", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
+
+
+if data_file:
+    # Retrieve the extension of the file
+    test = data_file.name[data_file.name.find('.'):]
+
+    ## Load .csv file
+    if test== '.csv':
+        with col1:
+            # Select list for CSV delimiter
+            psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+data_file.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+data_file.name))), key=9)
                 # Select list for CSV header True / False
-                phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31)
-                if phdr == 'yes':
-                    col = 0
-                else:
-                    col = False
-                data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
-                data_import, categorical_data, scaled_values = col_cat(data_import)
+            phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+data_file.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+data_file.name))), key=31)
+            if phdr == 'yes':
+                col = 0
+            else:
+                col = False
+            imp = pd.read_csv(data_file, sep=psep, index_col=col)
+            spectra = col_cat(imp)[0]
+            meta_data = col_cat(imp)[1]
+            st.success("The data have been loaded successfully", icon="✅")
+
+    ## Load .dx file
+    elif test == '.dx':
+        # Create a temporary file to save the uploaded file
+        with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
+            tmp.write(data_file.read())
+            tmp_path = tmp.name
+            with col1:
+                _, spectra, meta_data = read_dx(file =  tmp_path)
                 st.success("The data have been loaded successfully", icon="✅")
-                ## Visualize spectra
-
-            with col2:
-                fig, ax = plt.subplots(figsize = (30,7))
-                data_import.T.plot(legend=False, ax = ax, color = 'blue')
-                ax.set_xlabel('Wavelength/Wavenumber', fontsize=18)
-                ax.set_ylabel('Signal', fontsize=18)
-                plt.margins(x = 0)
-                st.pyplot(fig)
-
-                st.write("Summary")
-                info = pd.DataFrame({'N':[data_import.shape[0]],
-                                    'Min': [np.min(data_import)],
-                                    'Max':[np.max(data_import)],}, index = ['Values']).T
-                info.rename_axis('information')
-                st.table(data=info)
-
-        elif test == '.dx':
-            # Create a temporary file to save the uploaded file
-            with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
-                tmp.write(sselectx_csv.read())
-                tmp_path = tmp.name
-                with col1:
-                        data = DxRead(path = tmp_path)
-                        data_import = data.specs_df_
-                        st.success("The data have been loaded successfully", icon="✅")
-
-                    ## Visualize spectra
-
-                with col2:
-                    fig, ax = plt.subplots(figsize = (30,7))
-                    data_import.T.plot(legend=False, ax = ax, color = 'blue')
-                    ax.set_xlabel('Wavelength/Wavenumber', fontsize=18)
-                    ax.set_ylabel('Signal', fontsize=18)
-                    plt.margins(x = 0)
-                    st.pyplot(fig)
-
-                    st.write("Summary")
-                    info = pd.DataFrame({'N':[data_import.shape[0]],
-                                        'Min': [np.min(data_import)],
-                                        'Max':[np.max(data_import)],}, index = ['Values']).T
-                    info.rename_axis('information')
-                    st.table(data=info)
-            os.unlink(tmp_path)
-
-
-    
-
-        
-######################################################################################
+        os.unlink(tmp_path)
+
+
+## Visualize spectra
+if not spectra.empty:
+    with col2:
+        fig = plot_spectra(spectra)
+        st.pyplot(fig)
+
 
 ############################## Exploratory data analysis ###############################
-plot_type=['', 'PCA','UMAP', 'NMF']
-cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP']
-with container2:
-    if sselectx_csv is not None:
-        plot_type=['', 'PCA','UMAP', 'NMF']
-        cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP']
-
-        with pc:
-            type_plot = st.selectbox("Dimensionality reduction techniques: ", options=plot_type, key=37)
-            type_cluster = st.selectbox("Clustering techniques: ", options=cluster_methods, key=38)
-            # compute UMAP - umap_maker in application_functions.py
-            if type_plot == 'PCA':
-                model = LinearPCA(data_import, Ncomp=5)
-            elif type_plot =='UMAP':
-                model = Umap(data_import = data_import, numerical_data = scaled_values, cat_data = categorical_data)
-
-
-        if type_plot in ['PCA', 'UMAP']:
-            if type_plot in ['PCA']:
-                # add 2 select lists to choose which component to plot
-                axis1 = pc.selectbox("x-axis", options = model.scores_.columns, index=0)
-                axis2 = pc.selectbox("y-axis", options = model.scores_.columns, index=1)
-                axis3 = pc.selectbox("z-axis", options = model.scores_.columns, index=2)
-            elif type_plot in ['UMAP']:
-                axis1 = 0
-                axis2 = 1
-                axis3 = 2
-
-            if type_cluster == 'Kmeans':
-                scsc = pd.concat([model.scores_.loc[:,axis1], model.scores_.loc[:,axis2], model.scores_.loc[:,axis3]], axis = 1)
-                cl = Sk_Kmeans(scsc, max_clusters = 30)
-
-            elif type_cluster == 'HDBSCAN':
-                optimized_hdbscan = Hdbscan(model.scores_raw_)
-                labels, hdbscan_score = optimized_hdbscan.HDBSCAN_scores_
-            with scores:
-                t = model.scores_
-                if type_cluster in ['AP', 'Kmeans']:
-                    st.write('Scree plot')
-                    fig2 = px.scatter(cl.inertia_.T, y = 'inertia')
-                    st.plotly_chart(fig2)
-
-                    ncluster = st.number_input(min_value=2, max_value=30, value=3, label = 'Select the desired number of clusters')
-                    data, colors = cl.fit_optimal(nclusters=ncluster)
-                    #fig = px.scatter(data, x=axis1, y=axis2, color= colors)
-                    st.write('Scores plot')
-                    fig = px.scatter_3d(data, x=axis1, y=axis2, z = axis3, color=colors)
-                    fig.update_traces(marker=dict(size=4))
-
-
-                elif type_cluster in ['HDBSCAN']:
-                    st.write('plot HDBSCAN clustering')
-                    fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3, color=labels)
-                    fig.update_traces(marker=dict(size=4))
-                    # st.plotly_chart(fig_hdbscan)
-                    st.write('Optimal number of clusters = ' + str(len(set(labels))))
-                    st.write('DBCV score (-1 to 1 - higher is better) = ' + str(round(hdbscan_score,3)))
-                    st.write('Unclassified samples: ' + str(len(t[labels==-1])) + ' on ' + str(len(t)) + ' samples (' + str(round(len(t[labels==-1])/len(t)*100, 1)) + '%).')
-
-                else:
-                    if test == '.dx':
-                        filter = ['origin', 'date', 'time', 'spectrometer/data system']
-                        col = st.selectbox('filter', options= filter)
-
-                        fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3, color = data.md_df_[col])
-                        fig.update_traces(marker=dict(size=4))
-                    else:
-                        fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3 )
-                        fig.update_traces(marker=dict(size=4))
-
-                st.plotly_chart(fig)
-
-
-            if type_plot =='PCA':
-                with loadings:
-                    st.write('Loadings plot')
-                    p = model.loadings_
-                    pp = pd.concat([p, pd.DataFrame(np.arange(p.shape[0]), index=p.index, columns=['wl'])], axis =1)
-                    df1 = pp.melt(id_vars="wl")
-
-                    fig = px.line(df1, x = 'wl', y = 'value', color='variable')
-                    fig.update_layout(
-                        legend=dict(x=1, y=0,
-                                    font=dict(
-                                        family="Courier", size=12, color="black"),
-                                    bordercolor="Black", borderwidth=2)
-                    )
-                    st.plotly_chart(fig, use_container_width = True)
-
-                
-                with influence:
-                    st.write('Influence plot')
-                    ax1 = st.selectbox("Component", options=model.scores_.columns, index=3)
-                    leverage = model.leverage_
-                    residuals = model.residuals_
-                    fig = px.scatter(x=leverage[ax1], y=residuals[ax1], color = leverage[ax1]*residuals[ax1]).update_layout(xaxis_title="Leverage",yaxis_title="Residuals")
-                    st.plotly_chart(fig)
-
-                with hotelling:
-                    st.write('T²-Hotelling vs Q residuals plot')
-                    hotelling = model.hotelling_
-                    ax2 = st.selectbox("Component", options=model.scores_.columns, index=4)
-
-                    hotelling = model.hotelling_
-                    fig = px.scatter(t, x=hotelling[ax2], y=residuals[ax2]).update_layout(xaxis_title="T²",yaxis_title="Residuals")
-                    st.plotly_chart(fig)
+container2 = st.container(border=True)
+container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divider='blue')
+scores, loadings, pc = st.columns([2, 3, 0.5])
+influence, hotelling, qexp = st.columns([2, 2, 1])
 
-        else:
-            st.markdown('Select a dimensionality reduction technique from the dropdown list')
+dim_red_methods=['', 'PCA','UMAP', 'NMF']  # List of dimensionality reduction algos
+cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP'] # List of clustering algos
+
+dr_model = None # dimensionality reduction model
+cl_model = None # clustering model
+
+# Dimensionality reduction
+t = pd.DataFrame # scores
+p = pd.DataFrame # loadings
+labels = []
+if not spectra.empty:
+    dim_red_method = pc.selectbox("Dimensionality reduction techniques: ", options = dim_red_methods, key = 37)
+    clus_method = pc.selectbox("Clustering techniques: ", options = cluster_methods, key = 38)
+    xc = standardize(spectra)
+
+    if dim_red_method == dim_red_methods[1]:
+        dr_model = LinearPCA(xc, Ncomp=5)
+    elif dim_red_method == dim_red_methods[2]:
+        dr_model = Umap(data_import = data_import, numerical_data = scaled_values, cat_data = categorical_data)
+
+    if dr_model:
+        axis1 = pc.selectbox("x-axis", options = dr_model.scores_.columns, index=0)
+        axis2 = pc.selectbox("y-axis", options = dr_model.scores_.columns, index=1)
+        axis3 = pc.selectbox("z-axis", options = dr_model.scores_.columns, index=2)
+        t = pd.concat([dr_model.scores_.loc[:,axis1], dr_model.scores_.loc[:,axis2], dr_model.scores_.loc[:,axis3]], axis = 1)
+
+
+# clustering
+if not t.empty:
+    tcr = standardize(t)
+        # Clustering
+    if clus_method == cluster_methods[1]:
+        ncluster = scores.number_input(min_value=2, max_value=30, value=3, label = 'Select the desired number of clusters')
+        cl_model = Sk_Kmeans(tcr, max_clusters = 30)
+        fig2 = px.scatter(cl_model.inertia_.T, y = 'inertia')
+        scores.plotly_chart(fig2)
+        data, labels = cl_model.fit_optimal(nclusters = ncluster)
+
+    elif clus_method == cluster_methods[2]:
+        optimized_hdbscan = Hdbscan(model.scores_raw_)
+        labels, hdbscan_score = optimized_hdbscan.HDBSCAN_scores_
+
+##### Plots
+
+## Scores
+if not t.empty:
+    with scores:
+        st.write('Scores plot')
+        # scores plot with clustering
+        if list(labels) and meta_data.empty:
+            fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3, color = labels)
+
+        # scores plot with metadata
+        elif len(list(labels)) == 0 and not meta_data.empty:
+            filter = meta_data.columns[1:]
+            col = st.selectbox('Group by:', options= filter)
+            if col == 0:
+                fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3)
+            else:
+                fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3, color = list(map(str.lower,meta_data[col])) )
+
+        # color with scores and metadata
+        elif len(list(labels)) > 0  and not meta_data.empty:
+            if clus_method in cluster_methods[1:]:
+                filter = ['None', clus_method]
+                filter.extend(meta_data.columns[1:])
+            else:
+                filter = meta_data.columns[1:].insert(0,'None')
+
+            col = st.selectbox('Group by:', options= filter)
+            if col == "None":
+                fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3)
+            elif col == clus_method:
+                fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3, color = labels)
+            else:
+                fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3, color = list(map(str.lower,meta_data[col])))
 
+        else:
+            fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3)
+        fig.update_traces(marker=dict(size=4))
+        st.plotly_chart(fig)
+
+
+
+if not spectra.empty:
+    if dim_red_method == dim_red_methods[1]:
+        with loadings:
+            st.write('Loadings plot')
+            p = dr_model.loadings_
+            pp = pd.concat([p, pd.DataFrame(np.arange(p.shape[0]), index=p.index, columns=['wl'])], axis =1)
+            df1 = pp.melt(id_vars="wl")
+            fig = px.line(df1, x = 'wl', y = 'value', color='variable')
+            fig.update_layout(legend=dict(x=1, y=0,font=dict(family="Courier", size=12, color="black"),
+                                        bordercolor="Black", borderwidth=2))
+            st.plotly_chart(fig, use_container_width = True)
+
+        with influence:
+            st.write('Influence plot')
+            ax1 = st.selectbox("Component", options=dr_model.scores_.columns, index=3)
+            leverage = dr_model.leverage_
+            residuals = dr_model.residuals_
+            fig = px.scatter(x=leverage[ax1], y=residuals[ax1], color = leverage[ax1]*residuals[ax1]).update_layout(xaxis_title="Leverage",yaxis_title="Residuals")
+            st.plotly_chart(fig)
+
+        with hotelling:
+                st.write('T²-Hotelling vs Q residuals plot')
+                hotelling = dr_model.hotelling_
+                ax2 = st.selectbox("Component", options=dr_model.scores_.columns, index=4)
+
+                hotelling = dr_model.hotelling_
+                fig = px.scatter(t, x=hotelling[ax2], y=residuals[ax2]).update_layout(xaxis_title="T²",yaxis_title="Residuals")
+                st.plotly_chart(fig)
\ No newline at end of file
diff --git a/pages/2-model_creation.py b/pages/2-model_creation.py
index 3fadcb45393c71242b4876ccf32912c616a59ec4..3f506ea752eda5ebf0460f6b53ca8a24015225ec 100644
--- a/pages/2-model_creation.py
+++ b/pages/2-model_creation.py
@@ -3,9 +3,12 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
 from Modules import *
 from Class_Mod.DATA_HANDLING import *
 
+
 st.session_state["interface"] = st.session_state.get('interface')
 if st.session_state["interface"] == 'simple':
     hide_pages("Predictions")
+
+
 def nn(x):
     return x is not None
 ########################################################################################
@@ -26,91 +29,135 @@ M9, M10 = st.columns([2,2])
 M9.write("-- Save the model --")
 
 
+files_format = ['.csv', '.dx']
+file = M3.radio('select data file format:', options = files_format)
 
 
-# CSV files loader
-xcal_csv = M3.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
-ycal_csv = M3.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
+### Data
+spectra = pd.DataFrame
+y = pd.DataFrame
 
-
-if xcal_csv is not None and ycal_csv is not None:
+# load .csv file
+if file == files_format[0]:
+    xcal_csv = M3.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
+    ycal_csv = M3.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
+    
+    if xcal_csv and ycal_csv:
+    
         # Select list for CSV delimiter
-        sep = M3.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0)
+        sep = M3.radio("Select csv separator - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)),
+                            options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0)
         # Select list for CSV header True / False
-        hdr = M3.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1)
+        hdr = M3.radio("indexes column in csv? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)),
+                            options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1)
+        ###############
         if hdr == 'yes':
             col = 0
         else:
             col = False
-        rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i")
-        x, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col)
-        # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
-        train_index, test_index = train_test_split_idx(x, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
-        # Assign data to training and test sets
-        X_train, y_train, X_test, y_test = pd.DataFrame(x[train_index]), pd.DataFrame(y[train_index]), pd.DataFrame(x[test_index]), pd.DataFrame(y[test_index])
-        y_train = y_train.iloc[:,0]
-        y_test = y_test.iloc[:,0]
-
-
-
-        ############################# Regression modelling ##########################################
-        regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12)
-        if regression_algo == reg_algo[1]:
-            # Train model with model function from application_functions.py
-            Reg = PinardPlsr(x_train = X_train, x_test = X_test,y_train = y_train, y_test = y_test)
-            reg_model = Reg.model_
-            #M2.dataframe(Pin.pred_data_)
-
-        elif regression_algo == reg_algo[2]:
-            reg_model = model_LWPLSR(xcal_csv, ycal_csv, sep, hdr)
-
-        elif regression_algo == reg_algo[3]:
-            s = M2.number_input(label='Enter the maximum number of intervalls', min_value=1, max_value=6, value=3)
-            it = M2.number_input(label='Enter the number of iterations', min_value=50, max_value=1000, value=100)
-            progress_text = "The model is being created. Please wait."
-            
-            Reg = TpeIpls(x_train = X_train, x_test=X_test, y_train = y_train, y_test = y_test, scale = False, Kfold = 3, n_intervall = s)
-            pro = M1.progress(0, text="The model is being created. Please wait!")
-            rega = Reg.BandSelect(n_iter=it)
-            pro.empty()
-            M1.progress(100, text = "The model has successfully been  created!")
+        ###############
+        spectra, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col)
+        spectra = pd.DataFrame(spectra)
+        y  = pd.DataFrame(y)
+    
+
+
+## Load .dx file
+elif file == files_format[1]:
+    data_file = M3.file_uploader("Select Data", type=".dx", help=" :mushroom: select a dx file")
+    if data_file:
+        with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
+            tmp.write(data_file.read())
+            tmp_path = tmp.name
+            chem_data, spectra, meta_data = read_dx(file =  tmp_path)
+            M3.success("The data have been loaded successfully", icon="✅")
+            yname = M3.selectbox('Select target', options=chem_data.columns)
+            spectra = spectra
+            y = chem_data.loc[:,yname]
+
+        os.unlink(tmp_path)
+
+### split the data
+if not spectra.empty and not y.empty:
+    rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i")
+    # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
+    train_index, test_index = train_test_split_idx(spectra, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
+    # Assign data to training and test sets
+    X_train, y_train, X_test, y_test = pd.DataFrame(spectra.iloc[train_index,:]), pd.DataFrame(y.iloc[train_index]), pd.DataFrame(spectra.iloc[test_index,:]), pd.DataFrame(y.iloc[test_index])
+    y_train = y_train.iloc[:,0]
+    y_test = y_test.iloc[:,0]
+    
+
+#######################################
+    regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12)
+    if regression_algo == reg_algo[1]:
+        # Train model with model function from application_functions.py
+        Reg = PinardPlsr(x_train = X_train, x_test = X_test,y_train = y_train, y_test = y_test)
+        reg_model = Reg.model_
+        #M2.dataframe(Pin.pred_data_)
+    elif regression_algo == reg_algo[2]:
+        reg_model = model_LWPLSR(xcal_csv, ycal_csv, sep, hdr)
+
+    elif regression_algo == reg_algo[3]:
+        s = M1.number_input(label='Enter the maximum number of intervalls', min_value=1, max_value=6, value=3)
+        it = M1.number_input(label='Enter the number of iterations', min_value=50, max_value=1000, value=100)
+        progress_text = "The model is being created. Please wait."
             
-            time.sleep(1)
-            reg_model = Reg.model_
-            M2.table(rega[0])
+        Reg = TpeIpls(x_train = X_train, x_test=X_test, y_train = y_train, y_test = y_test, scale = False, Kfold = 3, n_intervall = s)
+        pro = M1.progress(0, text="The model is being created. Please wait!")
+        rega = Reg.BandSelect(n_iter=it)
+        pro.empty()
+        M1.progress(100, text = "The model has successfully been  created!")            
+        time.sleep(1)
+        reg_model = Reg.model_
+        M2.write('-- Table of selected wavelengths --')
+        M2.table(rega[0])
         
         ################# Model analysis ############
-
-        if regression_algo in reg_algo[1:]:
-            yc = Reg.pred_data_[0]
-            ycv = Reg.pred_data_[1]
-            yt = Reg.pred_data_[2]
+    if regression_algo in reg_algo[1:]:
+        yc = Reg.pred_data_[0]
+        ycv = Reg.pred_data_[1]
+        yt = Reg.pred_data_[2]
             
             
-            M1.write("-- Performance metrics --")
-            M1.dataframe(Reg.metrics_)
+        M2.write("-- Performance metrics --")
+        M2.dataframe(Reg.metrics_)
 
-            M7.pyplot(reg_plot([y_train, y_train, y_test],[yc, ycv, yt]))
-            M8.pyplot(resid_plot([y_train, y_train, y_test],[yc, ycv, yt]))
+        M7.pyplot(reg_plot([y_train, y_train, y_test],[yc, ycv, yt]))
+        M8.pyplot(resid_plot([y_train, y_train, y_test],[yc, ycv, yt]))
             
             
             #model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20)
-            model_name = M9.text_input('Give it a name')
-            if M9.button('Export Model'):
+        model_name = M9.text_input('Give it a name')
+        if M9.button('Export Model'):
+            path = 'data/models/model_'
+            if file == files_format[0]:
                 #export_package = __import__(model_export)
-                with open('data/models/model_' + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_' + '.pkl','wb') as f:
+                with open(path + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_' + '.pkl','wb') as f:
                     joblib.dump(reg_model, f)
-                
-                if regression_algo == reg_algo[3]:
-                    rega[1].sort()
-                    pd.DataFrame(rega[1]).to_csv('data/models/model_' + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_''Wavelengths_index.csv', sep = ';')
+                    if regression_algo == reg_algo[3]:
+                        rega[1].sort()
+                        pd.DataFrame(rega[1]).to_csv(path + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_'+'Wavelengths_index.csv', sep = ';')
+
+            elif file == files_format[1]:
+                #export_package = __import__(model_export)
+                with open(path + model_name + '_on_'  + '_data_' + '.pkl','wb') as f:
+                    joblib.dump(reg_model, f)
+                    if regression_algo == reg_algo[3]:
+                        rega[1].sort()
+                        pd.DataFrame(rega[1]).to_csv(path + model_name + '_on_' + '_data_'+'Wavelengths_index.csv', sep = ';')
+                        st.write('Model Exported')
+                    
+            if regression_algo == reg_algo[3]:
                 st.write('Model Exported')
-                
+                        
 
                 # create a report with information on the model
                 ## see https://stackoverflow.com/a/59578663
-        #M4.pyplot(reg_plot(meas==(ycal_csv,ycal_csv,ycal_csv], pred=[ycal_csv,ycal_csv,ycal_csv]))
 
 
-                if st.session_state['interface'] == 'simple':
-                    st.page_link('pages\\3-prediction.py', label = 'Keep on keepin\' on to predict your values !')
+        if st.session_state['interface'] == 'simple':
+            st.page_link('pages\\3-prediction.py', label = 'Keep on keepin\' on to predict your values !')
+
+
+## Load .dx file
diff --git a/pages/3-prediction.py b/pages/3-prediction.py
index 4ac4e5832e6d4bfce0d6c96ac0ffe748ffec7a97..65130fd1dfcdde9f491dc7f8eaee4e19817ddc55 100644
--- a/pages/3-prediction.py
+++ b/pages/3-prediction.py
@@ -47,7 +47,6 @@ if NIRS_csv:
 
 if st.button("Predict"):
         if s:
-             
              result = model_loaded.predict(pred_data.iloc[:,idx])
         else:
         # use prediction function from application_functions.py to predict chemical values
diff --git a/predictions/.gitkeep b/predictions/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000