+from Packages import * 
+## try to automatically detect the field separator within the CSV
+def find_delimiter(filename):
+    sniffer = csv.Sniffer()
+    with open(filename) as fp:
+        delimiter = sniffer.sniff(fp.read(5000)).delimiter
+    return delimiter
+def find_col_index(filename):
+    with open(filename) as fp:
+        lines = pd.read_csv(fp, skiprows=3, nrows=3, index_col=False, sep=str(find_delimiter(filename)))
+        col_index = 'yes' if lines.iloc[:,0].dtypes != np.float64 else 'no'
+    return col_index
+# detection of columns categories and scaling
+def col_cat(data_import):
+    # detect numerical and categorical columns in the csv
+    numerical_columns_list = []
+    categorical_columns_list = []
+    for i in data_import.columns:
+        if data_import[i].dtype == np.dtype("float64") or data_import[i].dtype == np.dtype("int64"):
+            numerical_columns_list.append(data_import[i])
+        else:
+            categorical_columns_list.append(data_import[i])
+    if len(numerical_columns_list) == 0:
+        empty = [0 for x in range(len(data_import))]
+        numerical_columns_list.append(empty)
+    if len(categorical_columns_list) > 0:
+        categorical_data = pd.concat(categorical_columns_list, axis=1)
+    if len(categorical_columns_list) == 0:
+        empty = ["" for x in range(len(data_import))]
+        categorical_columns_list.append(empty)
+        categorical_data = pd.DataFrame(categorical_columns_list).T
+        categorical_data.columns = ['no categories']
+    # Create numerical data matrix from the numerical columns list and fill na with the mean of the column
+    numerical_data = pd.concat(numerical_columns_list, axis=1)
+    numerical_data = numerical_data.apply(lambda x: x.fillna(x.mean())) #np.mean(x)))
+    # Scale the numerical data
+    scaler = StandardScaler()
+    scaled_values = scaler.fit_transform(numerical_data)
+    return numerical_data, categorical_data, scaled_values
+def list_files(mypath, import_type):
+    list_files = [f for f in listdir(mypath) if isfile(join(mypath, f)) and f.endswith(import_type + '.pkl')]
+    if list_files == []:
+        list_files = ['Please, create a model before - no model available yet']
+    return list_files
\ No newline at end of file
+from Packages import * 
+from Class_Mod.Miscellaneous import * 
+def model_LWPLSR(xcal_csv, ycal_csv, sep, hdr):
+    # hdr var correspond to column header True or False in the CSV
+    if hdr == 'yes':
+        col = 0
+    else:
+        col = False
+    # loading the csv
+    x, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col)
+    # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
+    train_index, test_index = train_test_split_idx(x, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=42)
+    # Assign data to training and test sets
+    X_train, y_train, X_test, y_test = x[train_index], y[train_index], x[test_index], y[test_index]
+    st.write("Size of train and test sets: train " + str(X_train.shape) + ' ' + str(y_train.shape) + ' / test ' + str(X_test.shape) + ' ' + str(y_test.shape))
+    Jchemo.lwplsr(X_train, y_train, nlvdis=4, metric = eucl, k = 10)
\ No newline at end of file
+from Packages import *
+# local CSS
+## load the custom CSS in the style folder
+def local_css(file_name):
+    with open(file_name) as f:
+        st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
+# Cross-Validation of the model
+def CV_model(estimator, x, y, cv):
+    st.write('Cross-Validation of this model')
+    st.write("CV_scores", cross_val_score(estimator, x, y, cv=cv))
+    st.write("-- CV predict --")
+    Y_preds = cross_val_predict(estimator, x, y, cv=3)
+    st.write("MAE", mean_absolute_error(y, Y_preds))
+    st.write("MSE", mean_squared_error(y, Y_preds))
+    st.write("MAPE", mean_absolute_percentage_error(y, Y_preds))
+    st.write("R²", r2_score(y, Y_preds))
+    st.write("-- Cross Validate --")
+    cv_results = cross_validate(estimator, x, y, cv=cv, return_train_score=True, n_jobs=3)
+    for key in cv_results.keys():
+        st.write(key, cv_results[key])
+# predict module
+def prediction(NIRS_csv, qsep, qhdr, model):
+    # hdr var correspond to column header True or False in the CSV
+    if qhdr == 'yes':
+        col = 0
+    else:
+        col = False
+    X_test = pd.read_csv(NIRS_csv, sep=qsep, index_col=col)
+    Y_preds = model.predict(X_test)
+    # Y_preds = X_test
+    return Y_preds
+from Packages import *
+from Class_Mod.DATA_HANDLING import *
+def pca_maker(data_import):
+    numerical_data, categorical_data, scaled_values = col_cat(data_import)
+    # Compute a 6 components PCA on scaled values
+    pca = PCA(n_components=6)
+    pca_fit = pca.fit(scaled_values)
+    pca_data = pca_fit.transform(scaled_values)
+    pca_data = pd.DataFrame(pca_data, index=numerical_data.index)
+    # Set PCA column names with component number and explained variance %
+    new_column_names = ["PCA_" + str(i) + ' - ' + str(round(pca_fit.explained_variance_ratio_[i-1], 3) *100) + '%' for i in range(1, len(pca_data.columns) + 1)]
+    # Format the output
+    column_mapper = dict(zip(list(pca_data.columns), new_column_names))
+    pca_data = pca_data.rename(columns=column_mapper)
+    output = pd.concat([data_import, pca_data], axis=1)
+    return output, list(categorical_data.columns), new_column_names
+from Packages import *
+from Class_Mod.Miscellaneous import * 
+# create model module with PINARD
+def model_PLSR(xcal_csv, ycal_csv, sep, hdr, rd_seed):
+    np.random.seed(rd_seed)
+    # hdr var correspond to column header True or False in the CSV
+    if hdr == 'yes':
+        col = 0
+    else:
+        col = False
+    # loading the csv
+    x, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col)
+    # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
+    train_index, test_index = train_test_split_idx(x, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
+    # Assign data to training and test sets
+    X_train, y_train, X_test, y_test = x[train_index], y[train_index], x[test_index], y[test_index]
+    st.write("Size of train and test sets: train " + str(X_train.shape) + ' ' + str(y_train.shape) + ' / test ' + str(X_test.shape) + ' ' + str(y_test.shape))
+    # Declare preprocessing pipeline
+    svgolay = [   ('_sg1',pp.SavitzkyGolay()),
+                  ('_sg2',pp.SavitzkyGolay())  # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
+                  ]
+    preprocessing = [   ('id', pp.IdentityTransformer()), # Identity transformer, no change to the data
+                        ('savgol', pp.SavitzkyGolay()), # Savitzky-Golay smoothing filter
+                        ('derivate', pp.Derivate()), # Calculate the first derivative of the data
+                        ('SVG', FeatureUnion(svgolay))
+                        # Pipeline([('_sg1',pp.SavitzkyGolay()),('_sg2',pp.SavitzkyGolay())])  # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
+                        ]
+    # Declare complete pipeline
+    pipeline = Pipeline([
+        ('scaler', MinMaxScaler()), # scaling the data
+        ('preprocessing', FeatureUnion(preprocessing)), # preprocessing
+        ('PLS',  PLSRegression()) # regressor
+    ])
+    # Estimator including y values scaling
+    estimator = TransformedTargetRegressor(regressor = pipeline, transformer = MinMaxScaler())
+    # Training
+    trained = estimator.fit(X_train, y_train)
+    # fit scores
+    st.write("fit scores / R²: " + str(estimator.score(X_test,y_test)))
+    # Predictions on test set
+    Y_preds = estimator.predict(X_test) # make predictions on test data and assign to Y_preds variable
+    ################################################################################################################
+    met= {"MAE: ":[5],
+          "MSE: ":[5],
+          "MSE: ":[8]}
+    met = pd.DataFrame(met).T
+    ################################################################################################################
+    st.table(met)
+    st.write("MAE: " + str(mean_absolute_error(y_test, Y_preds)))
+    st.write("MSE: " + str(mean_squared_error(y_test, Y_preds)))
+    st.write("MAPE: " + str(mean_absolute_percentage_error(y_test, Y_preds)))
+    # Cross-Validate the model
+    CV_model(estimator, X_train, y_train, 3)
+    return (trained)
\ No newline at end of file
+# UMAP function for the Sample Selection module
+from Packages import * 
+from Class_Mod.DATA_HANDLING import * 
+def umap_maker(data_import):
+    numerical_data, categorical_data, scaled_values = col_cat(data_import)
+    umap_func = UMAP(random_state=42, n_neighbors=20, n_components=4, min_dist=0.0,)
+    umap_fit = umap_func.fit(scaled_values)
+    umap_data = umap_fit.transform(scaled_values)
+    umap_data = pd.DataFrame(umap_data, index=numerical_data.index)
+    # Set UMAP column names with component number
+    new_column_names = ["UMAP_" + str(i) for i in range(1, len(umap_data.columns) + 1)]
+    # Format the output
+    column_mapper = dict(zip(list(umap_data.columns), new_column_names))
+    umap_data = umap_data.rename(columns=column_mapper)
+    output = pd.concat([data_import, umap_data], axis=1)
+    return output, list(categorical_data.columns), new_column_names
\ No newline at end of file
+from .PCA_ import *
+from .KMEANS_ import * 
+from .UMAP_ import * 
+from .DATA_HANDLING import * 
+from .PLSR_ import model_PLSR
+from .LWPLSR_ import model_LWPLSR
+from Class_Mod import pca_maker, umap_maker, find_col_index, model_PLSR, model_LWPLSR, list_files
+# find_col_index
+from Class_Mod.Miscellaneous import  prediction, CV_model
+from urllib.request import urlretrieve
\ No newline at end of file
+## Data loading, handling, and preprocessing
+import sys
+import csv
+import numpy as np
+import pandas as pd
+from os import listdir
+from os.path import isfile, join
+from sklearn.preprocessing import StandardScaler
+### Exploratory data analysis-Dimensionality reduction
+from umap.umap_ import UMAP
+from sklearn.decomposition import PCA
+# Clustering
+from sklearn.cluster import KMeans as km
+#import hdbscan
+# Modelling
+import julia
+from julia import Jchemo
+from pinard import utils
+from pinard import preprocessing as pp
+from pinard.model_selection import train_test_split_idx
+from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, cross_validate
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.pipeline import Pipeline, FeatureUnion
+from sklearn.compose import TransformedTargetRegressor
+from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
+from sklearn.cross_decomposition import PLSRegression
+## Images and plots 
+from PIL import Image
+import plotly.express as px
+### Important Metrics
+from sklearn.metrics import pairwise_distances_argmin_min
+## Web app construction
+import streamlit as st
+# help on streamlit input https://docs.streamlit.io/library/api-reference/widgets
\ No newline at end of file
 # NIRS_Workflow
 ## Getting started
-This package aims to provide a workflow for users who want to perform chemical analyses and predict characteristics using the NIRS technique.  
+This package aims to provide a workflow for users who want to perform chemical analyses and predict characteristics using the NIRS technique.
 The process includes:
 - sample selection - you can upload all your NIRS spectra and it'll help to select the samples to analyse chemically.
-- model creation - the PINARD (https://github.com/GBeurier/pinard) package creates a prediction model with spectra and related chemical analysis.- 
+- model creation - the PINARD (https://github.com/GBeurier/pinard) package creates a prediction model with spectra and related chemical analysis.-
 - predictions - the PINARD package uses the model to predict chemical values for unknown samples.
@@ -17,13 +17,13 @@ This package is written in python. You can clone the repository: git clone https
 Then install the requirements: pip install -r requirements.txt
 To use Locally weighted PLS Regression for creation model, you will need to install Jchemo.jl (https://github.com/mlesnoff/Jchemo.jl), a Julia package.
-From the CLI: python  
-> '>>> import julia  
-'>>> julia.install()  
-'>>> from julia import Pkg  
-'>>> Pkg.add("Jchemo")  
+From the CLI: python
+> '>>> import julia
+'>>> julia.install()
+'>>> from julia import Pkg
+'>>> Pkg.add("Jchemo")
-To check if Jchemo is installed without errors:  
+To check if Jchemo is installed without errors:
 > '>>> Pkg.status()
 You can then run: streamlit run ./app.py from the CLI.
@@ -39,7 +39,4 @@ Contributors:
 ## License
\ No newline at end of file
\ No newline at end of file
-import streamlit as st
-# help on streamlit input https://docs.streamlit.io/library/api-reference/widgets
-from PIL import Image
-# emojis code here : https://www.webfx.com/tools/emoji-cheat-sheet/
+#from Modules_manager.PCA_ import pca_maker
+from Packages import *
 st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
-import numpy as np
-import pandas as pd
-import plotly.express as px
-from sklearn.cluster import KMeans as km
-from sklearn.metrics import pairwise_distances_argmin_min
-from application_functions import pca_maker, model_PLSR, model_LWPLSR, prediction, find_delimiter, umap_maker, find_col_index, list_files, CV_model
+from Modules import *
+from Class_Mod.DATA_HANDLING import * 
 # load images for web interface
 img_sselect = Image.open("images\sselect.JPG")
@@ -18,21 +13,25 @@ img_predict = Image.open("images\predict.JPG")
 # TOC menu on the left
 with st.sidebar:
     st.markdown("[Sample Selection](#sample-selection)")
-    st.markdown("[Model Creation](#create-a-model)")
-    st.markdown("[Prediction](#predict)")
+    st.markdown("[Model Development](#create-a-model)")
+    st.markdown("[Predictions Making](#predict)")
 # Page header
 with st.container():
-    st.subheader("Plateforme d'Analyses Chimiques pour l'Ecologie :goat:")
+    st.subheader("Plateforme d'Analyses Chimiques pour l'Ecologie-PACE :goat:")
     st.title("NIRS Utils")
-    st.write("Sample selections, Modelisations & Predictions using [Pinard](https://github.com/GBeurier/pinard) and PACE NIRS Database.")
+    st.write("Sample selection, Predictive Modelling & Predictions making using [Pinard](https://github.com/GBeurier/pinard) and PACE NIRS Database.")
 # graphical delimiter
 # Sample Selection module
 with st.container():
     st.header("Sample Selection")
-    st.write("Sample selection using PCA and K-Means algorythms")
+    st.write("Sample selection using PCA and K-Means algorithms")
     # split 2 columns 4:1 ratio
     scatter_column, settings_column = st.columns((4, 1))
     scatter_column.write("**Multi-Dimensional Analysis**")
@@ -131,7 +130,7 @@ with st.container():
                 scatter_column.write("_Please Choose a file_")
             # clustering via UMAP / HDBSCAN -- TO BE DONE !!!
-            if type_cluster == 'umap':
+            if type_cluster == 'hdbscan':
                 import hdbscan
                 # plot de pc with colored clusters and selected samples
                 # graph_selected = px.scatter(data_frame=pc_data, x=pc_1, y=pc_2, template="simple_white", height=800, color=kmeans_samples.labels_, hover_name=pc_data.index, title="PC projection with K-Means Clusters and selected samples")
@@ -143,6 +142,9 @@ with st.container():
                 plot = scatter_column.plotly_chart(graph_selected)
 # graphical delimiter
 # Model creation module
 with st.container():
     st.header("Create a model")
@@ -179,6 +181,10 @@ with st.container():
 # graphical delimiter
 # Prediction module - TO BE DONE !!!!!
 with st.container():
@@ -211,7 +217,6 @@ with st.container():
         pd.DataFrame(result).to_csv(export_name + '.csv')
         st.write('Predictions exported to ' + export_name + '.csv')
         # export to local drive
-        from urllib.request import urlretrieve
         url = ('http://localhost:8501' + export_name[1:] + '.csv')
         filename = export_name + '.csv'
         urlretrieve(url, filename)
