diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
index 037c284fd8cd4f40fce1f55db2b119b0449037a6..737a221909359bfa9fe392fdb17d858ce828d4d4 100644
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -4,12 +4,13 @@
     <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
       <option name="ignoredPackages">
         <value>
-          <list size="5">
+          <list size="6">
             <item index="0" class="java.lang.String" itemvalue="protobuf" />
             <item index="1" class="java.lang.String" itemvalue="watchdog" />
             <item index="2" class="java.lang.String" itemvalue="streamlit" />
             <item index="3" class="java.lang.String" itemvalue="requests" />
             <item index="4" class="java.lang.String" itemvalue="Pillow" />
+            <item index="5" class="java.lang.String" itemvalue="pinard" />
           </list>
         </value>
       </option>
diff --git a/README.md b/README.md
index fc4b31d5574d61cd0572d0a05735a7f3bb82c554..5af2bded0d123aee95fdcfedcb1a701bf86de3a6 100644
--- a/README.md
+++ b/README.md
@@ -1,44 +1,33 @@
 # NIRS_Workflow
 
-
-
 ## Getting started
+This package aims to provide a workflow for users who want to perform chemical analyses and predict characteristics using the NIRS technique.  
 
-This package aims to provide a workflow for users who want to carry out chemical analyses and predict characteristics using the NIRS technique.
+The process includes:
+- sample selection - you can upload all your NIRS spectra and it'll help to select the samples to analyse chemically.
 
-## Add your files
 
-- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files
-- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command:
+- model creation - the PINARD (https://github.com/GBeurier/pinard) package creates a prediction model with spectra and related chemical analysis.- 
 
-```
-cd existing_repo
-git remote add origin https://src.koda.cnrs.fr/nicolas.barthes.5/nirs_workflow.git
-git branch -M main
-git push -uf origin main
-```
 
+- predictions - the PINARD package uses the model to predict chemical values for unknown samples.
+ 
 ## Installation
-Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection.
-
-## Usage
-Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README.
-
-## Support
-Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc.
+This package is written in python. You can clone the repository: git clone https://src.koda.cnrs.fr/nicolas.barthes.5/nirs_workflow.git
 
-## Roadmap
-If you have ideas for releases in the future, it is a good idea to list them in the README.
+Then install the requirements: pip install -r requirements.txt
 
-## Contributing
-State if you are open to contributions and what your requirements are for accepting them.
+You can then run: streamlit run ./app.py from the CLI.
 
-For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self.
+The app will then open in your default browser.
 
-You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser.
+## Usage
+The web app allows you to process sample selection, model creation and predictions.
 
 ## Authors and acknowledgment
-Show your appreciation to those who have contributed to the project.
+Contributors:
+- Nicolas Barthes (CNRS)
+- 
 
 ## License
 CC BY
diff --git a/app.py b/app.py
index 314fce9ddafdb57831b48c0c59cef93e75df0dfb..6cc896d1fb8ea68b59d881c7aba8ad3cac75aafe 100644
--- a/app.py
+++ b/app.py
@@ -1,12 +1,7 @@
-# pour lancer l'appli
-# streamlit run .\app.py
-
 import streamlit as st
-import time
-from PIL import Image
 # help on streamlit input https://docs.streamlit.io/library/api-reference/widgets
-# Page Title
-## emojis code here : https://www.webfx.com/tools/emoji-cheat-sheet/
+from PIL import Image
+# emojis code here : https://www.webfx.com/tools/emoji-cheat-sheet/
 st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
 import numpy as np
 import pandas as pd
@@ -14,38 +9,40 @@ import plotly.express as px
 from sklearn.cluster import KMeans as km
 from sklearn.metrics import pairwise_distances_argmin_min
 from application_functions import pca_maker, model, predict, find_delimiter
-# from scipy.spatial.distance import pdist, squareform
 
-
-
-# open images
+# load images for web interface
 img_sselect = Image.open("images\sselect.JPG")
 img_general = Image.open("images\general.JPG")
 img_predict = Image.open("images\predict.JPG")
 
+# TOC menu on the left
 with st.sidebar:
     st.markdown("[Sample Selection](#sample-selection)")
     st.markdown("[Model Creation](#create-a-model)")
     st.markdown("[Prediction](#predict)")
-
+# Page header
 with st.container():
     st.subheader("Plateforme d'Analyses Chimiques pour l'Ecologie :goat:")
     st.title("NIRS Utils")
     st.write("Sample selections, Modelisations & Predictions using [Pinard](https://github.com/GBeurier/pinard) and PACE NIRS Database.")
     st.image(img_general)
-
+# graphical delimiter
 st.write("---")
-
+# Sample Selection module
 with st.container():
     st.header("Sample Selection")
     st.image(img_sselect)
     st.write("Sample selection using PCA and K-Means algorythms")
+    # split 2 columns 4:1 ratio
     scatter_column, settings_column = st.columns((4, 1))
     scatter_column.write("**Multi-Dimensional Analysis**")
     settings_column.write("**Settings**")
+    # loader for csv file containing NIRS spectra
     sselectx_csv = settings_column.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
     if sselectx_csv is not None:
+        # Select list for CSV delimiter
         psep = settings_column.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9)
+        # Select list for CSV header True / False
         phdr = settings_column.selectbox("indexes column in csv?", options=["no", "yes"], key=31)
         if phdr == 'yes':
             col = 0
@@ -54,22 +51,18 @@ with st.container():
     import_button = settings_column.button('Import')
     if import_button:
         data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
-        # pour les tests, ajout d'une colonne Categorielle
-        # from itertools import islice, cycle
-        # data_import['Xcat1'] = list(islice(cycle(np.array(["aek", "muop", "mok"])), len(data_import)))
-        # data_import['Xcat2'] = list(islice(cycle(np.array(["aezfek", "mufzefopfz", "mzefezfok", "fzeo"])), len(data_import)))
-        # data_import['Xcat3'] = list(islice(cycle(np.array(["fezaezfek", "zefzemufzefopfz", "mkyukukzefezfok"])), len(data_import)))
+        # compute PCA - pca_maker function in application_functions.py
         pca_data, cat_cols, pca_cols = pca_maker(data_import)
-
+        # add 2 select lists to choose which component to plot
         pca_1 = settings_column.selectbox("First Principle Component", options=pca_cols, index=0)
         pca_2 = settings_column.selectbox("Second Principle Component", options=pca_cols, index=1)
-        categorical_variable = settings_column.selectbox("Variable Select", options = cat_cols)
-        categorical_variable_2 = settings_column.selectbox("Second Variable Select (hover data)", options = cat_cols)
+        # if categorical variables exist, add 2 select lists to choose the categorical variables to color the PCA
         if cat_cols[0] == "no categories":
             scatter_column.plotly_chart(px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, hover_name=pca_data.index, title="PCA plot of sample spectra"))
         else:
+            categorical_variable = settings_column.selectbox("Variable Select", options = cat_cols)
+            categorical_variable_2 = settings_column.selectbox("Second Variable Select (hover data)", options = cat_cols)
             scatter_column.plotly_chart(px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, color=categorical_variable, hover_data = [categorical_variable_2], hover_name=pca_data.index, title="PCA plot of sample spectra"))
-
         #K-Means
         ## K-Means choose number of clusters
         wcss_samples = []
@@ -116,28 +109,33 @@ with st.container():
                     export.append(pca_data.loc[pca_data.index[kmeans_samples.labels_==i]].index)
             scatter_column.write(pd.DataFrame(export).T)
         if scatter_column.button('Export'):
-            pd.DataFrame(export).T.to_csv('data/Samples_for_Chemical_Analysis.csv')
+            pd.DataFrame(export).T.to_csv('./data/Samples_for_Chemical_Analysis.csv')
     else:
         scatter_column.write("_Please Choose a file_")
 
 
+# graphical delimiter
 st.write("---")
-
+# Model creation module
 with st.container():
     st.header("Create a model")
     st.image(img_predict)
     st.write("Create a model to then predict chemical values from NIRS spectra")
+    # CSV files loader
     xcal_csv = st.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
     ycal_csv = st.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
-    # st.button("Create model", on_click=model)
     if xcal_csv is not None and ycal_csv is not None:
+        # Select list for CSV delimiter
         sep = st.selectbox("Select csv separator - CSV Detected separator: " + str(find_delimiter('data/'+xcal_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0)
+        # Select list for column indexes True / False
         hdr = st.selectbox("column indexes in csv?", options=["yes", "no"], key=1)
         rd_seed = st.slider("Choose seed", min_value=1, max_value=1212, value=42, format="%i")
+        # Train model with model function from application_functions.py
         trained_model = model(xcal_csv, ycal_csv, sep, hdr, rd_seed)
 
+# graphical delimiter
 st.write("---")
-
+# Prediction module - TO BE DONE !!!!!
 with st.container():
     st.header("Predict")
     st.write("---")
@@ -145,4 +143,4 @@ with st.container():
     NIRS_csv = st.file_uploader("Select NIRS Data to predict", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
     psep = st.selectbox("Select csv separator", options=[";", ","], key=2)
     phdr = st.selectbox("indexes column in csv?", options=["yes", "no"], key=3)
-    st.button("Predict", on_click=predict)
+    st.button("Predict", on_click=predict)
\ No newline at end of file
diff --git a/application_functions.py b/application_functions.py
index 22bbb47c8d6baa396c0d4c0ff720357947c3a5a4..f3111923874694c7c6f38e5c98bacf3daba8348f 100644
--- a/application_functions.py
+++ b/application_functions.py
@@ -6,24 +6,59 @@ from sklearn.preprocessing import StandardScaler
 import csv
 
 # local CSS
+## load the custom CSS in the style folder
 def local_css(file_name):
     with open(file_name) as f:
         st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
-
 local_css("style/style.css")
 
+## try to automatically detect the field separator within the CSV
 def find_delimiter(filename):
     sniffer = csv.Sniffer()
     with open(filename) as fp:
         delimiter = sniffer.sniff(fp.read(5000)).delimiter
     return delimiter
 
-# predict function
-def predict():
-    display = "Prediction with: " + str(NIRS_csv), str(psep), str(phdr)
-    st.success(display)
+# PCA function for the Sample Selection module
+def pca_maker(data_import):
+    # detect numerical and categorical columns in the csv
+    numerical_columns_list = []
+    categorical_columns_list = []
+    for i in data_import.columns:
+        if data_import[i].dtype == np.dtype("float64") or data_import[i].dtype == np.dtype("int64"):
+            numerical_columns_list.append(data_import[i])
+        else:
+            categorical_columns_list.append(data_import[i])
+    if len(numerical_columns_list) == 0:
+        empty = [0 for x in range(len(data_import))]
+        numerical_columns_list.append(empty)
+    if len(categorical_columns_list) > 0:
+        categorical_data = pd.concat(categorical_columns_list, axis=1)
+    if len(categorical_columns_list) == 0:
+        empty = ["" for x in range(len(data_import))]
+        categorical_columns_list.append(empty)
+        categorical_data = pd.DataFrame(categorical_columns_list).T
+        categorical_data.columns = ['no categories']
+    # Create numerical data matrix from the numerical columns list and fill na with the mean of the column
+    numerical_data = pd.concat(numerical_columns_list, axis=1)
+    numerical_data = numerical_data.apply(lambda x: x.fillna(x.mean())) #np.mean(x)))
+    # Scale the numerical data
+    scaler = StandardScaler()
+    scaled_values = scaler.fit_transform(numerical_data)
+    # Compute a 6 components PCA on scaled values
+    pca = PCA(n_components=6)
+    pca_fit = pca.fit(scaled_values)
+    pca_data = pca_fit.transform(scaled_values)
+    pca_data = pd.DataFrame(pca_data, index=numerical_data.index)
+    # Set PCA column names with component number and explained variance %
+    new_column_names = ["PCA_" + str(i) + ' - ' + str(round(pca_fit.explained_variance_ratio_[i-1], 3) *100) + '%' for i in range(1, len(pca_data.columns) + 1)]
+    # Format the output
+    column_mapper = dict(zip(list(pca_data.columns), new_column_names))
+    pca_data = pca_data.rename(columns=column_mapper)
+    output = pd.concat([data_import, pca_data], axis=1)
+    return output, list(categorical_data.columns), new_column_names
 
-# create model function
+# create model module with PINARD
 def model(xcal_csv, ycal_csv, sep, hdr, rd_seed):
     from pinard import utils
     from pinard import preprocessing as pp
@@ -35,10 +70,12 @@ def model(xcal_csv, ycal_csv, sep, hdr, rd_seed):
     from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
     from sklearn.cross_decomposition import PLSRegression
     np.random.seed(rd_seed)
+    # hdr var correspond to column header True or False in the CSV
     if hdr == 'yes':
         col = 0
     else:
         col = False
+    # loading the csv
     x, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col)
     # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
     train_index, test_index = train_test_split_idx(x, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
@@ -55,6 +92,7 @@ def model(xcal_csv, ycal_csv, sep, hdr, rd_seed):
                         ('SVG', FeatureUnion(svgolay))
                         # Pipeline([('_sg1',pp.SavitzkyGolay()),('_sg2',pp.SavitzkyGolay())])  # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
                         ]
+    # Declare complete pipeline
     pipeline = Pipeline([
         ('scaler', MinMaxScaler()), # scaling the data
         ('preprocessing', FeatureUnion(preprocessing)), # preprocessing
@@ -74,43 +112,8 @@ def model(xcal_csv, ycal_csv, sep, hdr, rd_seed):
     return (trained)
 
 
-def pca_maker(data_import):
-    # Declare complete pipeline
-    numerical_columns_list = []
-    categorical_columns_list = []
-
-    for i in data_import.columns:
-        if data_import[i].dtype == np.dtype("float64") or data_import[i].dtype == np.dtype("int64"):
-            numerical_columns_list.append(data_import[i])
-        else:
-            categorical_columns_list.append(data_import[i])
-    if len(numerical_columns_list) == 0:
-        empty = [0 for x in range(len(data_import))]
-        numerical_columns_list.append(empty)
-    if len(categorical_columns_list) > 0:
-        categorical_data = pd.concat(categorical_columns_list, axis=1)
-    if len(categorical_columns_list) == 0:
-        empty = ["" for x in range(len(data_import))]
-        categorical_columns_list.append(empty)
-    # else:
-        categorical_data = pd.DataFrame(categorical_columns_list).T
-        categorical_data.columns = ['no categories']
-    numerical_data = pd.concat(numerical_columns_list, axis=1)
-    numerical_data = numerical_data.apply(lambda x: x.fillna(x.mean())) #np.mean(x)))
-
-    scaler = StandardScaler()
-    scaled_values = scaler.fit_transform(numerical_data)
-
-    pca = PCA(n_components=6)
-    pca_fit = pca.fit(scaled_values)
-    pca_data = pca_fit.transform(scaled_values)
-    pca_data = pd.DataFrame(pca_data, index=numerical_data.index)
-    new_column_names = ["PCA_" + str(i) + ' - ' + str(round(pca_fit.explained_variance_ratio_[i-1], 3) *100) + '%' for i in range(1, len(pca_data.columns) + 1)]
-
-    column_mapper = dict(zip(list(pca_data.columns), new_column_names))
-
-    pca_data = pca_data.rename(columns=column_mapper)
-
-    output = pd.concat([data_import, pca_data], axis=1)
+# predict module
+def predict():
+    display = "Prediction with: " + str(NIRS_csv), str(psep), str(phdr)
+    st.success(display)
 
-    return output, list(categorical_data.columns), new_column_names
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index c14c275d8f7ff905e5a1b1c45ebd863d6dda60f5..851ef9d728d5da15852ca49238f414b9cd1bd23e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
-#streamlit_lottie>=0.0.2
 streamlit>=1.3.0
 requests>=2.24.0
 Pillow>=8.4.0