Merge branch 'master' of https://src.koda.cnrs.fr/cefe/pace/nirs_workflow

965f7e42 · DIANE · 023dfa7d · 4b97a50f · 965f7e42 · 965f7e42
Commit 965f7e42 authored 1 year ago by DIANE
--- a/.streamlit/config.toml
+++ b/.streamlit/config.toml
 [theme]
-#primaryColor="#E0A505"
+#base="dark"
-#backgroundColor="#b7e4f3"
-#secondaryBackgroundColor="#b5f5cb"
-#textColor="#0a23bf"
 font="sans serif"
-#primaryColor="#E91E63"
-#backgroundColor="#FFE082"
-#secondaryBackgroundColor="#E6EE9C"
-#textColor="#0a23bf"
 primaryColor="#EF8354"
 backgroundColor="#FFFFFF"
 secondaryBackgroundColor="#7E8987"

--- a/hdbscan.py
+++ b/hdbscan.py
@@ -256,14 +256,18 @@ def _get_label_members(X, labels, cluster):
    members = X[indices]
    return members
-def HDBSCAN_function(data, min_cluster_size):
+def HDBSCAN_function(data):
    # param_dist = {'min_samples': [1,5,10,30],
    #               'min_cluster_size':[5,10,20,30,50,75,100],
    #               # 'cluster_selection_method' : ['eom','leaf'],
    #               # 'metric' : ['euclidean','manhattan']
    #               }
-    param_dist = {'min_samples': [1,5],
+    # param_dist = {'min_samples': [1,5,10,50],
-                  'min_cluster_size':[5,10],
+    #               'min_cluster_size':[5,10,30,50,100,300,500],
+    #               }
+    param_dist = {'min_samples': [1,5, 10,],
+                  'min_cluster_size':[5,10,30,50,100],
+                  'metric' : ['euclidean','manhattan'],
                  }
    clusterable_embedding = UMAP(
@@ -272,6 +276,15 @@ def HDBSCAN_function(data, min_cluster_size):
        n_components=5,
        random_state=42,
    ).fit_transform(data)
+    # RandomizedSearchCV not working...
+    # def scoring(model, clusterable_embedding):
+    #     label = HDBSCAN().fit_predict(clusterable_embedding)
+    #     hdbscan_score = DBCV(clusterable_embedding, label, dist_function=euclidean)
+    #     return hdbscan_score
+    # tunning = RandomizedSearchCV(estimator=HDBSCAN(), param_distributions=param_dist,  scoring=scoring)
+    # tunning.fit(clusterable_embedding)
+    # return tunning
    min_score = pd.DataFrame()
    for i in param_dist.get('min_samples'):
        for j in param_dist.get('min_cluster_size'):
@@ -279,7 +292,8 @@ def HDBSCAN_function(data, min_cluster_size):
            ij_hdbscan_score = DBCV(clusterable_embedding, ij_label, dist_function=euclidean)
            min_score.at[i,j] = ij_hdbscan_score
    hdbscan_score  = max(min_score.max())
-    # get the coordinates of the best clustering paramters and run HDBSCAN below
+    # get the coordinates of the best clustering parameters and run HDBSCAN below
+    bparams = np.where(min_score == hdbscan_score)
-    labels = HDBSCAN(min_samples=1, min_cluster_size=min_cluster_size).fit_predict(clusterable_embedding)
+    # run HDBSCAN with best params
+    labels = HDBSCAN(min_samples=param_dist['min_samples'][bparams[0][0]], min_cluster_size=param_dist['min_cluster_size'][bparams[1][0]], metric=param_dist['metric'][bparams[1][0]]).fit_predict(clusterable_embedding)
    return labels, hdbscan_score
--- a/Packages.py
+++ b/Packages.py
@@ -27,7 +27,7 @@ from pinard import utils
 from pinard import preprocessing as pp
 from pinard.model_selection import train_test_split_idx
-from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, cross_validate
+from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, cross_validate, RandomizedSearchCV
 from sklearn.pipeline import Pipeline, FeatureUnion
 from sklearn.compose import TransformedTargetRegressor
 from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
@@ -45,6 +45,7 @@ from sklearn.metrics import pairwise_distances_argmin_min, adjusted_rand_score,
 ## Web app construction
 import streamlit as st
+from st_pages import Page, Section, show_pages, add_page_title, hide_pages
 from tempfile import NamedTemporaryFile
 # help on streamlit input https://docs.streamlit.io/library/api-reference/widgets

--- a/README.md
+++ b/README.md
@@ -15,10 +15,10 @@ The process includes:
 If one wants to use data stored in a SQL database, the config file is in the config/ folder.
 ## Installation
-This package is written in python. You can clone the repository: git clone https://src.koda.cnrs.fr/nicolas.barthes.5/nirs_workflow.git
+This package is written in python. You can clone the repository: git clone https://src.koda.cnrs.fr/CEFE/PACE/nirs_workflow.git
 Then install the requirements: pip install -r requirements.txt
-To use Locally weighted PLS Regression for creation model, you will need to install Jchemo.jl (https://github.com/mlesnoff/Jchemo.jl), a Julia package.
+(OPTIONNAL) To use Locally weighted PLS Regression for creation model, you will need to install Jchemo.jl (https://github.com/mlesnoff/Jchemo.jl), a Julia package.
 From the CLI: python
 > '>>> import julia
 '>>> julia.install()
@@ -28,9 +28,9 @@ From the CLI: python
 To check if Jchemo is installed without errors:
 > '>>> Pkg.status()
-You can then run: streamlit run ./app.py from the CLI.
+You can then run (CLI): streamlit run ./app.py from within your folder.
-The app will then open in your default browser.
+The app will open in your default browser.
 ## Usage
 The web app allows you to process sample selection, model creation and predictions.

--- a/app.py
+++ b/app.py
-#from Modules_manager.PCA_ import pca_maker
 from Packages import *
-st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
+st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide",)
+if 'interface' not in st.session_state:
+    st.session_state['interface'] = 'simple'
 from Modules import *
 from Class_Mod.DATA_HANDLING import *
-# graphical delimiter
-# st.write("---")
-# load images for web interface
-# img_sselect = Image.open("images\sselect.JPG")
-# img_general = Image.open("images\general.JPG")
-# img_predict = Image.open("images\predict.JPG")
-# TOC menu on the left
+# # TOC menu on the left
-# with st.sidebar:
+show_pages(
-#     st.markdown("[Sample Selection](#sample-selection)")
+    [Page("app.py", "Home"),
-#     st.markdown("[Model Development](#create-a-model)")
+     Page("pages\\1-samples_selection.py", "Samples Selection"),
-#     st.markdown("[Predictions Making](#predict)")
+     Page("pages\\2-model_creation.py", "Models Creation"),
+     Page("pages\\3-prediction.py", "Predictions"),
+     ]
+)
+hide_pages("Samples Selection")
+hide_pages("Models Creation")
+hide_pages("Predictions")
+with st.sidebar:
+    interface = st.selectbox(label="Interface", options=['simple', 'advanced'], key='interface')
+    st.page_link('pages\\1-samples_selection.py')
+    if st.session_state['interface'] == 'simple':
+        st.page_link('pages\\2-model_creation.py')
+    # if advanced interface, split Models Creation and Predictions
+    elif st.session_state['interface'] == 'advanced':
+        show_pages(
+            [Page("app.py", "Home"),
+             Page("pages\\1-samples_selection.py", "Samples Selection"),
+             Page("pages\\2-model_creation.py", "Models Creation"),
+             Page("pages\\3-prediction.py", "Predictions"),
+             ]
+        )
+        st.page_link('pages\\2-model_creation.py')
+        st.page_link('pages\\3-prediction.py')
 # Page header
 with st.container():

--- a/pages/1-samples_selection.py
+++ b/pages/1-samples_selection.py
@@ -7,6 +7,10 @@ from Class_Mod.DATA_HANDLING import *
+st.session_state["interface"] = st.session_state.get('interface')
+if st.session_state["interface"] == 'simple':
+    hide_pages("Predictions")
 ################################### Data Loading and Visualization ########################################
 # container1 = st.header("Data loading",border=True)
 col2, col1 = st.columns([3, 1])

--- a/pages/2-model_creation.py
+++ b/pages/2-model_creation.py
@@ -3,7 +3,9 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
 from Modules import *
 from Class_Mod.DATA_HANDLING import *
+st.session_state["interface"] = st.session_state.get('interface')
+if st.session_state["interface"] == 'simple':
+    hide_pages("Predictions")
 def nn(x):
    return x is not None
 ########################################################################################
@@ -107,4 +109,8 @@ if xcal_csv is not None and ycal_csv is not None:
                # create a report with information on the model
                ## see https://stackoverflow.com/a/59578663
        #M4.pyplot(reg_plot(meas==(ycal_csv,ycal_csv,ycal_csv], pred=[ycal_csv,ycal_csv,ycal_csv]))
\ No newline at end of file
+                if st.session_state['interface'] == 'simple':
+                    st.page_link('pages\\3-prediction.py', label = 'Keep on keepin\' on to predict your values !')
--- a/pages/3-prediction.py
+++ b/pages/3-prediction.py
@@ -3,6 +3,7 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
 from Modules import *
 from Class_Mod.DATA_HANDLING import *
+st.session_state["interface"] = st.session_state.get('interface')
 st.header("Predictions making", divider='blue')

--- a/requirements.txt
+++ b/requirements.txt
 streamlit>=1.3.0
+st_pages-0.4.5>=0.4.5
 requests>=2.24.0
 Pillow>=8.4.0
 protobuf>=3.19.0