diff --git a/.streamlit/config.toml b/.streamlit/config.toml index 54bad9eb6d873cc0f57103c4112a03be75af0a58..9b3d160d8bd8c8b1eb5980e18e5e3170f3ccfd85 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -1,15 +1,6 @@ [theme] -#primaryColor="#E0A505" -#backgroundColor="#b7e4f3" -#secondaryBackgroundColor="#b5f5cb" -#textColor="#0a23bf" +#base="dark" font="sans serif" - -#primaryColor="#E91E63" -#backgroundColor="#FFE082" -#secondaryBackgroundColor="#E6EE9C" -#textColor="#0a23bf" - primaryColor="#EF8354" backgroundColor="#FFFFFF" secondaryBackgroundColor="#7E8987" diff --git a/hdbscan.py b/Class_Mod/HDBSCAN_Clustering.py similarity index 90% rename from hdbscan.py rename to Class_Mod/HDBSCAN_Clustering.py index 0d9268bfed8e6c97db22f7165424052a13110f60..f01928254a72c2516d8f2093011b06130aaaea27 100644 --- a/hdbscan.py +++ b/Class_Mod/HDBSCAN_Clustering.py @@ -256,14 +256,18 @@ def _get_label_members(X, labels, cluster): members = X[indices] return members -def HDBSCAN_function(data, min_cluster_size): +def HDBSCAN_function(data): # param_dist = {'min_samples': [1,5,10,30], # 'min_cluster_size':[5,10,20,30,50,75,100], # # 'cluster_selection_method' : ['eom','leaf'], # # 'metric' : ['euclidean','manhattan'] # } - param_dist = {'min_samples': [1,5], - 'min_cluster_size':[5,10], + # param_dist = {'min_samples': [1,5,10,50], + # 'min_cluster_size':[5,10,30,50,100,300,500], + # } + param_dist = {'min_samples': [1,5, 10,], + 'min_cluster_size':[5,10,30,50,100], + 'metric' : ['euclidean','manhattan'], } clusterable_embedding = UMAP( @@ -272,6 +276,15 @@ def HDBSCAN_function(data, min_cluster_size): n_components=5, random_state=42, ).fit_transform(data) + + # RandomizedSearchCV not working... + # def scoring(model, clusterable_embedding): + # label = HDBSCAN().fit_predict(clusterable_embedding) + # hdbscan_score = DBCV(clusterable_embedding, label, dist_function=euclidean) + # return hdbscan_score + # tunning = RandomizedSearchCV(estimator=HDBSCAN(), param_distributions=param_dist, scoring=scoring) + # tunning.fit(clusterable_embedding) + # return tunning min_score = pd.DataFrame() for i in param_dist.get('min_samples'): for j in param_dist.get('min_cluster_size'): @@ -279,7 +292,8 @@ def HDBSCAN_function(data, min_cluster_size): ij_hdbscan_score = DBCV(clusterable_embedding, ij_label, dist_function=euclidean) min_score.at[i,j] = ij_hdbscan_score hdbscan_score = max(min_score.max()) - # get the coordinates of the best clustering paramters and run HDBSCAN below - - labels = HDBSCAN(min_samples=1, min_cluster_size=min_cluster_size).fit_predict(clusterable_embedding) + # get the coordinates of the best clustering parameters and run HDBSCAN below + bparams = np.where(min_score == hdbscan_score) + # run HDBSCAN with best params + labels = HDBSCAN(min_samples=param_dist['min_samples'][bparams[0][0]], min_cluster_size=param_dist['min_cluster_size'][bparams[1][0]], metric=param_dist['metric'][bparams[1][0]]).fit_predict(clusterable_embedding) return labels, hdbscan_score diff --git a/Packages.py b/Packages.py index 924b7888f5c5829ec4a62bc3308b8e9d97ef3620..12032738ef858e544f1c6483e9ed09a1edd140d7 100644 --- a/Packages.py +++ b/Packages.py @@ -27,7 +27,7 @@ from pinard import utils from pinard import preprocessing as pp from pinard.model_selection import train_test_split_idx -from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, cross_validate +from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, cross_validate, RandomizedSearchCV from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.compose import TransformedTargetRegressor from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score @@ -45,6 +45,7 @@ from sklearn.metrics import pairwise_distances_argmin_min, adjusted_rand_score, ## Web app construction import streamlit as st +from st_pages import Page, Section, show_pages, add_page_title, hide_pages from tempfile import NamedTemporaryFile # help on streamlit input https://docs.streamlit.io/library/api-reference/widgets diff --git a/README.md b/README.md index 1edf0f54ef393356556e0a87f77cc94840f214a9..d4047956224c137fc011b54219fea0b4d1ed7c19 100644 --- a/README.md +++ b/README.md @@ -15,10 +15,10 @@ The process includes: If one wants to use data stored in a SQL database, the config file is in the config/ folder. ## Installation -This package is written in python. You can clone the repository: git clone https://src.koda.cnrs.fr/nicolas.barthes.5/nirs_workflow.git +This package is written in python. You can clone the repository: git clone https://src.koda.cnrs.fr/CEFE/PACE/nirs_workflow.git Then install the requirements: pip install -r requirements.txt -To use Locally weighted PLS Regression for creation model, you will need to install Jchemo.jl (https://github.com/mlesnoff/Jchemo.jl), a Julia package. +(OPTIONNAL) To use Locally weighted PLS Regression for creation model, you will need to install Jchemo.jl (https://github.com/mlesnoff/Jchemo.jl), a Julia package. From the CLI: python > '>>> import julia '>>> julia.install() @@ -28,9 +28,9 @@ From the CLI: python To check if Jchemo is installed without errors: > '>>> Pkg.status() -You can then run: streamlit run ./app.py from the CLI. +You can then run (CLI): streamlit run ./app.py from within your folder. -The app will then open in your default browser. +The app will open in your default browser. ## Usage The web app allows you to process sample selection, model creation and predictions. diff --git a/app.py b/app.py index 8baa0d0724465c6e6101c90bafa42dba54c44a00..027393b8079257a75cae2893f29cd52e15db1c8a 100644 --- a/app.py +++ b/app.py @@ -1,24 +1,39 @@ -#from Modules_manager.PCA_ import pca_maker - from Packages import * -st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") +st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide",) +if 'interface' not in st.session_state: + st.session_state['interface'] = 'simple' from Modules import * from Class_Mod.DATA_HANDLING import * -# graphical delimiter -# st.write("---") - -# load images for web interface -# img_sselect = Image.open("images\sselect.JPG") -# img_general = Image.open("images\general.JPG") -# img_predict = Image.open("images\predict.JPG") -# TOC menu on the left -# with st.sidebar: -# st.markdown("[Sample Selection](#sample-selection)") -# st.markdown("[Model Development](#create-a-model)") -# st.markdown("[Predictions Making](#predict)") +# # TOC menu on the left +show_pages( + [Page("app.py", "Home"), + Page("pages\\1-samples_selection.py", "Samples Selection"), + Page("pages\\2-model_creation.py", "Models Creation"), + Page("pages\\3-prediction.py", "Predictions"), + ] +) +hide_pages("Samples Selection") +hide_pages("Models Creation") +hide_pages("Predictions") +with st.sidebar: + interface = st.selectbox(label="Interface", options=['simple', 'advanced'], key='interface') + st.page_link('pages\\1-samples_selection.py') + if st.session_state['interface'] == 'simple': + st.page_link('pages\\2-model_creation.py') + # if advanced interface, split Models Creation and Predictions + elif st.session_state['interface'] == 'advanced': + show_pages( + [Page("app.py", "Home"), + Page("pages\\1-samples_selection.py", "Samples Selection"), + Page("pages\\2-model_creation.py", "Models Creation"), + Page("pages\\3-prediction.py", "Predictions"), + ] + ) + st.page_link('pages\\2-model_creation.py') + st.page_link('pages\\3-prediction.py') # Page header with st.container(): diff --git a/pages/1-samples_selection.py b/pages/1-samples_selection.py index 07139bf0e0d09f615c10a9bfd4d4f3c31fd665ef..76c2454f85edeb074e1f9006febae570d1def43b 100644 --- a/pages/1-samples_selection.py +++ b/pages/1-samples_selection.py @@ -7,6 +7,10 @@ from Class_Mod.DATA_HANDLING import * +st.session_state["interface"] = st.session_state.get('interface') +if st.session_state["interface"] == 'simple': + hide_pages("Predictions") + ################################### Data Loading and Visualization ######################################## # container1 = st.header("Data loading",border=True) col2, col1 = st.columns([3, 1]) diff --git a/pages/2-model_creation.py b/pages/2-model_creation.py index b66ac470edbcd7355c71d0d3fdf9e03f8be4a24f..3fadcb45393c71242b4876ccf32912c616a59ec4 100644 --- a/pages/2-model_creation.py +++ b/pages/2-model_creation.py @@ -3,7 +3,9 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") from Modules import * from Class_Mod.DATA_HANDLING import * - +st.session_state["interface"] = st.session_state.get('interface') +if st.session_state["interface"] == 'simple': + hide_pages("Predictions") def nn(x): return x is not None ######################################################################################## @@ -107,4 +109,8 @@ if xcal_csv is not None and ycal_csv is not None: # create a report with information on the model ## see https://stackoverflow.com/a/59578663 - #M4.pyplot(reg_plot(meas==(ycal_csv,ycal_csv,ycal_csv], pred=[ycal_csv,ycal_csv,ycal_csv])) \ No newline at end of file + #M4.pyplot(reg_plot(meas==(ycal_csv,ycal_csv,ycal_csv], pred=[ycal_csv,ycal_csv,ycal_csv])) + + + if st.session_state['interface'] == 'simple': + st.page_link('pages\\3-prediction.py', label = 'Keep on keepin\' on to predict your values !') diff --git a/pages/3-prediction.py b/pages/3-prediction.py index d215aa7e9c1bca0269efacccd63bb258afdda062..65130fd1dfcdde9f491dc7f8eaee4e19817ddc55 100644 --- a/pages/3-prediction.py +++ b/pages/3-prediction.py @@ -3,6 +3,7 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") from Modules import * from Class_Mod.DATA_HANDLING import * +st.session_state["interface"] = st.session_state.get('interface') st.header("Predictions making", divider='blue') diff --git a/requirements.txt b/requirements.txt index de7027c587917f3f6fcd947ce5ab4b58af12de24..60c8fc3ba7f50429005b8d19db952fc27902ca47 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ streamlit>=1.3.0 +st_pages-0.4.5>=0.4.5 requests>=2.24.0 Pillow>=8.4.0 protobuf>=3.19.0