Skip to content
Snippets Groups Projects
Commit 965f7e42 authored by DIANE's avatar DIANE
Browse files
parents 023dfa7d 4b97a50f
No related branches found
No related tags found
No related merge requests found
[theme] [theme]
#primaryColor="#E0A505" #base="dark"
#backgroundColor="#b7e4f3"
#secondaryBackgroundColor="#b5f5cb"
#textColor="#0a23bf"
font="sans serif" font="sans serif"
#primaryColor="#E91E63"
#backgroundColor="#FFE082"
#secondaryBackgroundColor="#E6EE9C"
#textColor="#0a23bf"
primaryColor="#EF8354" primaryColor="#EF8354"
backgroundColor="#FFFFFF" backgroundColor="#FFFFFF"
secondaryBackgroundColor="#7E8987" secondaryBackgroundColor="#7E8987"
......
...@@ -256,14 +256,18 @@ def _get_label_members(X, labels, cluster): ...@@ -256,14 +256,18 @@ def _get_label_members(X, labels, cluster):
members = X[indices] members = X[indices]
return members return members
def HDBSCAN_function(data, min_cluster_size): def HDBSCAN_function(data):
# param_dist = {'min_samples': [1,5,10,30], # param_dist = {'min_samples': [1,5,10,30],
# 'min_cluster_size':[5,10,20,30,50,75,100], # 'min_cluster_size':[5,10,20,30,50,75,100],
# # 'cluster_selection_method' : ['eom','leaf'], # # 'cluster_selection_method' : ['eom','leaf'],
# # 'metric' : ['euclidean','manhattan'] # # 'metric' : ['euclidean','manhattan']
# } # }
param_dist = {'min_samples': [1,5], # param_dist = {'min_samples': [1,5,10,50],
'min_cluster_size':[5,10], # 'min_cluster_size':[5,10,30,50,100,300,500],
# }
param_dist = {'min_samples': [1,5, 10,],
'min_cluster_size':[5,10,30,50,100],
'metric' : ['euclidean','manhattan'],
} }
clusterable_embedding = UMAP( clusterable_embedding = UMAP(
...@@ -272,6 +276,15 @@ def HDBSCAN_function(data, min_cluster_size): ...@@ -272,6 +276,15 @@ def HDBSCAN_function(data, min_cluster_size):
n_components=5, n_components=5,
random_state=42, random_state=42,
).fit_transform(data) ).fit_transform(data)
# RandomizedSearchCV not working...
# def scoring(model, clusterable_embedding):
# label = HDBSCAN().fit_predict(clusterable_embedding)
# hdbscan_score = DBCV(clusterable_embedding, label, dist_function=euclidean)
# return hdbscan_score
# tunning = RandomizedSearchCV(estimator=HDBSCAN(), param_distributions=param_dist, scoring=scoring)
# tunning.fit(clusterable_embedding)
# return tunning
min_score = pd.DataFrame() min_score = pd.DataFrame()
for i in param_dist.get('min_samples'): for i in param_dist.get('min_samples'):
for j in param_dist.get('min_cluster_size'): for j in param_dist.get('min_cluster_size'):
...@@ -279,7 +292,8 @@ def HDBSCAN_function(data, min_cluster_size): ...@@ -279,7 +292,8 @@ def HDBSCAN_function(data, min_cluster_size):
ij_hdbscan_score = DBCV(clusterable_embedding, ij_label, dist_function=euclidean) ij_hdbscan_score = DBCV(clusterable_embedding, ij_label, dist_function=euclidean)
min_score.at[i,j] = ij_hdbscan_score min_score.at[i,j] = ij_hdbscan_score
hdbscan_score = max(min_score.max()) hdbscan_score = max(min_score.max())
# get the coordinates of the best clustering paramters and run HDBSCAN below # get the coordinates of the best clustering parameters and run HDBSCAN below
bparams = np.where(min_score == hdbscan_score)
labels = HDBSCAN(min_samples=1, min_cluster_size=min_cluster_size).fit_predict(clusterable_embedding) # run HDBSCAN with best params
labels = HDBSCAN(min_samples=param_dist['min_samples'][bparams[0][0]], min_cluster_size=param_dist['min_cluster_size'][bparams[1][0]], metric=param_dist['metric'][bparams[1][0]]).fit_predict(clusterable_embedding)
return labels, hdbscan_score return labels, hdbscan_score
...@@ -27,7 +27,7 @@ from pinard import utils ...@@ -27,7 +27,7 @@ from pinard import utils
from pinard import preprocessing as pp from pinard import preprocessing as pp
from pinard.model_selection import train_test_split_idx from pinard.model_selection import train_test_split_idx
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, cross_validate from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, cross_validate, RandomizedSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.compose import TransformedTargetRegressor from sklearn.compose import TransformedTargetRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
...@@ -45,6 +45,7 @@ from sklearn.metrics import pairwise_distances_argmin_min, adjusted_rand_score, ...@@ -45,6 +45,7 @@ from sklearn.metrics import pairwise_distances_argmin_min, adjusted_rand_score,
## Web app construction ## Web app construction
import streamlit as st import streamlit as st
from st_pages import Page, Section, show_pages, add_page_title, hide_pages
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
# help on streamlit input https://docs.streamlit.io/library/api-reference/widgets # help on streamlit input https://docs.streamlit.io/library/api-reference/widgets
......
...@@ -15,10 +15,10 @@ The process includes: ...@@ -15,10 +15,10 @@ The process includes:
If one wants to use data stored in a SQL database, the config file is in the config/ folder. If one wants to use data stored in a SQL database, the config file is in the config/ folder.
## Installation ## Installation
This package is written in python. You can clone the repository: git clone https://src.koda.cnrs.fr/nicolas.barthes.5/nirs_workflow.git This package is written in python. You can clone the repository: git clone https://src.koda.cnrs.fr/CEFE/PACE/nirs_workflow.git
Then install the requirements: pip install -r requirements.txt Then install the requirements: pip install -r requirements.txt
To use Locally weighted PLS Regression for creation model, you will need to install Jchemo.jl (https://github.com/mlesnoff/Jchemo.jl), a Julia package. (OPTIONNAL) To use Locally weighted PLS Regression for creation model, you will need to install Jchemo.jl (https://github.com/mlesnoff/Jchemo.jl), a Julia package.
From the CLI: python From the CLI: python
> '>>> import julia > '>>> import julia
'>>> julia.install() '>>> julia.install()
...@@ -28,9 +28,9 @@ From the CLI: python ...@@ -28,9 +28,9 @@ From the CLI: python
To check if Jchemo is installed without errors: To check if Jchemo is installed without errors:
> '>>> Pkg.status() > '>>> Pkg.status()
You can then run: streamlit run ./app.py from the CLI. You can then run (CLI): streamlit run ./app.py from within your folder.
The app will then open in your default browser. The app will open in your default browser.
## Usage ## Usage
The web app allows you to process sample selection, model creation and predictions. The web app allows you to process sample selection, model creation and predictions.
......
#from Modules_manager.PCA_ import pca_maker
from Packages import * from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide",)
if 'interface' not in st.session_state:
st.session_state['interface'] = 'simple'
from Modules import * from Modules import *
from Class_Mod.DATA_HANDLING import * from Class_Mod.DATA_HANDLING import *
# graphical delimiter
# st.write("---")
# load images for web interface
# img_sselect = Image.open("images\sselect.JPG")
# img_general = Image.open("images\general.JPG")
# img_predict = Image.open("images\predict.JPG")
# TOC menu on the left # # TOC menu on the left
# with st.sidebar: show_pages(
# st.markdown("[Sample Selection](#sample-selection)") [Page("app.py", "Home"),
# st.markdown("[Model Development](#create-a-model)") Page("pages\\1-samples_selection.py", "Samples Selection"),
# st.markdown("[Predictions Making](#predict)") Page("pages\\2-model_creation.py", "Models Creation"),
Page("pages\\3-prediction.py", "Predictions"),
]
)
hide_pages("Samples Selection")
hide_pages("Models Creation")
hide_pages("Predictions")
with st.sidebar:
interface = st.selectbox(label="Interface", options=['simple', 'advanced'], key='interface')
st.page_link('pages\\1-samples_selection.py')
if st.session_state['interface'] == 'simple':
st.page_link('pages\\2-model_creation.py')
# if advanced interface, split Models Creation and Predictions
elif st.session_state['interface'] == 'advanced':
show_pages(
[Page("app.py", "Home"),
Page("pages\\1-samples_selection.py", "Samples Selection"),
Page("pages\\2-model_creation.py", "Models Creation"),
Page("pages\\3-prediction.py", "Predictions"),
]
)
st.page_link('pages\\2-model_creation.py')
st.page_link('pages\\3-prediction.py')
# Page header # Page header
with st.container(): with st.container():
......
...@@ -7,6 +7,10 @@ from Class_Mod.DATA_HANDLING import * ...@@ -7,6 +7,10 @@ from Class_Mod.DATA_HANDLING import *
st.session_state["interface"] = st.session_state.get('interface')
if st.session_state["interface"] == 'simple':
hide_pages("Predictions")
################################### Data Loading and Visualization ######################################## ################################### Data Loading and Visualization ########################################
# container1 = st.header("Data loading",border=True) # container1 = st.header("Data loading",border=True)
col2, col1 = st.columns([3, 1]) col2, col1 = st.columns([3, 1])
......
...@@ -3,7 +3,9 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") ...@@ -3,7 +3,9 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import * from Modules import *
from Class_Mod.DATA_HANDLING import * from Class_Mod.DATA_HANDLING import *
st.session_state["interface"] = st.session_state.get('interface')
if st.session_state["interface"] == 'simple':
hide_pages("Predictions")
def nn(x): def nn(x):
return x is not None return x is not None
######################################################################################## ########################################################################################
...@@ -107,4 +109,8 @@ if xcal_csv is not None and ycal_csv is not None: ...@@ -107,4 +109,8 @@ if xcal_csv is not None and ycal_csv is not None:
# create a report with information on the model # create a report with information on the model
## see https://stackoverflow.com/a/59578663 ## see https://stackoverflow.com/a/59578663
#M4.pyplot(reg_plot(meas==(ycal_csv,ycal_csv,ycal_csv], pred=[ycal_csv,ycal_csv,ycal_csv])) #M4.pyplot(reg_plot(meas==(ycal_csv,ycal_csv,ycal_csv], pred=[ycal_csv,ycal_csv,ycal_csv]))
\ No newline at end of file
if st.session_state['interface'] == 'simple':
st.page_link('pages\\3-prediction.py', label = 'Keep on keepin\' on to predict your values !')
...@@ -3,6 +3,7 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") ...@@ -3,6 +3,7 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import * from Modules import *
from Class_Mod.DATA_HANDLING import * from Class_Mod.DATA_HANDLING import *
st.session_state["interface"] = st.session_state.get('interface')
st.header("Predictions making", divider='blue') st.header("Predictions making", divider='blue')
......
streamlit>=1.3.0 streamlit>=1.3.0
st_pages-0.4.5>=0.4.5
requests>=2.24.0 requests>=2.24.0
Pillow>=8.4.0 Pillow>=8.4.0
protobuf>=3.19.0 protobuf>=3.19.0
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment