Skip to content
Snippets Groups Projects
Commit 6402ff4f authored by DIANE's avatar DIANE
Browse files

sklearn Pls

Table that was written in a message was incorporated
parent d18871dd
No related branches found
No related tags found
No related merge requests found
from Packages import *
from Packages import *
from Class_Mod.Miscellaneous import *
from Class_Mod.Regression_metrics import metrics
class PlsR:
def __init__(self, x_train, y_train, x_test, y_test):
self.x_train = x_train
......@@ -6,12 +12,53 @@ class PlsR:
self.y_train = y_train
self.y_test = y_test
def fit_(self):
nlv = 20
rmse = []
for i in range(nlv):
m = PLSRegression(n_components= 20)
self.trained = PLSRegression(n_components= self._optimize(), scale = False)
self.trained.fit(self.x_train, self.y_train)
self.yc = pd.DataFrame(self.trained.predict(self.x_train)) # make predictions on test data and assign to Y_preds variable
self.ycv = pd.DataFrame(cross_val_predict(self.trained, self.x_train, self.y_train, cv = 3)) # make predictions on test data and assign to Y_preds variable
self.yt = pd.DataFrame(self.trained.predict(self.x_test)) # make predictions on test data and assign to Y_preds variable
def _optimize(self):
nlv = 21
rmse = np.ones(21)
rmse[0] = 0.002
lv = {}
ratio = []
for i in range(1,nlv):
m = PLSRegression(n_components= i, scale = False)
ycv = cross_val_predict(m, self.x_train, self.y_train, cv = 5)
rmse.append(mean_squared_error(self.y_train, ycv))
print(rmse)
rmse[i] = mean_squared_error(self.y_train, ycv)
ratio.append(((rmse[i-1]-rmse[i])/rmse[i-1])*100)
return np.argmax(ratio)+1
################################################################################################################
################################################################################################################
@property
def model_(self):
return self.trained
@property
def metrics_(self):
metc = metrics(self.y_train, self.yc)
metc = metc.evaluate_
metcv = metrics(self.y_train, self.ycv)
metcv = metcv.evaluate_
mett = metrics( self.y_test, self.yt)
mett = mett.evaluate_
met = pd.concat([metc, metcv, mett], axis = 0)
met.index = ['calib','cv','test']
return met
@property
def pred_data_(self):
return self.yc, self.ycv, self.yt
\ No newline at end of file
......@@ -11,4 +11,4 @@ from .VarSel import TpeIpls
from .Miscellaneous import resid_plot, reg_plot
from .DxReader import DxRead, read_dx
from .HDBSCAN_Clustering import Hdbscan
from .SK_PLSR_ import PlsR
\ No newline at end of file
from Class_Mod import LinearPCA, Umap, find_col_index, PinardPlsr, LWPLSR, list_files, metrics, TpeIpls, reg_plot, resid_plot, Sk_Kmeans, DxRead, Hdbscan, read_dx
from Class_Mod import PlsR, LinearPCA, Umap, find_col_index, PinardPlsr
from Class_Mod import LWPLSR, list_files, metrics, TpeIpls, reg_plot, resid_plot, Sk_Kmeans, DxRead, Hdbscan, read_dx
# find_col_index
from Class_Mod.Miscellaneous import prediction, download_results, plot_spectra
......@@ -11,6 +11,7 @@ from os import listdir
from os.path import isfile, join
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
import time
from scipy.stats import skew, kurtosis
### Exploratory data analysis-Dimensionality reduction
from umap.umap_ import UMAP
......@@ -24,7 +25,7 @@ from scipy.sparse import csgraph
# Modelling
# import julia
from julia import Main, Jchemo, DataFrames, Base, Pandas
#from julia import Main, Jchemo, DataFrames, Base, Pandas
from pinard import utils
from pinard import preprocessing as pp
......
......@@ -2,6 +2,8 @@ from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import *
from Class_Mod.DATA_HANDLING import *
# HTML pour le bandeau "CEFE - CNRS"
bandeau_html = """
<div style="width: 100%; background-color: #4682B4; padding: 10px; margin-bottom: 10px;">
......@@ -19,7 +21,7 @@ if st.session_state["interface"] == 'simple':
def nn(x):
return x is not None
########################################################################################
reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR"]
reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR", "Full-PLSR-sklearn"]
# page Design
st.header("Calibration Model Development", divider='blue')
......@@ -94,7 +96,22 @@ if not spectra.empty and not y.empty:
# Assign data to training and test sets
X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index]
X_test, y_test = pd.DataFrame(spectra.iloc[test_index,:]), y.iloc[test_index]
M2.write("ADD HERE A TABLE WITH SHAPE OF THE DATA: SAMPLES NUMBER AND WAVELENGTHS OR CHEMICAL VALUES NUMBER FOR TRAIN AND TEST")
sk = lambda x: skew(x, axis=0, bias=True)
ku = lambda x:kurtosis(x, axis=0, bias=True)
cv = lambda x: x.std()*100/x.mean()
M2.write('Loaded data summary')
M2.write(f'The loaded spectra consist of {spectra.shape[1]} wavelengths')
datainf = pd.DataFrame()
datainf['N samples'] = [X_train.shape[0], X_test.shape[0], spectra.shape[0] ]
datainf['Mean'] = [y_train.mean(), y_test.mean(), y.mean()]
datainf['SD'] = [y_train.std(), y_test.std(), y.std()]
datainf['CV(%)'] = [cv(y_train), cv(y_test), cv(y)]
datainf['Skewness'] = [sk(y_train), sk(y_test), sk(y)]
datainf['Kurtosis'] = [ku(y_train), ku(y_test), ku(y)]
datainf.index = ['Train', 'Test', 'Total']
M2.write(datainf.round(3))
#######################################
regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12)
......@@ -121,6 +138,10 @@ if not spectra.empty and not y.empty:
M2.write('-- Table of selected wavelengths --')
M2.table(rega[0])
elif regression_algo == reg_algo[4]:
Reg = PlsR(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test)
reg_model = Reg.model_
################# Model analysis ############
if regression_algo in reg_algo[1:]:
yc = Reg.pred_data_[0]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment