Skip to content
Snippets Groups Projects
Commit 48636f7f authored by DIANE's avatar DIANE
Browse files

packages import

parent b7cf71c8
No related branches found
No related tags found
No related merge requests found
......@@ -24,7 +24,7 @@ import plotly.express as px
from tempfile import NamedTemporaryFile
import numpy as np
from datetime import datetime
import json
from utils.data_parsing import JcampParser, CsvParser
from style.layout import UiComponents
......
......@@ -8,12 +8,8 @@ st.set_page_config(page_title = "NIRS Utils", page_icon = ":goat:", layout = "wi
# layout
UiComponents(pagespath = pages_folder, csspath= css_file,imgpath=image_path ,
header=True, sidebar= True, bgimg=False, colborders=True)
hash_ = ''
def p_hash(add):
global hash_
hash_ = hash_data(hash_+str(add))
return hash_
# Initialize the variable in session state if it doesn't exist for st.cache_data
if 'counter' not in st.session_state:
st.session_state.counter = 0
......@@ -89,7 +85,7 @@ match file:
for i in ["xcal_csv", "ycal_csv"]:
stringio = StringIO(eval(f'{i}.getvalue().decode("utf-8")'))
xy_str += str(stringio.read())
p_hash([xy_str + str(xcal_csv.name) + str(ycal_csv.name), hdrx, sepx, hdry, sepy])
# p_hash([xy_str + str(xcal_csv.name) + str(ycal_csv.name), hdrx, sepx, hdry, sepy])
# p_hash(add = )
@st.cache_data
......@@ -152,15 +148,17 @@ match file:
tmp_path = tmp.name
with open(tmp.name, 'r') as dd:
dxdata = dd.read()
p_hash(str(dxdata)+str(data_file.name))
# p_hash(str(dxdata)+str(data_file.name))
## load and parse the temp dx file
@st.cache_data
def dx_loader(change):
chem_data, spectra, meta_data, meta_data_st = read_dx(file = tmp_path)
os.unlink(tmp_path)
return chem_data, spectra, meta_data, meta_data_st
chem_data, spectra, meta_data, meta_data_st = dx_loader(change = hash_)
def read_dx(tmp_path):
M = JcampParser(path = tmp_path)
M.parse()
# chem_data, spectra, meta_data, meta_data_st = read_dx(file = tmp_path)
# os.unlink(tmp_path)
return M.chem_data, M.specs_df_, M.md_df_, M.md_df_st_
chem_data, spectra, meta_data, meta_data_st = read_dx(tmp_path = tmp_path)
if not spectra.empty:
st.success("Info: The data have been loaded successfully", icon = "")
......@@ -191,13 +189,14 @@ match file:
################################################### BEGIN : visualize and split the data ####################################################
st.subheader("I - Data visualization", divider = 'blue')
if not spectra.empty and not y.empty:
p_hash(y)
p_hash(np.mean(spectra))
# p_hash(y)
# p_hash(np.mean(spectra))
if np.array(spectra.columns).dtype.kind in ['i', 'f']:
colnames = spectra.columns
else:
colnames = np.arange(spectra.shape[1])
from utils.miscellaneous import data_split
X_train, X_test, y_train, y_test, train_index, test_index = data_split(x=spectra, y=y)
......@@ -205,6 +204,7 @@ if not spectra.empty and not y.empty:
#### insight on loaded data
spectra_plot = plot_spectra(spectra, xunits = 'Wavelength/Wavenumber', yunits = "Signal intensity")
target_plot = hist(y = y, y_train = y_train, y_test = y_test, target_name=yname)
from utils.miscellaneous import desc_stats
stats = DataFrame([desc_stats(y_train), desc_stats(y_test), desc_stats(y)], index =['train', 'test', 'total'] ).round(2)
# fig1, ax1 = plt.subplots( figsize = (12, 3))
......@@ -241,7 +241,7 @@ if not spectra.empty and not y.empty:
# select type of supervised modelling problem
var_nature = ['Continuous', 'Categorical']
mode = c4.radio("The nature of the target variable :", options = var_nature)
p_hash(mode)
# p_hash(mode)
match mode:
case "Continuous":
reg_algo = ["", "PLS", "LW-PLS", "TPE-iPLS"]
......@@ -276,7 +276,7 @@ if not spectra.empty and not y.empty:
# st.session_state.model_type = model_type
# increment()
p_hash(model_type)
# p_hash(model_type)
# Training set preparation for cross-validation(CV)
......@@ -293,6 +293,7 @@ if not spectra.empty and not y.empty:
match model_type:
case 'PLS':
from utils.regress import Plsr
Reg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter = 100, cv = nb_folds)
# reg_model = Reg.model_
rega = Reg.selected_features_
......@@ -412,10 +413,10 @@ if not spectra.empty and not y.empty:
it = st.number_input(label = 'Enter the number of iterations', min_value = 2, max_value = 500, value = 250)
else:
s, it = None, None
p_hash(str(s)+str(it))
# p_hash(str(s)+str(it))
remodel_button = st.button('re-model the data', key=4, help=None, type="primary", use_container_width=True, on_click=increment)
p_hash(st.session_state.counter)
# p_hash(st.session_state.counter)
Reg = RequestingModelCreation(change = hash_)
reg_model = Reg.model_
hash_ = hash(Reg)
......
......@@ -10,4 +10,3 @@ Here are all the classes to perform your analysis
# from .clustering import *
# from .samsel import *
# from .regress import *
# from .eval_metrics import *
\ No newline at end of file
from utils.eval_metrics import metrics
import numpy as np
from pandas import DataFrame
## try to automatically detect the field separator within the CSV
def find_delimiter(filename):
......@@ -86,6 +88,7 @@ class KF_CV:
### KFCV(dict) returns a testset indices/Fold
@staticmethod
def CV(x, y, n_folds:int):
from kennard_stone import KFold as ks_KFold
test_folds = {}
folds_name = [f'Fold{i+1}' for i in range(n_folds)]
kf = ks_KFold(n_splits=n_folds, device='cpu')
......@@ -132,12 +135,14 @@ class KF_CV:
r = DataFrame()
r['Predicted'] = ypcv[Fname]
r['Measured'] = y[folds[Fname]]
from sklearn.linear_model import LinearRegression
ols = LinearRegression().fit(DataFrame(y[folds[Fname]]), ypcv[Fname].reshape(-1,1))
r.index = folds[Fname]
r['Folds'] = [f'{Fname} (Predicted = {np.round(ols.intercept_[0], 2)} + {np.round(ols.coef_[0][0],2)} x Measured'] * r.shape[0]
cvcv[i] = r
coeff[Fname] = [ols.coef_[0][0], ols.intercept_[0]]
from pandas import concat
data = concat(cvcv, axis = 0)
data['index'] = [data.index[i][1] for i in range(data.shape[0])]
data.index = data['index']
......
from pandas import DataFrame
import numpy as np
class metrics:
from typing import Optional, List
from pandas import DataFrame
......
import streamlit as st
from pandas import DataFrame
import numpy as np
# predict module
def prediction(NIRS_csv, qsep, qhdr, model):
......@@ -21,16 +22,16 @@ def download_results(data, export_name):
@st.cache_data(show_spinner =True)
def data_split(x, y):
from kennard_stone import train_test_split
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
train_index, test_index = train_test_split_idx(x , y = y, method = "kennard_stone", metric = "correlation", test_size = 0.25, random_state = 42)
# Assign data to training and test sets
X_train, y_train = DataFrame(x.iloc[train_index,:]), y.iloc[train_index]
X_test, y_test = DataFrame(x.iloc[test_index,:]), y.iloc[test_index]
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 42)
train_index, test_index = X_train.index, X_test.index
return X_train, X_test, y_train, y_test, train_index, test_index
## descriptive stat
@st.cache_data(show_spinner =True)
def desc_stats(x):
from scipy.stats import skew, kurtosis
a = {}
a['N samples'] = x.shape[0]
a['Min'] = np.min(x)
......
from utils import metrics, Snv, No_transformation, KF_CV, sel_ratio
import numpy as np
from pandas import DataFrame
from utils.eval_metrics import metrics
from scipy.signal import savgol_filter
from sklearn.cross_decomposition import PLSRegression
from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK, anneal
from utils.data_handling import Snv, No_transformation, KF_CV, sel_ratio
class Regmodel(object):
from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK, anneal
def __init__(self, train, test, n_iter, add_hyperparams = None, nfolds = 3, **kwargs):
from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK, anneal
self.SCORE = 100000000
self._xc, self._xt, self._ytrain, self._ytest = train[0], test[0], train[1], test[1]
......
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~
@st.cache_data
def pred_hist(pred):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment