Skip to content
Snippets Groups Projects
Commit 0bfc99f9 authored by DIANE's avatar DIANE
Browse files

dx file load

parent 5a56913d
No related branches found
No related tags found
No related merge requests found
from Packages import *
import jcamp as jc
class DxReader:
class DxRead:
'''This module is designed to help retrieve spectral data as well as metadata of smaples from jcamp file'''
def __init__(self, path):
self.__path = path.replace('\\','/')
#self.__path = path.replace('\\','/')
self.__path = path
self.__dxfile = jc.jcamp_readfile(self.__path)
# Access samples data
......@@ -23,25 +25,25 @@ class DxReader:
for i in range(self.__nb): # Loop over the blocks
specs[i] = self.__list_of_blocks[i]['y']
block_met = { 'name':self.__list_of_blocks[i]['title'],
'origin':self.__list_of_blocks[i]['origin'],
'date':self.__list_of_blocks[i]['date'],
'time':self.__list_of_blocks[i]['time'],
'spectrometer/data system':self.__list_of_blocks[i]['spectrometer/data system'],
'instrumental parameters':self.__list_of_blocks[i]['instrumental parameters'],
'xunits':self.__list_of_blocks[i]['xunits'],
'yunits':self.__list_of_blocks[i]['yunits'],
'xfactor':self.__list_of_blocks[i]['xfactor'],
'yfactor':self.__list_of_blocks[i]['yfactor'],
'firstx':self.__list_of_blocks[i]['firstx'],
'lastx':self.__list_of_blocks[i]['lastx'],
'firsty':self.__list_of_blocks[i]['firsty'],
'miny': self.__list_of_blocks[i]['miny'],
'maxy': self.__list_of_blocks[i]['maxy'],
'npoints':self.__list_of_blocks[i]['npoints'],
'concentrations':self.__list_of_blocks[i]['concentrations'],
'deltax':self.__list_of_blocks[i]['deltax'],
block = self.__list_of_blocks[i]
block_met = { 'name': block['title'],
'origin': block['origin'],
'date': block['date'],
'time': block['time'],
'spectrometer/data system': block['spectrometer/data system'],
'instrumental parameters': block['instrumental parameters'],
'xunits': block['xunits'],
'yunits': block['yunits'],
'xfactor': block['xfactor'],
'yfactor': block['yfactor'],
'firstx': block['firstx'],
'lastx': block['lastx'],
'firsty':block['firsty'],
'miny': block['miny'],
'maxy': block['maxy'],
'npoints': block['npoints'],
'concentrations':block['concentrations'],
'deltax':block['deltax']
}
self.__met[f'{i}'] = block_met
self.metadata_ = pd.DataFrame(self.__met).T
......@@ -69,7 +71,7 @@ class DxReader:
cc[df.index[i]] = self.conc(df[str(i)])
### dataframe conntaining chemical data
self.chem_data = pd.DataFrame(cc, index=elements_name).T
self.chem_data = pd.DataFrame(cc, index=elements_name).T.astype(float)
### Method for retrieving the concentration of a single sample
def conc(self,sample):
......
......@@ -2,86 +2,74 @@ from Packages import *
class LinearPCA:
def __init__(self, X, Ncomp=10):
## define color palette to use for plotting
#self.__palette = 'YlGn'
#numerical_data, categorical_data, scaled_values = col_cat(X)
#self.catdata = list(categorical_data.columns)
## input matrix
self.__x = X
self._varnames = X.columns
self._rownames = X.index
if isinstance(X, pd.DataFrame):
self.__x = X.to_numpy()
self._rownames = X.index
else:
self.__x = X
## set the number of components to compute and fit the model
self.__ncp = Ncomp
# Fit PCA model
M = PCA(n_components = self.__ncp)
M.fit(self.__x)
######## results ########
# Explained variability
######## results ########
# Results
self.__pcnames = [f'PC{i+1}({100 * M.explained_variance_ratio_[i].round(2)}%)' for i in range(self.__ncp)]
self._Qexp_ratio = pd.DataFrame(100 * M.explained_variance_ratio_, columns = ["Qexp"], index= [f'PC{i+1}' for i in range(self.__ncp)])
# Loadings and scores
#scores
s = M.transform(self.__x)
self.__t = s
self._t = s
self._r = pd.DataFrame(2*(s-s.min(axis=0))/(s.max(axis=0)-s.min(axis=0)) -1, index= self._rownames)
self._r.columns = self.__pcnames
# Normalize each loading vector to have unit length
self._p = (M.components_ / np.linalg.norm(M.components_, axis=0)).T
self._p = M.components_.T
self._t = M.transform(self.__x)
self.eigvals = M.singular_values_**2
self.Lambda = np.diag(self.eigvals)
# Matrix reconstruction or prediction making
#
self.res = pd.DataFrame()
for i in range(self.__ncp):
self._xp = np.dot(self.__t[:,i].reshape((-1,1)), self._p[:,i].reshape((1,-1)))
# residuals
self._e = self.__x - self._xp
self.res[self.__pcnames[i]] = np.diag(self._e@self._e.T)
#self._res = pd.DataFrame( self._e, columns = self._varnames, index = self._rownames )
self.T2 = {}
self._xp = {}
self._qres = {}
self.leverage = {}
self._xp = self.__t @ self._p.T
#
for i in range(self.__ncp):
# Matrix reconstruction- prediction
self._xp[i] = np.dot(self._t[:,:i+1], self._p.T[:i+1,:])
# Compute the cosine similarity between the normalized loading vectors
self.lev = {}
## Laverage: leverage values range between 0 and 1
for i in range(self._t.shape[1]):
ti = self._t[:,i].reshape((-1,1))
Hat = ti @ np.linalg.pinv(np.transpose(ti) @ ti) @ np.transpose(ti)
self.lev[self._r.columns[i]] = ti.ravel()
self.leverage = pd.DataFrame(self.lev)
# Q residuals: Q residuals represent the magnitude of the variation remaining in each sample after projection through the model
self._qres[i] = np.diag(np.subtract(self.__x, self._xp[i])@ np.subtract(self.__x, self._xp[i]).T)
self.T2[i] = np.diag(self._t[:,:i+1] @ np.transpose(self._t[:,:i+1]))
# Laverage
Hat = self._t[:,:i+1] @ np.linalg.inv(np.transpose(self._t[:,:i+1]) @ self._t[:,:i+1]) @ np.transpose(self._t[:,:i+1])
self.leverage[i] = np.diag(Hat) / np.trace(Hat)
## Hotelling t2
self.eigvals = M.singular_values_**2
self.Lambda = np.diag(self.eigvals)
self.T2 = pd.DataFrame()
tt = self._r.to_numpy()
for i in range(self._t.shape[1]):
self.T2[self.__pcnames[i]] = np.diag(self.__t[:,i].reshape((-1,1)) @ np.linalg.inv(np.array(self.Lambda[i,i]).reshape((1,1))) @ np.transpose(self.__t[:,i].reshape((-1,1))))
@property
def scores_(self):
return pd.DataFrame(self._r)
return pd.DataFrame(self._t, columns= self.__pcnames)
@property
def loadings_(self):
return pd.DataFrame(self._p, columns=self.__pcnames, index=self._varnames)
return pd.DataFrame(self._p, columns=self.__pcnames)
@property
def leverage_(self):
return self.leverage
lev = pd.DataFrame(self.leverage)
lev.columns =self.__pcnames
return lev
@property
def residuals(self):
return self.res
def residuals_(self):
res = pd.DataFrame(self._qres)
res.columns=self.__pcnames
return res
@property
def hotelling_(self):
#return pd.DataFrame(self.T2)
return self.T2
\ No newline at end of file
hot = pd.DataFrame(self.T2)
hot.columns=self.__pcnames
return hot
\ No newline at end of file
from Packages import *
class PlsR:
def __init__(self, x_train, y_train, x_test, y_test):
self.x_train = x_train
self.x_test = x_test
self.y_train = y_train
self.y_test = y_test
def fit_(self):
nlv = 20
rmse = []
for i in range(nlv):
m = PLSRegression(n_components= 20)
ycv = cross_val_predict(m, self.x_train, self.y_train, cv = 5)
rmse.append(mean_squared_error(self.y_train, ycv))
print(rmse)
......@@ -7,3 +7,4 @@ from .LWPLSR_ import model_LWPLSR
from .Regression_metrics import metrics
from .VarSel import TpeIpls
from .Miscellaneous import resid_plot, reg_plot
from .DxReader import DxRead
from Class_Mod import LinearPCA, Umap, find_col_index, PinardPlsr, model_LWPLSR, list_files, metrics, TpeIpls, reg_plot, resid_plot, Sk_Kmeans
from Class_Mod import LinearPCA, Umap, find_col_index, PinardPlsr, model_LWPLSR, list_files, metrics, TpeIpls, reg_plot, resid_plot, Sk_Kmeans, DxRead
# find_col_index
from Class_Mod.Miscellaneous import prediction, download_results
......@@ -44,7 +44,7 @@ from sklearn.metrics import pairwise_distances_argmin_min
## Web app construction
import streamlit as st
from tempfile import NamedTemporaryFile
# help on streamlit input https://docs.streamlit.io/library/api-reference/widgets
#Library for connecting to SQL DB
......
......@@ -15,40 +15,73 @@ influence, hotelling, qexp = st.columns([2, 2, 1])
with container1:
col1.header("NIRS Data Loading", divider='blue')
col1.header("Data Loading", divider='blue')
col2.header("Spectral Data Visualization", divider='blue')
with col1:
# loader for csv file containing NIRS spectra
sselectx_csv = st.file_uploader("Load NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
if sselectx_csv is not None:
# Select list for CSV delimiter
psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9)
# Select list for CSV header True / False
phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31)
if phdr == 'yes':
col = 0
else:
col = False
data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
st.success("The data have been loaded successfully", icon="")
## Visualize spectra
# loader for csv file containing NIRS spectra
sselectx_csv = col1.file_uploader("Load NIRS Data", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
if sselectx_csv is not None:
with col2:
fig, ax = plt.subplots(figsize = (30,7))
data_import.T.plot(legend=False, ax = ax, color = 'blue')
ax.set_xlabel('Wavelength/Wavenumber', fontsize=18)
ax.set_ylabel('Signal', fontsize=18)
plt.margins(x = 0)
st.pyplot(fig)
st.write("Summary")
info = pd.DataFrame({'N':[data_import.shape[0]],
'Min': [np.min(data_import)],
'Max':[np.max(data_import)],}, index = ['Values']).T
info.rename_axis('information')
st.table(data=info)
test = sselectx_csv.name[sselectx_csv.name.find('.'):]
if test== '.csv':
with col1:
# Select list for CSV delimiter
psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9)
# Select list for CSV header True / False
phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31)
if phdr == 'yes':
col = 0
else:
col = False
data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
st.success("The data have been loaded successfully", icon="")
## Visualize spectra
with col2:
fig, ax = plt.subplots(figsize = (30,7))
data_import.T.plot(legend=False, ax = ax, color = 'blue')
ax.set_xlabel('Wavelength/Wavenumber', fontsize=18)
ax.set_ylabel('Signal', fontsize=18)
plt.margins(x = 0)
st.pyplot(fig)
st.write("Summary")
info = pd.DataFrame({'N':[data_import.shape[0]],
'Min': [np.min(data_import)],
'Max':[np.max(data_import)],}, index = ['Values']).T
info.rename_axis('information')
st.table(data=info)
elif test == '.dx':
# Create a temporary file to save the uploaded file
with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
tmp.write(sselectx_csv.read())
tmp_path = tmp.name
with col1:
data = DxRead(path = tmp_path)
data_import = data.specs_df_
st.success("The data have been loaded successfully", icon="")
## Visualize spectra
with col2:
fig, ax = plt.subplots(figsize = (30,7))
data_import.T.plot(legend=False, ax = ax, color = 'blue')
ax.set_xlabel('Wavelength/Wavenumber', fontsize=18)
ax.set_ylabel('Signal', fontsize=18)
plt.margins(x = 0)
st.pyplot(fig)
st.write("Summary")
info = pd.DataFrame({'N':[data_import.shape[0]],
'Min': [np.min(data_import)],
'Max':[np.max(data_import)],}, index = ['Values']).T
info.rename_axis('information')
st.table(data=info)
os.unlink(tmp_path)
######################################################################################
############################## Exploratory data analysis ###############################
......@@ -116,30 +149,24 @@ with container2:
)
st.plotly_chart(fig, use_container_width = True)
with influence:
st.write('Influence plot')
ax1 = st.selectbox("Component", options=model.scores_.columns, index=3)
leverage = model.leverage_
residuals = model.residuals
fig = px.scatter(x=leverage[ax1], y=residuals[ax1], color = leverage[ax1]*residuals[ax1])
residuals = model.residuals_
fig = px.scatter(x=leverage[ax1], y=residuals[ax1], color = leverage[ax1]*residuals[ax1]).update_layout(xaxis_title="Leverage",yaxis_title="Residuals")
st.plotly_chart(fig)
with hotelling:
st.write('T²-Hotelling vs Q residuals plot')
hotelling = model.hotelling_
ax2 = st.selectbox("Component", options=model.scores_.columns, index=4)
t = model.hotelling_
fig = px.scatter(t, x=t[ax2], y=t[ax2])
hotelling = model.hotelling_
fig = px.scatter(t, x=hotelling[ax2], y=residuals[ax2]).update_layout(xaxis_title="",yaxis_title="Residuals")
st.plotly_chart(fig)
with qexp:
pass
else:
st.markdown('Select a dimensionality reduction technique from the dropdown list')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment