diff --git a/src/Class_Mod/Miscellaneous.py b/src/Class_Mod/Miscellaneous.py index cdf6b6296d833c4c861fa6073481e603d8e6f977..4280bb31df9e9516443cd3f5000d67523ee2865c 100644 --- a/src/Class_Mod/Miscellaneous.py +++ b/src/Class_Mod/Miscellaneous.py @@ -113,10 +113,10 @@ def plot_spectra(df, xunits, yunits): min = np.max(df.columns) df.T.plot(legend=False, ax = ax, color = 'blue').invert_xaxis() - plt.annotate(text = f'The total number of spectra is {df.shape[0]}', xy =(min, np.max(df)), size=20, color = 'black', backgroundcolor='red') ax.set_xlabel(xunits, fontsize=18) ax.set_ylabel(yunits, fontsize=18) plt.margins(x = 0) + plt.tight_layout() return fig diff --git a/src/Packages.py b/src/Packages.py index 7b80d48264ca5daab1f27235270c2d5766466382..f0da4f3775aa4cb01e83451457a0cdd3612f528f 100644 --- a/src/Packages.py +++ b/src/Packages.py @@ -10,6 +10,7 @@ import random import datetime import numpy as np import pandas as pd +from itertools import combinations import zipfile from matplotlib import colors from matplotlib.colors import Normalize diff --git a/src/Report/report.py b/src/Report/report.py index 3d1e2d2362cbcdf268003c243d2939167d9debe1..7e79ae71cc3d3bbf2d861ce94e3b91696f980082 100644 --- a/src/Report/report.py +++ b/src/Report/report.py @@ -2,8 +2,20 @@ import subprocess from pathlib import Path import os import pandas as pd +import os.path +def intersect(l1, l2): + return l1.intersection(set(l2)) +def check(file): + return os.path.isfile(file) def report(*args): + dim_red_methods= {'PCA':'Principal Components Analysis (PCA)', + 'UMAP':'Uniform Manifold Approximation and Projection (UMAP)', + 'NMF':'Non-negative Matrix Factorization (NMF)'} # List of dimensionality reduction algos + cluster_methods = {'Kmeans':'Kmeans', + 'HDBSCAN':'Hierarchical Density-Based Spatial Clustering of Applications with Noise (HDBSCAN)', + 'AP':'Affinity Propagation (AP)'} # List of clustering algos + selec_strategy = {'center':'PCA','random':'PCA'} to_report=[] j=0 for arg in args: @@ -15,18 +27,18 @@ def report(*args): df_name = 'df' + str(j) j+=1 globals()[df_name] = arg - #print(to_report) + latex_report = "" latex_report += r"""\documentclass[a4paper,10pt]{article} \usepackage{fancyhdr} \usepackage{graphicx} \usepackage{geometry} \geometry{a4paper, left=2cm, right=2cm, top=1.5cm, bottom=3cm } - \usepackage{caption} - \usepackage{subcaption} + \usepackage{caption, subcaption} \usepackage{hyperref} \usepackage{hyphenat} \usepackage{booktabs} + \usepackage{times} \usepackage{etoolbox,fancyhdr,xcolor} \newcommand{\headrulecolor}[1]{\patchcmd{\headrule}{\hrule}{\color{#1}\hrule}{}{}} \newcommand{\footrulecolor}[1]{\patchcmd{\footrule}{\hrule}{\color{#1}\hrule}{}{}} @@ -35,376 +47,113 @@ def report(*args): \renewcommand{\footrulewidth}{1pt} \footrulecolor{red!100}% \graphicspath{{images/}, {Figures/}} - \fancyhf{} \fancyhead[R]{\includegraphics[width=0.1\textwidth]{logo_cefe.png}} \fancyhead[L]{PACE - NIRS Analysis Report} - + \fancyfoot[L]{Project Name to fill} \fancyfoot[C]{Plateforme d'Analyses Chimiques en Ecologie} \fancyfoot[R]{\thepage} - - \setlength{\headheight}{15mm} + + \setlength{\headheight}{52pt} + \addtolength{\topmargin}{-9.2942pt} \pagestyle{fancy} - - \usepackage{times} - \begin{document} \noindent \begin{center} \textbf{{\Large NIRS WORKFLOW REPORT}} \\ \end{center}""" - - if 'sample' in to_report: + if 'sample_selection' in to_report: latex_report += r"""\noindent - \textbf{QUERY: } You asked for Sample selection. - Specify bibliographic refs \cite{Lesnoff2020} to describe the algorithms used.\\ + \textbf{QUERY MADE: } Sample selection performing.\\ \noindent - \textbf{INPUTS:} Specify here input file names \& parameters.\\ - - """ - - if 'model' in to_report: - latex_report += r"""\subsection*{Data Visualization} - - Here we have a sub-heading. There is no blank line after the sub-heading. You can have one level of subheadings but not a third i.e. you cannot have Section 1.1.1 as a subheading. - \begin{center} - Spectral Data Visualization - \end{center} + \textbf{ENTERED INPUTS: }{"""+to_report[1] + r"""}\\ + \textbf{PRINCIPLE OF RESPONSE TO THE QUERY:} Representative subset selection has + been performed using the "sample selection" workflow that consists of applying + a sequence of data processing techniques, specifically, dimensionality reduction, + clustering, and samples selection techniques.""" - \begin{figure}[ht] - \centering - \includegraphics[width=1\linewidth]{""" + to_report[1] + r"""} - \label{fig:votre_graphique} - \end{figure} - \textbf{Exploratory Data Analysis-Multivariable Data Analysis} - \begin{figure}[ht] - \centering - \includegraphics[width=1\linewidth]{""" + to_report[2] + r"""} - \label{fig:votre_graphique} - \end{figure} - - """ - - # Ajout de la conversion DataFrame en LaTeX - latex_report += r""" - \begin{center} - """ + df0.to_latex(escape=True) + r""" - \end{center} - """ - - latex_report += r""" - - - After a list, you must leave a single blank line and remember to add the indent if you are starting a new paragraph. - \begin{center} - II - Model creation - \end{center} - """ - if 'full_plsr' in to_report: - latex_report += r""" - Model Choisi : Full Plsr - -- Spectral preprocessing info -- - """ - latex_report += r""" - \begin{center} - """ + df1.to_latex(escape=True) + r""" - \end{center} - """ - latex_report += r""" - Model performance - """ - latex_report += r""" - \begin{center} - """ + df2.to_latex(escape=True) + r""" - \end{center} - """ - - latex_report += r""" - \newpage - Cross Validation - \begin{figure}[ht] - \centering - \includegraphics[width=0.9\linewidth]{""" + to_report[3] + r"""} - \label{fig:votre_graphique} - \end{figure} - After a list, you must leave a single blank line and remember to add the indent if you are starting a new paragraph. - After a list, you must leave a single blank line and remember to add the indent if you are starting a new paragraph. - -- Cross-Validation Summary-- - """ - latex_report += r""" - \begin{center} - """ + df3.to_latex(escape=True) + r""" - \end{center} - -- Out-of-Fold Predictions Visualization (All in one) -- - \begin{figure}[ht] - \centering - \includegraphics[width=0.9\linewidth]{""" + to_report[4] + r"""} - \label{fig:votre_graphique} - \end{figure} - - III - Model Diagnosis - \begin{figure}[h] + latex_report += r"""\section*{RESULTS}""" + latex_report += r"""\subsection*{Spectral data visualization}""" + latex_report += r"""Acquired spectra were visualized in fig1 by plotting the intensity + of absorption, reflectance, transmission, etc, against the wavelengths or wavenumbers. + This helps observe general patterns and trends in the spectra, and understand the + variability within the data. + \begin{figure}[h] \centering - \begin{minipage}[b]{0.5\linewidth} - \centering - \includegraphics[width=1.2\linewidth]{""" + to_report[5] + r"""} - \end{minipage}% - \begin{minipage}[b]{0.5\linewidth} - \centering - \includegraphics[width=1.2\linewidth]{""" + to_report[6] + r"""} - \end{minipage}% - \caption{\label{fig:frog}PCA} - \end{figure} - - """ - - - - - - - if 'predict' in to_report: - latex_report += r"""\noindent - \textbf{QUERY: } You asked for Predictions. - Specify bibliographic refs \cite{scikit-learn} to describe the algorithms used.\\ - \noindent - \textbf{INPUTS:} Specify here input file names \& parameters. - """ - - latex_report += r"""\section*{RESULTS} - Please follow this template exactly and DO NOT CHANGE THE FONT SIZE, MARGINS, HEADER, FOOTER, ETC OF THE TEMPLATE. - Start typing your paper in this font in this way. Section headings are to be in 10pt bold and full caps. Number headings consecutively. Leave a single blank line before each Section Heading and one blank line between the heading and the first line of text. When you want to start a new paragraph do not drop a line do it simply like this. Use MS Word/Latex formatting exactly as given in this paragraph. Do NOT change the spacing 'before' and 'after' to try and squeeze more in. - """ - latex_report += r""" - This is now a new paragraph which starts with a tab space and when you are starting new sentences you must put a space after the full stop. Just like this. Text should be single spaced, left and right justified, providing 25.4mm left margin and 25.4mm right margin. Leave a 25.4mm margin at top and a 25.4mm margin at bottom and the page size should be set as A4. You will also notice that the font to use is Times New Roman and the font size is 10pt. - """ - if 'sample' and 'csv' in to_report: - latex_report += r"""\subsection*{Data Visualization} - - Here we have a sub-heading. There is no blank line after the sub-heading. You can have one level of subheadings but not a third i.e. you cannot have Section 1.1.1 as a subheading. - \begin{center} - Spectral Data Visualization - \end{center} - - \begin{figure}[ht] - \centering - \includegraphics[width=1\linewidth]{""" + to_report[3] + r"""} - \label{fig:votre_graphique} - \end{figure} - - \textbf{Exploratory Data Analysis-Multivariable Data Analysis} - """ - if 'hdb' in to_report: - latex_report += r""" - \begin{figure}[ht] - \centering - \includegraphics[width=0.5\linewidth]{""" + to_report[5] + r"""} - \label{fig:votre_graphique} - \end{figure} - """ - if 'kmeans' in to_report or 'AP' in to_report: - latex_report += r""" - \begin{figure}[h] - \centering - \begin{minipage}[b]{0.33\linewidth} - \centering - \includegraphics[width=\linewidth]{""" + to_report[4] + r"""} - \end{minipage}% - \begin{minipage}[b]{0.33\linewidth} - \centering - \includegraphics[width=\linewidth]{""" + to_report[5] + r"""} - \end{minipage}% - \caption{\label{fig:frog}PCA} - \end{figure} - """ - - latex_report += r""" - This is now a new paragraph which starts with a tab space and when you are starting new sentences you must put a space after the full stop. Just like this. Text should be single spaced, left and right justified, providing 25.4mm left margin and 25.4mm right margin. Leave a 25.4mm margin at top and a 25.4mm margin at bottom and the page size should be set as A4. You will also notice that the font to use is Times New Roman and the font size is 10pt. - - """ - latex_report += r""" - This is now a new paragraph which starts with a tab space and when you are starting new sentences you must put a space after the full stop. Just like this. Text should be single spaced, left and right justified, providing 25.4mm left margin and 25.4mm right margin. Leave a 25.4mm margin at top and a 25.4mm margin at bottom and the page size should be set as A4. You will also notice that the font to use is Times New Roman and the font size is 10pt. - """ - - latex_report += r""" - - \begin{center} - Graphique des PCA - \end{center} + \includegraphics[width=1\linewidth]{spectra_plot.png} + \caption{Acquired spectra} + \label{fig:raw_spectra} + \end{figure}""" + + latex_report += r"""\subsection*{Multivariable Data Analysis}""" + latex_report += r""" For optimal selection of subset of the samples to analyze through the + reference method, a workflow consisting of consecutively applying features extraction/dimensionality + reduction and clustering analysis was developed. Features extraction was performed by means of {"""+dim_red_methods[to_report[2]] + r"""} + technique which helps represent the high dimensional spectra in a reduced perceptible 3D + subspace spanned by a few number of features (three features in our case), on of the spectra. + While clustering analysis was performed using the {"""+cluster_methods[to_report[3]] + r"""} technique which + helps group the data into groups of spectra that share the same carachteristics. + This workflow is widely used in the world of spectral data analysis for detecting outliers, + analysis the homogenity of the data, reducing the computational costs prior to supervised predictive modelling, etc.\\*""" + if "PCA" in to_report: + latex_report += r"""\indent To detect the presence of any spectral outliers, the influence and residuals plots were constructed, + with outlyingness limits established at the 95\% confidence level. Together, these plots helps distinguish Regular Observations (ROs), + which form a homogeneous group near the subspace generated by the PCs; Good Leverage Points (GLPs), + which are at the same plane as the subspace but distant from the ROs; Orthogonal Observations (OOs), which have a + large residual distance to the subspace, but whose projection is on the subspace; and, finally, Bad Leverage + Points (BLPs), which have a large residual distance such that the projection on the subspace is away from ROs.\\*""" + + latex_report += """\indent Results of applying this workflow are displayed in fig. 1. Based of the features extracted using + {"""+to_report[2]+ r"""}, {"""+to_report[3]+ r"""} revealed the existance of {"""+to_report[5] + r"""} + data clusters that are visualized with different colors. \begin{figure}[h] \centering - \begin{minipage}[b]{0.33\linewidth} + \begin{minipage}[b]{0.33\textwidth} \centering - \includegraphics[width=\linewidth]{""" + to_report[6] + r"""} + \includegraphics[width=\linewidth]{scores_pc1_pc2.png} \end{minipage}% - \begin{minipage}[b]{0.33\linewidth} + \begin{minipage}[b]{0.33\textwidth} \centering - \includegraphics[width=\linewidth]{""" + to_report[7] + r"""} + \includegraphics[width=\linewidth]{scores_pc1_pc3.png} \end{minipage}% - \begin{minipage}[b]{0.33\linewidth} + \begin{minipage}[b]{0.33\textwidth} \centering - \includegraphics[width=\linewidth]{""" + to_report[8] + r"""} + \includegraphics[width=\linewidth]{scores_pc2_pc3.png} \end{minipage} - \caption{\label{fig:frog}PCA} - \end{figure} - """ - latex_report += r""" - This is now a new paragraph which starts with a tab space and when you are starting new sentences you must put a space after the full stop. Just like this. Text should be single spaced, left and right justified, providing 25.4mm left margin and 25.4mm right margin. Leave a 25.4mm margin at top and a 25.4mm margin at bottom and the page size should be set as A4. You will also notice that the font to use is Times New Roman and the font size is 10pt. - - - \begin{center} - \end{center} - \begin{figure}[h] + \caption{The pairwise projection of spectra on the reduced 3D subspace.} + \label{pcaplots} + \end{figure}""" + + if 'PCA' in to_report: + latex_report += r""" + \begin{figure} \centering - \begin{minipage}[b]{0.45\linewidth} + \begin{minipage}[b]{0.33\textwidth} \centering - \includegraphics[width=\linewidth]{""" + to_report[9] + r"""} + \includegraphics[width=\linewidth]{influence_plot.png} \end{minipage}% - \hspace{0.05\linewidth} % Adds space between the two images - \begin{minipage}[b]{0.45\linewidth} + \begin{minipage}[b]{0.33\textwidth} \centering - \includegraphics[width=\linewidth]{""" + to_report[10] + r"""} + \includegraphics[width=\linewidth]{hotelling_plot.png} \end{minipage} - \caption{\label{fig:frog}PCA} + \caption{The pairwise projection of spectra on the reduced 3D subspace.} + \label{hotelling_and_influence} \end{figure} """ - latex_report+= r""" - This is now a new paragraph which starts with a tab space and when you are starting new sentences you must put a space after the full stop. Just like this. Text should be single spaced, left and right justified, providing 25.4mm left margin and 25.4mm right margin. Leave a 25.4mm margin at top and a 25.4mm margin at bottom and the page size should be set as A4. You will also notice that the font to use is Times New Roman and the font size is 10pt. - - \textbf{Selected samples for chemical analysis} - \newline - \newline - \newline - """ - num_samples = to_report[0] - nb_clu=to_report[1] - - if 'kmeans' in to_report or 'AP' in to_report: - latex_report += df0.to_latex(escape=True) - latex_report += r""" - - \begin{itemize} - \item The total number of samples: """ + str(num_samples) + r""" - \item The number of selected samples for chemical analysis: """ + str(nb_clu) + r""" - \end{itemize} + + latex_report += r"""Following the exploratory data analysis, a subset sampling method, consisting of""" + if 'random' in to_report: + latex_report += r""" selecting the sample with the least euclidian distance to the center of each data cluster identified by {"""+to_report[3]+ r"""},""" + if 'center' in to_report: + latex_report += r""" fitting a second clustering model, specifically kmeans, to each data cluster and selecting + 3 samples or less from each subcluster (if a subcluster contains less than 3 samples, then all samples included + in this subcluster are selected)," - After a list, you must leave a single blank line and remember to add the indent if you are starting a new paragraph. + the center was applied to select representative samples to be used for robust NIR calibration developement + , i.e, to be analyzed by adequate reference analytical procedures (generally requiring destructive sample preparation)""" - """ - if 'sample' and 'dx' in to_report: - latex_report += r"""\subsection*{Data Visualization} - \begin{center} - Graphique des PCA - \end{center} - \begin{figure}[h] - \centering - \begin{minipage}[b]{0.33\linewidth} - \centering - \includegraphics[width=\linewidth]{plot_axe1_axe2.png} - \end{minipage}% - \begin{minipage}[b]{0.33\linewidth} - \centering - \includegraphics[width=\linewidth]{plot_axe1_axe3.png} - \end{minipage}% - \begin{minipage}[b]{0.33\linewidth} - \centering - \includegraphics[width=\linewidth]{plot_axe2_axe3.png} - \end{minipage} - \caption{\label{fig:frog}PCA} - \end{figure} - """ latex_report += r""" - This is now a new paragraph which starts with a tab space and when you are starting new sentences you must put a space after the full stop. Just like this. Text should be single spaced, left and right justified, providing 25.4mm left margin and 25.4mm right margin. Leave a 25.4mm margin at top and a 25.4mm margin at bottom and the page size should be set as A4. You will also notice that the font to use is Times New Roman and the font size is 10pt. - """ - if 'model' in to_report: - latex_report += r""" - \subsection*{Sample Selection} - - You can have as many sub-headings in a section as you want to. Note that sub-headings have a 6pt spacing after them rather than a blank line but they are preceded by a blank line. The number of sections and sub-sections is up to you, as are the titles of each of them and this will be driven by the content of your report. - - \subsection*{Model Creation} - - You can have as many sub-headings in a section as you want to. Note that sub-headings have a 6pt spacing after them rather than a blank line but they are preceded by a blank line. The number of sections and sub-sections is up to you, as are the titles of each of them and this will be driven by the content of your report. - """ - if 'predict' in to_report: - latex_report += r""" - \subsection*{Predictions} - - You can have as many sub-headings in a section as you want to. Note that sub-headings have a 6pt spacing after them rather than a blank line but they are preceded by a blank line. The number of sections and sub-sections is up to you, as are the titles of each of them and this will be driven by the content of your report. - """ - - if 'help' in to_report: - latex_report += r"""\subsection*{How to include equations, figures and tables?} - - Some of us like to include a formula or two and these should be referred to in the test in the form equation 1. You must type in equations using the equation editor and all symbols should be explained within the text of your manuscript. However you may prefer to include a separate section detailing all nomenclature. Never paste equations in from a paper or other source. Leave a blank line before and after the equation: - - \begin{equation} - x = \frac{-b \pm \sqrt{b^2 - 4ac} }{2a} - \end{equation} - - where $x$ is a number; $a$, $b$, and $c$ are other numbers. - - \begin{equation} - BCP = 176R+28G+46B - \end{equation} - - where, $BCP$, $R$, $G$ and $B$ are also useful numbers. - - Figure \ref{fig_regression} shows a graph. Figures / diagrams / photos are to be centred, with the reference and caption printed below the figure. The lettering used in the illustrations should be easily legible. Illustrations are to be referred to as figures, and must be quoted in the text. One blank line should be left between the figure caption and the next paragraph. Please ensure that all figures are of the highest quality. Keep figures as simple as possible. Avoid excessive notes. Photographs must have a resolution of at least 300 dpi. The use of colour is allowed on figures. Note that the test does not wrap around the figure but if you do not like the wasted space then it is acceptable to put two figures side by side but they must be e.g. a 'Figure \ref{fig_bridge1}' and 'Figure \ref{fig_bridge2}' and not 'Figure 1' and 'Figure 2'. See the example given as Figure \ref{fig_twobridges}. However all figures must be as close as possible to the location where they are first referred to in the text. The figure is also not surrounded by an unnecessary box/border. Remove this when inserting figures from MS Excel. - - - \begin{figure}[ht] - \centering - \includegraphics[height=6.6cm]{figures/fig_regression} - \caption{An interesting plot (note that figure captions go BELOW THE FIGURE).} - \label{fig_regression} - \end{figure} - - - \begin{figure}[ht] - \centering - \begin{subfigure}[b]{0.45\textwidth} - \centering - \includegraphics[width=\textwidth]{figures/fig_bridge1} - \caption{Bridge 1} - \label{fig_bridge1} - \end{subfigure} - \hfill - \begin{subfigure}[b]{0.45\textwidth} - \centering - \includegraphics[width=\textwidth]{figures/fig_bridge2} - \caption{Bridge 2} - \label{fig_bridge2} - \end{subfigure} - \caption{Walton-on-Thames Bridge (a) wide shot and (b) the underside of the deck - (photos taken by P. J. Vardanega, used with permision)} - \label{fig_twobridges} - \end{figure} - - - \begin{table}[h] - \begin{center} - \caption{Summary of the database (note that table captions go ABOVE THE TABLE)} - \begin{tabular}{ |l|c|l| } - \hline - \textbf{Column heading} & \textbf{Column heading} & \textbf{Column heading} \\ \hline - Table Text & 10 & Falling head permeameter \\ \hline - Concrete & 2 & Strong \\ \hline - Steel & 1 & Stronger \\ \hline - Timber & 3 & Weak \\ - \hline - \end{tabular} - \label{tab_materials} - \end{center} - \end{table} - """ - latex_report += r"""\newpage - \section*{ACKNOWLEDGEMENTS} - This tool is provided by the Chemical Analysis Platform for Ecology - Montpellier, France.\\ - Thanks to Abderrahim DIANE, Mouhcine MAIMOUNI, Alexandre Granier, Remy Beugnon, Vincent Negre et Nicolas Barthes.\\ - Source code available at \href{https://src.koda.cnrs.fr/cefe/pace/nirs_workflow}{CNRS forge}. - \fontsize{8}{9}\selectfont \bibliographystyle{apalike} % \bibliographystyle{abbrv} @@ -443,4 +192,4 @@ def compile_latex(): # open the report proc = subprocess.Popen([str(filename[:-4]) + '.pdf'], cwd = filename_path, shell=True) proc.communicate() -# compile_latex() +# compile_latex() \ No newline at end of file diff --git a/src/pages/1-samples_selection.py b/src/pages/1-samples_selection.py index cc196e8096bc9ae1dbe250b8d94c96fa6055c361..824dda053cb8ca7503f3bc2d1c936060bf66af8b 100644 --- a/src/pages/1-samples_selection.py +++ b/src/pages/1-samples_selection.py @@ -16,10 +16,8 @@ add_header() #load specific model page css local_css(css_file / "style_model.css") -#define some variables -tcr=pd.DataFrame() -sam=pd.DataFrame() -sam1=pd.DataFrame() + +# algorithms available in our app dim_red_methods=['', 'PCA','UMAP', 'NMF'] # List of dimensionality reduction algos cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP'] # List of clustering algos selec_strategy = ['center','random'] @@ -51,22 +49,25 @@ if st.session_state["interface"] == 'advanced': st.header("I - Spectral Data Visualization", divider='blue') col2, col1 = st.columns([3, 1]) - ## Preallocation of data structure -spectra = pd.DataFrame -meta_data = pd.DataFrame -selected_samples = pd.DataFrame +spectra = pd.DataFrame() +meta_data = pd.DataFrame() +tcr=pd.DataFrame() +sam=pd.DataFrame() +sam1=pd.DataFrame() +selected_samples = pd.DataFrame() non_clustered = None -colnames = [] -rownames = [] l1 = [] +labels = [] +color_palette = None +dr_model = None # dimensionality reduction model +cl_model = None # clustering model + # loader for datafile data_file = col1.file_uploader("Load NIRS Data", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5) - if data_file: # Retrieve the extension of the file test = data_file.name[data_file.name.find('.'):] - ## Load .csv file if test== '.csv': with col1: @@ -84,7 +85,6 @@ if data_file: spectra, md_df_st_ = col_cat(imp) meta_data = md_df_st_ st.success("The data have been loaded successfully", icon="✅") - ## Load .dx file elif test == '.dx': # Create a temporary file to save the uploaded file @@ -106,18 +106,36 @@ if not spectra.empty: spectra.index = rownames with col2: + fig, ax = plt.subplots(figsize = (30,7)) if test =='.dx': - if meta_data.loc[:,'xunits'][0] == '1/cm': - lab = 'Wavenumber (1/cm)' - else: - lab = 'Wavelength (nm)' - fig = plot_spectra(spectra, xunits = lab, yunits = meta_data.loc[:,'yunits'][0]) + lab = ['Wavenumber (1/cm)' if meta_data.loc[:,'xunits'][0] == '1/cm' else 'Wavelength (nm)'] + if lab[0] =='Wavenumber (1/cm)': + spectra.T.plot(legend=False, ax = ax).invert_xaxis() + else : + spectra.T.plot(legend=False, ax = ax) + ax.set_xlabel(lab[0], fontsize=18) else: - fig = plot_spectra(spectra, xunits = 'Wavelength/Wavenumber', yunits = 'Signal intensity') - + spectra.T.plot(legend=False, ax = ax) + ax.set_xlabel('Wavelength/Wavenumber', fontsize=18) + + ax.set_ylabel('Signal intensity', fontsize=18) + plt.margins(x = 0) + plt.tight_layout() st.pyplot(fig) - fig.savefig("./Report/figures/Spectra_Plot.png") - + + # Update the size of plot axis for exprotation to report + l, w = fig.get_size_inches() + fig.set_size_inches(8, 3) + for label in (ax.get_xticklabels()+ax.get_yticklabels()): + ax.xaxis.label.set_size(10) + ax.yaxis.label.set_size(10) + plt.tight_layout() + fig.savefig("./Report/figures/spectra_plot.png", dpi=400) ## Export report + fig.set_size_inches(l, w)# reset the plot size to its original size + data_info = pd.DataFrame({'Name': [data_file.name], + 'Number of scanned samples': [spectra.shape[0]]}, + index = ['Input file']) + st.write(data_info) ## table showing the number of samples in the data file ############################## Exploratory data analysis ############################### st.header("II - Exploratory Data Analysis-Multivariable Data Analysis", divider='blue') @@ -125,13 +143,9 @@ scores, loadings, pc = st.columns([2, 3, 0.5]) influence, hotelling, qexp = st.columns([2, 2, 1]) st.header('III - Selected samples for chemical analysis', divider='blue') -dr_model = None # dimensionality reduction model -cl_model = None # clustering model - ###### 1- Dimensionality reduction ###### t = pd.DataFrame # scores p = pd.DataFrame # loadings -labels = [] if not spectra.empty: dim_red_method = pc.selectbox("Dimensionality reduction techniques: ", options = dim_red_methods, index = default_reduction_option, key = 37) clus_method = pc.selectbox("Clustering techniques: ", options = cluster_methods, index = default_clustering_option, key = 38) @@ -174,7 +188,7 @@ if not t.empty: if clus_method == cluster_methods[1]: cl_model = Sk_Kmeans(tcr, max_clusters = 25) ncluster = scores.number_input(min_value=2, max_value=25, value=cl_model.suggested_n_clusters_, label = 'Select the desired number of clusters') - fig2 = px.scatter(cl_model.inertia_.T, y = 'inertia') + fig2 = px.bar(cl_model.inertia_.T, y = 'inertia') scores.write(f"Suggested n_clusters : {cl_model.suggested_n_clusters_}") scores.plotly_chart(fig2,use_container_width=True) img = pio.to_image(fig2, format="png") @@ -335,37 +349,28 @@ if not t.empty: tt = tcr.iloc[selected_samples_idx,:] fig.add_scatter3d(x = tt.loc[:,axis1], y = tt.loc[:,axis2], z = tt.loc[:,axis3], mode ='markers', marker = dict(size = 5, color = 'black'), - name = 'selected samples') - - plt.savefig("./Report/Figures/test.png") + name = 'selected samples') st.plotly_chart(fig, use_container_width=True) if labels: num_clusters = len(np.unique(labels)) custom_color_palette = px.colors.qualitative.Plotly[:num_clusters] - color_discrete_sequence=custom_color_palette # Créer et exporter le graphique Axe1-Axe2 en PNG - fig_axe1_axe2 = px.scatter(tcr, x=axis1, y=axis2, color=labels if list(labels) else None, color_discrete_sequence=custom_color_palette) - fig_axe1_axe2.update_layout(title='Axe1-Axe2') - fig_axe1_axe2.update_traces(marker=dict(size=4)) - fig_axe1_axe2.write_image("./Report/Figures/plot_axe1_axe2.png") - - - # Créer et exporter le graphique Axe1-Axe3 en PNG - fig_axe1_axe3 = px.scatter(tcr, x=axis1, y=axis3, color=labels if list(labels) else None, color_discrete_sequence=custom_color_palette) - fig_axe1_axe3.update_layout(title='Axe1-Axe3') - fig_axe1_axe3.update_traces(marker=dict(size=4)) - fig_axe1_axe3.write_image("./Report/Figures/plot_axe1_axe3.png") + comb = [i for i in combinations([1,2,3], 2)] + subcap = ['a','b','c'] + for i in range(len(comb)): + fig_axe1_axe2 = px.scatter(tcr, x=eval(f'axis{str(comb[i][0])}'), y=eval(f'axis{str(comb[i][1])}'), + color=labels if list(labels) else None, + color_discrete_sequence= custom_color_palette) + fig_axe1_axe2.update_layout(font=dict(size=23)) + fig_axe1_axe2.add_annotation(text= f'({subcap[i]})', align='center', showarrow= False, xref='paper', yref='paper', x=-0.13, y= 1, + font= dict(color= "black", size= 35), bgcolor ='white', borderpad= 2, bordercolor= 'black', borderwidth= 3) + fig_axe1_axe2.update_traces(marker=dict(size= 10), showlegend= False) + fig_axe1_axe2.write_image(f'./Report/Figures/scores_pc{str(comb[i][0])}_pc{str(comb[i][1])}.png') - # Créer et exporter le graphique Axe2-Axe3 en PNG - fig_axe2_axe3 = px.scatter(tcr, x=axis2, y=axis3, color=labels if list(labels) else None, color_discrete_sequence=custom_color_palette) - fig_axe2_axe3.update_layout(title='Axe2-Axe3') - fig_axe2_axe3.update_traces(marker=dict(size=4)) - fig_axe2_axe3.write_image("./Report/Figures/plot_axe2_axe3.png") - if not spectra.empty: if dim_red_method == dim_red_methods[1] or dim_red_method == dim_red_methods[3]: @@ -405,13 +410,12 @@ if not spectra.empty: # Export du graphique img = pio.to_image(fig, format="png") - with open("./Report/figures/graphe_loadings.png", "wb") as f: + with open("./Report/figures/loadings_plot.png", "wb") as f: f.write(img) ############################################################################################################# if dim_red_method == dim_red_methods[1]: with influence: st.write('Influence plot') - # Laverage Hat = t.to_numpy() @ np.linalg.inv(np.transpose(t.to_numpy()) @ t.to_numpy()) @ np.transpose(t.to_numpy()) leverage = np.diag(Hat) / np.trace(Hat) @@ -444,12 +448,11 @@ if not spectra.empty: elif not meta_data.empty and not clus_method: l1 = list(map(str.lower,md_df_st_[col])) - - - fig = px.scatter(x = leverage, y = residuals, color = l1) + fig = px.scatter(x = leverage, y = residuals, color=labels if list(labels) else None, + color_discrete_sequence= custom_color_palette) fig.add_vline(x = tresh3, line_width = 1, line_dash = 'solid', line_color = 'red') fig.add_hline(y=tresh4, line_width=1, line_dash='solid', line_color='red') - fig.update_layout(xaxis_title="Leverage", yaxis_title = "Residuals") + fig.update_layout(xaxis_title="Leverage", yaxis_title = "Q-residuals", font=dict(size=20), width=800, height=600) out3 = leverage > tresh3 out4 = residuals > tresh4 @@ -460,17 +463,23 @@ if not spectra.empty: ann = meta_data.loc[:,'name'][i] else: ann = t.index[i] - fig.add_annotation(dict(x = leverage[i], y = residuals[i], showarrow=True, text = ann, + fig.add_annotation(dict(x = leverage[i], y = residuals[i], showarrow=True, text = ann,font= dict(color= "black", size= 15), xanchor = 'auto', yanchor = 'auto')) - - st.plotly_chart(fig, use_container_width = True) - img = pio.to_image(fig, format="png") - with open("./Report/figures/graphe_influence.png", "wb") as f: - f.write(img) + + fig.update_traces(marker=dict(size= 6), showlegend= True) + fig.update_layout(font=dict(size=23), width=800, height=500) + st.plotly_chart(fig, use_container_width=True) + + for annotation in fig.layout.annotations: + annotation.font.size = 35 + fig.update_layout(font=dict(size=23), width=800, height=600) + fig.update_traces(marker=dict(size= 10), showlegend= False) + fig.write_image('./Report/figures/influence_plot.png', engine = 'kaleido') + with hotelling: - st.write('T²-Hotelling vs Q residuals plot') + st.write('T²-Hotelling vs Q-residuals plot') # Hotelling hotelling = t.var(axis = 1) # Q residuals: Q residuals represent the magnitude of the variation remaining in each sample after projection through the model @@ -481,8 +490,9 @@ if not spectra.empty: tresh0 = (3 * (I ** 2 - 1) * fcri) / (I * (I - 3)) tresh1 = sc.stats.chi2.ppf(0.05, df = 3) - fig = px.scatter(t, x = hotelling, y = residuals, color = l1) - fig.update_layout(xaxis_title="T²",yaxis_title="Q-Residuals") + fig = px.scatter(t, x = hotelling, y = residuals, color=labels if list(labels) else None, + color_discrete_sequence= custom_color_palette) + fig.update_layout(xaxis_title="Hotelling-T² distance",yaxis_title="Q-residuals") fig.add_vline(x=tresh0, line_width=1, line_dash='solid', line_color='red') fig.add_hline(y=tresh1, line_width=1, line_dash='solid', line_color='red') @@ -496,32 +506,27 @@ if not spectra.empty: ann = meta_data.loc[:,'name'][i] else: ann = t.index[i] - fig.add_annotation(dict(x = hotelling[i], y = residuals[i], showarrow=True, text = ann, + fig.add_annotation(dict(x = hotelling[i], y = residuals[i], showarrow=True, text = ann, font= dict(color= "black", size= 15), xanchor = 'auto', yanchor = 'auto')) - + + fig.update_traces(marker=dict(size= 6), showlegend= True) + fig.update_layout(font=dict(size=23), width=800, height=500) st.plotly_chart(fig, use_container_width=True) - fig.write_image("./Report/figures/graphe_hotelling.png", format="png") - #st.write() - #st.write() + + + for annotation in fig.layout.annotations: + annotation.font.size = 35 + fig.update_layout(font=dict(size=23), width=800, height=600) + fig.update_traces(marker=dict(size= 10), showlegend= False) + fig.write_image("./Report/figures/hotelling_plot.png", format="png") + + + Nb_ech = str(tcr.shape[0]) nb_clu = str(sam1.shape[0]) -Ac_Km = ['Spectra_Plot.png', 'Elbow.png', 'graphe_loadings.png', 'plot_axe1_axe2.png', 'plot_axe1_axe3.png', 'plot_axe2_axe3.png', 'graphe_hotelling.png', 'graphe_influence.png'] - -# Streamlit container -with st.container(): - if st.button("Download report"): - if test == '.csv': - if dim_red_method == dim_red_methods[1] and clus_method == cluster_methods[1]: - latex_report = report.report(sam, tcr, Nb_ech, nb_clu, 'sample', Ac_Km, 'csv', 'kmeans') - report.compile_latex() - elif dim_red_method == dim_red_methods[1] and clus_method == cluster_methods[2]: - latex_report = report.report(sam, tcr, Nb_ech, nb_clu, 'sample', Ac_Km, 'csv', 'hdb') - report.compile_latex() - elif dim_red_method == dim_red_methods[1] and clus_method == cluster_methods[3]: - latex_report = report.report(sam, tcr, Nb_ech, nb_clu, 'sample', Ac_Km, 'csv', 'AP') - report.compile_latex() - else: - latex_report = report.report(sam, 'sample', 'dx') +# figs_list = os.listdir("./Report/figures") +if data_file: + with st.container(): + if st.button("Download report"): + latex_report = report.report('sample_selection', data_file.name, dim_red_method, clus_method, Nb_ech, ncluster, selection, nb_clu,tcr, sam) report.compile_latex() - else: - pass \ No newline at end of file