From c0f3f8edc9dbc5cb81d0d307bfbe158e2dc5a788 Mon Sep 17 00:00:00 2001
From: DIANE <abderrahim.diane@cefe.cnrs.fr>
Date: Thu, 11 Jul 2024 16:04:22 +0200
Subject: [PATCH] report

---
 src/Report/report.py | 103 ++++++++++++++++++++++++++++++-------------
 1 file changed, 72 insertions(+), 31 deletions(-)

diff --git a/src/Report/report.py b/src/Report/report.py
index 78029e5..dfbdab5 100644
--- a/src/Report/report.py
+++ b/src/Report/report.py
@@ -40,7 +40,8 @@ def report(*args):
     \usepackage{graphicx}
     \usepackage{geometry}
     \usepackage{changepage}
-    \geometry{a4paper, left=2cm, right=2cm, top=1.5cm, bottom=3cm }
+    \geometry{a4paper, left=2cm, right=2cm, top=1.5cm, bottom=3cm,
+      headheight=0.05cm, footskip=1cm}
     \usepackage{caption, subcaption}
     \usepackage{hyperref}
     \usepackage{hyphenat}
@@ -56,14 +57,30 @@ def report(*args):
     \graphicspath{{images/}, {Figures/}}    
     \fancyhead[R]{\includegraphics[width=0.1\textwidth]{logo_cefe.png}}
     \fancyhead[L]{PACE - NIRS Analysis Report}
-
     \fancyfoot[L]{Project Name to fill}
     \fancyfoot[C]{Plateforme d'Analyses Chimiques en Ecologie}
     \fancyfoot[R]{\thepage}
-
     \setlength{\headheight}{52pt}
     \addtolength{\topmargin}{-9.2942pt}
     \pagestyle{fancy}
+
+    \DeclareCaptionLabelFormat{myfigureformat}{\textbf{Fig. #2.}}
+    \captionsetup[figure]{
+        labelformat=myfigureformat, % Apply the custom format
+        justification=centering, % Justify the caption text
+        singlelinecheck=false, % Allow the caption to occupy multiple lines
+        labelsep=space, % Add a space after the label
+    }
+    \DeclareCaptionLabelFormat{mytableformat}{\textbf{Table #2}}
+    \captionsetup[table]{
+        labelformat=mytableformat, % Apply the custom format
+        justification=justified, % Justify the caption text
+        singlelinecheck=false, % Allow the caption to occupy multiple lines
+        skip=0pt, % Vertical space between caption and table
+        position=top % Position the caption at the top of the table
+    }
+
+    
     \begin{document}
     
     \noindent
@@ -76,26 +93,33 @@ def report(*args):
     \textbf{ENTERED INPUTS: }{"""+ re.sub(r'([_%])', r"\\\1", to_report[1])+ r"""}.\\"""
     latex_report += r"""\section*{Results}"""
     latex_report += r"""\subsection*{Spectral data visualization}"""
-    latex_report += r"""Acquired spectra were visualized in fig1 by plotting the intensity
-    of absorption, reflectance, transmission, etc, against the wavelengths or wavenumbers.
-    This helps observe general patterns and trends in the spectra, and understand the 
-    variability within the data.
+    latex_report += r"""Acquired spectra were visualized in fig.\ref{raw_spectra} by plotting the signal of the samples captured in the specific spectral range 
+    This helps observe general patterns and trends in the spectra, and understand the variability within the data.
     \begin{figure}[h]
     \centering
     \includegraphics[width=1\linewidth]{spectra_plot.png}
     \caption{Acquired spectra}
-    \label{fig:raw_spectra}
+    \label{raw_spectra}
     \end{figure}"""
 
     if 'Representative subset selection' in to_report:
         latex_report += r"""\subsection*{Multivariable Data Analysis}"""
-        latex_report += r""" For optimal selection of subset of the samples to analyze through the
-          reference method, a pipeline consisting of consecutively applying features extraction/dimensionality
+        latex_report += r"""\indent For optimal selection of subset of the samples to analyze through the \cite{Lesnoff2020}
+          reference method, a pipeline consisting of consecutively applying features extraction/dimensionality\cite{BellonMaurel2010,scikit-learn}
             reduction and clustering analysis was developed. Features extraction was performed by means of {"""+dim_red_methods[to_report[2]] + r"""} 
             technique which helps represent the high dimensional spectra in a reduced perceptible 3D
             subspace spanned by a few number of features (three features in our case), while clustering analysis was performed
               using the {"""+cluster_methods[to_report[3]] + r"""} technique which
-              helps group the data into groups of spectra that share the same carachteristics.\\*"""
+              helps group the data into groups of spectra that share the same carachteristics. """
+        
+        latex_report += r"""After applying the pipeline, a subset sampling method, consisting of"""
+        if 'center' in to_report:
+            latex_report += r""" selecting {"""+to_report[7]+ r"""} samples, each from a distict cluster, with the least euclidian distance to the center of the cluster identified by {"""+to_report[3]+ r"""} and to which it the sample belongs."""
+        if 'random' in to_report:
+              latex_report += r""" fitting a second clustering model, specifically kmeans, to each individual data cluster and selecting {"""+to_report[7]+ r"""}
+                 samples or less from each subcluster (if a subcluster contains less than {"""+to_report[7]+ r"""} samples, then all samples included
+                  in this subcluster are selected), was applied.\\"""
+                  
         if "PCA" in to_report:
             latex_report += r"""\indent To detect the presence of any spectral outliers, the influence and residuals plots were constructed,
               with outlyingness limits established at the 95\% confidence level. Together, these plots helps distinguish regular observations,
@@ -104,10 +128,10 @@ def report(*args):
                     large residual distance to the subspace, but whose projection is on the subspace; and, finally, bad leverage
                       points, which have a large residual distance such that the projection on the subspace is away from regular observations.\\*"""
               
-        latex_report += """\indent Results of applying this workflow are displayed in fig. 1. Based of the features extracted using
-          {"""+to_report[2]+ r"""}, {"""+to_report[3]+ r"""} revealed the  existance of {"""+to_report[5] + r"""}
+        latex_report += """\indent Results of applying this workflow are displayed in fig. 2. Based of the features extracted using
+         {"""+to_report[2]+ r"""}, {"""+to_report[3]+ r"""} revealed the  existance of {"""+to_report[5] + r"""}
             data clusters that are visualized with different colors.
-        \begin{figure}[h]
+        \begin{figure}[h!]
         \captionsetup{justification=centering}
             \centering
             \begin{minipage}[b]{0.33\textwidth}
@@ -123,13 +147,12 @@ def report(*args):
             \caption{Illustration of the pairwise projection of spectra onto the reduced 3 dimensional subspace, clustering, and sample selection
             results: data points with the same color belong to the same cluster and data points colored in black correspond to the samples to be
             analyzed by a standard reference analytical procedure}
-        \label{pcaplots}
-        \end{figure}"""
-        latex_report +=r""" """
+            \label{pcaplots}
+        \end{figure} """
     
         if 'PCA' in to_report:
             latex_report += r"""
-            \begin{figure}[ht]
+            \begin{figure}[h!]
             \centering
             \begin{minipage}[b]{0.33\textwidth}
                 \centering
@@ -143,17 +166,9 @@ def report(*args):
         \label{hotelling_and_influence}
         \end{figure}
         """
-        
-        latex_report += r"""Following the exploratory data analysis, a subset sampling method, consisting of"""
-        if 'center' in to_report:
-            latex_report += r""" selecting {"""+to_report[7]+ r"""} samples, each from a distict cluster, with the least euclidian distance to the center of the cluster identified by {"""+to_report[3]+ r"""} and to which it the sample belongs."""
-        if 'random' in to_report:
-              latex_report += r""" fitting a second clustering model, specifically kmeans, to each individual data cluster and selecting {"""+to_report[7]+ r"""}
-                 samples or less from each subcluster (if a subcluster contains less than 3 samples, then all samples included
-                  in this subcluster are selected), was applied."""
-                  
-        latex_report += r"""The subset of selected samples are identified to be representative and are suggested to be used for robust NIR calibration developement
-         , i.e, to be analyzed by adequate reference analytical procedures (generally requiring destructive sample preparation)."""
+        latex_report += r"""A subset of {"""+to_report[8]+ r"""} samples were identified and selected to be representative and were suggested to be used for robust NIR calibration developement
+         , i.e, to be analyzed by adequate reference analytical procedures (generally requiring destructive sample preparation).
+         """
 
     elif 'Predictive model development' in to_report:
         latex_report += r"""\paragraph{}To develop a robust NIR calibration that formally correlates the spectral signature of the samples in the NIR region
@@ -186,6 +201,21 @@ def report(*args):
 
         
         latex_report += r"""Predictive modelling development was performed using the {"""+reg_algo[to_report[6]]+ r"""} regression method."""
+        latex_report += r"""
+        For fig.\ref{fig:CV}
+            \begin{figure}[h]
+            \captionsetup{justification=centering}
+            \centering
+            \begin{minipage}[c]{0.5\textwidth}
+                \includegraphics[width=\linewidth]{meas_vs_pred_cv_onebyone.png}
+            \end{minipage}%
+            \begin{minipage}[c]{0.5\textwidth}
+                \includegraphics[width=\linewidth]{meas_vs_pred_cv_all.png}
+            \end{minipage}%
+            \caption{ Visualization of measured vs predicted values scatter plot for cross-validation }
+        \label{CV}
+        \end{figure}"""
+
         if "Full-PLSR" in to_report:
             latex_report += r"""the most important and influential spectral regions in the model, were visualized in fig.5"""
         elif "Locally Weighted PLSR" in to_report:
@@ -207,15 +237,26 @@ def report(*args):
             \begin{figure}[h]
             \centering
             \includegraphics[width=1\linewidth]{Variable_importance.png}
-            \caption{Visualizing important spectral regions identifiedin the PLS model on the raw and preprocessed average spectrum}
+            \caption{Visualizing important spectral regions identified in the PLS model on the raw and preprocessed average spectrum}
             \label{fig:Histogram}
             \end{figure}
             """
         
         latex_report += r"""After numerically analyzing the performance of the model, a visual investigation (figs 7 and 8) of goodness of model fit was performed to identify potential
           issues such as a pattern, that has not been captured by the model, or outliers.\par.
-          """
 
+        \begin{figure}[h]
+        \captionsetup{justification=centering}
+            \centering
+            \begin{minipage}[b]{0.5\textwidth}
+                \includegraphics[width=\linewidth]{measured_vs_predicted.png}
+            \end{minipage}%
+            \begin{minipage}[b]{0.5\textwidth}
+                \includegraphics[width=\linewidth]{residuals_plot.png}
+            \end{minipage}%
+            \caption{Post-hoc analysis of the developed predictive model; measured vs predicted values (a) and measured vs residuals (b) plots }
+        \label{pcaplots}
+        \end{figure}"""            
     latex_report += r"""
     \fontsize{8}{9}\selectfont
     \bibliographystyle{apalike}
-- 
GitLab