From 65155b6f9f0e4d43bdc55fec6358f4aef10f7afe Mon Sep 17 00:00:00 2001
From: barthes <nicolas.barthes@cefe.cnrs.fr>
Date: Tue, 30 Apr 2024 13:04:24 +0200
Subject: [PATCH] now working Julia LWPLSR model with temp files

---
 .gitignore                    |   3 +-
 requirements.txt              |   2 +-
 src/Class_Mod/LWPLSR_.py      | 150 +++++++++++++++++++++-------------
 src/Class_Mod/LWPLSR_Call.py  |  27 ++++++
 src/Class_Mod/__init__.py     |   2 +-
 src/pages/2-model_creation.py |  36 ++++----
 6 files changed, 145 insertions(+), 75 deletions(-)
 create mode 100644 src/Class_Mod/LWPLSR_Call.py

diff --git a/.gitignore b/.gitignore
index c8c9b25..a025df3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,5 @@ src/data/predictions/*
 src/data/sample_selections/*
 src/Report/*.pdf
 src/Report/*.tex
-src/Report/figures/
\ No newline at end of file
+src/Report/figures/
+src/temp/*
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 042ceb5..414efc3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,4 +15,4 @@ umap-learn>=0.5.5
 jcamp>=1.2.2
 mkdocs>=1.5.3
 mkdocs-material>=9.5.18
-mkdocstrings[python]>=0.24.3
\ No newline at end of file
+mkdocstrings[python]>=0.24.3
diff --git a/src/Class_Mod/LWPLSR_.py b/src/Class_Mod/LWPLSR_.py
index 5e86c90..877ca50 100644
--- a/src/Class_Mod/LWPLSR_.py
+++ b/src/Class_Mod/LWPLSR_.py
@@ -1,5 +1,6 @@
-from Packages import *
-from Class_Mod.Miscellaneous import *
+from juliacall import Main as jl
+import numpy as np
+import pandas as pd
 
 class LWPLSR:
     """
@@ -11,36 +12,40 @@ class LWPLSR:
         # prepare to send dataframes to julia and Jchemo
         jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test
         # Pre-treatment of x_train and x_test
-        jl.seval("""
-        # using DataFrames
-        # using Pandas
-        using Jchemo
-        mod1 = Jchemo.model(snv; centr = true, scal = true)
-        mod2 = Jchemo.model(savgol; npoint = 15, deriv = 1, degree = 2)
-        mod = Jchemo.pip(mod1, mod2)
-        Jchemo.fit!(mod, x_train)
-        x_train = Jchemo.transf(mod1, x_train)
-        Jchemo.fit!(mod, x_test)
-        x_test = Jchemo.transf(mod1, x_test)
-        """)
-        jl.seval("""
-        ntrain = nro(x_train)
-        segm = segmkf(ntrain, 4; rep = 5)
-        nlvdis = [5; 10; 15] ; metric = [:mah]
-        h = [1; 2; 6; Inf] ; k = [10; 30; 100]
-        nlv = 0:15
-        pars = mpar(nlvdis = nlvdis, metric = metric, h = h, k = k)
-        println(pars)
-        """)
+        # jl.seval("""
+        # # using DataFrames
+        # # using Pandas
+        # using Jchemo
+        # mod1 = Jchemo.model(snv; centr = true, scal = true)
+        # mod2 = Jchemo.model(savgol; npoint = 15, deriv = 1, degree = 2)
+        # mod = Jchemo.pip(mod1, mod2)
+        # Jchemo.fit!(mod, x_train)
+        # x_train = Jchemo.transf(mod1, x_train)
+        # Jchemo.fit!(mod, x_test)
+        # x_test = Jchemo.transf(mod1, x_test)
+        # """)
+        # jl.seval("""
+        # ntrain = nro(x_train)
+        # segm = segmkf(ntrain, 4; rep = 5)
+        # nlvdis = [5; 10; 15] ; metric = [:mah]
+        # h = [1; 2; 6; Inf] ; k = [10; 30; 100]
+        # nlv = 0:15
+        # pars = mpar(nlvdis = nlvdis, metric = metric, h = h, k = k)
+        # println(pars)
+        # """)
 
         # initialize vars from the class
         y_shape = y_test.shape
         self.scores = pd.DataFrame
         self.predicted_results_on_test = pd.DataFrame
-        self.pred = np.zeros(shape=(y_shape[0], 1))
+        self.predicted_results_on_train = pd.DataFrame
+        self.predicted_results_on_cv = pd.DataFrame
+        self.pred_test = np.zeros(shape=(y_shape[0], 1))
+        self.pred_train = np.zeros(shape=(y_shape[0], 1))
+        self.pred_cv = np.zeros(shape=(y_shape[0], 1))
         self.mod = ""
 
-    def Jchemo_lwplsr(self):
+    def Jchemo_lwplsr_fit(self):
         """Send data to Julia to compute lwplsr.
 
         Args:
@@ -64,35 +69,44 @@ class LWPLSR:
         y_test |> Pandas.DataFrame |> DataFrames.DataFrame
         """)
         # Create LWPLSR model and fit
-        # jl.seval("""
-        # nlvdis = 5 ; metric = :mah
-        # h = 1 ; k = 200 ; nlv = 15 #; scal = true
-        # mod = Jchemo.model(Jchemo.lwplsr; nlvdis, metric, h, k, nlv)
-        # # Fit model
-        # Jchemo.fit!(mod, x_train, y_train)
-        # """)
-
         jl.seval("""
-        mod = Jchemo.model(Jchemo.lwplsr)
-        res = Jchemo.gridcv(mod, x_train, y_train; segm, score = Jchemo.rmsep, pars, nlv, verbose = true).res
-        # u = findall(res.y1 .== minimum(res.y1))[1]
-        # mod = Jchemo.model(lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u]) ;
-        # Jchemo.fit!(mod, x_train, y_train)
+        nlvdis = 5 ; metric = :mah
+        h = 1 ; k = 200 ; nlv = 15 #; scal = true
+        mod = Jchemo.model(Jchemo.lwplsr; nlvdis, metric, h, k, nlv)
+        # Fit model
+        Jchemo.fit!(mod, x_train, y_train)
         """)
+
+        # jl.seval("""
+        # mod = Jchemo.model(Jchemo.lwplsr)
+        # res = Jchemo.gridcv(mod, x_train, y_train; segm, score = Jchemo.rmsep, pars, nlv, verbose = true).res
+        # # u = findall(res.y1 .== minimum(res.y1))[1]
+        # # mod = Jchemo.model(lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u]) ;
+        # # Jchemo.fit!(mod, x_train, y_train)
+        # """)
         self.mod = jl.mod
 
     def Jchemo_lwplsr_predict(self):
         # Predictions on x_test and store in self.pred
-        self.pred = jl.seval("""
+        self.pred_test = jl.seval("""
         res = Jchemo.predict(mod, x_test)
         res.pred
         """)
-        # convert predicted data from x_test to Pandas DataFrame
-        self.predicted_results_on_test = pd.DataFrame(self.pred)
+        self.pred_train = jl.seval("""
+        res = Jchemo.predict(mod, x_train)
+        res.pred
+        """)
+        self.pred_cv = self.pred_train
+
 
     @property
     def pred_data_(self):
-        return self.predicted_results_on_test, self.predicted_results_on_test, self.predicted_results_on_test
+        # convert predicted data from x_test to Pandas DataFrame
+        self.predicted_results_on_test = pd.DataFrame(self.pred_test)
+        self.predicted_results_on_train = pd.DataFrame(self.pred_train)
+        # self.predicted_results_on_cv = pd.DataFrame(self.pred_cv)
+        self.predicted_results_on_cv = pd.DataFrame(self.pred_train)
+        return self.predicted_results_on_train, self.predicted_results_on_cv, self.predicted_results_on_test
 
     @property
     def model_(self):
@@ -100,24 +114,50 @@ class LWPLSR:
 
     @property
     def metrics_(self):
-        jl.pred = self.pred
-        st.dataframe(self.pred)
-        st.dataframe(self.predicted_results_on_test)
-        st.write('starting metrics')
+        jl.pred_test = self.pred_test
         jl.seval("""
         using Jchemo
         """)
-        scorermsep = jl.seval("""
-            first(Jchemo.rmsep(pred, y_test))
+        scorermsep_test = jl.seval("""
+            first(Jchemo.rmsep(pred_test, y_test))
+            """)
+        scoremr2_test = jl.seval("""
+            first(Jchemo.r2(pred_test, y_test))
+            """)
+        scorerpd_test = jl.seval("""
+            first(Jchemo.rpd(pred_test, y_test))
+            """)
+        scoremsep_test = jl.seval("""
+            first(Jchemo.sep(pred_test, y_test))
             """)
-        scoremr2 = jl.seval("""
-            first(Jchemo.r2(pred, y_test))
+        jl.pred_train = self.pred_train
+        scorermsep_train = jl.seval("""
+            first(Jchemo.rmsep(pred_train, y_train))
             """)
-        scorerpd = jl.seval("""
-            first(Jchemo.rpd(pred, y_test))
+        scoremr2_train = jl.seval("""
+            first(Jchemo.r2(pred_train, y_train))
             """)
-        scoremsep = jl.seval("""
-            first(Jchemo.sep(pred, y_test))
+        scorerpd_train = jl.seval("""
+            first(Jchemo.rpd(pred_train, y_train))
             """)
-        self.scores = pd.DataFrame([[scoremr2, scorermsep, scoremsep, scorerpd]], columns=['r2', 'rmsep', 'msep', 'rpd'], index=['scores'])
+        scoremsep_train = jl.seval("""
+            first(Jchemo.sep(pred_train, y_train))
+            """)
+        jl.pred_cv = self.pred_cv
+        scorermsep_cv = jl.seval("""
+            first(Jchemo.rmsep(pred_cv, y_train))
+            """)
+        scoremr2_cv = jl.seval("""
+            first(Jchemo.r2(pred_cv, y_train))
+            """)
+        scorerpd_cv = jl.seval("""
+            first(Jchemo.rpd(pred_cv, y_train))
+            """)
+        scoremsep_cv = jl.seval("""
+            first(Jchemo.sep(pred_cv, y_train))
+            """)
+
+
+        self.scores = pd.DataFrame([[scoremr2_test, scorermsep_test, scoremsep_test, scorerpd_test]], columns=['r2', 'rmsep', 'msep', 'rpd'], index=['test'])
+        self.scores = pd.concat([self.scores, pd.DataFrame([[scoremr2_train, scorermsep_train, scoremsep_train, scorerpd_train]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["train"]), pd.DataFrame([[scoremr2_cv, scorermsep_cv, scoremsep_cv, scorerpd_cv]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["cv"])])#
         return self.scores
diff --git a/src/Class_Mod/LWPLSR_Call.py b/src/Class_Mod/LWPLSR_Call.py
new file mode 100644
index 0000000..47bf8c8
--- /dev/null
+++ b/src/Class_Mod/LWPLSR_Call.py
@@ -0,0 +1,27 @@
+import numpy as np
+from pathlib import Path
+import json
+from LWPLSR_ import LWPLSR
+
+# loading the lwplsr_inputs.json
+temp_path = Path("temp/")
+for i in ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']:
+    globals()[i] = np.genfromtxt(temp_path / str(i + ".csv"), delimiter=',')
+print('CSV imported')
+print('start model creation')
+Reg = LWPLSR(x_train_np, y_train_np, x_test_np, y_test_np)
+print('model created. \n now fit')
+LWPLSR.Jchemo_lwplsr_fit(Reg)
+print('now predict')
+LWPLSR.Jchemo_lwplsr_predict(Reg)
+
+json_export = {}
+data_to_export = ['model', 'pred_data', 'metrics']
+json_export['pred_data_train'] = Reg.pred_data_[0].to_dict()
+json_export['pred_data_cv'] = Reg.pred_data_[1].to_dict()
+json_export['pred_data_test'] = Reg.pred_data_[2].to_dict()
+json_export['metrics'] = Reg.metrics_.to_dict()
+json_export['model'] = str(Reg.model_)
+with open(temp_path / "lwplsr_outputs.json", "w+") as outfile:
+    json.dump(json_export, outfile)
+print(Reg.metrics_)
\ No newline at end of file
diff --git a/src/Class_Mod/__init__.py b/src/Class_Mod/__init__.py
index 540a093..155f625 100644
--- a/src/Class_Mod/__init__.py
+++ b/src/Class_Mod/__init__.py
@@ -14,4 +14,4 @@ from .HDBSCAN_Clustering import Hdbscan
 from .SK_PLSR_ import PlsR
 from .PLSR_Preprocess import PlsProcess
 from .NMF_ import Nmf
-from .Ap import AP
\ No newline at end of file
+from .Ap import AP
diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py
index 986d249..efc2022 100644
--- a/src/pages/2-model_creation.py
+++ b/src/pages/2-model_creation.py
@@ -1,20 +1,11 @@
+import streamlit
 from Packages import *
 st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
 from Modules import *
 from Class_Mod.DATA_HANDLING import *
 
-
-# HTML pour le bandeau "CEFE - CNRS"
-# bandeau_html = """
-# <div style="width: 100%; background-color: #4682B4; padding: 10px; margin-bottom: 10px;">
-#   <h1 style="text-align: center; color: white;">CEFE - CNRS / UM</h1>
-# </div>
-# """
-# # Injecter le code HTML du bandeau
-# st.markdown(bandeau_html, unsafe_allow_html=True)
 add_header()
 
-
 st.session_state["interface"] = st.session_state.get('interface')
 if st.session_state["interface"] == 'simple':
     hide_pages("Predictions")
@@ -134,13 +125,24 @@ if not spectra.empty and not y.empty:
         reg_model = Reg.model_
         #M2.dataframe(Pin.pred_data_)
     elif regression_algo == reg_algo[2]:
-        x_train, y_train, x_test, y_test = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
-        Reg = LWPLSR(x_train, y_train, x_test, y_test)
-        LWPLSR.Jchemo_lwplsr(Reg)
-        reg_model = Reg.model_
-        # LWPLSR.Jchemo_lwplsr_predict(Reg)
-        # st.dataframe(Reg.pred_data_)
-        # st.dataframe(Reg.metrics_)
+        data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
+        x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
+        temp_path = Path('temp/')
+        for i in data_to_work_with: np.savetxt(temp_path / str(i + ".csv"), vars()[i], delimiter=",")
+        import subprocess
+        subprocess_path = Path("Class_Mod/")
+        subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"])
+        with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
+                Reg_json = json.load(outfile)
+                for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
+        os.unlink(temp_path / "lwplsr_outputs.json")
+        Reg = type('obj', (object,), {'metrics_' : pd.json_normalize(Reg_json['metrics']), 'pred_data_' : [pd.json_normalize(Reg_json['pred_data_train']), pd.json_normalize(Reg_json['pred_data_cv']),pd.json_normalize(Reg_json['pred_data_test'])]})
+        Reg.pred_data_[0] = Reg.pred_data_[0].T.reset_index().drop(columns = ['index'])
+        Reg.pred_data_[0].index = list(y_train.index)
+        Reg.pred_data_[1] = Reg.pred_data_[1].T.reset_index().drop(columns = ['index'])
+        Reg.pred_data_[1].index = list(y_train.index)
+        Reg.pred_data_[2] = Reg.pred_data_[2].T.reset_index().drop(columns = ['index'])
+        Reg.pred_data_[2].index = list(y_test.index)
 
     elif regression_algo == reg_algo[3]:
         s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3)
-- 
GitLab