Updated LWPLSR

47a2f4ef · Nicolas Barthes · 7ef7771c · 47a2f4ef · 47a2f4ef
Commit 47a2f4ef authored 1 year ago by Nicolas Barthes
--- a/src/Class_Mod/LWPLSR_.py
+++ b/src/Class_Mod/LWPLSR_.py
-import pandas as pd
-import streamlit
 from Packages import *
 from Class_Mod.Miscellaneous import *

 class LWPLSR:
    """
-    The UMAP dimension reduction algorithm from scikit learn
+    The lwpls regression model from Jchemo (M. Lesnoff)
    """
-    def __init__(self, x_train, x_test, y_train, y_test):
+    def __init__(self, x_train, y_train, x_test, y_test):
        """Initiate the LWPLSR and prepare data for Julia computing."""
-        self.x_train, self.y_train, self.x_test, self.y_test = x_train, x_test, y_train, y_test
+        self.x_train, self.y_train, self.x_test, self.y_test = x_train, y_train, x_test, y_test
        # prepare to send dataframes to julia and Jchemo
-        jl.x_train,jl.y_train,jl.x_test,jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test
+        jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test
+        # Pre-treatment of x_train and x_test
+        jl.seval("""
+        # using DataFrames
+        # using Pandas
+        using Jchemo
+        mod1 = Jchemo.model(snv; centr = true, scal = true)
+        mod2 = Jchemo.model(savgol; npoint = 15, deriv = 1, degree = 2)
+        mod = Jchemo.pip(mod1, mod2)
+        Jchemo.fit!(mod, x_train)
+        x_train = Jchemo.transf(mod1, x_train)
+        Jchemo.fit!(mod, x_test)
+        x_test = Jchemo.transf(mod1, x_test)
+        """)
+        jl.seval("""
+        ntrain = nro(x_train)
+        segm = segmkf(ntrain, 4; rep = 5)
+        nlvdis = [5; 10; 15] ; metric = [:mah]
+        h = [1; 2; 6; Inf] ; k = [10; 30; 100]
+        nlv = 0:15
+        pars = mpar(nlvdis = nlvdis, metric = metric, h = h, k = k)
+        println(pars)
+        """)
+
+        # initialize vars from the class
+        y_shape = y_test.shape
        self.scores = pd.DataFrame
        self.predicted_results_on_test = pd.DataFrame
+        self.pred = np.zeros(shape=(y_shape[0], 1))
+        self.mod = ""

    def Jchemo_lwplsr(self):
        """Send data to Julia to compute lwplsr.
@@ -29,38 +54,70 @@ class LWPLSR:
            self.predicted_results_on_test (DataFrame):
        """
        # launch Julia Jchemo lwplsr
-        jl.eval("""
-        using Pandas
+        jl.seval("""
        using DataFrames
+        using Pandas
        using Jchemo
-        nlvdis = 5 ; metric = :mah
-        h = 1 ; k = 200 ; nlv = 15 #; scal = true
-        mod = Jchemo.model(Jchemo.lwplsr; nlvdis, metric, h, k, nlv)
-        Jchemo.fit!(mod, X_train, y_train)
-        # Jchemo.pnames(mod)
-        # Jchemo.pnames(mod.fm)
-        # predictions on test data calculation
-        res = Jchemo.predict(mod, X_test)
-        # Jchemo.pnames(res)
+        x_train |> Pandas.DataFrame |> DataFrames.DataFrame
+        y_train |> Pandas.DataFrame |> DataFrames.DataFrame
+        x_test |> Pandas.DataFrame |> DataFrames.DataFrame
+        y_test |> Pandas.DataFrame |> DataFrames.DataFrame
        """)
-        resjp = jl.seval("""
-        Pandas.DataFrame(res.pred)
+        # Create LWPLSR model and fit
+        # jl.seval("""
+        # nlvdis = 5 ; metric = :mah
+        # h = 1 ; k = 200 ; nlv = 15 #; scal = true
+        # mod = Jchemo.model(Jchemo.lwplsr; nlvdis, metric, h, k, nlv)
+        # # Fit model
+        # Jchemo.fit!(mod, x_train, y_train)
+        # """)
+
+        jl.seval("""
+        mod = Jchemo.model(Jchemo.lwplsr)
+        res = Jchemo.gridcv(mod, x_train, y_train; segm, score = Jchemo.rmsep, pars, nlv, verbose = true).res
+        # u = findall(res.y1 .== minimum(res.y1))[1]
+        # mod = Jchemo.model(lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u]) ;
+        # Jchemo.fit!(mod, x_train, y_train)
        """)
-        scoresjp = jl.seval("""
-        Jchemo.mse(res.pred, y_test)
+        self.mod = jl.mod
+
+    def Jchemo_lwplsr_predict(self):
+        # Predictions on x_test and store in self.pred
+        self.pred = jl.seval("""
+        res = Jchemo.predict(mod, x_test)
+        res.pred
        """)
-        self.scores = pd.Dataframe(scoresjp)
-        self.predicted_results_on_test = pd.Dataframe(resjp)
+        # convert predicted data from x_test to Pandas DataFrame
+        self.predicted_results_on_test = pd.DataFrame(self.pred)

    @property
    def pred_data_(self):
        return self.predicted_results_on_test, self.predicted_results_on_test, self.predicted_results_on_test

-    # @property
-    # def model_(self):
-    #     return self.trained
+    @property
+    def model_(self):
+        return self.mod

    @property
    def metrics_(self):
-        # self.scores = pd.DataFrame(self.scores, index=['test'])
-        return self.scores
\ No newline at end of file
+        jl.pred = self.pred
+        st.dataframe(self.pred)
+        st.dataframe(self.predicted_results_on_test)
+        st.write('starting metrics')
+        jl.seval("""
+        using Jchemo
+        """)
+        scorermsep = jl.seval("""
+            first(Jchemo.rmsep(pred, y_test))
+            """)
+        scoremr2 = jl.seval("""
+            first(Jchemo.r2(pred, y_test))
+            """)
+        scorerpd = jl.seval("""
+            first(Jchemo.rpd(pred, y_test))
+            """)
+        scoremsep = jl.seval("""
+            first(Jchemo.sep(pred, y_test))
+            """)
+        self.scores = pd.DataFrame([[scoremr2, scorermsep, scoremsep, scorerpd]], columns=['r2', 'rmsep', 'msep', 'rpd'], index=['scores'])
+        return self.scores
--- a/src/pages/2-model_creation.py
+++ b/src/pages/2-model_creation.py
@@ -134,9 +134,13 @@ if not spectra.empty and not y.empty:
        reg_model = Reg.model_
        #M2.dataframe(Pin.pred_data_)
    elif regression_algo == reg_algo[2]:
-        # Reg = LWPLSR(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test)
-        # LWPLSR.Jchemo_lwplsr(Reg)
-        pass
+        x_train, y_train, x_test, y_test = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
+        Reg = LWPLSR(x_train, y_train, x_test, y_test)
+        LWPLSR.Jchemo_lwplsr(Reg)
+        reg_model = Reg.model_
+        # LWPLSR.Jchemo_lwplsr_predict(Reg)
+        # st.dataframe(Reg.pred_data_)
+        # st.dataframe(Reg.metrics_)

    elif regression_algo == reg_algo[3]:
        s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3)