Skip to content
Snippets Groups Projects
LWPLSR_.py 7.25 KiB
Newer Older
  • Learn to ignore specific revisions
  • from juliacall import Main as jl
    import numpy as np
    import pandas as pd
    
    class LWPLSR:
    
        """The lwpls regression model from Jchemo (M. Lesnoff)
    
        Returns:
            self.scores (DataFrame): various metrics and scores
            self.predicted_results_on_train (DataFrame):
            self.predicted_results_on_test (DataFrame):
            self.mod (Julia model): the prepared model
    
        def __init__(self, dataset):
    
            """Initiate the LWPLSR and prepare data for Julia computing."""
    
            # self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))]
            self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(4)]
    
            self.nb_fold = int((len(dataset)-4)/4)
            for i in range(self.nb_fold):
                setattr(self, "xtr_fold"+str(i+1), dataset[i+7])
                setattr(self, "ytr_fold"+str(i+1), dataset[i+13])
                setattr(self, "xte_fold"+str(i+1), dataset[i+4])
                # setattr(self, "yte_fold"+str(i+1), dataset[i+10])
                setattr(jl, "xtr_fold"+str(i+1), dataset[i+7])
                setattr(jl, "ytr_fold"+str(i+1), dataset[i+13])
                setattr(jl, "xte_fold"+str(i+1), dataset[i+4])
                # setattr(jl, "yte_fold"+str(i+1), dataset[i+10])
    
            # prepare to send dataframes to julia and Jchemo
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test
    
            # initialize vars from the class
    
            y_shape = self.y_test.shape
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            self.predicted_results_on_test = pd.DataFrame
    
            self.predicted_results_on_train = pd.DataFrame
            self.pred_test = np.zeros(shape=(y_shape[0], 1))
            self.pred_train = np.zeros(shape=(y_shape[0], 1))
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            self.mod = ""
    
            self.best_lwplsr_params = np.zeros(shape=(5, 1))
    
            self.predicted_results = {}
    
        def Jchemo_lwplsr_fit(self):
    
            """Send data to Julia to fit lwplsr.
    
                self.x_train (DataFrame):
                self.y_train (DataFrame):
                self.x_test (DataFrame):
                self.y_test (DataFrame):
    
                self.mod (Julia model): the prepared model
    
            """
            # launch Julia Jchemo lwplsr
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            jl.seval("""
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            using DataFrames
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            using Pandas
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            using Jchemo
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            x_train |> Pandas.DataFrame |> DataFrames.DataFrame
            y_train |> Pandas.DataFrame |> DataFrames.DataFrame
            x_test |> Pandas.DataFrame |> DataFrames.DataFrame
            y_test |> Pandas.DataFrame |> DataFrames.DataFrame
    
            print('LWPLSR - tuning')
            # set tuning parameters
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            jl.seval("""
    
            nlvdis = [5; 10; 15] ; metric = [:eucl; :mah] 
            h = [1; 2; 6; Inf] ; k = [30; 80; 200]  
            nlv = 5:15
            pars = Jchemo.mpar(nlvdis = nlvdis, metric = metric, h = h, k = k)
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            """)
    
            # split Train data into Cal/Val for tuning
    
            jl.seval("""
    
            pct = .3
            ntrain = Jchemo.nro(x_train)
            nval = Int(round(pct * ntrain))
            s = Jchemo.samprand(ntrain, nval)
            Xcal = x_train[s.train, :]
            ycal = y_train[s.train]
            Xval = x_train[s.test, :]
            yval = y_train[s.test]
            ncal = ntrain - nval 
    
            """)
    
            # Create LWPLSR model and tune
            jl.seval("""
            mod = Jchemo.model(Jchemo.lwplsr)
            res = gridscore(mod, Xcal, ycal, Xval, yval; score = Jchemo.rmsep, pars, nlv, verbose = false)
            u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination
            """)
            self.best_lwplsr_params = {'nlvdis' : jl.res.nlvdis[jl.u], 'metric' : str(jl.res.metric[jl.u]), 'h' : jl.res.h[jl.u], 'k' : jl.res.k[jl.u], 'nlv' : jl.res.nlv[jl.u]}
    
            print('best lwplsr params ' + str(self.best_lwplsr_params))
    
            print('LWPLSR - best params ok')
            # calculate LWPLSR model with best parameters
            jl.seval("""
            mod = Jchemo.model(Jchemo.lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u])
            # Fit model
            Jchemo.fit!(mod, x_train, y_train)
            """)
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            self.mod = jl.mod
    
        def Jchemo_lwplsr_predict(self):
    
            """Send data to Julia to predict with lwplsr.
    
            Args:
                self.mod (Julia model): the prepared model
    
                self.x_train (DataFrame):
                self.y_train (DataFrame):
                self.x_test (DataFrame):
                self.y_test (DataFrame):
    
    
            Returns:
                self.pred_test (Julia DataFrame): predicted values on x_test
                self.pred_train (Julia DataFrame): predicted values on x_train
            """
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            # Predictions on x_test and store in self.pred
    
            self.pred_test = jl.seval("""
    
            println("LWPLSR - start test predict")
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            res = Jchemo.predict(mod, x_test)
            res.pred
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            """)
    
            self.pred_train = jl.seval("""
    
            println("LWPLSR - start train predict")
    
            res = Jchemo.predict(mod, x_train)
            res.pred
            """)
    
        def Jchemo_lwplsr_cv(self):
            """Send data to Julia to predict with lwplsr.
    
            Args:
                self.mod (Julia model): the prepared model
                self.xtr_fold1 (DataFrame):
                self.ytr_fold1 (DataFrame):
                self.xte_fold1 (DataFrame):
                self.yte_fold1 (DataFrame):
    
            Returns:
                self.pred_cv (Julia DataFrame): predicted values on x_train with Cross-Validation
            """
            for i in range(self.nb_fold):
                jl.Xtr = getattr(self, "xtr_fold"+str(i+1))
                jl.Ytr = getattr(self, "ytr_fold"+str(i+1))
                jl.Xte = getattr(self, "xte_fold"+str(i+1))
                # jl.Yte = getattr(self, "yte_fold"+str(i+1))
                jl.seval("""
                using DataFrames
                using Pandas
                using Jchemo
                Xtr |> Pandas.DataFrame |> DataFrames.DataFrame
                Ytr |> Pandas.DataFrame |> DataFrames.DataFrame
                Xte |> Pandas.DataFrame |> DataFrames.DataFrame
                """)
                jl.nlvdis = int(self.best_lwplsr_params['nlvdis'])
                jl.metric = self.best_lwplsr_params['metric']
                jl.h = self.best_lwplsr_params['h']
                jl.k = int(self.best_lwplsr_params['k'])
                jl.nlv = int(self.best_lwplsr_params['nlv'])
                jl.seval("""
                println("LWPLSR - start CV mod")
                mod_cv = Jchemo.model(Jchemo.lwplsr; nlvdis = nlvdis, metric = Symbol(metric), h = h, k = k, nlv = nlv)
                # Fit model
                Jchemo.fit!(mod_cv, Xtr, Ytr)
                """)
                pred_cv = jl.seval("""
                println("LWPLSR - start CV predict")
                res = Jchemo.predict(mod_cv, Xte)
                res.pred
                """)
                self.predicted_results["CV" + str(i+1)] = pd.DataFrame(pred_cv)
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
    
        @property
        def pred_data_(self):
    
            # convert predicted data from x_test to Pandas DataFrame
            self.predicted_results_on_test = pd.DataFrame(self.pred_test)
            self.predicted_results_on_train = pd.DataFrame(self.pred_train)
    
            self.predicted_results["pred_data_train"] = self.predicted_results_on_train
            self.predicted_results["pred_data_test"] = self.predicted_results_on_test
            return self.predicted_results
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
        @property
        def model_(self):
            return self.mod
    
        @property
        def best_lwplsr_params_(self):
            return self.best_lwplsr_params