Skip to content
Snippets Groups Projects
lwplsr_.py 12.3 KiB
Newer Older
DIANE's avatar
DIANE committed
from juliacall import Main as jl
import numpy as np
import pandas as pd
DIANE's avatar
DIANE committed
from pandas import DataFrame
DIANE's avatar
DIANE committed

class LWPLSR:
    """The lwpls regression model from Jchemo (M. Lesnoff)

    Returns:
        self.scores (DataFrame): various metrics and scores
        self.predicted_results (Dictionary): Dict containing all predicted results (train, test, cross-validation)
        self.mod (Julia model): the prepared model
    """
    def __init__(self, dataset, preT, action):
DIANE's avatar
DIANE committed
        """Initiate the LWPLSR and prepare data for Julia computing."""
        if action == 'Model_Creation':
            # get train / test data from dataset
            self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(4)]
            # calculate number of KFolds and get CV data from dataset
            self.nb_fold = int((len(dataset)-4)/4)
            for i in range(self.nb_fold):
                setattr(self, "xtr_fold"+str(i+1), dataset[i+7])
                setattr(self, "ytr_fold"+str(i+1), dataset[i+13])
                setattr(self, "xte_fold"+str(i+1), dataset[i+4])
                setattr(jl, "xtr_fold"+str(i+1), dataset[i+7])
                setattr(jl, "ytr_fold"+str(i+1), dataset[i+13])
                setattr(jl, "xte_fold"+str(i+1), dataset[i+4])

            # prepare to send dataframes to julia and Jchemo (with the jl. prefix)
            jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test
            # Get parameters for preTreatment of the spectra (acquired from a global PLSR)
            self.preT = preT

            # initialize vars from the class
            y_shape = self.y_test.shape
            self.pred_test = np.zeros(shape=(y_shape[0], 1))
            self.pred_train = np.zeros(shape=(y_shape[0], 1))
            self.mod = ""
            self.best_lwplsr_params = np.zeros(shape=(5, 1))
            self.predicted_results = {}
        elif action == 'Prediction':
            self.x_spectra, self.y, self.x_pred = [dataset[i] for i in range(3)]
            # prepare to send dataframes to julia and Jchemo (with the jl. prefix)
            jl.x_spectra, jl.y, jl.x_pred = self.x_spectra, self.y, self.x_pred
            self.predicted_results = {}

DIANE's avatar
DIANE committed

    def Jchemo_lwplsr_fit(self):
        """Send data to Julia to fit lwplsr.

        Args:
            self.x_train (DataFrame):
            self.y_train (DataFrame):
            self.x_test (DataFrame):
            self.y_test (DataFrame):

        Returns:
            self.mod (Julia model): the prepared model
        """
        # launch Julia Jchemo lwplsr and convert DataFrames from Python Pandas DataFrame to Julia DataFrame
        jl.seval("""
        using DataFrames
        using Pandas
        using Jchemo
        x_train |> Pandas.DataFrame |> DataFrames.DataFrame
        y_train |> Pandas.DataFrame |> DataFrames.DataFrame
        x_test |> Pandas.DataFrame |> DataFrames.DataFrame
        y_test |> Pandas.DataFrame |> DataFrames.DataFrame
        """)
        # apply pre-treatments on X data
        print('LWPLSR - preTreatment')
        # apply pre-treatments to X data before working with
        jl.npoint = self.preT['window_length']
        jl.deriv = self.preT['deriv']
        jl.degree = self.preT['polyorder']
        if self.preT['polyorder'] > 0:
            jl.seval("""
            mod1 = model(snv; centr = true, scal = true)
            mod2 = model(savgol; npoint = npoint, deriv = deriv, degree = degree)
            """)
            if self.preT['normalization'] == "No_transformation":
                jl.seval("""
                    preMod = mod2
                """)
            elif self.preT['normalization'] == 'Snv':
                jl.seval("""
                    preMod = pip(mod1, mod2)
                """)
            jl.seval("""
            fit!(preMod, x_train)
            x_train = transf(preMod, x_train)
            x_test = transf(preMod, x_test)
            """)
        # LWPLSR tuning
        print('LWPLSR - tuning')
        # set tuning parameters to test
        jl.seval("""
        nlvdis = [5; 10; 15] ; metric = [:eucl; :mah] 
        h = [1; 2; 6; Inf] ; k = [30; 80; 200]  
        nlv = 5:15
        pars = Jchemo.mpar(nlvdis = nlvdis, metric = metric, h = h, k = k)
        """)
        # split Train data into Cal/Val for tuning
        jl.seval("""
        pct = .3
        ntrain = Jchemo.nro(x_train)
        nval = Int(round(pct * ntrain))
        s = Jchemo.samprand(ntrain, nval)
        Xcal = x_train[s.train, :]
        ycal = y_train[s.train]
        Xval = x_train[s.test, :]
        yval = y_train[s.test]
        ncal = ntrain - nval 
        """)

        # Create LWPLSR model and tune with GridScore
        jl.seval("""
        mod = Jchemo.model(Jchemo.lwplsr)
        res = gridscore(mod, Xcal, ycal, Xval, yval; score = Jchemo.rmsep, pars, nlv, verbose = false)
        u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination
        """)
        # save best lwplsr parameters
        self.best_lwplsr_params = {'nlvdis' : jl.res.nlvdis[jl.u], 'metric' : str(jl.res.metric[jl.u]), 'h' : jl.res.h[jl.u], 'k' : jl.res.k[jl.u], 'nlv' : jl.res.nlv[jl.u]}
        print('best lwplsr params ' + str(self.best_lwplsr_params))
        # run LWPLSR model with best parameters
        jl.seval("""
        mod = Jchemo.model(Jchemo.lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u])
        # Fit model
        Jchemo.fit!(mod, x_train, y_train)
        """)
        # save Julia Jchemo model
        self.mod = jl.mod

    def Jchemo_lwplsr_predict_fit(self):
        """Send data to Julia to fit lwplsr.

        Args:
            self.x_spectra (DataFrame):
            self.y (DataFrame):
            self.x_pred (DataFrame):

        Returns:
            self.mod (Julia model): the prepared model
        """
        # launch Julia Jchemo lwplsr and convert DataFrames from Python Pandas DataFrame to Julia DataFrame
        jl.seval("""
            using DataFrames
            using Pandas
            using Jchemo
            x_spectra |> Pandas.DataFrame |> DataFrames.DataFrame
            y |> Pandas.DataFrame |> DataFrames.DataFrame
            x_pred |> Pandas.DataFrame |> DataFrames.DataFrame
            """)
        # LWPLSR tuning
        print('LWPLSR - no tuning, using best parameters from model creation')
        # # set tuning parameters to test
        # jl.seval("""
        #     nlvdis = [5; 10; 15] ; metric = [:eucl; :mah]
        #     h = [1; 2; 6; Inf] ; k = [30; 80; 200]
        #     nlv = 5:15
        #     pars = Jchemo.mpar(nlvdis = nlvdis, metric = metric, h = h, k = k)
        #     """)
        # # split Train data into Cal/Val for tuning
        # jl.seval("""
        #     pct = .3
        #     ntrain = Jchemo.nro(x_spectra)
        #     nval = Int(round(pct * ntrain))
        #     s = Jchemo.samprand(ntrain, nval)
        #     Xcal = x_spectra[s.train, :]
        #     ycal = y[s.train]
        #     Xval = x_spectra[s.test, :]
        #     yval = y[s.test]
        #     ncal = ntrain - nval
        #     """)
        #
        # # Create LWPLSR model and tune with GridScore
        # jl.seval("""
        #     mod = Jchemo.model(Jchemo.lwplsr)
        # #     res = gridscore(mod, Xcal, ycal, Xval, yval; score = Jchemo.rmsep, pars, nlv, verbose = false)
        # #     u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination
        # #     """)
        # # save best lwplsr parameters
        # self.best_lwplsr_params = {'nlvdis' : jl.res.nlvdis[jl.u], 'metric' : str(jl.res.metric[jl.u]), 'h' : jl.res.h[jl.u], 'k' : jl.res.k[jl.u], 'nlv' : jl.res.nlv[jl.u]}
        # print('best lwplsr params ' + str(self.best_lwplsr_params))
        # import best params from model creation
        jl.nlvdis = self.preT['nlvdis']
        jl.metric = self.preT['metric']
        jl.h = self.preT['h']
        jl.k = self.preT['k']
        jl.nlv = self.preT['nlv']
        # run LWPLSR model with best parameters
        jl.seval("""
            mod = Jchemo.model(Jchemo.lwplsr; nlvdis = nlvdis, metric = Symbol(metric), h = h, k = k, nlv = nlv)
            # Fit model
            Jchemo.fit!(mod, x_spectra, y)
            """)
        # save Julia Jchemo model
        self.mod = jl.mod



DIANE's avatar
DIANE committed
    def Jchemo_lwplsr_predict(self):
        """Send data to Julia to predict with lwplsr.

        Args:
            self.mod (Julia model): the prepared model
            self.x_train (DataFrame):
            self.y_train (DataFrame):
            self.x_test (DataFrame):
            self.y_test (DataFrame):

        Returns:
            self.pred_test (Julia DataFrame): predicted values on x_test
            self.pred_train (Julia DataFrame): predicted values on x_train
        """
        # Predictions on x_test and store in self.pred
        self.pred_test = jl.seval("""
        println("LWPLSR - start test predict")
        res = Jchemo.predict(mod, x_test)
        res.pred
        """)
        self.pred_train = jl.seval("""
        println("LWPLSR - start train predict")
        res = Jchemo.predict(mod, x_train)
        res.pred
        """)
        print('LWPLSR - end')

    def Jchemo_lwplsr_predict_predict(self):
        """Send data to Julia to predict with lwplsr.

        Args:
            self.mod (Julia model): the prepared model
            self.x_spectra (DataFrame):
            self.y (DataFrame):
            self.x_pred (DataFrame):

        Returns:
            self.y_pred (Julia DataFrame): predicted values on x_pred
        """
        # Predictions on x_test and store in self.pred
        self.y_pred = jl.seval("""
            println("LWPLSR - start y predict")
            res = Jchemo.predict(mod, x_pred)
            res.pred
            """)
        print('LWPLSR - end')

DIANE's avatar
DIANE committed
    def Jchemo_lwplsr_cv(self):
        """Send Cross-Validation data to Julia to fit & predict with lwplsr.

        Args:
            self.best_lwplsr_params: the best parameters to use (from tuning) for CV
            self.xtr_fold1 (DataFrame):
            self.ytr_fold1 (DataFrame):
            self.xte_fold1 (DataFrame):

        Returns:
            self.pred_cv (Julia DataFrame): predicted values on x_train with Cross-Validation
        """
        for i in range(self.nb_fold):
            jl.Xtr = getattr(self, "xtr_fold"+str(i+1))
            jl.Ytr = getattr(self, "ytr_fold"+str(i+1))
            jl.Xte = getattr(self, "xte_fold"+str(i+1))
            # convert Python Pandas DataFrame to Julia DataFrame
            jl.seval("""
            using DataFrames
            using Pandas
            using Jchemo
            Xtr |> Pandas.DataFrame |> DataFrames.DataFrame
            Ytr |> Pandas.DataFrame |> DataFrames.DataFrame
            Xte |> Pandas.DataFrame |> DataFrames.DataFrame
            """)
            # set lwplsr parameters as the best one from tuning
            jl.nlvdis = int(self.best_lwplsr_params['nlvdis'])
            jl.metric = self.best_lwplsr_params['metric']
            jl.h = self.best_lwplsr_params['h']
            jl.k = int(self.best_lwplsr_params['k'])
            jl.nlv = int(self.best_lwplsr_params['nlv'])
            jl.seval("""
            println("LWPLSR - start CV mod")
            mod_cv = Jchemo.model(Jchemo.lwplsr; nlvdis = nlvdis, metric = Symbol(metric), h = h, k = k, nlv = nlv)
            # Fit model
            Jchemo.fit!(mod_cv, Xtr, Ytr)
            """)
            pred_cv = jl.seval("""
            println("LWPLSR - start CV predict")
            res = Jchemo.predict(mod_cv, Xte)
            res.pred
            """)
            # save predicted values for each KFold in the predicted_results dictionary
DIANE's avatar
DIANE committed
            self.predicted_results["CV" + str(i+1)] = DataFrame(pred_cv)
DIANE's avatar
DIANE committed

    @property
    def pred_data_(self):
        # convert predicted data from x_test to Pandas DataFrame
DIANE's avatar
DIANE committed
        self.predicted_results["pred_data_train"] = DataFrame(self.pred_train)
        self.predicted_results["pred_data_test"] = DataFrame(self.pred_test)
DIANE's avatar
DIANE committed
        return self.predicted_results

    @property
    def predict_pred_data_(self):
        # convert predicted data from x_test to Pandas DataFrame
        self.predicted_results["y_pred"] = DataFrame(self.y_pred)
        return self.predicted_results

DIANE's avatar
DIANE committed
    @property
    def model_(self):
        return self.mod

    @property
    def best_lwplsr_params_(self):
        return self.best_lwplsr_params