Skip to content
Snippets Groups Projects
eval_metrics.py 4.57 KiB
Newer Older
  • Learn to ignore specific revisions
  • DIANE's avatar
    DIANE committed
    
    
    DIANE's avatar
    DIANE committed
    from pandas import DataFrame
    import numpy as np
    
    
    
    DIANE's avatar
    DIANE committed
    class metrics:
    
    DIANE's avatar
    DIANE committed
        """
        A class for calculating various performance metrics for regression and classification tasks.
        
        This class can compute statistical metrics for regression and classification problems based on
        provided measured and predicted values. It can handle train, cross-validation, and test data separately,
        and return the metrics in a structured format.
    
        Attributes:
        -----------
        scores_ : DataFrame
            A DataFrame containing the calculated performance metrics for each dataset (train, cross-validation, test).
        """
    
    
    DIANE's avatar
    DIANE committed
        from typing import Optional, List
        from pandas import DataFrame
    
    
    DIANE's avatar
    DIANE committed
        def __init__(self, c: Optional[float] = None, cv: Optional[List] = None, t: Optional[List] = None, method='regression') -> DataFrame:
            """
            Initializes the metrics object and computes the performance metrics for the provided data.
    
            Parameters:
            -----------
            c : Optional[float], optional
                Measured and predicted values for the training set. The default is None.
            cv : Optional[List], optional
                A list containing measured and predicted values for the cross-validation set. The default is None.
            t : Optional[List], optional
                A list containing measured and predicted values for the test set. The default is None.
            method : str, optional
                The method for performance evaluation, either 'regression' or 'classification'. The default is 'regression'.
    
            Returns:
            --------
            DataFrame
                A DataFrame containing the performance metrics for each dataset (train, cross-validation, test).
            """
            
    
    DIANE's avatar
    DIANE committed
            phase = [c, cv, t]
            index = np.array(["train", "cv", "test"])
    
    DIANE's avatar
    DIANE committed
            notnone = [i for i in range(3) if phase[i] is not None]
    
    DIANE's avatar
    DIANE committed
            met_index = index[notnone]
            methods = ['regression', 'classification']
            perf = {}
    
    DIANE's avatar
    DIANE committed
    
    
    DIANE's avatar
    DIANE committed
            for i in notnone:
                if method == 'regression':
                    perf[index[i]] = metrics.reg_(phase[i][0], phase[i][1])
                elif method == 'classification':
                    perf[index[i]] = metrics.class_(phase[i][0], phase[i][1])
            
    
    DIANE's avatar
    DIANE committed
            if len(notnone) == 1:
                self.ret = perf.T
            else:
                self.ret = DataFrame(perf).T
    
    DIANE's avatar
    DIANE committed
                 
        @staticmethod
        def reg_(meas, pred):
    
    DIANE's avatar
    DIANE committed
            """
            Calculates regression performance metrics for the given measured and predicted values.
    
            Parameters:
            -----------
            meas : list or array
                The measured (true) values.
            pred : list or array
                The predicted values.
    
            Returns:
            --------
            dict
                A dictionary containing the following regression metrics:
                - 'r': Correlation coefficient
                - 'r2': R-squared
                - 'rmse': Root Mean Square Error
                - 'mae': Mean Absolute Error
                - 'rpd': Ratio of Performance to Deviation
                - 'rpiq': Relative Predictive Interval Quality
            """
            
            meas = np.array(meas)
            pred = np.array(pred)
            xbar = np.mean(meas)  # the average of measured values
            e = np.subtract(meas, pred)
            e2 = e ** 2  # the squared error
    
            # Sum of squared:
            # TOTAL
            sst = np.sum((meas - xbar) ** 2)
            # RESIDUAL
            ssr = np.sum(e2)
            # REGRESSION OR MODEL
            ssm = np.sum(pred - xbar)
    
            # Compute statistical metrics
            metr = {}
            metr['r'] = np.corrcoef(meas, pred)[0, 1]
            metr['r2'] = 1 - ssr / sst
            metr['rmse'] = np.sqrt(np.mean(e2))
            metr['mae'] = np.mean(np.abs(e))
            metr['rpd'] = np.std(meas) / np.sqrt(np.mean(e2))
            metr['rpiq'] = (np.quantile(meas, 0.75) - np.quantile(meas, 0.25)) / np.sqrt(np.mean(e2))
            
            return metr
    
    DIANE's avatar
    DIANE committed
    
        @staticmethod
        def class_(meas, pred):
    
    DIANE's avatar
    DIANE committed
            """
            Placeholder method for classification metrics (not implemented yet).
    
            Parameters:
            -----------
            meas : list or array
                The measured (true) values.
            pred : list or array
                The predicted values.
    
            Returns:
            --------
            None
                This method currently does not perform any operations and returns None.
            """
    
    DIANE's avatar
    DIANE committed
            pass
    
        @property
        def scores_(self):
    
    DIANE's avatar
    DIANE committed
            """
            Returns the calculated performance metrics.
    
            Returns:
            --------
            DataFrame
                The DataFrame containing the calculated performance metrics for each dataset (train, cross-validation, test).
            """
            return self.ret