Skip to content
Snippets Groups Projects
eval_metrics.py 4.5 KiB
Newer Older
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
from pandas import DataFrame
import numpy as np


DIANE's avatar
DIANE committed
class metrics:
DIANE's avatar
DIANE committed
    """
    A class for calculating various performance metrics for regression and classification tasks.
    
    This class can compute statistical metrics for regression and classification problems based on
    provided measured and predicted values. It can handle train, cross-validation, and test data separately,
    and return the metrics in a structured format.

    Attributes:
    -----------
    scores_ : DataFrame
        A DataFrame containing the calculated performance metrics for each dataset (train, cross-validation, test).
    """

DIANE's avatar
DIANE committed
    from typing import Optional, List
    from pandas import DataFrame

DIANE's avatar
DIANE committed
    def __init__(self, c: Optional[float] = None, cv: Optional[List] = None, t: Optional[List] = None, method='regression') -> DataFrame:
        """
        Initializes the metrics object and computes the performance metrics for the provided data.

        Parameters:
        -----------
        c : Optional[float], optional
            Measured and predicted values for the training set. The default is None.
        cv : Optional[List], optional
            A list containing measured and predicted values for the cross-validation set. The default is None.
        t : Optional[List], optional
            A list containing measured and predicted values for the test set. The default is None.
        method : str, optional
            The method for performance evaluation, either 'regression' or 'classification'. The default is 'regression'.

        Returns:
        --------
        DataFrame
            A DataFrame containing the performance metrics for each dataset (train, cross-validation, test).
        """
        
DIANE's avatar
DIANE committed
        phase = [c, cv, t]
        index = np.array(["train", "cv", "test"])
DIANE's avatar
DIANE committed
        notnone = [i for i in range(3) if phase[i] is not None]
DIANE's avatar
DIANE committed
        met_index = index[notnone]
        methods = ['regression', 'classification']
        perf = {}
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
        for i in notnone:
            if method == 'regression':
                perf[index[i]] = metrics.reg_(phase[i][0], phase[i][1])
            elif method == 'classification':
                perf[index[i]] = metrics.class_(phase[i][0], phase[i][1])
        
DIANE's avatar
DIANE committed

        self.ret = DataFrame(perf).T
DIANE's avatar
DIANE committed
             
    @staticmethod
    def reg_(meas, pred):
DIANE's avatar
DIANE committed
        """
        Calculates regression performance metrics for the given measured and predicted values.

        Parameters:
        -----------
        meas : list or array
            The measured (true) values.
        pred : list or array
            The predicted values.

        Returns:
        --------
        dict
            A dictionary containing the following regression metrics:
            - 'r': Correlation coefficient
            - 'r2': R-squared
            - 'rmse': Root Mean Square Error
            - 'mae': Mean Absolute Error
            - 'rpd': Ratio of Performance to Deviation
            - 'rpiq': Relative Predictive Interval Quality
        """
        
        meas = np.array(meas)
        pred = np.array(pred)
        xbar = np.mean(meas)  # the average of measured values
        e = np.subtract(meas, pred)
        e2 = e ** 2  # the squared error

        # Sum of squared:
        # TOTAL
        sst = np.sum((meas - xbar) ** 2)
        # RESIDUAL
        ssr = np.sum(e2)
        # REGRESSION OR MODEL
        ssm = np.sum(pred - xbar)

        # Compute statistical metrics
        metr = {}
        metr['r'] = np.corrcoef(meas, pred)[0, 1]
        metr['r2'] = 1 - ssr / sst
        metr['rmse'] = np.sqrt(np.mean(e2))
        metr['mae'] = np.mean(np.abs(e))
        metr['rpd'] = np.std(meas) / np.sqrt(np.mean(e2))
        metr['rpiq'] = (np.quantile(meas, 0.75) - np.quantile(meas, 0.25)) / np.sqrt(np.mean(e2))
        
        return metr
DIANE's avatar
DIANE committed

    @staticmethod
    def class_(meas, pred):
DIANE's avatar
DIANE committed
        """
        Placeholder method for classification metrics (not implemented yet).

        Parameters:
        -----------
        meas : list or array
            The measured (true) values.
        pred : list or array
            The predicted values.

        Returns:
        --------
        None
            This method currently does not perform any operations and returns None.
        """
DIANE's avatar
DIANE committed
        pass

    @property
    def scores_(self):
DIANE's avatar
DIANE committed
        """
        Returns the calculated performance metrics.

        Returns:
        --------
        DataFrame
            The DataFrame containing the calculated performance metrics for each dataset (train, cross-validation, test).
        """
        return self.ret