eval_metrics.py


from pandas import DataFrame
import numpy as np


class metrics:
    """
    A class for calculating various performance metrics for regression and classification tasks.
    
    This class can compute statistical metrics for regression and classification problems based on
    provided measured and predicted values. It can handle train, cross-validation, and test data separately,
    and return the metrics in a structured format.

    Attributes:
    -----------
    scores_ : DataFrame
        A DataFrame containing the calculated performance metrics for each dataset (train, cross-validation, test).
    """

    from typing import Optional, List
    from pandas import DataFrame

    def __init__(self, c: Optional[float] = None, cv: Optional[List] = None, t: Optional[List] = None, method='regression') -> DataFrame:
        """
        Initializes the metrics object and computes the performance metrics for the provided data.

        Parameters:
        -----------
        c : Optional[float], optional
            Measured and predicted values for the training set. The default is None.
        cv : Optional[List], optional
            A list containing measured and predicted values for the cross-validation set. The default is None.
        t : Optional[List], optional
            A list containing measured and predicted values for the test set. The default is None.
        method : str, optional
            The method for performance evaluation, either 'regression' or 'classification'. The default is 'regression'.

        Returns:
        --------
        DataFrame
            A DataFrame containing the performance metrics for each dataset (train, cross-validation, test).
        """
        
        phase = [c, cv, t]
        index = np.array(["train", "cv", "test"])
        notnone = [i for i in range(3) if phase[i] is not None]
        met_index = index[notnone]
        methods = ['regression', 'classification']
        perf = {}

        for i in notnone:
            if method == 'regression':
                perf[index[i]] = metrics.reg_(phase[i][0], phase[i][1])
            elif method == 'classification':
                perf[index[i]] = metrics.class_(phase[i][0], phase[i][1])
        
        if len(notnone) == 1:
            self.ret = perf.T
        else:
            self.ret = DataFrame(perf).T
             
    @staticmethod
    def reg_(meas, pred):
        """
        Calculates regression performance metrics for the given measured and predicted values.

        Parameters:
        -----------
        meas : list or array
            The measured (true) values.
        pred : list or array
            The predicted values.

        Returns:
        --------
        dict
            A dictionary containing the following regression metrics:
            - 'r': Correlation coefficient
            - 'r2': R-squared
            - 'rmse': Root Mean Square Error
            - 'mae': Mean Absolute Error
            - 'rpd': Ratio of Performance to Deviation
            - 'rpiq': Relative Predictive Interval Quality
        """
        
        meas = np.array(meas)
        pred = np.array(pred)
        xbar = np.mean(meas)  # the average of measured values
        e = np.subtract(meas, pred)
        e2 = e ** 2  # the squared error

        # Sum of squared:
        # TOTAL
        sst = np.sum((meas - xbar) ** 2)
        # RESIDUAL
        ssr = np.sum(e2)
        # REGRESSION OR MODEL
        ssm = np.sum(pred - xbar)

        # Compute statistical metrics
        metr = {}
        metr['r'] = np.corrcoef(meas, pred)[0, 1]
        metr['r2'] = 1 - ssr / sst
        metr['rmse'] = np.sqrt(np.mean(e2))
        metr['mae'] = np.mean(np.abs(e))
        metr['rpd'] = np.std(meas) / np.sqrt(np.mean(e2))
        metr['rpiq'] = (np.quantile(meas, 0.75) - np.quantile(meas, 0.25)) / np.sqrt(np.mean(e2))
        
        return metr

    @staticmethod
    def class_(meas, pred):
        """
        Placeholder method for classification metrics (not implemented yet).

        Parameters:
        -----------
        meas : list or array
            The measured (true) values.
        pred : list or array
            The predicted values.

        Returns:
        --------
        None
            This method currently does not perform any operations and returns None.
        """
        pass

    @property
    def scores_(self):
        """
        Returns the calculated performance metrics.

        Returns:
        --------
        DataFrame
            The DataFrame containing the calculated performance metrics for each dataset (train, cross-validation, test).
        """
        return self.ret