from pandas import DataFrame import numpy as np class metrics: """ A class for calculating various performance metrics for regression and classification tasks. This class can compute statistical metrics for regression and classification problems based on provided measured and predicted values. It can handle train, cross-validation, and test data separately, and return the metrics in a structured format. Attributes: ----------- scores_ : DataFrame A DataFrame containing the calculated performance metrics for each dataset (train, cross-validation, test). """ from typing import Optional, List from pandas import DataFrame def __init__(self, c: Optional[float] = None, cv: Optional[List] = None, t: Optional[List] = None, method='regression') -> DataFrame: """ Initializes the metrics object and computes the performance metrics for the provided data. Parameters: ----------- c : Optional[float], optional Measured and predicted values for the training set. The default is None. cv : Optional[List], optional A list containing measured and predicted values for the cross-validation set. The default is None. t : Optional[List], optional A list containing measured and predicted values for the test set. The default is None. method : str, optional The method for performance evaluation, either 'regression' or 'classification'. The default is 'regression'. Returns: -------- DataFrame A DataFrame containing the performance metrics for each dataset (train, cross-validation, test). """ phase = [c, cv, t] index = np.array(["train", "cv", "test"]) notnone = [i for i in range(3) if phase[i] is not None] met_index = index[notnone] methods = ['regression', 'classification'] perf = {} for i in notnone: if method == 'regression': perf[index[i]] = metrics.reg_(phase[i][0], phase[i][1]) elif method == 'classification': perf[index[i]] = metrics.class_(phase[i][0], phase[i][1]) self.ret = DataFrame(perf).T @staticmethod def reg_(meas, pred): """ Calculates regression performance metrics for the given measured and predicted values. Parameters: ----------- meas : list or array The measured (true) values. pred : list or array The predicted values. Returns: -------- dict A dictionary containing the following regression metrics: - 'r': Correlation coefficient - 'r2': R-squared - 'rmse': Root Mean Square Error - 'mae': Mean Absolute Error - 'rpd': Ratio of Performance to Deviation - 'rpiq': Relative Predictive Interval Quality """ meas = np.array(meas) pred = np.array(pred) xbar = np.mean(meas) # the average of measured values e = np.subtract(meas, pred) e2 = e ** 2 # the squared error # Sum of squared: # TOTAL sst = np.sum((meas - xbar) ** 2) # RESIDUAL ssr = np.sum(e2) # REGRESSION OR MODEL ssm = np.sum(pred - xbar) # Compute statistical metrics metr = {} metr['r'] = np.corrcoef(meas, pred)[0, 1] metr['r2'] = 1 - ssr / sst metr['rmse'] = np.sqrt(np.mean(e2)) metr['mae'] = np.mean(np.abs(e)) metr['rpd'] = np.std(meas) / np.sqrt(np.mean(e2)) metr['rpiq'] = (np.quantile(meas, 0.75) - np.quantile(meas, 0.25)) / np.sqrt(np.mean(e2)) return metr @staticmethod def class_(meas, pred): """ Placeholder method for classification metrics (not implemented yet). Parameters: ----------- meas : list or array The measured (true) values. pred : list or array The predicted values. Returns: -------- None This method currently does not perform any operations and returns None. """ pass @property def scores_(self): """ Returns the calculated performance metrics. Returns: -------- DataFrame The DataFrame containing the calculated performance metrics for each dataset (train, cross-validation, test). """ return self.ret