Newer
Older
"""
A class for calculating various performance metrics for regression and classification tasks.
This class can compute statistical metrics for regression and classification problems based on
provided measured and predicted values. It can handle train, cross-validation, and test data separately,
and return the metrics in a structured format.
Attributes:
-----------
scores_ : DataFrame
A DataFrame containing the calculated performance metrics for each dataset (train, cross-validation, test).
"""
from typing import Optional, List
from pandas import DataFrame
def __init__(self, c: Optional[float] = None, cv: Optional[List] = None, t: Optional[List] = None, method='regression') -> DataFrame:
"""
Initializes the metrics object and computes the performance metrics for the provided data.
Parameters:
-----------
c : Optional[float], optional
Measured and predicted values for the training set. The default is None.
cv : Optional[List], optional
A list containing measured and predicted values for the cross-validation set. The default is None.
t : Optional[List], optional
A list containing measured and predicted values for the test set. The default is None.
method : str, optional
The method for performance evaluation, either 'regression' or 'classification'. The default is 'regression'.
Returns:
--------
DataFrame
A DataFrame containing the performance metrics for each dataset (train, cross-validation, test).
"""
phase = [c, cv, t]
index = np.array(["train", "cv", "test"])
met_index = index[notnone]
methods = ['regression', 'classification']
perf = {}
for i in notnone:
if method == 'regression':
perf[index[i]] = metrics.reg_(phase[i][0], phase[i][1])
elif method == 'classification':
perf[index[i]] = metrics.class_(phase[i][0], phase[i][1])
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
Calculates regression performance metrics for the given measured and predicted values.
Parameters:
-----------
meas : list or array
The measured (true) values.
pred : list or array
The predicted values.
Returns:
--------
dict
A dictionary containing the following regression metrics:
- 'r': Correlation coefficient
- 'r2': R-squared
- 'rmse': Root Mean Square Error
- 'mae': Mean Absolute Error
- 'rpd': Ratio of Performance to Deviation
- 'rpiq': Relative Predictive Interval Quality
"""
meas = np.array(meas)
pred = np.array(pred)
xbar = np.mean(meas) # the average of measured values
e = np.subtract(meas, pred)
e2 = e ** 2 # the squared error
# Sum of squared:
# TOTAL
sst = np.sum((meas - xbar) ** 2)
# RESIDUAL
ssr = np.sum(e2)
# REGRESSION OR MODEL
ssm = np.sum(pred - xbar)
# Compute statistical metrics
metr = {}
metr['r'] = np.corrcoef(meas, pred)[0, 1]
metr['r2'] = 1 - ssr / sst
metr['rmse'] = np.sqrt(np.mean(e2))
metr['mae'] = np.mean(np.abs(e))
metr['rpd'] = np.std(meas) / np.sqrt(np.mean(e2))
metr['rpiq'] = (np.quantile(meas, 0.75) - np.quantile(meas, 0.25)) / np.sqrt(np.mean(e2))
return metr
"""
Placeholder method for classification metrics (not implemented yet).
Parameters:
-----------
meas : list or array
The measured (true) values.
pred : list or array
The predicted values.
Returns:
--------
None
This method currently does not perform any operations and returns None.
"""