Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from Packages import *
from Class_Mod.Miscellaneous import *
# create model module with PINARD
def model_PLSR(xcal_csv, ycal_csv, sep, hdr, rd_seed):
np.random.seed(rd_seed)
# hdr var correspond to column header True or False in the CSV
if hdr == 'yes':
col = 0
else:
col = False
# loading the csv
x, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col)
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
train_index, test_index = train_test_split_idx(x, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
# Assign data to training and test sets
X_train, y_train, X_test, y_test = x[train_index], y[train_index], x[test_index], y[test_index]
st.write("Size of train and test sets: train " + str(X_train.shape) + ' ' + str(y_train.shape) + ' / test ' + str(X_test.shape) + ' ' + str(y_test.shape))
# Declare preprocessing pipeline
svgolay = [ ('_sg1',pp.SavitzkyGolay()),
('_sg2',pp.SavitzkyGolay()) # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
]
preprocessing = [ ('id', pp.IdentityTransformer()), # Identity transformer, no change to the data
('savgol', pp.SavitzkyGolay()), # Savitzky-Golay smoothing filter
('derivate', pp.Derivate()), # Calculate the first derivative of the data
('SVG', FeatureUnion(svgolay))
# Pipeline([('_sg1',pp.SavitzkyGolay()),('_sg2',pp.SavitzkyGolay())]) # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
]
# Declare complete pipeline
pipeline = Pipeline([
('scaler', MinMaxScaler()), # scaling the data
('preprocessing', FeatureUnion(preprocessing)), # preprocessing
('PLS', PLSRegression()) # regressor
])
# Estimator including y values scaling
estimator = TransformedTargetRegressor(regressor = pipeline, transformer = MinMaxScaler())
# Training
trained = estimator.fit(X_train, y_train)
# fit scores
st.write("fit scores / R²: " + str(estimator.score(X_test,y_test)))
# Predictions on test set
Y_preds = estimator.predict(X_test) # make predictions on test data and assign to Y_preds variable
################################################################################################################
met= {"MAE: ":[5],
"MSE: ":[5],
"MSE: ":[8]}
met = pd.DataFrame(met).T
################################################################################################################
st.table(met)
st.write("MAE: " + str(mean_absolute_error(y_test, Y_preds)))
st.write("MSE: " + str(mean_squared_error(y_test, Y_preds)))
st.write("MAPE: " + str(mean_absolute_percentage_error(y_test, Y_preds)))
# Cross-Validate the model
CV_model(estimator, X_train, y_train, 3)
return (trained)