print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)


# ## Learning

# In[111]:





# In[112]:


preprocessing


# In[113]:


# Declare complete pipeline
pipeline = Pipeline([
    ('scaler', MinMaxScaler()), # scaling the data
    ('preprocessing', FeatureUnion(preprocessing)), # preprocessing
    # Pipeline([('sg1',pp.SavitzkyGolay()),('sg2',pp.SavitzkyGolay())]), 
    # ('sg1',pp.SavitzkyGolay()),('sg2',pp.SavitzkyGolay()), 
                    # preprocessing - nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
    ('PLS',  PLSRegression()) # regressor
])


# In[114]:


pipeline


# In[115]:


# Estimator including y values scaling
estimator = TransformedTargetRegressor(regressor = pipeline, transformer = MinMaxScaler())


# In[116]:


estimator


# In[117]:


# Training
estimator.fit(X_train, y_train)


# In[110]:


estimator.score(X_test,y_test)


# In[ ]:


# Predictions
Y_preds = estimator.predict(X_test) # make predictions on test data and assign to Y_preds variable
print("R²", r2_score(y_test, Y_preds))


# ## Résultats de prédiction

# In[ ]:


print("MAE", mean_absolute_error(y_test, Y_preds))
print("MSE", mean_squared_error(y_test, Y_preds))
print("MAPE", mean_absolute_percentage_error(y_test, Y_preds))
print("R²", r2_score(y_test, Y_preds))
# print(estimator.get_params())


# ## Cross Validation

# In[ ]:


print("CV_scores", cross_val_score(estimator, x, y, cv=3))
print("-- CV predict --")
Y_preds = cross_val_predict(estimator, x, y, cv=3)
print("MAE", mean_absolute_error(y, Y_preds))
print("MSE", mean_squared_error(y, Y_preds))
print("MAPE", mean_absolute_percentage_error(y, Y_preds))
print("R²", r2_score(y, Y_preds))

print("-- Cross Validate --")
cv_results = cross_validate(estimator, x, y, cv=3, return_train_score=True, n_jobs=3)
for key in cv_results.keys():
    print(key, cv_results[key])