Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
NIRS_Workflow
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
CEFE
PACE
NIRS_Workflow
Commits
fa5863ce
Commit
fa5863ce
authored
11 months ago
by
DIANE
Browse files
Options
Downloads
Patches
Plain Diff
varsel update
parent
2d045dd9
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Class_Mod/VarSel.py
+136
-0
136 additions, 0 deletions
Class_Mod/VarSel.py
with
136 additions
and
0 deletions
Class_Mod/VarSel.py
+
136
−
0
View file @
fa5863ce
from
Packages
import
*
from
Class_Mod
import
metrics
class
TpeIpls
:
'''
This framework is added to the clan of wavelengths selection algorithms.It was introduced as an improvement
to the forward and backward intervall selection algorithms. This framework combines
the partial least squares algorithm and the tree-parzen structed estimatior, which is a bayesian optimization algorithm
that was first introduced in 2011. This combination provides a wrapper method for intervall-PLS.
This work keeps the integrity of the spectral data. by treating the data as a sequential data rather than using
descrete optimization (point to point selection)
'''
'''
Optimization algorithms can be used to find the subset of variables that optimize a certain criterion
(e.g., maximize predictive performance, minimize overfitting)
'''
SCORE
=
10000
index_export
=
pd
.
DataFrame
()
def
__init__
(
self
,
x_train
,
x_test
,
y_train
,
y_test
,
scale
,
Kfold
,
n_intervall
):
TpeIpls
.
SCORE
=
10000
self
.
x_train
=
x_train
self
.
x_test
=
x_test
self
.
y_train
=
y_train
self
.
y_test
=
y_test
self
.
scale
=
scale
self
.
Kfold
=
Kfold
self
.
p
=
self
.
x_train
.
shape
[
1
]
self
.
n_intervall
=
n_intervall
self
.
__n_arrets
=
self
.
n_intervall
*
2
self
.
PLS_params
=
{
f
'
v
{
i
}
'
:
hp
.
randint
(
f
'
v
{
i
}
'
,
0
,
self
.
p
)
for
i
in
range
(
1
,
self
.
__n_arrets
+
1
)}
self
.
PLS_params
[
'
n_components
'
]
=
hp
.
randint
(
"
n_components
"
,
1
,
6
)
def
_objective
(
self
,
params
):
self
.
idx
=
[
params
[
f
'
v
{
i
}
'
]
for
i
in
range
(
1
,
self
.
__n_arrets
+
1
)]
self
.
idx
.
sort
()
arrays
=
[
np
.
arange
(
self
.
idx
[
2
*
i
],
self
.
idx
[
2
*
i
+
1
]
+
1
)
for
i
in
range
(
self
.
n_intervall
)]
id
=
np
.
unique
(
np
.
concatenate
(
arrays
,
axis
=
0
),
axis
=
0
)
# Train the model
try
:
Model
=
PLSRegression
(
scale
=
self
.
scale
,
n_components
=
params
[
'
n_components
'
])
Model
.
fit
(
self
.
x_train
.
iloc
[:,
id
],
self
.
y_train
)
except
ValueError
as
ve
:
params
[
"
n_components
"
]
=
1
Model
=
PLSRegression
(
scale
=
self
.
scale
,
n_components
=
params
[
'
n_components
'
])
Model
.
fit
(
self
.
x_train
.
iloc
[:,
id
],
self
.
y_train
)
## make prediction
yc
=
Model
.
predict
(
self
.
x_train
.
iloc
[:,
id
]).
ravel
()
ycv
=
cross_val_predict
(
Model
,
self
.
x_train
.
iloc
[:,
id
],
self
.
y_train
,
cv
=
self
.
Kfold
,
n_jobs
=-
1
).
ravel
()
yt
=
Model
.
predict
(
self
.
x_test
.
iloc
[:,
id
]).
ravel
()
### compute r-squared
r2c
=
r2_score
(
self
.
y_train
,
yc
)
r2cv
=
r2_score
(
self
.
y_train
,
ycv
)
r2t
=
r2_score
(
self
.
y_test
,
yt
)
rmsecv
=
np
.
sqrt
(
mean_squared_error
(
self
.
y_train
,
ycv
))
rmsec
=
np
.
sqrt
(
mean_squared_error
(
self
.
y_train
,
yc
))
score
=
np
.
round
(
rmsecv
/
rmsec
+
rmsecv
*
100
/
self
.
y_train
.
mean
())
if
score
<
TpeIpls
.
SCORE
-
0.5
:
TpeIpls
.
SCORE
=
score
self
.
nlv
=
params
[
'
n_components
'
]
print
(
'
--**-------------##---------#~###~#---------##---------------**--
'
)
print
(
f
'
***** R²train : [
{
round
(
r2c
*
100
)
}
]**** R²cv : [
{
round
(
r2cv
*
100
)
}
]**** R²test : [
{
round
(
r2t
*
100
)
}
]*****
'
)
print
(
f
'
***** N Predictiors : [
{
len
(
id
)
}
] ******** NLV : [
{
params
[
"
n_components
"
]
}
]*****
'
)
TpeIpls
.
index_export
=
pd
.
DataFrame
()
TpeIpls
.
index_export
[
"
Vars
"
]
=
self
.
x_test
.
columns
[
id
]
TpeIpls
.
index_export
.
index
=
id
# Save model
#TpeIpls.index_export.to_excel(path + 'variables.xlsx')
##3-performance
metrics
(
train
=
(
self
.
y_train
,
yc
),
cv
=
(
self
.
y_train
,
ycv
)
,
test
=
(
self
.
y_test
,
yt
)).
round
(
2
).
to_excel
(
path
+
"
performance.xlsx
"
)
self
.
segments
=
arrays
print
(
"''
---------------------------- evolution noticed, hence a new model was saved-------------------------------
''"
)
self
.
idx
=
self
.
idx
return
score
def
tune
(
self
,
n_iter
):
print
(
'
------------------------------------------------ Optimization of the process has started ---------------------------------------------
'
)
trials
=
Trials
()
best_params
=
fmin
(
fn
=
self
.
_objective
,
space
=
self
.
PLS_params
,
algo
=
tpe
.
suggest
,
# Tree of Parzen Estimators’ (tpe) which is a Bayesian approach
max_evals
=
n_iter
,
trials
=
trials
,
verbose
=
2
)
@property
def
segments_
(
self
):
self
.
bands
=
{}
for
i
in
range
(
len
(
self
.
segments
)):
self
.
bands
[
f
'
band
{
i
+
1
}
'
]
=
[
self
.
segments
[
i
][
0
],
self
.
segments
[
i
][
self
.
segments
[
i
].
shape
[
0
]
-
1
]]
bands
=
pd
.
DataFrame
(
self
.
bands
).
T
bands
.
columns
=
[
'
from
'
,
'
to
'
]
return
bands
@property
def
tpe_pls_performance
(
self
):
f
=
[]
for
i
in
range
(
self
.
segments_
.
shape
[
0
]):
f
.
extend
(
np
.
arange
(
self
.
segments_
[
"
from
"
][
i
],
self
.
segments_
[
"
to
"
][
i
]
+
1
))
variables_idx
=
list
(
set
(
f
))
pls
=
PLSRegression
(
n_components
=
self
.
nlv
,
scale
=
self
.
scale
)
pls
.
fit
(
self
.
x_train
.
iloc
[:,
variables_idx
],
self
.
y_train
)
self
.
yc
=
pls
.
predict
(
self
.
x_train
.
iloc
[:,
variables_idx
]).
ravel
()
self
.
ycv
=
cross_val_predict
(
pls
,
self
.
x_train
.
iloc
[:,
variables_idx
],
self
.
y_train
,
cv
=
self
.
Kfold
,
n_jobs
=-
1
).
ravel
()
self
.
yt
=
pls
.
predict
(
self
.
x_test
.
iloc
[:,
variables_idx
]).
ravel
()
perf
=
metrics
(
train
=
(
self
.
y_train
,
self
.
yc
),
cv
=
(
self
.
y_train
,
self
.
ycv
)
,
test
=
(
self
.
y_test
,
self
.
yt
)).
round
(
2
)
return
perf
@property
def
meas_vs_pred
(
self
):
fig
,
ax
=
plt
.
subplots
()
sns
.
regplot
(
x
=
self
.
y_train
,
y
=
self
.
yc
,
ax
=
ax
)
sns
.
regplot
(
x
=
self
.
y_train
,
y
=
self
.
ycv
,
ax
=
ax
)
sns
.
regplot
(
x
=
self
.
y_test
,
y
=
self
.
yt
,
ax
=
ax
)
plt
.
show
()
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment