Newer
Older
from Packages import *
# local CSS
## load the custom CSS in the style folder
@st.cache_data
def local_css(file_name):
with open(file_name) as f:
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
# predict module
def prediction(NIRS_csv, qsep, qhdr, model):
# hdr var correspond to column header True or False in the CSV
if qhdr == 'yes':
col = 0
else:
col = False
Y_preds = model.predict(X_test)
# Y_preds = X_test
return Y_preds
@st.cache_data
def reg_plot( meas, pred, train_idx, test_idx):
a0 = np.ones(2)
a1 = np.ones(2)
for i in range(len(meas)):
meas[i] = np.array(meas[i]).reshape(-1, 1)
pred[i] = np.array(pred[i]).reshape(-1, 1)
M = LinearRegression()
M.fit(meas[i], pred[i])
a1[i] = np.round(M.coef_[0][0],2)
a0[i] = np.round(M.intercept_[0],2)
ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1))
et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))
fig, ax = plt.subplots(figsize = (12,4))
sns.regplot(x = meas[0] , y = pred[0], color="#2C6B6F", label = f'Cal (Predicted = {a0[0]} + {a1[0]} x Measured)', scatter_kws={'edgecolor': 'black'})
sns.regplot(x = meas[1], y = pred[1], color='#d0f7be', label = f'Val (Predicted = {a0[1]} + {a1[1]} x Measured)', scatter_kws={'edgecolor': 'black'})
plt.plot([np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], [np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], color = 'black')
for i, txt in enumerate(train_idx):
#plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
if np.abs(ec[i])> np.mean(ec)+ 3*np.std(ec):
plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i], np.array(pred[0]).reshape(-1)[i]))
for i, txt in enumerate(test_idx):
if np.abs(et[i])> np.mean(et)+ 3*np.std(et):
plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i]))
ax.set_ylabel('Predicted values')
ax.set_xlabel('Measured values')
plt.legend()
plt.margins(0)
# fig.savefig('./report/figures/measured_vs_predicted.png')
return fig
@st.cache_data
def resid_plot( meas, pred, train_idx, test_idx):
a0 = np.ones(2)
a1 = np.ones(2)
e = [np.subtract(meas[0] ,pred[0]), np.subtract(meas[1], pred[1])]
for i in range(len(meas)):
M = LinearRegression()
M.fit( np.array(meas[i]).reshape(-1,1), np.array(e[i]).reshape(-1,1))
a1[i] = np.round(M.coef_[0],2)
a0[i] = np.round(M.intercept_,2)
fig, ax = plt.subplots(figsize = (12,4))
sns.scatterplot(x = pred[0], y = e[0], color="#2C6B6F", label = f'Cal', edgecolor="black")
sns.scatterplot(x = pred[1], y = e[1], color="#d0f7be", label = f'Val', edgecolor="black")
# sns.scatterplot(x = pred[0], y = e[0], color='blue', label = f'Cal (Residual = {a0[0]} + {a1[0]} * Predicted)')
# sns.scatterplot(x = pred[1], y = e[1], color='green', label = f'Val (Residual = {a0[1]} + {a1[1]} * Predicted)')
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
plt.axhline(y= 0, c ='black', linestyle = ':')
lim = np.max(abs(np.concatenate([e[0], e[1]], axis = 0)))*1.1
plt.ylim(- lim, lim )
for i in range(2):
e[i] = np.array(e[i]).reshape(-1,1)
for i, txt in enumerate(train_idx):
#plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
if np.abs(e[0][i])> np.mean(e[0])+ 3*np.std(e[0]):
plt.annotate(txt ,(np.array(pred[0]).reshape(-1)[i],e[0][i]))
for i, txt in enumerate(test_idx):
if np.abs(e[1][i])> np.mean(e[1])+ 3*np.std(e[1]):
plt.annotate(txt ,(np.array(pred[1]).reshape(-1)[i],e[1][i]))
ax.set_xlabel(f'{ train_idx.shape}')
ax.set_ylabel('Residuals')
ax.set_xlabel('Predicted values')
plt.legend()
plt.margins(0)
# fig.savefig('./report/figures/residuals_plot.png')
return fig
# function that create a download button - needs the data to save and the file name to store to
def download_results(data, export_name):
with open(data) as f:
st.download_button('Download', f, export_name, type='primary')
if isinstance(specdf.columns[0], str):
specdf.T.plot(legend=False, ax = ax, color = '#2474b4')
min = np.max(specdf.columns)
specdf.T.plot(legend=False, ax = ax, color = '#2474b4').invert_xaxis()
ax.set_xlabel(xunits, fontsize=30)
ax.set_ylabel(yunits, fontsize=30)
@st.cache_data
def hist(y, y_train, y_test, target_name = 'y'):
fig, ax = plt.subplots(figsize = (12,3))
sns.histplot(y, color = "#004e9e", kde = True, label = str(target_name), ax = ax, fill = True)
sns.histplot(y_train, color = "#2C6B6F", kde = True, label = str(target_name)+" (Cal)", ax = ax, fill = True)
sns.histplot(y_test, color = "#d0f7be", kde = True, label = str(target_name)+" (Val)", ax = ax, fill = True)
ax.set_xlabel(str(target_name))
plt.legend()
plt.tight_layout()
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
@st.cache_data
def pred_hist(pred):
# Creating histogram
hist, axs = plt.subplots(1, 1, figsize =(15, 3),
tight_layout = True)
# Add x, y gridlines
axs.grid( color ='grey', linestyle ='-.', linewidth = 0.5, alpha = 0.6)
# Remove axes splines
for s in ['top', 'bottom', 'left', 'right']:
axs.spines[s].set_visible(False)
# Remove x, y ticks
axs.xaxis.set_ticks_position('none')
axs.yaxis.set_ticks_position('none')
# Add padding between axes and labels
axs.xaxis.set_tick_params(pad = 5)
axs.yaxis.set_tick_params(pad = 10)
# Creating histogram
N, bins, patches = axs.hist(pred, bins = 12)
return hist
@st.cache_data
def fig_export():
pass
@st.cache_data(show_spinner =True)
def data_split(x, y):
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
train_index, test_index = train_test_split_idx(x , y = y, method = "kennard_stone", metric = "correlation", test_size = 0.25, random_state = 42)
# Assign data to training and test sets
X_train, y_train = DataFrame(x.iloc[train_index,:]), y.iloc[train_index]
X_test, y_test = DataFrame(x.iloc[test_index,:]), y.iloc[test_index]
return X_train, X_test, y_train, y_test, train_index, test_index
def desc_stats(x):
a = {}
a['N samples'] = x.shape[0]
a['Min'] = np.min(x)
a['Max'] = np.max(x)
a['Mean'] = np.mean(x)
a['Median'] = np.median(x)
a['S'] = np.std(x)
a['RSD'] = np.std(x)*100/np.mean(x)
a['Skew'] = skew(x, axis=0, bias=True)
a['Kurt'] = kurtosis(x, axis=0, bias=True)
return a
def hash_data(data):
import xxhash
"""Hash various data types using MD5."""
# Convert to a string representation
data_str = data.to_string()
elif isinstance(data, np.ndarray):
data_str = np.array2string(data, separator=',')
elif isinstance(data, (list, tuple)):
data_str = str(data)
elif isinstance(data, dict):
# Ensure consistent order for dict items
data_str = str(sorted(data.items()))
elif isinstance(data, (int, float, str, bool)):
data_str = str(data)
elif isinstance(data, bytes):
data_str = data.decode('utf-8', 'ignore') # Decode bytes to string
elif isinstance(data, str): # Check if it's a string representing file content
data_str = data
else:
raise TypeError(f"Unsupported data type: {type(data)}")
# Encode the string to bytes
data_bytes = data_str.encode()
# Compute the MD5 hash
md5_hash = xxhash.xxh32(data_bytes).hexdigest()
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
return str(md5_hash)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ style test
@st.cache_data
def background_img(change):
import base64
image_path = './images/img-sky.jpg'
with open(image_path, "rb") as image_file:
base64_image= base64.b64encode(image_file.read()).decode('utf-8')
# CSS code to set the background image
# Get the base64-encoded image
# CSS code to set the background image
background_image_style = f"""
<style>
.stApp {{
background-image: url("data:image/jpeg;base64,{base64_image}");
background-size: cover;
background-repeat: no-repeat;
background-attachment: fixed;
}}
</style>
"""
# Inject the CSS style
st.markdown(background_image_style, unsafe_allow_html=True)