Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
from Packages import *
# local CSS
## load the custom CSS in the style folder
@st.cache_data
def local_css(file_name):
with open(file_name) as f:
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
# predict module
def prediction(NIRS_csv, qsep, qhdr, model):
# hdr var correspond to column header True or False in the CSV
if qhdr == 'yes':
col = 0
else:
col = False
X_test = pd.read_csv(NIRS_csv, sep=qsep, index_col=col)
Y_preds = model.predict(X_test)
# Y_preds = X_test
return Y_preds
@st.cache_data
def reg_plot( meas, pred, train_idx, test_idx):
a0 = np.ones(2)
a1 = np.ones(2)
for i in range(len(meas)):
meas[i] = np.array(meas[i]).reshape(-1, 1)
pred[i] = np.array(pred[i]).reshape(-1, 1)
M = LinearRegression()
M.fit(meas[i], pred[i])
a1[i] = np.round(M.coef_[0][0],2)
a0[i] = np.round(M.intercept_[0],2)
ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1))
et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))
fig, ax = plt.subplots(figsize = (12,4))
sns.regplot(x = meas[0] , y = pred[0], color='blue', label = f'Calib (Predicted = {a0[0]} + {a1[0]} x Measured)')
sns.regplot(x = meas[1], y = pred[1], color='green', label = f'Test (Predicted = {a0[1]} + {a1[1]} x Measured)')
plt.plot([np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], [np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], color = 'black')
for i, txt in enumerate(train_idx):
#plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
if np.abs(ec[i])> np.mean(ec)+ 3*np.std(ec):
plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i], np.array(pred[0]).reshape(-1)[i]))
for i, txt in enumerate(test_idx):
if np.abs(et[i])> np.mean(et)+ 3*np.std(et):
plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i]))
ax.set_ylabel('Predicted values')
ax.set_xlabel('Measured values')
plt.legend()
plt.margins(0)
# fig.savefig('./report/figures/measured_vs_predicted.png')
return fig
@st.cache_data
def resid_plot( meas, pred, train_idx, test_idx):
a0 = np.ones(2)
a1 = np.ones(2)
e = [np.subtract(meas[0] ,pred[0]), np.subtract(meas[1], pred[1])]
for i in range(len(meas)):
M = LinearRegression()
M.fit( np.array(meas[i]).reshape(-1,1), np.array(e[i]).reshape(-1,1))
a1[i] = np.round(M.coef_[0],2)
a0[i] = np.round(M.intercept_,2)
fig, ax = plt.subplots(figsize = (12,4))
sns.scatterplot(x = pred[0], y = e[0], color='blue', label = f'Calib (Residual = {a0[0]} + {a1[0]} * Predicted)')
sns.scatterplot(x = pred[1], y = e[1], color='green', label = f'Test (Residual = {a0[1]} + {a1[1]} * Predicted)')
plt.axhline(y= 0, c ='black', linestyle = ':')
lim = np.max(abs(np.concatenate([e[0], e[1]], axis = 0)))*1.1
plt.ylim(- lim, lim )
for i in range(2):
e[i] = np.array(e[i]).reshape(-1,1)
for i, txt in enumerate(train_idx):
#plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
if np.abs(e[0][i])> np.mean(e[0])+ 3*np.std(e[0]):
plt.annotate(txt ,(np.array(pred[0]).reshape(-1)[i],e[0][i]))
for i, txt in enumerate(test_idx):
if np.abs(e[1][i])> np.mean(e[1])+ 3*np.std(e[1]):
plt.annotate(txt ,(np.array(pred[1]).reshape(-1)[i],e[1][i]))
ax.set_xlabel(f'{ train_idx.shape}')
ax.set_ylabel('Residuals')
ax.set_xlabel('Predicted values')
plt.legend()
plt.margins(0)
# fig.savefig('./report/figures/residuals_plot.png')
return fig
# function that create a download button - needs the data to save and the file name to store to
def download_results(data, export_name):
with open(data) as f:
st.download_button('Download', f, export_name, type='primary')
@st.cache_resource
def plot_spectra(df, xunits, yunits):
fig, ax = plt.subplots(figsize = (30,7))
if isinstance(df.columns[0], str):
df.T.plot(legend=False, ax = ax, color = 'blue')
min = 0
else:
min = np.max(df.columns)
df.T.plot(legend=False, ax = ax, color = 'blue').invert_xaxis()
ax.set_xlabel(xunits, fontsize=18)
ax.set_ylabel(yunits, fontsize=18)
plt.margins(x = 0)
plt.tight_layout()
return fig
## descriptive stat
def desc_stats(x):
a = {}
a['N samples'] = x.shape[0]
a['Min'] = np.min(x)
a['Max'] = np.max(x)
a['Mean'] = np.mean(x)
a['Median'] = np.median(x)
a['S'] = np.std(x)
a['RSD'] = np.std(x)*100/np.mean(x)
a['Skew'] = skew(x, axis=0, bias=True)
a['Kurt'] = kurtosis(x, axis=0, bias=True)
return a
def hash_data(data):
import xxhash
"""Hash various data types using MD5."""
# Convert to a string representation
if isinstance(data, pd.DataFrame):
data_str = data.to_string()
elif isinstance(data, pd.Series):
data_str = data.to_string()
elif isinstance(data, np.ndarray):
data_str = np.array2string(data, separator=',')
elif isinstance(data, (list, tuple)):
data_str = str(data)
elif isinstance(data, dict):
# Ensure consistent order for dict items
data_str = str(sorted(data.items()))
elif isinstance(data, (int, float, str, bool)):
data_str = str(data)
elif isinstance(data, bytes):
data_str = data.decode('utf-8', 'ignore') # Decode bytes to string
elif isinstance(data, str): # Check if it's a string representing file content
data_str = data
else:
raise TypeError(f"Unsupported data type: {type(data)}")
# Encode the string to bytes
data_bytes = data_str.encode()
# Compute the MD5 hash
md5_hash = xxhash.xxh32(data_bytes).hexdigest()
return str(md5_hash)