Newer
Older
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~
@st.cache_data
def pred_hist(pred):
# Creating histogram
# Add x, y gridlines
axs.grid(color='grey', linestyle='-.', linewidth=0.5, alpha=0.6)
# Remove axes splines
for s in ['top', 'bottom', 'left', 'right']:
axs.xaxis.set_ticks_position('none')
axs.yaxis.set_ticks_position('none')
# Add padding between axes and labels
axs.xaxis.set_tick_params(pad=5)
axs.yaxis.set_tick_params(pad=10)
return hist
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~
@st.cache_data
def plot_spectra(specdf=None, color=None, cmap=None, xunits=None, yunits=None, mean=False):
for key, value in cmap.items():
idx = color.index[color == key].tolist()
ax.set_xlabel(xunits, fontsize=30)
ax.set_ylabel(yunits, fontsize=30)
# plt.legend()
return fig
@st.cache_data
def barhplot(metadf, cmap):
counts = metadf.groupby(metadf.columns[0]).size()
counts = counts.loc[cmap.keys()]
ax.barh(counts.index, counts.values, color=cmap.values())
plt.gca().invert_yaxis()
plt.xlabel('Count')
plt.ylabel(str(metadf.columns[0]).capitalize())
return fig
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cal/val hist ~~~~~~~~~~~~~~~~~~~~~~~~~~
@st.cache_data
def hist(y, y_train, y_test, target_name='y'):
fig, ax = plt.subplots(figsize=(5, 2))
sns.histplot(y, color="#004e9e", kde=True, label=str(
target_name) + " (Total)", ax=ax, fill=True)
sns.histplot(y_train, color="#2C6B6F", kde=True,
label=str(target_name)+" (Cal)", ax=ax, fill=True)
sns.histplot(y_test, color="#d0f7be", kde=True, label=str(
target_name)+" (Val)", ax=ax, fill=True)
ax.set_xlabel(str(target_name))
plt.legend()
plt.tight_layout()
return fig
@st.cache_data
n = 2 if trainplot else 1
for i in range(n):
meas[i] = np.array(meas[i]).reshape(-1, 1)
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
a1[i] = np.round(M.coef_[0][0], 2)
a0[i] = np.round(M.intercept_[0], 2)
if trainplot:
ec = np.subtract(np.array(meas[0]).reshape(-1),
np.array(pred[0]).reshape(-1))
et = np.subtract(np.array(meas[1]).reshape(-1),
np.array(pred[1]).reshape(-1))
fig, ax = plt.subplots(figsize=(12, 4))
if trainplot:
sns.regplot(x=meas[0], y=pred[0], color="#2C6B6F", label=f'Cal (Predicted = {
a0[0]} + {a1[0]} x Measured)', scatter_kws={'edgecolor': 'black'})
sns.regplot(x=meas[1], y=pred[1], color='#d0f7be', label=f'Val (Predicted = {
a0[1]} + {a1[1]} x Measured)', scatter_kws={'edgecolor': 'black'})
plt.plot([np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05],
[np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], color='black')
for i, txt in enumerate(train_idx):
# plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
if np.abs(ec[i]) > np.mean(ec) + 3*np.std(ec):
plt.annotate(
txt, (np.array(meas[0]).reshape(-1)[i], np.array(pred[0]).reshape(-1)[i]))
if trainplot:
for i, txt in enumerate(test_idx):
if np.abs(et[i]) > np.mean(et) + 3*np.std(et):
plt.annotate(
txt, (np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i]))
ax.set_ylabel('Predicted values')
ax.set_xlabel('Measured values')
plt.legend()
plt.margins(0)
# fig.savefig('./report/figures/measured_vs_predicted.png')
return fig
# Resid plot
@st.cache_data
def resid_plot(meas, pred, train_idx, test_idx, trainplot = True):
e = [np.subtract(meas[0], pred[0]), np.subtract(meas[1], pred[1])]
fig, ax = plt.subplots(figsize=(12, 4))
if trainplot:
sns.scatterplot(x=pred[0], y=e[0], color="#2C6B6F",
label=f'Cal', edgecolor="black")
sns.scatterplot(x=pred[1], y=e[1], color="#d0f7be",
label=f'Val', edgecolor="black")
# sns.scatterplot(x = pred[0], y = e[0], color='blue', label = f'Cal (Residual = {a0[0]} + {a1[0]} * Predicted)')
# sns.scatterplot(x = pred[1], y = e[1], color='green', label = f'Val (Residual = {a0[1]} + {a1[1]} * Predicted)')
plt.axhline(y=0, c='black', linestyle=':')
lim = np.max(abs(np.concatenate([e[0], e[1]], axis=0)))*1.1
plt.ylim(- lim, lim)
n = 2 if trainplot else 1
for i in range(n):
e[i] = np.array(e[i]).reshape(-1, 1)
if trainplot:
for i, txt in enumerate(train_idx):
# plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
if np.abs(e[0][i]) > np.mean(e[0]) + 3*np.std(e[0]):
plt.annotate(txt, (np.array(pred[0]).reshape(-1)[i], e[0][i]))
for i, txt in enumerate(test_idx):
if np.abs(e[1][i]) > np.mean(e[1]) + 3*np.std(e[1]):
plt.annotate(txt, (np.array(pred[1]).reshape(-1)[i], e[1][i]))
ax.set_xlabel(f'{train_idx.shape}')
ax.set_ylabel('Residuals')
ax.set_xlabel('Predicted values')
plt.legend()
plt.margins(0)
# fig.savefig('./report/figures/residuals_plot.png')