visualize.py 5.33 KiB
from packages import *
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~
@st.cache_data
def pred_hist(pred):
# Creating histogram
hist, axs = plt.subplots(1, 1, figsize =(15, 3),
tight_layout = True)
# Add x, y gridlines
axs.grid( color ='grey', linestyle ='-.', linewidth = 0.5, alpha = 0.6)
# Remove axes splines
for s in ['top', 'bottom', 'left', 'right']:
axs.spines[s].set_visible(False)
# Remove x, y ticks
axs.xaxis.set_ticks_position('none')
axs.yaxis.set_ticks_position('none')
# Add padding between axes and labels
axs.xaxis.set_tick_params(pad = 5)
axs.yaxis.set_tick_params(pad = 10)
# Creating histogram
N, bins, patches = axs.hist(pred, bins = 12)
return hist
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~
@st.cache_data
def plot_spectra(specdf, xunits, yunits):
fig, ax = plt.subplots(figsize = (30,7))
if isinstance(specdf.columns[0], str):
specdf.T.plot(legend=False, ax = ax, color = '#2474b4')
min = 0
else:
min = np.max(specdf.columns)
specdf.T.plot(legend=False, ax = ax, color = '#2474b4').invert_xaxis()
ax.set_xlabel(xunits, fontsize=30)
ax.set_ylabel(yunits, fontsize=30)
plt.margins(x = 0)
plt.tight_layout()
return fig
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cal/val hist ~~~~~~~~~~~~~~~~~~~~~~~~~~
@st.cache_data
def hist(y, y_train, y_test, target_name = 'y'):
fig, ax = plt.subplots(figsize = (12,3))
sns.histplot(y, color = "#004e9e", kde = True, label = str(target_name), ax = ax, fill = True)
sns.histplot(y_train, color = "#2C6B6F", kde = True, label = str(target_name)+" (Cal)", ax = ax, fill = True)
sns.histplot(y_test, color = "#d0f7be", kde = True, label = str(target_name)+" (Val)", ax = ax, fill = True)
ax.set_xlabel(str(target_name))
plt.legend()
plt.tight_layout()
return fig
@st.cache_data
def reg_plot( meas, pred, train_idx, test_idx):
a0 = np.ones(2)
a1 = np.ones(2)
for i in range(len(meas)):
meas[i] = np.array(meas[i]).reshape(-1, 1)
pred[i] = np.array(pred[i]).reshape(-1, 1)
M = LinearRegression()
M.fit(meas[i], pred[i])
a1[i] = np.round(M.coef_[0][0],2)
a0[i] = np.round(M.intercept_[0],2)
ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1))
et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))
fig, ax = plt.subplots(figsize = (12,4))
sns.regplot(x = meas[0] , y = pred[0], color="#2C6B6F", label = f'Cal (Predicted = {a0[0]} + {a1[0]} x Measured)', scatter_kws={'edgecolor': 'black'})
sns.regplot(x = meas[1], y = pred[1], color='#d0f7be', label = f'Val (Predicted = {a0[1]} + {a1[1]} x Measured)', scatter_kws={'edgecolor': 'black'})
plt.plot([np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], [np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], color = 'black')
for i, txt in enumerate(train_idx):
#plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
if np.abs(ec[i])> np.mean(ec)+ 3*np.std(ec):
plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i], np.array(pred[0]).reshape(-1)[i]))
for i, txt in enumerate(test_idx):
if np.abs(et[i])> np.mean(et)+ 3*np.std(et):
plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i]))
ax.set_ylabel('Predicted values')
ax.set_xlabel('Measured values')
plt.legend()
plt.margins(0)
# fig.savefig('./report/figures/measured_vs_predicted.png')
return fig
# Resid plot
@st.cache_data
def resid_plot( meas, pred, train_idx, test_idx):
a0 = np.ones(2)
a1 = np.ones(2)
e = [np.subtract(meas[0] ,pred[0]), np.subtract(meas[1], pred[1])]
for i in range(len(meas)):
M = LinearRegression()
M.fit( np.array(meas[i]).reshape(-1,1), np.array(e[i]).reshape(-1,1))
a1[i] = np.round(M.coef_[0],2)
a0[i] = np.round(M.intercept_,2)
fig, ax = plt.subplots(figsize = (12,4))
sns.scatterplot(x = pred[0], y = e[0], color="#2C6B6F", label = f'Cal', edgecolor="black")
sns.scatterplot(x = pred[1], y = e[1], color="#d0f7be", label = f'Val', edgecolor="black")
# sns.scatterplot(x = pred[0], y = e[0], color='blue', label = f'Cal (Residual = {a0[0]} + {a1[0]} * Predicted)')
# sns.scatterplot(x = pred[1], y = e[1], color='green', label = f'Val (Residual = {a0[1]} + {a1[1]} * Predicted)')
plt.axhline(y= 0, c ='black', linestyle = ':')
lim = np.max(abs(np.concatenate([e[0], e[1]], axis = 0)))*1.1
plt.ylim(- lim, lim )
for i in range(2):
e[i] = np.array(e[i]).reshape(-1,1)
for i, txt in enumerate(train_idx):
#plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
if np.abs(e[0][i])> np.mean(e[0])+ 3*np.std(e[0]):
plt.annotate(txt ,(np.array(pred[0]).reshape(-1)[i],e[0][i]))
for i, txt in enumerate(test_idx):
if np.abs(e[1][i])> np.mean(e[1])+ 3*np.std(e[1]):
plt.annotate(txt ,(np.array(pred[1]).reshape(-1)[i],e[1][i]))
ax.set_xlabel(f'{ train_idx.shape}')
ax.set_ylabel('Residuals')
ax.set_xlabel('Predicted values')
plt.legend()
plt.margins(0)
# fig.savefig('./report/figures/residuals_plot.png')
return fig