import streamlit as st import numpy as np import matplotlib.pyplot as plt import seaborn as sns # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~ @st.cache_data def pred_hist(pred): # Creating histogram hist, axs = plt.subplots(1, 1, figsize=(15, 3), tight_layout=True) # Add x, y gridlines axs.grid(color='grey', linestyle='-.', linewidth=0.5, alpha=0.6) # Remove axes splines for s in ['top', 'bottom', 'left', 'right']: axs.spines[s].set_visible(False) # Remove x, y ticks axs.xaxis.set_ticks_position('none') axs.yaxis.set_ticks_position('none') # Add padding between axes and labels axs.xaxis.set_tick_params(pad=5) axs.yaxis.set_tick_params(pad=10) # Creating histogram N, bins, patches = axs.hist(pred, bins=12) return hist # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ predictions histogram ~~~~~~~~~~~~~~~~~~~~~~~~~~ @st.cache_data def plot_spectra(specdf=None, color=None, cmap=None, xunits=None, yunits=None, mean=False): # pass import matplotlib.pyplot as plt import numpy as np fig, ax = plt.subplots(figsize=(30, 7)) if color is None or cmap is None: specdf.T.plot(legend=False, ax=ax, color="blue") else: cats = color.unique() for key, value in cmap.items(): ax.plot([], [], color=value, label=str(key)) plt.legend() for key, value in cmap.items(): idx = color.index[color == key].tolist() specdf.loc[idx].T.plot(legend=False, ax=ax, color=value) if mean: specdf.mean().T.plot(legend=False, ax=ax, color="black", linewidth=5) ax.set_xlabel(xunits, fontsize=30) ax.set_ylabel(yunits, fontsize=30) plt.margins(x=0) plt.tight_layout() # plt.legend() return fig @st.cache_data def barhplot(metadf, cmap): counts = metadf.groupby(metadf.columns[0]).size() counts = counts.loc[cmap.keys()] fig, ax = plt.subplots(figsize=(10, 5)) ax.barh(counts.index, counts.values, color=cmap.values()) plt.gca().invert_yaxis() plt.xlabel('Count') plt.ylabel(str(metadf.columns[0]).capitalize()) return fig # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cal/val hist ~~~~~~~~~~~~~~~~~~~~~~~~~~ @st.cache_data def hist(y, y_train, y_test, target_name='y'): fig, ax = plt.subplots(figsize=(5, 2)) sns.histplot(y, color="#004e9e", kde=True, label=str( target_name) + " (Total)", ax=ax, fill=True) sns.histplot(y_train, color="#2C6B6F", kde=True, label=str(target_name)+" (Cal)", ax=ax, fill=True) sns.histplot(y_test, color="#d0f7be", kde=True, label=str( target_name)+" (Val)", ax=ax, fill=True) ax.set_xlabel(str(target_name)) plt.legend() plt.tight_layout() return fig @st.cache_data def reg_plot(meas, pred, train_idx, test_idx, trainplot=True): a0 = np.ones(2) a1 = np.ones(2) n = 2 if trainplot else 1 for i in range(n): meas[i] = np.array(meas[i]).reshape(-1, 1) pred[i] = np.array(pred[i]).reshape(-1, 1) from sklearn.linear_model import LinearRegression M = LinearRegression() M.fit(meas[i], pred[i]) a1[i] = np.round(M.coef_[0][0], 2) a0[i] = np.round(M.intercept_[0], 2) if trainplot: ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1)) et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1)) fig, ax = plt.subplots(figsize=(12, 4)) if trainplot: sns.regplot(x=meas[0], y=pred[0], color="#2C6B6F", label=f'Cal (Predicted = { a0[0]} + {a1[0]} x Measured)', scatter_kws={'edgecolor': 'black'}) sns.regplot(x=meas[1], y=pred[1], color='#d0f7be', label=f'Val (Predicted = { a0[1]} + {a1[1]} x Measured)', scatter_kws={'edgecolor': 'black'}) plt.plot([np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], [np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], color='black') if trainplot: for i, txt in enumerate(train_idx): # plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i])) if np.abs(ec[i]) > np.mean(ec) + 3*np.std(ec): plt.annotate( txt, (np.array(meas[0]).reshape(-1)[i], np.array(pred[0]).reshape(-1)[i])) for i, txt in enumerate(test_idx): if np.abs(et[i]) > np.mean(et) + 3*np.std(et): plt.annotate( txt, (np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i])) ax.set_ylabel('Predicted values') ax.set_xlabel('Measured values') plt.legend() plt.margins(0) # fig.savefig('./report/figures/measured_vs_predicted.png') return fig # Resid plot @st.cache_data def resid_plot(meas, pred, train_idx, test_idx, trainplot=True): et = np.subtract(meas[1], pred[1]) ett = np.array(et).reshape(-1, 1) fig, ax = plt.subplots(figsize=(12, 4)) plt.axhline(y=0, c='black', linestyle=':') if trainplot: ec = np.subtract(meas[0], pred[0]) ecc = np.array(ec).reshape(-1, 1) sns.scatterplot(x=pred[0], y=ec, color="#2C6B6F", label=f'Cal', edgecolor="black") for i, txt in enumerate(train_idx): if np.abs(ecc[i]) > np.mean(ecc) + 3*np.std(ecc): plt.annotate(txt, (np.array(pred[0]).reshape(-1)[i], ecc[i])) sns.scatterplot(x=pred[1], y=et, color="#d0f7be", label=f'Val', edgecolor="black") for i, txt in enumerate(test_idx): if np.abs(ett[i]) > np.mean(ett) + 3 * np.std(ett): plt.annotate(txt, (np.array(pred[1]).reshape(-1)[i], ett[i])) if trainplot: lim = np.max(abs(np.concatenate([ec, et], axis=0)))*1.1 else: lim = np.max(abs(et))*1.1 plt.ylim(- lim, lim) ax.set_ylabel('Residuals') ax.set_xlabel('Predicted values') plt.legend() plt.margins(0) # fig.savefig('./report/figures/residuals_plot.png') return fig