from Packages import * # local CSS ## load the custom CSS in the style folder @st.cache_data def local_css(file_name): with open(file_name) as f: st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True) # predict module def prediction(NIRS_csv, qsep, qhdr, model): # hdr var correspond to column header True or False in the CSV if qhdr == 'yes': col = 0 else: col = False X_test = read_csv(NIRS_csv, sep=qsep, index_col=col) Y_preds = model.predict(X_test) # Y_preds = X_test return Y_preds @st.cache_data def reg_plot( meas, pred, train_idx, test_idx): a0 = np.ones(2) a1 = np.ones(2) for i in range(len(meas)): meas[i] = np.array(meas[i]).reshape(-1, 1) pred[i] = np.array(pred[i]).reshape(-1, 1) M = LinearRegression() M.fit(meas[i], pred[i]) a1[i] = np.round(M.coef_[0][0],2) a0[i] = np.round(M.intercept_[0],2) ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1)) et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1)) fig, ax = plt.subplots(figsize = (12,4)) sns.regplot(x = meas[0] , y = pred[0], color="#2C6B6F", label = f'Cal (Predicted = {a0[0]} + {a1[0]} x Measured)', scatter_kws={'edgecolor': 'black'}) sns.regplot(x = meas[1], y = pred[1], color='#d0f7be', label = f'Val (Predicted = {a0[1]} + {a1[1]} x Measured)', scatter_kws={'edgecolor': 'black'}) plt.plot([np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], [np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], color = 'black') for i, txt in enumerate(train_idx): #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i])) if np.abs(ec[i])> np.mean(ec)+ 3*np.std(ec): plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i], np.array(pred[0]).reshape(-1)[i])) for i, txt in enumerate(test_idx): if np.abs(et[i])> np.mean(et)+ 3*np.std(et): plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i], np.array(pred[1]).reshape(-1)[i])) ax.set_ylabel('Predicted values') ax.set_xlabel('Measured values') plt.legend() plt.margins(0) # fig.savefig('./report/figures/measured_vs_predicted.png') return fig @st.cache_data def resid_plot( meas, pred, train_idx, test_idx): a0 = np.ones(2) a1 = np.ones(2) e = [np.subtract(meas[0] ,pred[0]), np.subtract(meas[1], pred[1])] for i in range(len(meas)): M = LinearRegression() M.fit( np.array(meas[i]).reshape(-1,1), np.array(e[i]).reshape(-1,1)) a1[i] = np.round(M.coef_[0],2) a0[i] = np.round(M.intercept_,2) fig, ax = plt.subplots(figsize = (12,4)) sns.scatterplot(x = pred[0], y = e[0], color="#2C6B6F", label = f'Cal', edgecolor="black") sns.scatterplot(x = pred[1], y = e[1], color="#d0f7be", label = f'Val', edgecolor="black") # sns.scatterplot(x = pred[0], y = e[0], color='blue', label = f'Cal (Residual = {a0[0]} + {a1[0]} * Predicted)') # sns.scatterplot(x = pred[1], y = e[1], color='green', label = f'Val (Residual = {a0[1]} + {a1[1]} * Predicted)') plt.axhline(y= 0, c ='black', linestyle = ':') lim = np.max(abs(np.concatenate([e[0], e[1]], axis = 0)))*1.1 plt.ylim(- lim, lim ) for i in range(2): e[i] = np.array(e[i]).reshape(-1,1) for i, txt in enumerate(train_idx): #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i])) if np.abs(e[0][i])> np.mean(e[0])+ 3*np.std(e[0]): plt.annotate(txt ,(np.array(pred[0]).reshape(-1)[i],e[0][i])) for i, txt in enumerate(test_idx): if np.abs(e[1][i])> np.mean(e[1])+ 3*np.std(e[1]): plt.annotate(txt ,(np.array(pred[1]).reshape(-1)[i],e[1][i])) ax.set_xlabel(f'{ train_idx.shape}') ax.set_ylabel('Residuals') ax.set_xlabel('Predicted values') plt.legend() plt.margins(0) # fig.savefig('./report/figures/residuals_plot.png') return fig # function that create a download button - needs the data to save and the file name to store to def download_results(data, export_name): with open(data) as f: st.download_button('Download', f, export_name, type='primary') @st.cache_data def plot_spectra(specdf, xunits, yunits): fig, ax = plt.subplots(figsize = (30,7)) if isinstance(specdf.columns[0], str): specdf.T.plot(legend=False, ax = ax, color = '#2474b4') min = 0 else: min = np.max(specdf.columns) specdf.T.plot(legend=False, ax = ax, color = '#2474b4').invert_xaxis() ax.set_xlabel(xunits, fontsize=30) ax.set_ylabel(yunits, fontsize=30) plt.margins(x = 0) plt.tight_layout() return fig @st.cache_data def hist(y, y_train, y_test, target_name = 'y'): fig, ax = plt.subplots(figsize = (12,3)) sns.histplot(y, color = "#004e9e", kde = True, label = str(target_name), ax = ax, fill = True) sns.histplot(y_train, color = "#2C6B6F", kde = True, label = str(target_name)+" (Cal)", ax = ax, fill = True) sns.histplot(y_test, color = "#d0f7be", kde = True, label = str(target_name)+" (Val)", ax = ax, fill = True) ax.set_xlabel(str(target_name)) plt.legend() plt.tight_layout() return fig @st.cache_data def pred_hist(pred): # Creating histogram hist, axs = plt.subplots(1, 1, figsize =(15, 3), tight_layout = True) # Add x, y gridlines axs.grid( color ='grey', linestyle ='-.', linewidth = 0.5, alpha = 0.6) # Remove axes splines for s in ['top', 'bottom', 'left', 'right']: axs.spines[s].set_visible(False) # Remove x, y ticks axs.xaxis.set_ticks_position('none') axs.yaxis.set_ticks_position('none') # Add padding between axes and labels axs.xaxis.set_tick_params(pad = 5) axs.yaxis.set_tick_params(pad = 10) # Creating histogram N, bins, patches = axs.hist(pred, bins = 12) return hist @st.cache_data def fig_export(): pass @st.cache_data(show_spinner =True) def data_split(x, y): # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing train_index, test_index = train_test_split_idx(x , y = y, method = "kennard_stone", metric = "correlation", test_size = 0.25, random_state = 42) # Assign data to training and test sets X_train, y_train = DataFrame(x.iloc[train_index,:]), y.iloc[train_index] X_test, y_test = DataFrame(x.iloc[test_index,:]), y.iloc[test_index] return X_train, X_test, y_train, y_test, train_index, test_index ## descriptive stat @st.cache_data(show_spinner =True) def desc_stats(x): a = {} a['N samples'] = x.shape[0] a['Min'] = np.min(x) a['Max'] = np.max(x) a['Mean'] = np.mean(x) a['Median'] = np.median(x) a['S'] = np.std(x) a['RSD'] = np.std(x)*100/np.mean(x) a['Skew'] = skew(x, axis=0, bias=True) a['Kurt'] = kurtosis(x, axis=0, bias=True) return a def hash_data(data): import xxhash """Hash various data types using MD5.""" # Convert to a string representation if isinstance(data, DataFrame): data_str = data.to_string() elif isinstance(data, Series): data_str = data.to_string() elif isinstance(data, np.ndarray): data_str = np.array2string(data, separator=',') elif isinstance(data, (list, tuple)): data_str = str(data) elif isinstance(data, dict): # Ensure consistent order for dict items data_str = str(sorted(data.items())) elif isinstance(data, (int, float, str, bool)): data_str = str(data) elif isinstance(data, bytes): data_str = data.decode('utf-8', 'ignore') # Decode bytes to string elif isinstance(data, str): # Check if it's a string representing file content data_str = data else: raise TypeError(f"Unsupported data type: {type(data)}") # Encode the string to bytes data_bytes = data_str.encode() # Compute the MD5 hash md5_hash = xxhash.xxh32(data_bytes).hexdigest() return str(md5_hash) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ style test @st.cache_data def background_img(change): import base64 image_path = './images/img-sky.jpg' with open(image_path, "rb") as image_file: base64_image= base64.b64encode(image_file.read()).decode('utf-8') # CSS code to set the background image # Get the base64-encoded image # CSS code to set the background image background_image_style = f""" <style> .stApp {{ background-image: url("data:image/jpeg;base64,{base64_image}"); background-size: cover; background-repeat: no-repeat; background-attachment: fixed; }} </style> """ # Inject the CSS style st.markdown(background_image_style, unsafe_allow_html=True)