diff --git a/src/utils/Miscellaneous.py b/src/utils/Miscellaneous.py deleted file mode 100644 index 70f03b331278b452e922319e7d886373d8d35adc..0000000000000000000000000000000000000000 --- a/src/utils/Miscellaneous.py +++ /dev/null @@ -1,102 +0,0 @@ -from packages import * - - -# predict module -def prediction(NIRS_csv, qsep, qhdr, model): - # hdr var correspond to column header True or False in the CSV - if qhdr == 'yes': - col = 0 - else: - col = False - X_test = read_csv(NIRS_csv, sep=qsep, index_col=col) - Y_preds = model.predict(X_test) - # Y_preds = X_test - return Y_preds - - -# function that create a download button - needs the data to save and the file name to store to -def download_results(data, export_name): - with open(data) as f: - st.download_button('Download', f, export_name, type='primary') - -@st.cache_data(show_spinner =True) -def data_split(x, y): - # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing - train_index, test_index = train_test_split_idx(x , y = y, method = "kennard_stone", metric = "correlation", test_size = 0.25, random_state = 42) - # Assign data to training and test sets - X_train, y_train = DataFrame(x.iloc[train_index,:]), y.iloc[train_index] - X_test, y_test = DataFrame(x.iloc[test_index,:]), y.iloc[test_index] - return X_train, X_test, y_train, y_test, train_index, test_index - -## descriptive stat -@st.cache_data(show_spinner =True) -def desc_stats(x): - a = {} - a['N samples'] = x.shape[0] - a['Min'] = np.min(x) - a['Max'] = np.max(x) - a['Mean'] = np.mean(x) - a['Median'] = np.median(x) - a['S'] = np.std(x) - a['RSD'] = np.std(x)*100/np.mean(x) - a['Skew'] = skew(x, axis=0, bias=True) - a['Kurt'] = kurtosis(x, axis=0, bias=True) - return a - - - -def ObjectHash(current = None, add = None): - def DatatoStr(data): - from pandas import DataFrame, Series - import numpy as np - """Hash various data types using MD5.""" - - # Convert to a string representation - if isinstance(data, DataFrame): - data_str = data.to_string() - elif isinstance(data, Series): - data_str = data.to_string() - elif isinstance(data, np.ndarray): - data_str = np.array2string(data, separator=',') - elif isinstance(data, (list, tuple)): - data_str = str(data) - elif isinstance(data, dict): - # Ensure consistent order for dict items - data_str = str(sorted(data.items())) - elif isinstance(data, (int, float, str, bool)): - data_str = str(data) - elif isinstance(data, bytes): - data_str = data.decode('utf-8', 'ignore') # Decode bytes to string - elif isinstance(data, str): # Check if it's a string representing file content - data_str = data - else: - raise TypeError(f"Unsupported data type: {type(data)}") - - # Encode the string to bytes - data_bytes = data_str.encode() - return str(data_bytes) - - - import xxhash - if current == None and add == None: - object = "None" - print('Insert the object for which you want to compute the hash value.') - elif current != None and add != None: - object = DatatoStr(current)+ DatatoStr(add) - elif current == None and add != None: - object = DatatoStr(add) - elif current != None and add == None: - object = DatatoStr(current) - - # Compute the MD5 hash - - md5_hash = xxhash.xxh32(object).hexdigest() - return str(md5_hash) - - - -def JointoMain(): - import os - for i in ['utils','style']: - import sys - sys.path.append(os.path.join(os.path.dirname(__file__), i))