From 4786ec3661e9f1a1cbc2289f7865b051e17b0def Mon Sep 17 00:00:00 2001 From: DIANE <abderrahim.diane@cefe.cnrs.fr> Date: Thu, 10 Oct 2024 15:57:39 +0200 Subject: [PATCH] miscellaneous.py --- src/utils/miscellaneous.py | 104 +++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 src/utils/miscellaneous.py diff --git a/src/utils/miscellaneous.py b/src/utils/miscellaneous.py new file mode 100644 index 0000000..d8ac4b4 --- /dev/null +++ b/src/utils/miscellaneous.py @@ -0,0 +1,104 @@ +from packages import * + + +# predict module +def prediction(NIRS_csv, qsep, qhdr, model): + # hdr var correspond to column header True or False in the CSV + if qhdr == 'yes': + col = 0 + else: + col = False + X_test = read_csv(NIRS_csv, sep=qsep, index_col=col) + Y_preds = model.predict(X_test) + # Y_preds = X_test + return Y_preds + + +# function that create a download button - needs the data to save and the file name to store to +def download_results(data, export_name): + with open(data) as f: + st.download_button('Download', f, export_name, type='primary') + +@st.cache_data(show_spinner =True) +def data_split(x, y): + # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing + train_index, test_index = train_test_split_idx(x , y = y, method = "kennard_stone", metric = "correlation", test_size = 0.25, random_state = 42) + # Assign data to training and test sets + X_train, y_train = DataFrame(x.iloc[train_index,:]), y.iloc[train_index] + X_test, y_test = DataFrame(x.iloc[test_index,:]), y.iloc[test_index] + return X_train, X_test, y_train, y_test, train_index, test_index + +## descriptive stat +@st.cache_data(show_spinner =True) +def desc_stats(x): + a = {} + a['N samples'] = x.shape[0] + a['Min'] = np.min(x) + a['Max'] = np.max(x) + a['Mean'] = np.mean(x) + a['Median'] = np.median(x) + a['S'] = np.std(x) + a['RSD'] = np.std(x)*100/np.mean(x) + a['Skew'] = skew(x, axis=0, bias=True) + a['Kurt'] = kurtosis(x, axis=0, bias=True) + return a + + + +def ObjectHash(current = None, add = None): + def DatatoStr(data): + from pandas import DataFrame, Series + import numpy as np + """Hash various data types using MD5.""" + + # Convert to a string representation + if isinstance(data, DataFrame): + data_str = data.to_string() + elif isinstance(data, Series): + data_str = data.to_string() + elif isinstance(data, np.ndarray): + data_str = np.array2string(data, separator=',') + elif isinstance(data, (list, tuple)): + data_str = str(data) + elif isinstance(data, dict): + # Ensure consistent order for dict items + data_str = str(sorted(data.items())) + elif isinstance(data, (int, float, str, bool)): + data_str = str(data) + elif isinstance(data, bytes): + data_str = data.decode('utf-8', 'ignore') # Decode bytes to string + elif isinstance(data, str): # Check if it's a string representing file content + data_str = data + else: + raise TypeError(f"Unsupported data type: {type(data)}") + + # Encode the string to bytes + data_bytes = data_str.encode() + return str(data_bytes) + + + import xxhash + if current == None and add == None: + object = "None" + print('Insert the object for which you want to compute the hash value.') + elif current != None and add != None: + object = DatatoStr(current)+ DatatoStr(add) + elif current == None and add != None: + object = DatatoStr(add) + elif current != None and add == None: + object = DatatoStr(current) + + # Compute the MD5 hash + + md5_hash = xxhash.xxh32(object).hexdigest() + return str(md5_hash) + + + +def JointoMain(): + import os + for i in ['utils','style']: + import sys + sys.path.append(os.path.join(os.path.dirname(__file__), i)) + +# \ No newline at end of file -- GitLab