From 4786ec3661e9f1a1cbc2289f7865b051e17b0def Mon Sep 17 00:00:00 2001
From: DIANE <abderrahim.diane@cefe.cnrs.fr>
Date: Thu, 10 Oct 2024 15:57:39 +0200
Subject: [PATCH] miscellaneous.py

---
 src/utils/miscellaneous.py | 104 +++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 src/utils/miscellaneous.py

diff --git a/src/utils/miscellaneous.py b/src/utils/miscellaneous.py
new file mode 100644
index 0000000..d8ac4b4
--- /dev/null
+++ b/src/utils/miscellaneous.py
@@ -0,0 +1,104 @@
+from packages import *
+
+
+# predict module
+def prediction(NIRS_csv, qsep, qhdr, model):
+    # hdr var correspond to column header True or False in the CSV
+    if qhdr == 'yes':
+        col = 0
+    else:
+        col = False
+    X_test = read_csv(NIRS_csv, sep=qsep, index_col=col)
+    Y_preds = model.predict(X_test)
+    # Y_preds = X_test
+    return Y_preds
+
+
+# function that create a download button - needs the data to save and the file name to store to
+def download_results(data, export_name):
+    with open(data) as f:
+        st.download_button('Download', f, export_name, type='primary')
+
+@st.cache_data(show_spinner =True)
+def data_split(x, y):
+    # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
+    train_index, test_index = train_test_split_idx(x , y = y, method = "kennard_stone", metric = "correlation", test_size = 0.25, random_state = 42)
+    # Assign data to training and test sets
+    X_train, y_train = DataFrame(x.iloc[train_index,:]), y.iloc[train_index]
+    X_test, y_test = DataFrame(x.iloc[test_index,:]), y.iloc[test_index]
+    return X_train, X_test, y_train, y_test, train_index, test_index
+
+## descriptive stat
+@st.cache_data(show_spinner =True)
+def desc_stats(x):
+    a = {}
+    a['N samples'] = x.shape[0]
+    a['Min'] =  np.min(x)
+    a['Max'] = np.max(x)
+    a['Mean'] = np.mean(x)
+    a['Median'] = np.median(x)
+    a['S'] = np.std(x)
+    a['RSD'] = np.std(x)*100/np.mean(x)
+    a['Skew'] = skew(x, axis=0, bias=True)
+    a['Kurt'] = kurtosis(x, axis=0, bias=True)
+    return a
+
+
+
+def ObjectHash(current = None, add = None):
+    def DatatoStr(data):
+        from pandas import DataFrame, Series
+        import numpy as np
+        """Hash various data types using MD5."""
+        
+        # Convert to a string representation
+        if isinstance(data, DataFrame):
+            data_str = data.to_string()
+        elif isinstance(data, Series):
+            data_str = data.to_string()
+        elif isinstance(data, np.ndarray):
+            data_str = np.array2string(data, separator=',')
+        elif isinstance(data, (list, tuple)):
+            data_str = str(data)
+        elif isinstance(data, dict):
+            # Ensure consistent order for dict items
+            data_str = str(sorted(data.items()))
+        elif isinstance(data, (int, float, str, bool)):
+            data_str = str(data)
+        elif isinstance(data, bytes):
+            data_str = data.decode('utf-8', 'ignore')  # Decode bytes to string
+        elif isinstance(data, str):  # Check if it's a string representing file content
+            data_str = data
+        else:
+            raise TypeError(f"Unsupported data type: {type(data)}")
+        
+        # Encode the string to bytes
+        data_bytes = data_str.encode()
+        return str(data_bytes)
+    
+
+    import xxhash
+    if current == None and add == None:
+        object = "None"
+        print('Insert the object for which you want to compute the hash value.')
+    elif current != None and add != None:
+        object = DatatoStr(current)+ DatatoStr(add)
+    elif current == None and add != None:
+        object = DatatoStr(add)
+    elif current != None and add == None:
+        object = DatatoStr(current)
+
+         # Compute the MD5 hash
+    
+    md5_hash = xxhash.xxh32(object).hexdigest()
+    return str(md5_hash)
+
+
+
+def JointoMain():
+    import os
+    for i in ['utils','style']:
+        import sys
+        sys.path.append(os.path.join(os.path.dirname(__file__), i))
+
+#
\ No newline at end of file
-- 
GitLab