Newer
Older
# predict module
def prediction(NIRS_csv, qsep, qhdr, model):
# hdr var correspond to column header True or False in the CSV
if qhdr == 'yes':
col = 0
else:
col = False
Y_preds = model.predict(X_test)
# Y_preds = X_test
return Y_preds
# function that create a download button - needs the data to save and the file name to store to
def download_results(data, export_name):
with open(data) as f:
st.download_button('Download', f, export_name, type='primary')
@st.cache_data(show_spinner =True)
def data_split(x, y):
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
train_index, test_index = train_test_split_idx(x , y = y, method = "kennard_stone", metric = "correlation", test_size = 0.25, random_state = 42)
# Assign data to training and test sets
X_train, y_train = DataFrame(x.iloc[train_index,:]), y.iloc[train_index]
X_test, y_test = DataFrame(x.iloc[test_index,:]), y.iloc[test_index]
return X_train, X_test, y_train, y_test, train_index, test_index
def desc_stats(x):
a = {}
a['N samples'] = x.shape[0]
a['Min'] = np.min(x)
a['Max'] = np.max(x)
a['Mean'] = np.mean(x)
a['Median'] = np.median(x)
a['S'] = np.std(x)
a['RSD'] = np.std(x)*100/np.mean(x)
a['Skew'] = skew(x, axis=0, bias=True)
a['Kurt'] = kurtosis(x, axis=0, bias=True)
return a
def ObjectHash(current = None, add = None):
def DatatoStr(data):
from pandas import DataFrame, Series
import numpy as np
"""Hash various data types using MD5."""
# Convert to a string representation
if isinstance(data, DataFrame):
data_str = data.to_string()
elif isinstance(data, Series):
data_str = data.to_string()
elif isinstance(data, np.ndarray):
data_str = np.array2string(data, separator=',')
elif isinstance(data, (list, tuple)):
data_str = str(data)
elif isinstance(data, dict):
# Ensure consistent order for dict items
data_str = str(sorted(data.items()))
elif isinstance(data, (int, float, str, bool)):
data_str = str(data)
elif isinstance(data, bytes):
data_str = data.decode('utf-8', 'ignore') # Decode bytes to string
elif isinstance(data, str): # Check if it's a string representing file content
data_str = data
else:
raise TypeError(f"Unsupported data type: {type(data)}")
# Encode the string to bytes
data_bytes = data_str.encode()
return str(data_bytes)
import xxhash
if current == None and add == None:
object = "None"
print('Insert the object for which you want to compute the hash value.')
elif current != None and add != None:
object = DatatoStr(current)+ DatatoStr(add)
elif current == None and add != None:
object = DatatoStr(add)
elif current != None and add == None:
object = DatatoStr(current)
# Compute the MD5 hash