Newer
Older
# predict module
def prediction(NIRS_csv, qsep, qhdr, model):
# hdr var correspond to column header True or False in the CSV
if qhdr == 'yes':
col = 0
else:
col = False
X_test = read_csv(NIRS_csv, sep=qsep, index_col=col)
Y_preds = model.predict(X_test)
# Y_preds = X_test
return Y_preds
# function that create a download button - needs the data to save and the file name to store to
def download_results(data, export_name):
with open(data) as f:
st.download_button('Download', f, export_name, type='primary')
@st.cache_data(show_spinner =True)
def data_split(x, y):
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 42)
train_index, test_index = np.array(X_train.index), np.array(X_test.index)
return X_train, X_test, y_train, y_test, train_index, test_index
## descriptive stat
@st.cache_data(show_spinner =True)
def desc_stats(x):
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
a = {}
a['N samples'] = x.shape[0]
a['Min'] = np.min(x)
a['Max'] = np.max(x)
a['Mean'] = np.mean(x)
a['Median'] = np.median(x)
a['S'] = np.std(x)
a['RSD'] = np.std(x)*100/np.mean(x)
a['Skew'] = skew(x, axis=0, bias=True)
a['Kurt'] = kurtosis(x, axis=0, bias=True)
return a
def ObjectHash(current = None, add = None):
def DatatoStr(data):
from pandas import DataFrame, Series
import numpy as np
"""Hash various data types using MD5."""
# Convert to a string representation
if isinstance(data, DataFrame):
data_str = data.to_string()
elif isinstance(data, Series):
data_str = data.to_string()
elif isinstance(data, np.ndarray):
data_str = np.array2string(data, separator=',')
elif isinstance(data, (list, tuple)):
data_str = str(data)
elif isinstance(data, dict):
# Ensure consistent order for dict items
data_str = str(sorted(data.items()))
elif isinstance(data, (int, float, str, bool)):
data_str = str(data)
elif isinstance(data, bytes):
data_str = data.decode('utf-8', 'ignore') # Decode bytes to string
elif isinstance(data, str): # Check if it's a string representing file content
data_str = data
else:
raise TypeError(f"Unsupported data type: {type(data)}")
# Encode the string to bytes
data_bytes = data_str.encode()
return str(data_bytes)
import xxhash
if current == None and add == None:
object = "None"
print('Insert the object for which you want to compute the hash value.')
elif current != None and add != None:
object = DatatoStr(current)+ DatatoStr(add)
elif current == None and add != None:
object = DatatoStr(add)
elif current != None and add == None:
# Compute the MD5 hash
md5_hash = xxhash.xxh32(object).hexdigest()
return str(md5_hash)
def JointoMain():
import os
for i in ['utils','style']:
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), i))
#