Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from packages import *
# predict module
def prediction(NIRS_csv, qsep, qhdr, model):
# hdr var correspond to column header True or False in the CSV
if qhdr == 'yes':
col = 0
else:
col = False
X_test = read_csv(NIRS_csv, sep=qsep, index_col=col)
Y_preds = model.predict(X_test)
# Y_preds = X_test
return Y_preds
# function that create a download button - needs the data to save and the file name to store to
def download_results(data, export_name):
with open(data) as f:
st.download_button('Download', f, export_name, type='primary')
@st.cache_data(show_spinner =True)
def data_split(x, y):
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
train_index, test_index = train_test_split_idx(x , y = y, method = "kennard_stone", metric = "correlation", test_size = 0.25, random_state = 42)
# Assign data to training and test sets
X_train, y_train = DataFrame(x.iloc[train_index,:]), y.iloc[train_index]
X_test, y_test = DataFrame(x.iloc[test_index,:]), y.iloc[test_index]
return X_train, X_test, y_train, y_test, train_index, test_index
## descriptive stat
@st.cache_data(show_spinner =True)
def desc_stats(x):
a = {}
a['N samples'] = x.shape[0]
a['Min'] = np.min(x)
a['Max'] = np.max(x)
a['Mean'] = np.mean(x)
a['Median'] = np.median(x)
a['S'] = np.std(x)
a['RSD'] = np.std(x)*100/np.mean(x)
a['Skew'] = skew(x, axis=0, bias=True)
a['Kurt'] = kurtosis(x, axis=0, bias=True)
return a
def ObjectHash(current = None, add = None):
def DatatoStr(data):
from pandas import DataFrame, Series
import numpy as np
"""Hash various data types using MD5."""
# Convert to a string representation
if isinstance(data, DataFrame):
data_str = data.to_string()
elif isinstance(data, Series):
data_str = data.to_string()
elif isinstance(data, np.ndarray):
data_str = np.array2string(data, separator=',')
elif isinstance(data, (list, tuple)):
data_str = str(data)
elif isinstance(data, dict):
# Ensure consistent order for dict items
data_str = str(sorted(data.items()))
elif isinstance(data, (int, float, str, bool)):
data_str = str(data)
elif isinstance(data, bytes):
data_str = data.decode('utf-8', 'ignore') # Decode bytes to string
elif isinstance(data, str): # Check if it's a string representing file content
data_str = data
else:
raise TypeError(f"Unsupported data type: {type(data)}")
# Encode the string to bytes
data_bytes = data_str.encode()
return str(data_bytes)
import xxhash
if current == None and add == None:
object = "None"
print('Insert the object for which you want to compute the hash value.')
elif current != None and add != None:
object = DatatoStr(current)+ DatatoStr(add)
elif current == None and add != None:
object = DatatoStr(add)
elif current != None and add == None:
object = DatatoStr(current)
# Compute the MD5 hash
md5_hash = xxhash.xxh32(object).hexdigest()
return str(md5_hash)
def JointoMain():
import os
for i in ['utils','style']:
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), i))
#