Skip to content
Snippets Groups Projects
miscellaneous.py 3.44 KiB
Newer Older
DIANE Abderrahim's avatar
DIANE Abderrahim committed
from packages import *


# predict module
def prediction(NIRS_csv, qsep, qhdr, model):
    # hdr var correspond to column header True or False in the CSV
    if qhdr == 'yes':
        col = 0
    else:
        col = False
    X_test = read_csv(NIRS_csv, sep=qsep, index_col=col)
    Y_preds = model.predict(X_test)
    # Y_preds = X_test
    return Y_preds


# function that create a download button - needs the data to save and the file name to store to
def download_results(data, export_name):
    with open(data) as f:
        st.download_button('Download', f, export_name, type='primary')

@st.cache_data(show_spinner =True)
def data_split(x, y):
    # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
    train_index, test_index = train_test_split_idx(x , y = y, method = "kennard_stone", metric = "correlation", test_size = 0.25, random_state = 42)
    # Assign data to training and test sets
    X_train, y_train = DataFrame(x.iloc[train_index,:]), y.iloc[train_index]
    X_test, y_test = DataFrame(x.iloc[test_index,:]), y.iloc[test_index]
    return X_train, X_test, y_train, y_test, train_index, test_index

## descriptive stat
@st.cache_data(show_spinner =True)
def desc_stats(x):
    a = {}
    a['N samples'] = x.shape[0]
    a['Min'] =  np.min(x)
    a['Max'] = np.max(x)
    a['Mean'] = np.mean(x)
    a['Median'] = np.median(x)
    a['S'] = np.std(x)
    a['RSD'] = np.std(x)*100/np.mean(x)
    a['Skew'] = skew(x, axis=0, bias=True)
    a['Kurt'] = kurtosis(x, axis=0, bias=True)
    return a



def ObjectHash(current = None, add = None):
    def DatatoStr(data):
        from pandas import DataFrame, Series
        import numpy as np
        """Hash various data types using MD5."""
        
        # Convert to a string representation
        if isinstance(data, DataFrame):
            data_str = data.to_string()
        elif isinstance(data, Series):
            data_str = data.to_string()
        elif isinstance(data, np.ndarray):
            data_str = np.array2string(data, separator=',')
        elif isinstance(data, (list, tuple)):
            data_str = str(data)
        elif isinstance(data, dict):
            # Ensure consistent order for dict items
            data_str = str(sorted(data.items()))
        elif isinstance(data, (int, float, str, bool)):
            data_str = str(data)
        elif isinstance(data, bytes):
            data_str = data.decode('utf-8', 'ignore')  # Decode bytes to string
        elif isinstance(data, str):  # Check if it's a string representing file content
            data_str = data
        else:
            raise TypeError(f"Unsupported data type: {type(data)}")
        
        # Encode the string to bytes
        data_bytes = data_str.encode()
        return str(data_bytes)
    

    import xxhash
    if current == None and add == None:
        object = "None"
        print('Insert the object for which you want to compute the hash value.')
    elif current != None and add != None:
        object = DatatoStr(current)+ DatatoStr(add)
    elif current == None and add != None:
        object = DatatoStr(add)
    elif current != None and add == None:
        object = DatatoStr(current)

         # Compute the MD5 hash
    
    md5_hash = xxhash.xxh32(object).hexdigest()
    return str(md5_hash)



def JointoMain():
    import os
    for i in ['utils','style']:
        import sys
        sys.path.append(os.path.join(os.path.dirname(__file__), i))

#