Skip to content
Snippets Groups Projects
miscellaneous.py 3.4 KiB
Newer Older
DIANE's avatar
DIANE committed
import streamlit as st
DIANE's avatar
DIANE committed
from pandas import DataFrame
import numpy as np
DIANE's avatar
DIANE committed

# predict module
def prediction(NIRS_csv, qsep, qhdr, model):
    # hdr var correspond to column header True or False in the CSV
    if qhdr == 'yes':
        col = 0
    else:
        col = False
    X_test = read_csv(NIRS_csv, sep=qsep, index_col=col)
    Y_preds = model.predict(X_test)
    # Y_preds = X_test
    return Y_preds


# function that create a download button - needs the data to save and the file name to store to
def download_results(data, export_name):
    with open(data) as f:
        st.download_button('Download', f, export_name, type='primary')

@st.cache_data(show_spinner =True)
def data_split(x, y):
DIANE's avatar
DIANE committed
    from kennard_stone import train_test_split
DIANE's avatar
DIANE committed
    # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
DIANE's avatar
DIANE committed
    X_train, X_test, y_train, y_test  = train_test_split(x, y, test_size = 0.25, random_state = 42)
DIANE's avatar
DIANE committed
    train_index, test_index = np.array(X_train.index), np.array(X_test.index)
DIANE's avatar
DIANE committed
    return X_train, X_test, y_train, y_test, train_index, test_index

## descriptive stat
@st.cache_data(show_spinner =True)
def desc_stats(x):
DIANE's avatar
DIANE committed
    from scipy.stats import skew, kurtosis
DIANE's avatar
DIANE committed
    a = {}
    a['N samples'] = x.shape[0]
    a['Min'] =  np.min(x)
    a['Max'] = np.max(x)
    a['Mean'] = np.mean(x)
    a['Median'] = np.median(x)
    a['S'] = np.std(x)
    a['RSD'] = np.std(x)*100/np.mean(x)
    a['Skew'] = skew(x, axis=0, bias=True)
    a['Kurt'] = kurtosis(x, axis=0, bias=True)
    return a



def ObjectHash(current = None, add = None):
    def DatatoStr(data):
        from pandas import DataFrame, Series
        import numpy as np
        """Hash various data types using MD5."""
        
        # Convert to a string representation
        if isinstance(data, DataFrame):
            data_str = data.to_string()
        elif isinstance(data, Series):
            data_str = data.to_string()
        elif isinstance(data, np.ndarray):
            data_str = np.array2string(data, separator=',')
        elif isinstance(data, (list, tuple)):
            data_str = str(data)
        elif isinstance(data, dict):
            # Ensure consistent order for dict items
            data_str = str(sorted(data.items()))
        elif isinstance(data, (int, float, str, bool)):
            data_str = str(data)
        elif isinstance(data, bytes):
            data_str = data.decode('utf-8', 'ignore')  # Decode bytes to string
        elif isinstance(data, str):  # Check if it's a string representing file content
            data_str = data
        else:
            raise TypeError(f"Unsupported data type: {type(data)}")
        
        # Encode the string to bytes
        data_bytes = data_str.encode()
        return str(data_bytes)
    

    import xxhash
    if current == None and add == None:
        object = "None"
        print('Insert the object for which you want to compute the hash value.')
    elif current != None and add != None:
        object = DatatoStr(current)+ DatatoStr(add)
    elif current == None and add != None:
        object = DatatoStr(add)
    elif current != None and add == None:
DIANE's avatar
DIANE committed
        object = "None"
DIANE's avatar
DIANE committed

         # Compute the MD5 hash
    
    md5_hash = xxhash.xxh32(object).hexdigest()
    return str(md5_hash)



def JointoMain():
    import os
    for i in ['utils','style']:
        import sys
        sys.path.append(os.path.join(os.path.dirname(__file__), i))

#