Skip to content
Snippets Groups Projects
miscellaneous.py 3.38 KiB
Newer Older
  • Learn to ignore specific revisions
  • DIANE's avatar
    DIANE committed
    import streamlit as st
    
    DIANE's avatar
    DIANE committed
    from pandas import DataFrame
    import numpy as np
    
    DIANE's avatar
    DIANE committed
    
    # predict module
    def prediction(NIRS_csv, qsep, qhdr, model):
        # hdr var correspond to column header True or False in the CSV
        if qhdr == 'yes':
            col = 0
        else:
            col = False
        X_test = read_csv(NIRS_csv, sep=qsep, index_col=col)
        Y_preds = model.predict(X_test)
        # Y_preds = X_test
        return Y_preds
    
    
    # function that create a download button - needs the data to save and the file name to store to
    def download_results(data, export_name):
        with open(data) as f:
            st.download_button('Download', f, export_name, type='primary')
    
    @st.cache_data(show_spinner =True)
    def data_split(x, y):
    
    DIANE's avatar
    DIANE committed
        from kennard_stone import train_test_split
    
    DIANE's avatar
    DIANE committed
        # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
    
    DIANE's avatar
    DIANE committed
        X_train, X_test, y_train, y_test  = train_test_split(x, y, test_size = 0.25, random_state = 42)
        train_index, test_index = X_train.index, X_test.index
    
    DIANE's avatar
    DIANE committed
        return X_train, X_test, y_train, y_test, train_index, test_index
    
    ## descriptive stat
    @st.cache_data(show_spinner =True)
    def desc_stats(x):
    
    DIANE's avatar
    DIANE committed
        from scipy.stats import skew, kurtosis
    
    DIANE's avatar
    DIANE committed
        a = {}
        a['N samples'] = x.shape[0]
        a['Min'] =  np.min(x)
        a['Max'] = np.max(x)
        a['Mean'] = np.mean(x)
        a['Median'] = np.median(x)
        a['S'] = np.std(x)
        a['RSD'] = np.std(x)*100/np.mean(x)
        a['Skew'] = skew(x, axis=0, bias=True)
        a['Kurt'] = kurtosis(x, axis=0, bias=True)
        return a
    
    
    
    def ObjectHash(current = None, add = None):
        def DatatoStr(data):
            from pandas import DataFrame, Series
            import numpy as np
            """Hash various data types using MD5."""
            
            # Convert to a string representation
            if isinstance(data, DataFrame):
                data_str = data.to_string()
            elif isinstance(data, Series):
                data_str = data.to_string()
            elif isinstance(data, np.ndarray):
                data_str = np.array2string(data, separator=',')
            elif isinstance(data, (list, tuple)):
                data_str = str(data)
            elif isinstance(data, dict):
                # Ensure consistent order for dict items
                data_str = str(sorted(data.items()))
            elif isinstance(data, (int, float, str, bool)):
                data_str = str(data)
            elif isinstance(data, bytes):
                data_str = data.decode('utf-8', 'ignore')  # Decode bytes to string
            elif isinstance(data, str):  # Check if it's a string representing file content
                data_str = data
            else:
                raise TypeError(f"Unsupported data type: {type(data)}")
            
            # Encode the string to bytes
            data_bytes = data_str.encode()
            return str(data_bytes)
        
    
        import xxhash
        if current == None and add == None:
            object = "None"
            print('Insert the object for which you want to compute the hash value.')
        elif current != None and add != None:
            object = DatatoStr(current)+ DatatoStr(add)
        elif current == None and add != None:
            object = DatatoStr(add)
        elif current != None and add == None:
    
    DIANE's avatar
    DIANE committed
            object = "None"
    
    DIANE's avatar
    DIANE committed
    
             # Compute the MD5 hash
        
        md5_hash = xxhash.xxh32(object).hexdigest()
        return str(md5_hash)
    
    
    
    def JointoMain():
        import os
        for i in ['utils','style']:
            import sys
            sys.path.append(os.path.join(os.path.dirname(__file__), i))
    
    #