diff --git a/Class_Mod/DxReader.py b/Class_Mod/DxReader.py new file mode 100644 index 0000000000000000000000000000000000000000..4d6d815d7cef7a6a77dacd50b7a241cd4d8b657f --- /dev/null +++ b/Class_Mod/DxReader.py @@ -0,0 +1,92 @@ +from Packages import * + +class DxReader: + + '''This module is designed to help retrieve spectral data as well as metadata of smaples from jcamp file''' + def __init__(self, path): + self.__path = path.replace('\\','/') + self.__dxfile = jc.jcamp_readfile(self.__path) + + # Access samples data + self.__nb = self.__dxfile['blocks'] # Get the total number of blocks = The total number of scanned samples + self.__list_of_blocks = self.__dxfile['children'] # Store all blocks within a a list + self.__wl = self.__list_of_blocks[0]["x"] # Wavelengths/frequencies/range + + + + # Start retreiving the data + specs = np.zeros((self.__nb, len(self.__list_of_blocks[0]["y"])), dtype=float) # preallocate a np matrix for sotoring spectra + self.idx = np.arange(self.__nb) # This list is designed to store samples name + self.__met = {} + + + for i in range(self.__nb): # Loop over the blocks + specs[i] = self.__list_of_blocks[i]['y'] + + + block_met = { 'name':self.__list_of_blocks[i]['title'], + 'origin':self.__list_of_blocks[i]['origin'], + 'date':self.__list_of_blocks[i]['date'], + 'time':self.__list_of_blocks[i]['time'], + 'spectrometer/data system':self.__list_of_blocks[i]['spectrometer/data system'], + 'instrumental parameters':self.__list_of_blocks[i]['instrumental parameters'], + 'xunits':self.__list_of_blocks[i]['xunits'], + 'yunits':self.__list_of_blocks[i]['yunits'], + 'xfactor':self.__list_of_blocks[i]['xfactor'], + 'yfactor':self.__list_of_blocks[i]['yfactor'], + 'firstx':self.__list_of_blocks[i]['firstx'], + 'lastx':self.__list_of_blocks[i]['lastx'], + 'firsty':self.__list_of_blocks[i]['firsty'], + 'miny': self.__list_of_blocks[i]['miny'], + 'maxy': self.__list_of_blocks[i]['maxy'], + 'npoints':self.__list_of_blocks[i]['npoints'], + 'concentrations':self.__list_of_blocks[i]['concentrations'], + 'deltax':self.__list_of_blocks[i]['deltax'], + } + self.__met[f'{i}'] = block_met + self.metadata_ = pd.DataFrame(self.__met).T + + + self.spectra = pd.DataFrame(np.fliplr(specs), columns= self.__wl[::-1]) # Storing spectra in a pd.dataframe + + + + #### Concentrarions + self.pattern = r"\(([^,]+),(\d+(\.\d+)?),([^)]+)" + aa = self.__list_of_blocks[0]['concentrations'] + a = '\n'.join(line for line in aa.split('\n') if "NCU" not in line and "<<undef>>" not in line) + n_elements = a.count('(') + + ## Get the name of analyzed chamical elements + elements_name = [] + for match in re.findall(self.pattern, a): + elements_name.append(match[0]) + + ## Retrieve concentrationds + df = self.metadata_['concentrations'] + cc = {} + for i in range(self.metadata_.shape[0]): + cc[df.index[i]] = self.conc(df[str(i)]) + + ### dataframe conntaining chemical data + self.chem_data = pd.DataFrame(cc, index=elements_name).T + + ### Method for retrieving the concentration of a single sample + def conc(self,sample): + prep = '\n'.join(line for line in sample.split('\n') if "NCU" not in line and "<<undef>>" not in line) + c = [] + for match in re.findall(self.pattern, prep): + c.append(match[1]) + concentration = np.array(c) + return concentration + + @property + def specs_df_(self): + return self.spectra + @property + def md_df_(self): + return self.metadata_ + + @property + def chem_data_(self): + return self.chem_data \ No newline at end of file diff --git a/Packages.py b/Packages.py index 2791d65dc00e352ae21e67d28eaca906e31d6bf3..4d90631b1c05d3d7c6c15174add94af8c14777c3 100644 --- a/Packages.py +++ b/Packages.py @@ -3,6 +3,8 @@ import os import sys import csv +import re +import jcamp import random import numpy as np import pandas as pd