from packages import * import jcamp as jc class JcampParser: '''This module is designed to help retrieve spectral data as well as metadata of smaples from jcamp file''' def __init__(self, path): #self.__path = path.replace('\\','/') self.__path = path self.__dxfile = jc.jcamp_readfile(self.__path) # Access samples data self.__nb = self.__dxfile['blocks'] # Get the total number of blocks = The total number of scanned samples self.__list_of_blocks = self.__dxfile['children'] # Store all blocks within a a list self.__wl = self.__list_of_blocks[0]["x"] # Wavelengths/frequencies/range # Start retreiving the data specs = np.zeros((self.__nb, len(self.__list_of_blocks[0]["y"])), dtype=float) # preallocate a np matrix for sotoring spectra self.idx = np.arange(self.__nb) # This list is designed to store samples name self.__met = {} for i in range(self.__nb): # Loop over the blocks specs[i] = self.__list_of_blocks[i]['y'] block = self.__list_of_blocks[i] block_met = { 'name': block['title'], 'origin': block['origin'], 'date': block['date'], #'time': block['time'], 'spectrometer': block['spectrometer/data system'].split('\n$$')[0], 'n_scans':block['spectrometer/data system'].split('\n$$')[6].split('=')[1], 'resolution': block['spectrometer/data system'].split('\n$$')[8].split('=')[1], #'instrumental parameters': block['instrumental parameters'], 'xunits': block['xunits'], 'yunits': block['yunits'], #'xfactor': block['xfactor'], #'yfactor': block['yfactor'], 'firstx': block['firstx'], 'lastx': block['lastx'], #'firsty':block['firsty'], #'miny': block['miny'], #'maxy': block['maxy'], 'npoints': block['npoints'], 'concentrations':block['concentrations'], #'deltax':block['deltax'] } self.__met[f'{i}'] = block_met self.metadata_ = DataFrame(self.__met).T self.spectra = DataFrame(np.fliplr(specs), columns= self.__wl[::-1], index = self.metadata_['name']) # Storing spectra in a dataframe #### Concentrarions self.pattern = r"\(([^,]+),(\d+(\.\d+)?),([^)]+)" aa = self.__list_of_blocks[0]['concentrations'] a = '\n'.join(line for line in aa.split('\n') if "NCU" not in line and "<<undef>>" not in line) n_elements = a.count('(') ## Get the name of analyzed chamical elements elements_name = [] for match in re.findall(self.pattern, a): elements_name.append(match[0]) ## Retrieve concentrationds df = self.metadata_['concentrations'] cc = {} for i in range(self.metadata_.shape[0]): cc[df.index[i]] = self.conc(df[str(i)]) ### dataframe conntaining chemical data self.chem_data = DataFrame(cc, index=elements_name).T.astype(float) self.chem_data.index = self.metadata_['name'] ### Method for retrieving the concentration of a single sample def conc(self,sample): prep = '\n'.join(line for line in sample.split('\n') if "NCU" not in line and "<<undef>>" not in line) c = [] for match in re.findall(self.pattern, prep): c.append(match[1]) concentration = np.array(c) return concentration @property def specs_df_(self): return self.spectra @property def md_df_(self): me = self.metadata_.drop("concentrations", axis = 1) me = me.drop(me.columns[(me == '').all()], axis = 1) return me @property def md_df_st_(self): rt = ['origin','date'] cl = self.metadata_.loc[:,rt] return cl @property def chem_data_(self): return self.chem_data class CsvParser: def __init__(self) -> None: pass