Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from packages import *
import jcamp as jc
class JcampParser:
'''This module is designed to help retrieve spectral data as well as metadata of smaples from jcamp file'''
def __init__(self, path):
#self.__path = path.replace('\\','/')
self.__path = path
self.__dxfile = jc.jcamp_readfile(self.__path)
# Access samples data
self.__nb = self.__dxfile['blocks'] # Get the total number of blocks = The total number of scanned samples
self.__list_of_blocks = self.__dxfile['children'] # Store all blocks within a a list
self.__wl = self.__list_of_blocks[0]["x"] # Wavelengths/frequencies/range
# Start retreiving the data
specs = np.zeros((self.__nb, len(self.__list_of_blocks[0]["y"])), dtype=float) # preallocate a np matrix for sotoring spectra
self.idx = np.arange(self.__nb) # This list is designed to store samples name
self.__met = {}
for i in range(self.__nb): # Loop over the blocks
specs[i] = self.__list_of_blocks[i]['y']
block = self.__list_of_blocks[i]
block_met = { 'name': block['title'],
'origin': block['origin'],
'date': block['date'],
#'time': block['time'],
'spectrometer': block['spectrometer/data system'].split('\n$$')[0],
'n_scans':block['spectrometer/data system'].split('\n$$')[6].split('=')[1],
'resolution': block['spectrometer/data system'].split('\n$$')[8].split('=')[1],
#'instrumental parameters': block['instrumental parameters'],
'xunits': block['xunits'],
'yunits': block['yunits'],
#'xfactor': block['xfactor'],
#'yfactor': block['yfactor'],
'firstx': block['firstx'],
'lastx': block['lastx'],
#'firsty':block['firsty'],
#'miny': block['miny'],
#'maxy': block['maxy'],
'npoints': block['npoints'],
'concentrations':block['concentrations'],
#'deltax':block['deltax']
}
self.__met[f'{i}'] = block_met
self.metadata_ = DataFrame(self.__met).T
self.spectra = DataFrame(np.fliplr(specs), columns= self.__wl[::-1], index = self.metadata_['name']) # Storing spectra in a dataframe
#### Concentrarions
self.pattern = r"\(([^,]+),(\d+(\.\d+)?),([^)]+)"
aa = self.__list_of_blocks[0]['concentrations']
a = '\n'.join(line for line in aa.split('\n') if "NCU" not in line and "<<undef>>" not in line)
n_elements = a.count('(')
## Get the name of analyzed chamical elements
elements_name = []
for match in re.findall(self.pattern, a):
elements_name.append(match[0])
## Retrieve concentrationds
df = self.metadata_['concentrations']
cc = {}
for i in range(self.metadata_.shape[0]):
cc[df.index[i]] = self.conc(df[str(i)])
### dataframe conntaining chemical data
self.chem_data = DataFrame(cc, index=elements_name).T.astype(float)
self.chem_data.index = self.metadata_['name']
### Method for retrieving the concentration of a single sample
def conc(self,sample):
prep = '\n'.join(line for line in sample.split('\n') if "NCU" not in line and "<<undef>>" not in line)
c = []
for match in re.findall(self.pattern, prep):
c.append(match[1])
concentration = np.array(c)
return concentration
@property
def specs_df_(self):
return self.spectra
@property
def md_df_(self):
me = self.metadata_.drop("concentrations", axis = 1)
me = me.drop(me.columns[(me == '').all()], axis = 1)
return me
@property
def md_df_st_(self):
rt = ['origin','date']
cl = self.metadata_.loc[:,rt]
return cl
@property
def chem_data_(self):
return self.chem_data
class CsvParser:
def __init__(self) -> None:
pass