1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 """Load list of datafiles for sequential refinement and associated environment
16 variables such as temperature or pressure.
17 """
18
19 import os
20 import re
21 from diffpy.srrietveld.exceptions import SrrFileError
22
23
24 __id__ = "$Id: listfilereader.py 6515 2011-04-13 14:52:02Z juhas $"
25
26
27 _LABELALIASES = {
28 'run' : 'datafile',
29 }
30
32 '''Load an return ListFileReader instance from the specified file.
33
34 filename -- path to the list file to be loaded. Uses CSV reader
35 for ".csv" extensions or plain text reader otherwise.
36 fmt -- optional filename format. Use "csv" for CSV reader
37 or "txt" for plain text reader. When None, determine
38 format from the filename extension.
39 basepath -- when specified, make loaded datapaths relative to the
40 basepath directory. Use '.' for the current directory.
41 Do not modify datapaths when None.
42
43 Return a ListFileReader instance.
44 Raise ValueError for invalid fmt value.
45 Raise SrrFileError for non-existent or corrupted file.
46 '''
47 if fmt is None:
48 readerclass = (os.path.splitext(filename)[1].lower() == '.csv'
49 and CSVListFileReader or ListFileReader)
50 elif fmt == 'csv':
51 readerclass = CSVListFileReader
52 elif fmt == 'txt':
53 readerclass = ListFileReader
54 else:
55 emsg = 'Invalid fmt value, supported formats are "csv" and "txt".'
56 raise ValueError(fmt)
57 rv = readerclass(filename=filename)
58 if basepath is not None:
59 fdir = os.path.dirname(filename)
60 makerelpath = (lambda f:
61 os.path.relpath(os.path.join(fdir, f), basepath))
62 for lb in rv.banklabels:
63 rv.column[lb] = map(makerelpath, rv.column[lb])
64 return rv
65
66
68 '''Convert column label to a standard name.
69 This applies aliases as defined in the _LABELALIASES and maps
70 bank002 to bank2. Otherwise the label remains unchanged.
71
72 label -- string label to be converted to standard name.
73
74 Return string.
75 '''
76 lbare1 = label.strip()
77 if lbare1.lower() in _LABELALIASES:
78 return resolveColumnLabel(_LABELALIASES[lbare1.lower()])
79
80 if lbare1.lower() == 'datafile':
81 lbare1 = lbare1.lower()
82
83 rv = re.sub(r'(?i)^(bank)(\d+)$',
84 lambda mx: 'bank%i' % int(mx.group(2)), lbare1)
85 return rv
86
87
88
90 '''Object for loading datafiles and temperatures from a list file.
91 Bank columns are loaded as list of strings, all other columns are
92 converted to floats.
93
94 Class variables:
95
96 labelaliases -- dictionary of alternative column labels.
97
98 Instance variables:
99
100 columnlabels -- list of column labels in the loaded list file
101 banklabels -- list of bank-column labels sorted by bank index
102 column -- dictionary that maps column label to associated
103 list of values.
104 '''
105
106 - def __init__(self, filename=None, fp=None):
107 '''Initializa ListFileReader
108
109 filename -- optional path to a list file to be loaded
110 fp -- file type object to be read
111
112 Only one of filename, fp arguments can be specified.
113 '''
114
115 if None not in (filename, fp):
116 emsg = "Specify either filename or fp, not both."
117 raise ValueError(emsg)
118 self.columnlabels = []
119 self.banklabels = []
120 self.column = {}
121 if filename is not None:
122 try:
123 with open(filename, 'rb') as fp1:
124 self.readFile(fp1)
125 except IOError, e:
126 raise SrrFileError(e)
127 if fp is not None:
128 self.readFile(fp)
129 return
130
131
133 '''Read listfile from a file-type object fp.
134
135 No return value.
136 Raise SrrFileError if file loading failed.
137 '''
138
139 columnlabels = []
140 column = {}
141 lineiter = iter(enumerate(fp))
142
143 for nr, line in lineiter:
144 nr += 1
145 w = line.split()
146
147 if not w or w[0].startswith('#'): continue
148 columnlabels = map(resolveColumnLabel, w)
149 for lb in columnlabels: column[lb] = []
150 self._validate(columnlabels, column)
151 break
152
153 ncols = len(columnlabels)
154 for nr, line in lineiter:
155 nr += 1
156 w = line.split()
157
158 if not w or w[0].startswith('#'): continue
159 if len(w) != ncols:
160 emsg = "%i: expected %i columns, found %i." % (
161 nr, ncols, len(w))
162 raise SrrFileError(emsg)
163 for lb, wi in zip(columnlabels, w):
164 column[lb].append(wi)
165 self._validate(columnlabels, column)
166 return
167
168
170 '''Check consistency of the loaded data, convert non-bank
171 columns to floats and if everything is OK, update the instance
172 attributes.
173
174 Raise SrrFileError when something is not OK.
175 '''
176
177 lbidx = {}
178 for idx, lb in enumerate(columnlabels):
179 if lb in lbidx:
180 emsg = "Duplicate labels in columns %i, %i" % (
181 lbidx[lb] + 1, idx + 1)
182 raise SrrFileError(emsg)
183 lbidx[lb] = idx
184
185 banklabels = filter(re.compile(r'(?:bank\d+|datafile)$').match,
186 columnlabels)
187 banklabels.sort(key=lambda s: int(filter(str.isdigit, s) or 0))
188
189 if columnlabels and not banklabels:
190 emsg = "Missing column label 'datafile' or 'bankN'."
191 raise SrrFileError(emsg)
192
193 for lb, col in column.iteritems():
194
195 if lb in banklabels: continue
196 try:
197 col[:] = map(float, col)
198 except ValueError, e:
199 emsg = "Error converting %i-th column to floats: %s" % (
200 columnlabels.index(lb) + 1, e)
201 raise SrrFileError(emsg)
202
203 self.columnlabels = columnlabels
204 self.banklabels = banklabels
205 self.column = column
206
207
208
209
210
212 '''ListFileReader specialized for comma-separated-values (CSV) format.
213 '''
214
216 '''Read CSV-formatted listfile from a file-type object fp.
217
218 No return value.
219 Raise SrrFileError if file loading failed.
220 '''
221 import csv
222 reader = csv.reader(fp)
223 try:
224 rows = [row for row in reader]
225 except csv.Error, e:
226 emsg = "%i: %s" % (reader.line_num, e)
227 raise SrrFileError(emsg)
228 headrow = rows and rows.pop(0) or []
229 columnlabels = map(resolveColumnLabel, headrow)
230 column = dict(zip(columnlabels, map(list, zip(*rows))))
231 self._validate(columnlabels, column)
232 return
233
234
235