1''' 2testcode2.util 3-------------- 4 5Utility functions. 6 7:copyright: (c) 2012 James Spencer. 8:license: modified BSD; see LICENSE for more details. 9''' 10 11import os.path 12import re 13import sys 14 15import testcode2.compatibility as compat 16import testcode2.exceptions as exceptions 17 18def testcode_filename(stem, file_id, inp, args): 19 '''Construct filename in testcode format.''' 20 filename = '%s.%s' % (stem, file_id) 21 if inp: 22 filename = '%s.inp=%s' % (filename, inp) 23 if args: 24 filename = '%s.args=%s' % (filename, args) 25 filename = filename.replace(' ','_') 26 filename = filename.replace('/', '_') 27 return filename 28 29def testcode_file_id(filename, stem): 30 '''Extract the file_id from a filename in the testcode format.''' 31 filename = os.path.basename(filename) 32 file_id = filename.replace('%s.' % (stem), '') 33 file_id = re.sub(r'\.inp=.*', '', file_id) 34 file_id = re.sub(r'\.args=.*', '', file_id) 35 return file_id 36 37 38def try_floatify(val): 39 '''Convert val to a float if possible.''' 40 try: 41 return float(val) 42 except ValueError: 43 return val 44 45def extract_tagged_data(data_tag, filename): 46 '''Extract data from lines marked by the data_tag in filename.''' 47 if not os.path.exists(filename): 48 err = 'Cannot extract data: file %s does not exist.' % (filename) 49 raise exceptions.AnalysisError(err) 50 data_file = open(filename) 51 # Data tag is the first non-space character in the line. 52 # e.g. extract data from lines: 53 # data_tag Energy: 1.256743 a.u. 54 data_tag_regex = re.compile('^ *%s' % (re.escape(data_tag))) 55 data = {} 56 for line in data_file.readlines(): 57 if data_tag_regex.match(line): 58 # This is a line containing info to be tested. 59 words = line.split() 60 key = [] 61 # name of data is string after the data_tag and preceeding the 62 # (numerical) data. only use the first number in the line, with 63 # the key taken from all proceeding information. 64 for word in words[1:]: 65 val = try_floatify(word) 66 if val != word: 67 break 68 else: 69 key.append(word) 70 if key[-1] in ("=",':'): 71 key.pop() 72 key = '_'.join(key) 73 if key[-1] in ("=",':'): 74 key = key[:-1] 75 if not key: 76 key = 'data' 77 if key in data: 78 data[key].append(val) 79 else: 80 data[key] = [val] 81 # We shouldn't change the data from this point: convert entries to tuples. 82 for (key, val) in data.items(): 83 data[key] = tuple(val) 84 return data 85 86def dict_table_string(table_string): 87 '''Read a data table from a string into a dictionary. 88 89The first row and any subsequent rows containing no numbers are assumed to form 90headers of a subtable, and so form the keys for the subsequent subtable. 91 92Values, where possible, are converted to floats. 93 94e.g. a b c a -> {'a':(1,4,7,8), 'b':(2,5), 'c':(3,6)} 95 1 2 3 7 96 4 5 6 8 97and 98 a b c -> {'a':(1,4,7), 'b':(2,5,8), 'c':(3,6), 'd':(9), 'e':(6)} 99 1 2 3 100 4 5 6 101 a b d e 102 7 8 9 6 103''' 104 data = [i.split() for i in table_string.splitlines()] 105 # Convert to numbers where appropriate 106 data = [[try_floatify(val) for val in dline] for dline in data] 107 data_dict = {} 108 head = [] 109 for dline in data: 110 # Test if all items are strings; if so start a new subtable. 111 # We actually test if all items are not floats, as python 3 can return 112 # a bytes variable from subprocess whereas (e.g.) python 2.4 returns a 113 # str. Testing for this is problematic as the bytes type does not 114 # exist in python 2.4. Fortunately we have converted all items to 115 # floats if possible, so can just test for the inverse condition... 116 if compat.compat_all(type(val) is not float for val in dline): 117 # header of new subtable 118 head = dline 119 for val in head: 120 if val not in data_dict: 121 data_dict[val] = [] 122 else: 123 if len(dline) > len(head): 124 err = 'Table missing column heading(s):\n%s' % (table_string) 125 raise exceptions.AnalysisError(err) 126 for (ind, val) in enumerate(dline): 127 # Add data to appropriate key. 128 # Note that this handles the case where the same column heading 129 # occurs multiple times in the same subtable and does not 130 # overwrite the previous column with the same heading. 131 data_dict[head[ind]].append(val) 132 # We shouldn't change the data from this point: convert entries to tuples. 133 for (key, val) in data_dict.items(): 134 data_dict[key] = tuple(val) 135 return data_dict 136 137def wrap_list_strings(word_list, width): 138 '''Create a list of strings of a given width from a list of words. 139 140This is, to some extent, a version of textwrap.wrap but without the 'feature' 141of removing additional whitespace.''' 142 wrapped_strings = [] 143 clen = 0 144 cstring = [] 145 for string in word_list: 146 if clen + len(string) + len(cstring) <= width: 147 cstring.append(string) 148 clen += len(string) 149 else: 150 wrapped_strings.append(' '.join(cstring)) 151 cstring = [string] 152 clen = len(string) 153 if cstring: 154 wrapped_strings.append(' '.join(cstring)) 155 return wrapped_strings 156 157 158def pretty_print_table(labels, dicts): 159 '''Print data in dictionaries of identical size in a tabular format.''' 160 # Fill in the dicts with missing data. 161 # This can be hit if the missing data fields are ignored... 162 for dict1 in dicts: 163 for key in dict1.keys(): 164 if type(dict1[key]) is tuple or type(dict1[key]) is list: 165 nitems = len(dict1[key]) 166 val = ('n/a',)*nitems 167 iterable = True 168 else: 169 val = 'n/a' 170 iterable = False 171 for dict2 in dicts: 172 if key not in dict2: 173 dict2[key] = val 174 elif iterable and nitems != len(dict2[key]): 175 dict2[key] += nitems - len(dict2[key]) 176 # Loop through all elements in order to calculate the field width. 177 # Create header line as we go. 178 fmt = dict(_tc_label='%%-%is' % (max(len(str(label)) for label in labels))) 179 header = [] 180 for key in sorted(dicts[0].keys()): 181 fmt[key] = len(str(key)) 182 nitems = 1 183 if type(dicts[0][key]) is tuple or type(dicts[0][key]) is list: 184 nitems = len(dicts[0][key]) 185 for dval in dicts: 186 for item in dval[key]: 187 fmt[key] = max(fmt[key], len(str(item))) 188 else: 189 fmt[key] = max(len(str(dval[key])) for dval in dicts) 190 fmt[key] = max(fmt[key], len(str(key))) 191 # Finished processing all data items with this key. 192 # Covert from field width into a format statement. 193 fmt[key] = '%%-%is' % (fmt[key]) 194 for item in range(nitems): 195 header.append(fmt[key] % (key)) 196 # Wrap header line and insert key/label at the start of each line. 197 key = fmt['_tc_label'] % ('') 198 header = wrap_list_strings(header, 70) 199 header = ['%s %s' % (key, line_part) for line_part in header] 200 # Printing without a new line is different in python 2 and python 3, so for 201 # ease we construct the formatting for the line and then print it. 202 lines = [ header ] 203 for (ind, label) in enumerate(labels): 204 line = [fmt['_tc_label'] % (label)] 205 line = [] 206 for key in sorted(dicts[ind].keys()): 207 if type(dicts[ind][key]) is tuple or type(dicts[ind][key]) is list: 208 for item in range(len(dicts[ind][key])): 209 line.append(fmt[key] % (dicts[ind][key][item])) 210 else: 211 line.append(fmt[key] % (dicts[ind][key])) 212 # Wrap line and insert key/label at the start of each line. 213 key = fmt['_tc_label'] % (label) 214 line = wrap_list_strings(line, 70) 215 line = ['%s %s' % (key, line_part) for line_part in line] 216 lines.extend([line]) 217 # Now actually form table. Due to line wrapping we might actually form 218 # several subtables. As each line has the same number of items (or 219 # should!), this is quite simple. 220 table = [] 221 for ind in range(len(lines[0])): 222 table.append('\n'.join([line[ind] for line in lines])) 223 table = '\n'.join(table) 224 return (table or 225 'No data for %s.' % ('; '.join(label.strip() for label in labels))) 226 227def info_line(path, input_file, args, rundir): 228 '''Produce a (terse) string describing a test.''' 229 if rundir: 230 path = compat.relpath(path, rundir) 231 info_line = path 232 if input_file: 233 info_line += ' - %s' % (input_file) 234 if args: 235 info_line += ' (arg(s): %s)' % (args) 236 info_line += ': ' 237 return info_line 238