1'''
2testcode2.util
3--------------
4
5Utility functions.
6
7:copyright: (c) 2012 James Spencer.
8:license: modified BSD; see LICENSE for more details.
9'''
10
11import os.path
12import re
13import sys
14
15import testcode2.compatibility as compat
16import testcode2.exceptions as exceptions
17
18def testcode_filename(stem, file_id, inp, args):
19    '''Construct filename in testcode format.'''
20    filename = '%s.%s' % (stem, file_id)
21    if inp:
22        filename = '%s.inp=%s' % (filename, inp)
23    if args:
24        filename = '%s.args=%s' % (filename, args)
25    filename = filename.replace(' ','_')
26    filename = filename.replace('/', '_')
27    return filename
28
29def testcode_file_id(filename, stem):
30    '''Extract the file_id from a filename in the testcode format.'''
31    filename = os.path.basename(filename)
32    file_id = filename.replace('%s.' % (stem), '')
33    file_id = re.sub(r'\.inp=.*', '', file_id)
34    file_id = re.sub(r'\.args=.*', '', file_id)
35    return file_id
36
37
38def try_floatify(val):
39    '''Convert val to a float if possible.'''
40    try:
41        return float(val)
42    except ValueError:
43        return val
44
45def extract_tagged_data(data_tag, filename):
46    '''Extract data from lines marked by the data_tag in filename.'''
47    if not os.path.exists(filename):
48        err = 'Cannot extract data: file %s does not exist.' % (filename)
49        raise exceptions.AnalysisError(err)
50    data_file = open(filename)
51    # Data tag is the first non-space character in the line.
52    # e.g. extract data from lines:
53    # data_tag      Energy:    1.256743 a.u.
54    data_tag_regex = re.compile('^ *%s' % (re.escape(data_tag)))
55    data = {}
56    for line in data_file.readlines():
57        if data_tag_regex.match(line):
58            # This is a line containing info to be tested.
59            words = line.split()
60            key = []
61            # name of data is string after the data_tag and preceeding the
62            # (numerical) data.  only use the first number in the line, with
63            # the key taken from all proceeding information.
64            for word in words[1:]:
65                val = try_floatify(word)
66                if val != word:
67                    break
68                else:
69                    key.append(word)
70            if key[-1] in ("=",':'):
71                key.pop()
72            key = '_'.join(key)
73            if key[-1] in ("=",':'):
74                key = key[:-1]
75            if not key:
76                key = 'data'
77            if key in data:
78                data[key].append(val)
79            else:
80                data[key] = [val]
81    # We shouldn't change the data from this point: convert entries to tuples.
82    for (key, val) in data.items():
83        data[key] = tuple(val)
84    return data
85
86def dict_table_string(table_string):
87    '''Read a data table from a string into a dictionary.
88
89The first row and any subsequent rows containing no numbers are assumed to form
90headers of a subtable, and so form the keys for the subsequent subtable.
91
92Values, where possible, are converted to floats.
93
94e.g. a  b  c  a  ->   {'a':(1,4,7,8), 'b':(2,5), 'c':(3,6)}
95     1  2  3  7
96     4  5  6  8
97and
98     a  b  c   ->   {'a':(1,4,7), 'b':(2,5,8), 'c':(3,6), 'd':(9), 'e':(6)}
99     1  2  3
100     4  5  6
101     a  b  d  e
102     7  8  9  6
103'''
104    data = [i.split() for i in table_string.splitlines()]
105    # Convert to numbers where appropriate
106    data = [[try_floatify(val) for val in dline] for dline in data]
107    data_dict = {}
108    head = []
109    for dline in data:
110        # Test if all items are strings; if so start a new subtable.
111        # We actually test if all items are not floats, as python 3 can return
112        # a bytes variable from subprocess whereas (e.g.) python 2.4 returns a
113        # str.  Testing for this is problematic as the bytes type does not
114        # exist in python 2.4.  Fortunately we have converted all items to
115        # floats if possible, so can just test for the inverse condition...
116        if compat.compat_all(type(val) is not float for val in dline):
117            # header of new subtable
118            head = dline
119            for val in head:
120                if val not in data_dict:
121                    data_dict[val] = []
122        else:
123            if len(dline) > len(head):
124                err = 'Table missing column heading(s):\n%s' % (table_string)
125                raise exceptions.AnalysisError(err)
126            for (ind, val) in enumerate(dline):
127                # Add data to appropriate key.
128                # Note that this handles the case where the same column heading
129                # occurs multiple times in the same subtable and does not
130                # overwrite the previous column with the same heading.
131                data_dict[head[ind]].append(val)
132    # We shouldn't change the data from this point: convert entries to tuples.
133    for (key, val) in data_dict.items():
134        data_dict[key] = tuple(val)
135    return data_dict
136
137def wrap_list_strings(word_list, width):
138    '''Create a list of strings of a given width from a list of words.
139
140This is, to some extent, a version of textwrap.wrap but without the 'feature'
141of removing additional whitespace.'''
142    wrapped_strings = []
143    clen = 0
144    cstring = []
145    for string in word_list:
146        if clen + len(string) + len(cstring) <= width:
147            cstring.append(string)
148            clen += len(string)
149        else:
150            wrapped_strings.append(' '.join(cstring))
151            cstring = [string]
152            clen = len(string)
153    if cstring:
154        wrapped_strings.append(' '.join(cstring))
155    return wrapped_strings
156
157
158def pretty_print_table(labels, dicts):
159    '''Print data in dictionaries of identical size in a tabular format.'''
160    # Fill in the dicts with missing data.
161    # This can be hit if the missing data fields are ignored...
162    for dict1 in dicts:
163        for key in dict1.keys():
164            if type(dict1[key]) is tuple or type(dict1[key]) is list:
165                nitems = len(dict1[key])
166                val = ('n/a',)*nitems
167                iterable = True
168            else:
169                val = 'n/a'
170                iterable = False
171            for dict2 in dicts:
172                if key not in dict2:
173                    dict2[key] = val
174                elif iterable and nitems != len(dict2[key]):
175                    dict2[key] += nitems - len(dict2[key])
176    # Loop through all elements in order to calculate the field width.
177    # Create header line as we go.
178    fmt = dict(_tc_label='%%-%is' % (max(len(str(label)) for label in labels)))
179    header = []
180    for key in sorted(dicts[0].keys()):
181        fmt[key] = len(str(key))
182        nitems = 1
183        if type(dicts[0][key]) is tuple or type(dicts[0][key]) is list:
184            nitems = len(dicts[0][key])
185            for dval in dicts:
186                for item in dval[key]:
187                    fmt[key] = max(fmt[key], len(str(item)))
188        else:
189            fmt[key] = max(len(str(dval[key])) for dval in dicts)
190            fmt[key] = max(fmt[key], len(str(key)))
191        # Finished processing all data items with this key.
192        # Covert from field width into a format statement.
193        fmt[key] = '%%-%is' % (fmt[key])
194        for item in range(nitems):
195            header.append(fmt[key] % (key))
196    # Wrap header line and insert key/label at the start of each line.
197    key = fmt['_tc_label'] % ('')
198    header = wrap_list_strings(header, 70)
199    header = ['%s %s' % (key, line_part) for line_part in header]
200    # Printing without a new line is different in python 2 and python 3, so for
201    # ease we construct the formatting for the line and then print it.
202    lines = [ header ]
203    for (ind, label) in enumerate(labels):
204        line = [fmt['_tc_label'] % (label)]
205        line = []
206        for key in sorted(dicts[ind].keys()):
207            if type(dicts[ind][key]) is tuple or type(dicts[ind][key]) is list:
208                for item in range(len(dicts[ind][key])):
209                    line.append(fmt[key] % (dicts[ind][key][item]))
210            else:
211                line.append(fmt[key] % (dicts[ind][key]))
212        # Wrap line and insert key/label at the start of each line.
213        key = fmt['_tc_label'] % (label)
214        line = wrap_list_strings(line, 70)
215        line = ['%s %s' % (key, line_part) for line_part in line]
216        lines.extend([line])
217    # Now actually form table.  Due to line wrapping we might actually form
218    # several subtables.  As each line has the same number of items (or
219    # should!), this is quite simple.
220    table = []
221    for ind in range(len(lines[0])):
222        table.append('\n'.join([line[ind] for line in lines]))
223    table = '\n'.join(table)
224    return (table or
225            'No data for %s.' % ('; '.join(label.strip() for label in labels)))
226
227def info_line(path, input_file, args, rundir):
228    '''Produce a (terse) string describing a test.'''
229    if rundir:
230        path = compat.relpath(path, rundir)
231    info_line = path
232    if input_file:
233        info_line += ' - %s' % (input_file)
234    if args:
235        info_line += ' (arg(s): %s)' % (args)
236    info_line += ': '
237    return info_line
238