1# -*- coding: utf-8 -*-
2"""GRASS Python testing framework checkers
3
4Copyright (C) 2014 by the GRASS Development Team
5This program is free software under the GNU General Public
6License (>=v2). Read the file COPYING that comes with GRASS GIS
7for details.
8
9:authors: Vaclav Petras, Soeren Gebbert
10"""
11
12import os
13import sys
14import re
15import doctest
16
17from grass.script.utils import decode, encode, _get_encoding
18
19try:
20    from grass.script.core import KeyValue
21except (ImportError, AttributeError):
22    # TODO: we are silent about the error and use a object with different
23    # interface, should be replaced by central keyvalue module
24    # this can happen when translations are not available
25    # TODO: grass should survive are give better error when tranlsations are not available
26    # even the lazy loading after first _ call would be interesting
27    # File "...grass/script/core.py", line 40, in <module>
28    # AttributeError: 'NoneType' object has no attribute 'endswith'
29    KeyValue = dict
30
31# alternative term to check(er(s)) would be compare
32
33
34def unify_projection(dic):
35    """Unifies names of projections.
36
37    Some projections are referred using different names like
38    'Universal Transverse Mercator' and 'Universe Transverse Mercator'.
39    This function replaces synonyms by a unified name.
40
41    Example of common typo in UTM replaced by correct spelling::
42
43        >>> unify_projection({'name': ['Universe Transverse Mercator']})
44        {'name': ['Universal Transverse Mercator']}
45
46    :param dic: The dictionary containing information about projection
47
48    :return: The dictionary with the new values if needed or a copy of old one
49    """
50    # the lookup variable is a list of list, each list contains all the
51    # possible name for a projection system
52    lookup = [['Universal Transverse Mercator',
53               'Universe Transverse Mercator']]
54    dic = dict(dic)
55    for l in lookup:
56        for n in range(len(dic['name'])):
57            if dic['name'][n] in l:
58                dic['name'][n] = l[0]
59    return dic
60
61
62def unify_units(dic):
63    """Unifies names of units.
64
65    Some units have different spelling although they are the same units.
66    This functions replaces different spelling options by unified one.
67
68    Example of British English spelling replaced by US English spelling::
69
70        >>> unify_units({'units': ['metres'], 'unit': ['metre']})  # doctest: +SKIP
71        {'units': ['meters'], 'unit': ['meter']}
72
73    :param dic: The dictionary containing information about units
74
75    :return: The dictionary with the new values if needed or a copy of old one
76    """
77    # the lookup variable is a list of list, each list contains all the
78    # possible name for a units
79    lookup = [['meter', 'metre'], ['meters', 'metres'],
80              ['Meter', 'Metre'], ['Meters', 'Metres'],
81              ['kilometer', 'kilometre'], ['kilometers', 'kilometres'],
82              ['Kilometer', 'Kilometre'], ['Kilometers', 'Kilometres'],
83              ]
84    dic = dict(dic)
85    for l in lookup:
86        import types
87        if not isinstance(dic['unit'], str):
88            for n in range(len(dic['unit'])):
89                if dic['unit'][n] in l:
90                    dic['unit'][n] = l[0]
91        else:
92            if dic['unit'] in l:
93                dic['unit'] = l[0]
94        if not isinstance(dic['units'], str):
95            for n in range(len(dic['units'])):
96                if dic['units'][n] in l:
97                    dic['units'][n] = l[0]
98        else:
99            if dic['units'] in l:
100                dic['units'] = l[0]
101    return dic
102
103
104def value_from_string(value):
105    """Create value of a most fitting type from a string.
106
107    Type conversions are applied in order ``int``, ``float``, ``string``
108    where string is no conversion.
109
110    >>> value_from_string('1')
111    1
112    >>> value_from_string('5.6')
113    5.6
114    >>> value_from_string('  5.6\t  ')
115    5.6
116    >>> value_from_string('hello')
117    'hello'
118    """
119    not_float = False
120    not_int = False
121    # Convert values into correct types
122    # We first try integer then float because
123    # int('1.0') is ValueError (although int(1.0) is not)
124    # while float('1') is not
125    try:
126        value_converted = int(value)
127    except ValueError:
128        not_int = True
129    if not_int:
130        try:
131            value_converted = float(value)
132        except ValueError:
133            not_float = True
134    # strip strings from whitespace (expecting spaces and tabs)
135    if not_int and not_float:
136        value_converted = value.strip()
137    return value_converted
138
139
140# TODO: what is the default separator?
141def text_to_keyvalue(text, sep=":", val_sep=",", functions=None,
142                     skip_invalid=False, skip_empty=False,
143                     from_string=value_from_string):
144    """Convert test to key-value pairs (dictionary-like KeyValue object).
145
146    Converts a key-value text file, where entries are separated
147    by newlines and the key and value are separated by `sep`,
148    into a key-value dictionary and discovers/uses the correct
149    data types (float, int or string) for values.
150
151    Besides key-value pairs it also parses values itself. Value is created
152    with the best fitting type using `value_from_string()` function by default.
153    When val_sep is present in value part, the resulting value is
154    a list of values.
155
156    :param text: string to convert
157    :param sep: character that separates the keys and values
158    :param val_sep: character that separates the values of a single key
159    :param functions: list of functions to apply on the resulting dictionary
160    :param skip_invalid: skip all lines which does not contain separator
161    :param skip_empty: skip empty lines
162    :param from_string: a function used to convert strings to values,
163        use ``lambda x: x`` for no conversion
164
165    :return: a dictionary representation of text
166    :return type: grass.script.core.KeyValue or dict
167
168    And example of converting text with text, floats, integers and list
169    to a dictionary::
170
171        >>> sorted(text_to_keyvalue('''a: Hello
172        ... b: 1.0
173        ... c: 1,2,3,4,5
174        ... d : hello,8,0.1''').items())  # sorted items from the dictionary
175        [('a', 'Hello'), ('b', 1.0), ('c', [1, 2, 3, 4, 5]), ('d', ['hello', 8, 0.1])]
176
177    .. warning::
178        And empty string is a valid input because empty dictionary is a valid
179        dictionary. You need to test this separately according
180        to the circumstances.
181    """
182    # splitting according to universal newlines approach
183    # TODO: add also general split with vsep
184    text = text.splitlines()
185    kvdict = KeyValue()
186    functions = [] if functions is None else functions
187
188    for line in text:
189        if line.find(sep) >= 0:
190            key, value = line.split(sep, 1)
191            key = key.strip()
192            value = value.strip()
193            # this strip may not be necessary, we strip each item in list
194            # and also if there is only one value
195        else:
196            # lines with no separator (empty or invalid)
197            if not line:
198                if not skip_empty:
199                    # TODO: here should go _ for translation
200                    # TODO: the error message is not really informative
201                    # in case of skipping lines we may get here with no key
202                    msg = ("Empty line in the parsed text.")
203                    if kvdict:
204                        # key is the one from previous line
205                        msg = ("Empty line in the parsed text."
206                               " Previous line's key is <%s>") % key
207                    raise ValueError(msg)
208            else:
209                # line contains something but not separator
210                if not skip_invalid:
211                    # TODO: here should go _ for translation
212                    raise ValueError(("Line <{l}> does not contain"
213                                      " separator <{s}>.").format(l=line, s=sep))
214            # if we get here we are silently ignoring the line
215            # because it is invalid (does not contain key-value separator) or
216            # because it is empty
217            continue
218        if value.find(val_sep) >= 0:
219            # lists
220            values = value.split(val_sep)
221            value_list = []
222            for value in values:
223                value_converted = from_string(value)
224                value_list.append(value_converted)
225            kvdict[key] = value_list
226        else:
227            # single values
228            kvdict[key] = from_string(value)
229    for function in functions:
230        kvdict = function(kvdict)
231    return kvdict
232
233
234# TODO: decide if there should be some default for precision
235# TODO: define standard precisions for DCELL, FCELL, CELL, mm, ft, cm, ...
236# TODO: decide if None is valid, and use some default or no compare
237# TODO: is None a valid value for precision?
238def values_equal(value_a, value_b, precision=0.000001):
239    """
240    >>> values_equal(1.022, 1.02, precision=0.01)
241    True
242    >>> values_equal([1.2, 5.3, 6.8], [1.1, 5.2, 6.9], precision=0.2)
243    True
244    >>> values_equal(7, 5, precision=2)
245    True
246    >>> values_equal(1, 5.9, precision=10)
247    True
248    >>> values_equal('Hello', 'hello')
249    False
250    """
251    # each if body needs to handle only not equal state
252
253    if isinstance(value_a, float) and isinstance(value_b, float):
254        # both values are float
255        # this could be also changed to is None and raise TypeError
256        # in Python 2 None is smaller than anything
257        # in Python 3 None < 3 raises TypeError
258        precision = float(precision)
259        if abs(value_a - value_b) > precision:
260            return False
261
262    elif (isinstance(value_a, float) and isinstance(value_b, int)) or \
263            (isinstance(value_b, float) and isinstance(value_a, int)):
264        # on is float the other is int
265        # don't accept None
266        precision = float(precision)
267        # we will apply precision to int-float comparison
268        # rather than converting both to integer
269        # (as in the original function from grass.script.core)
270        if abs(value_a - value_b) > precision:
271            return False
272
273    elif isinstance(value_a, int) and isinstance(value_b, int) and \
274            precision and int(precision) > 0:
275        # both int but precision applies for them
276        if abs(value_a - value_b) > precision:
277            return False
278
279    elif isinstance(value_a, list) and isinstance(value_b, list):
280        if len(value_a) != len(value_b):
281            return False
282        for i in range(len(value_a)):
283            # apply this function for comparison of items in the list
284            if not values_equal(value_a[i], value_b[i], precision):
285                return False
286    else:
287        if value_a != value_b:
288            return False
289    return True
290
291
292def keyvalue_equals(dict_a, dict_b, precision,
293                    def_equal=values_equal, key_equal=None,
294                    a_is_subset=False):
295    """Compare two dictionaries.
296
297    .. note::
298        Always use keyword arguments for all parameters with defaults.
299        It is a good idea to use keyword arguments also for the first
300        two parameters.
301
302    An example of key-value texts comparison::
303
304        >>> keyvalue_equals(text_to_keyvalue('''a: Hello
305        ... b: 1.0
306        ... c: 1,2,3,4,5
307        ... d: hello,8,0.1'''),
308        ... text_to_keyvalue('''a: Hello
309        ... b: 1.1
310        ... c: 1,22,3,4,5
311        ... d: hello,8,0.1'''), precision=0.1)
312        False
313
314    :param dict_a: first dictionary
315    :param dict_b: second dictionary
316    :param precision: precision with which the floating point values
317        are compared (passed to equality functions)
318    :param callable def_equal: function used for comparison by default
319    :param dict key_equal: dictionary of functions used for comparison
320        of specific keys, `def_equal` is used for the rest,
321        keys in dictionary are keys in `dict_a` and `dict_b` dictionaries,
322        values are the functions used to comapare the given key
323    :param a_is_subset: `True` if `dict_a` is a subset of `dict_b`,
324        `False` otherwise
325
326    :return: `True` if identical, `False` if different
327
328    Use `diff_keyvalue()` to get information about differeces.
329    You can use this function to find out if there is a difference and then
330    use `diff_keyvalue()` to determine all the differences between
331    dictionaries.
332    """
333    key_equal = {} if key_equal is None else key_equal
334
335    if not a_is_subset and sorted(dict_a.keys()) != sorted(dict_b.keys()):
336        return False
337    b_keys = dict_b.keys() if a_is_subset else None
338
339    # iterate over subset or just any if not a_is_subset
340    # check for missing keys in superset
341    # compare matching keys
342    for key in dict_a.keys():
343        if a_is_subset and key not in b_keys:
344            return False
345        equal_fun = key_equal.get(key, def_equal)
346        if not equal_fun(dict_a[key], dict_b[key], precision):
347            return False
348    return True
349
350
351# TODO: should the return depend on the a_is_subset parameter?
352# this function must have the same interface and behavior as keyvalue_equals
353def diff_keyvalue(dict_a, dict_b, precision,
354                  def_equal=values_equal, key_equal=None,
355                  a_is_subset=False):
356    """Determine the difference of two dictionaries.
357
358    The function returns missing keys and different values for common keys::
359
360        >>> a = {'c': 2, 'b': 3, 'a': 4}
361        >>> b = {'c': 1, 'b': 3, 'd': 5}
362        >>> diff_keyvalue(a, b, precision=0)
363        (['d'], ['a'], [('c', 2, 1)])
364
365    You can provide only a subset of values in dict_a, in this case
366    first item in tuple is an emptu list::
367
368        >>> diff_keyvalue(a, b, a_is_subset=True, precision=0)
369        ([], ['a'], [('c', 2, 1)])
370
371    This function behaves the same as `keyvalue_equals()`.
372
373    :returns: A tuple of lists, fist is list of missing keys in dict_a,
374        second missing keys in dict_b and third is a list of mismatched
375        values as tuples (key, value_from_a, value_from_b)
376    :rtype: (list, list, list)
377
378    Comparing to the Python ``difflib`` package this function does not create
379    any difference output. It just returns the dictionaries.
380    Comparing to the Python ``unittest`` ``assertDictEqual()``,
381    this function does not issues error or exception, it just determines
382    what it the difference.
383    """
384    key_equal = {} if key_equal is None else key_equal
385
386    a_keys = dict_a.keys()
387    b_keys = dict_b.keys()
388
389    missing_in_a = []
390    missing_in_b = []
391    mismatched = []
392
393    if not a_is_subset:
394        for key in b_keys:
395            if key not in a_keys:
396                missing_in_a.append(key)
397
398    # iterate over a, so we know that it is in a
399    for key in a_keys:
400        # check if it is in b
401        if key not in b_keys:
402            missing_in_b.append(key)
403        else:
404            equal_fun = key_equal.get(key, def_equal)
405            if not equal_fun(dict_a[key], dict_b[key], precision):
406                mismatched.append((key, dict_a[key], dict_b[key]))
407
408    return sorted(missing_in_a), sorted(missing_in_b), sorted(mismatched)
409
410
411def proj_info_equals(text_a, text_b):
412    """Test if two PROJ_INFO texts are equal."""
413    def compare_sums(list_a, list_b, precision):
414        """Compare difference of sums of two list using precision"""
415        # derived from the code in grass.script.core
416        if abs(sum(list_a) - sum(list_b)) > precision:
417            return False
418    sep = ':'
419    val_sep = ','
420    key_equal = {'+towgs84': compare_sums}
421    dict_a = text_to_keyvalue(text_a, sep=sep, val_sep=val_sep,
422                              functions=[unify_projection])
423    dict_b = text_to_keyvalue(text_b, sep=sep, val_sep=val_sep,
424                              functions=[unify_projection])
425    return keyvalue_equals(dict_a, dict_b,
426                            precision=0.000001,
427                            def_equal=values_equal,
428                            key_equal=key_equal)
429
430
431def proj_units_equals(text_a, text_b):
432    """Test if two PROJ_UNITS texts are equal."""
433    def lowercase_equals(string_a, string_b, precision=None):
434        # we don't need a warning for unused precision
435        # pylint: disable=W0613
436        """Test equality of two strings ignoring their case using ``lower()``.
437
438        Precision is accepted as require by `keyvalue_equals()` but ignored.
439        """
440        return string_a.lower() == string_b.lower()
441    sep = ':'
442    val_sep = ','
443    key_equal = {'unit': lowercase_equals, 'units': lowercase_equals}
444    dict_a = text_to_keyvalue(text_a, sep=sep, val_sep=val_sep,
445                              functions=[unify_units])
446    dict_b = text_to_keyvalue(text_b, sep, val_sep,
447                              functions=[unify_units])
448    return keyvalue_equals(dict_a, dict_b,
449                            precision=0.000001,
450                            def_equal=values_equal,
451                            key_equal=key_equal)
452
453
454# TODO: support also float (with E, e, inf, nan, ...?) and int (###, ##.)
455# http://hg.python.org/cpython/file/943d3e289ab4/Lib/decimal.py#l6098
456# perhaps a separate function?
457# alternative names: looks like, correspond with/to
458# TODO: change checking over lines?
459# TODO: change parameter order?
460# TODO: the behavior with last \n is strange but now using DOTALL and $
461def check_text_ellipsis(reference, actual):
462    r"""
463    >>> check_text_ellipsis("Vector map <...> contains ... points.",
464    ...                     "Vector map <bridges> contains 5268 points.")
465    True
466    >>> check_text_ellipsis("user: ...\\nname: elevation",
467    ...                     "user: some_user\\nname: elevation")
468    True
469    >>> check_text_ellipsis("user: ...\\nname: elevation",
470    ...                     "user: \\nname: elevation")
471    False
472
473    The ellipsis is always considered even if it is followed by another
474    dots. Consequently, a dot at the end of the sentence with preceding
475    ellipsis will work as well as a line filled with undefined number of dots.
476
477    >>> check_text_ellipsis("The result is ....",
478    ...                     "The result is 25.")
479    True
480    >>> check_text_ellipsis("max ..... ...",
481    ...                     "max ....... 6")
482    True
483
484    However, there is no way how to express that the dot should be in the
485    beginning and the ellipsis is at the end of the group of dots.
486
487    >>> check_text_ellipsis("The result is ....",
488    ...                     "The result is .25")
489    False
490
491    The matching goes over lines (TODO: should this be changed?):
492    >>> check_text_ellipsis("a=11\nb=...", "a=11\nb=22\n")
493    True
494
495    This function is based on regular expression containing .+ but no other
496    regular expression matching will be done.
497
498    >>> check_text_ellipsis("Result: [569] (...)",
499    ...                     "Result: 9 (too high)")
500    False
501    """
502    ref_escaped = re.escape(reference)
503    exp = re.compile(r'\\\.\\\.\\\.')  # matching escaped ...
504    ref_regexp = exp.sub('.+', ref_escaped) + "$"
505    if re.match(ref_regexp, actual, re.DOTALL):
506        return True
507    else:
508        return False
509
510
511def check_text_ellipsis_doctest(reference, actual):
512    """
513    >>> check_text_ellipsis_doctest("user: ...\\nname: elevation",
514    ...                     "user: some_user\\nname: elevation")
515    True
516    >>> check_text_ellipsis_doctest("user: ...\\nname: elevation",
517    ...                     "user: \\nname: elevation")
518    True
519
520    This function is using doctest's function to check the result, so we
521    will discuss here how the underlying function behaves.
522
523    >>> checker = doctest.OutputChecker()
524    >>> checker.check_output("user: some_user\\nname: elevation",
525    ...                      "user: some_user\\nname: elevation",
526    ...                      optionflags=None)
527    True
528    >>> checker.check_output("user: user1\\nname: elevation",
529    ...                      "user: some_user\\nname: elevation",
530    ...                      optionflags=doctest.ELLIPSIS)
531    False
532    >>> checker.check_output("user: ...\\nname: elevation",
533    ...                      "user: some_user\\nname: elevation",
534    ...                      optionflags=doctest.ELLIPSIS)
535    True
536
537    The ellipsis matches also an empty string, so the following matches:
538
539    >>> checker.check_output("user: ...\\nname: elevation",
540    ...                      "user: \\nname: elevation",
541    ...                      optionflags=doctest.ELLIPSIS)
542    True
543
544    It is robust concerning misspelled matching string but does not allow
545    ellipsis followed by a dot, e.g. at the end of the sentence:
546
547    >>> checker.check_output("user: ....\\nname: elevation",
548    ...                      "user: some_user\\nname: elevation",
549    ...                      optionflags=doctest.ELLIPSIS)
550    False
551    """
552    # this can be also global
553    checker = doctest.OutputChecker()
554    return checker.check_output(reference, actual,
555                                optionflags=doctest.ELLIPSIS)
556
557
558import hashlib
559
560# optimal size depends on file system and maybe on hasher.block_size
561_BUFFER_SIZE = 2**16
562
563
564# TODO: accept also open file object
565def file_md5(filename):
566    """Get MD5 (check) sum of a file."""
567    hasher = hashlib.md5()
568    with open(filename, 'rb') as f:
569        buf = f.read(_BUFFER_SIZE)
570        while len(buf) > 0:
571            hasher.update(buf)
572            buf = f.read(_BUFFER_SIZE)
573    return hasher.hexdigest()
574
575
576def text_file_md5(filename, exclude_lines=None, exclude_re=None,
577                  prepend_lines=None, append_lines=None):
578    """Get a MD5 (check) sum of a text file.
579
580    Works in the same way as `file_md5()` function but ignores newlines
581    characters and excludes lines from the file as well as prepend or
582    append them if requested.
583
584    :param exclude_lines: list of strings to be excluded
585        (newline characters should not be part of the strings)
586    :param exclude_re: regular expression string;
587        lines matching this regular expression will not be considered
588    :param prepend_lines: list of lines to be prepended to the file
589        before computing the sum
590    :param append_lines: list of lines  to be appended to the file
591        before computing the sum
592    """
593    hasher = hashlib.md5()
594    if exclude_re:
595        regexp = re.compile(exclude_re)
596    if prepend_lines:
597        for line in prepend_lines:
598            hasher.update(line if sys.version_info[0] == 2 else encode(line))
599    with open(filename, 'r') as f:
600        for line in f:
601            # replace platform newlines by standard newline
602            if os.linesep != '\n':
603                line = line.rstrip(os.linesep) + '\n'
604            if exclude_lines and line in exclude_lines:
605                continue
606            if exclude_re and regexp.match(line):
607                continue
608            hasher.update(line if sys.version_info[0] == 2 else encode(line))
609    if append_lines:
610        for line in append_lines:
611            hasher.update(line if sys.version_info[0] == 2 else encode(line))
612    return hasher.hexdigest()
613
614
615def files_equal_md5(filename_a, filename_b):
616    """Check equality of two files according to their MD5 sums"""
617    return file_md5(filename_a) == file_md5(filename_b)
618
619
620def main():  # pragma: no cover
621    """Run the doctest"""
622    ret = doctest.testmod()
623    return ret.failed
624
625
626if __name__ == '__main__':  # pragma: no cover
627    sys.exit(main())
628