1# -*- coding: utf-8 -*- 2"""GRASS Python testing framework checkers 3 4Copyright (C) 2014 by the GRASS Development Team 5This program is free software under the GNU General Public 6License (>=v2). Read the file COPYING that comes with GRASS GIS 7for details. 8 9:authors: Vaclav Petras, Soeren Gebbert 10""" 11 12import os 13import sys 14import re 15import doctest 16 17from grass.script.utils import decode, encode, _get_encoding 18 19try: 20 from grass.script.core import KeyValue 21except (ImportError, AttributeError): 22 # TODO: we are silent about the error and use a object with different 23 # interface, should be replaced by central keyvalue module 24 # this can happen when translations are not available 25 # TODO: grass should survive are give better error when tranlsations are not available 26 # even the lazy loading after first _ call would be interesting 27 # File "...grass/script/core.py", line 40, in <module> 28 # AttributeError: 'NoneType' object has no attribute 'endswith' 29 KeyValue = dict 30 31# alternative term to check(er(s)) would be compare 32 33 34def unify_projection(dic): 35 """Unifies names of projections. 36 37 Some projections are referred using different names like 38 'Universal Transverse Mercator' and 'Universe Transverse Mercator'. 39 This function replaces synonyms by a unified name. 40 41 Example of common typo in UTM replaced by correct spelling:: 42 43 >>> unify_projection({'name': ['Universe Transverse Mercator']}) 44 {'name': ['Universal Transverse Mercator']} 45 46 :param dic: The dictionary containing information about projection 47 48 :return: The dictionary with the new values if needed or a copy of old one 49 """ 50 # the lookup variable is a list of list, each list contains all the 51 # possible name for a projection system 52 lookup = [['Universal Transverse Mercator', 53 'Universe Transverse Mercator']] 54 dic = dict(dic) 55 for l in lookup: 56 for n in range(len(dic['name'])): 57 if dic['name'][n] in l: 58 dic['name'][n] = l[0] 59 return dic 60 61 62def unify_units(dic): 63 """Unifies names of units. 64 65 Some units have different spelling although they are the same units. 66 This functions replaces different spelling options by unified one. 67 68 Example of British English spelling replaced by US English spelling:: 69 70 >>> unify_units({'units': ['metres'], 'unit': ['metre']}) # doctest: +SKIP 71 {'units': ['meters'], 'unit': ['meter']} 72 73 :param dic: The dictionary containing information about units 74 75 :return: The dictionary with the new values if needed or a copy of old one 76 """ 77 # the lookup variable is a list of list, each list contains all the 78 # possible name for a units 79 lookup = [['meter', 'metre'], ['meters', 'metres'], 80 ['Meter', 'Metre'], ['Meters', 'Metres'], 81 ['kilometer', 'kilometre'], ['kilometers', 'kilometres'], 82 ['Kilometer', 'Kilometre'], ['Kilometers', 'Kilometres'], 83 ] 84 dic = dict(dic) 85 for l in lookup: 86 import types 87 if not isinstance(dic['unit'], str): 88 for n in range(len(dic['unit'])): 89 if dic['unit'][n] in l: 90 dic['unit'][n] = l[0] 91 else: 92 if dic['unit'] in l: 93 dic['unit'] = l[0] 94 if not isinstance(dic['units'], str): 95 for n in range(len(dic['units'])): 96 if dic['units'][n] in l: 97 dic['units'][n] = l[0] 98 else: 99 if dic['units'] in l: 100 dic['units'] = l[0] 101 return dic 102 103 104def value_from_string(value): 105 """Create value of a most fitting type from a string. 106 107 Type conversions are applied in order ``int``, ``float``, ``string`` 108 where string is no conversion. 109 110 >>> value_from_string('1') 111 1 112 >>> value_from_string('5.6') 113 5.6 114 >>> value_from_string(' 5.6\t ') 115 5.6 116 >>> value_from_string('hello') 117 'hello' 118 """ 119 not_float = False 120 not_int = False 121 # Convert values into correct types 122 # We first try integer then float because 123 # int('1.0') is ValueError (although int(1.0) is not) 124 # while float('1') is not 125 try: 126 value_converted = int(value) 127 except ValueError: 128 not_int = True 129 if not_int: 130 try: 131 value_converted = float(value) 132 except ValueError: 133 not_float = True 134 # strip strings from whitespace (expecting spaces and tabs) 135 if not_int and not_float: 136 value_converted = value.strip() 137 return value_converted 138 139 140# TODO: what is the default separator? 141def text_to_keyvalue(text, sep=":", val_sep=",", functions=None, 142 skip_invalid=False, skip_empty=False, 143 from_string=value_from_string): 144 """Convert test to key-value pairs (dictionary-like KeyValue object). 145 146 Converts a key-value text file, where entries are separated 147 by newlines and the key and value are separated by `sep`, 148 into a key-value dictionary and discovers/uses the correct 149 data types (float, int or string) for values. 150 151 Besides key-value pairs it also parses values itself. Value is created 152 with the best fitting type using `value_from_string()` function by default. 153 When val_sep is present in value part, the resulting value is 154 a list of values. 155 156 :param text: string to convert 157 :param sep: character that separates the keys and values 158 :param val_sep: character that separates the values of a single key 159 :param functions: list of functions to apply on the resulting dictionary 160 :param skip_invalid: skip all lines which does not contain separator 161 :param skip_empty: skip empty lines 162 :param from_string: a function used to convert strings to values, 163 use ``lambda x: x`` for no conversion 164 165 :return: a dictionary representation of text 166 :return type: grass.script.core.KeyValue or dict 167 168 And example of converting text with text, floats, integers and list 169 to a dictionary:: 170 171 >>> sorted(text_to_keyvalue('''a: Hello 172 ... b: 1.0 173 ... c: 1,2,3,4,5 174 ... d : hello,8,0.1''').items()) # sorted items from the dictionary 175 [('a', 'Hello'), ('b', 1.0), ('c', [1, 2, 3, 4, 5]), ('d', ['hello', 8, 0.1])] 176 177 .. warning:: 178 And empty string is a valid input because empty dictionary is a valid 179 dictionary. You need to test this separately according 180 to the circumstances. 181 """ 182 # splitting according to universal newlines approach 183 # TODO: add also general split with vsep 184 text = text.splitlines() 185 kvdict = KeyValue() 186 functions = [] if functions is None else functions 187 188 for line in text: 189 if line.find(sep) >= 0: 190 key, value = line.split(sep, 1) 191 key = key.strip() 192 value = value.strip() 193 # this strip may not be necessary, we strip each item in list 194 # and also if there is only one value 195 else: 196 # lines with no separator (empty or invalid) 197 if not line: 198 if not skip_empty: 199 # TODO: here should go _ for translation 200 # TODO: the error message is not really informative 201 # in case of skipping lines we may get here with no key 202 msg = ("Empty line in the parsed text.") 203 if kvdict: 204 # key is the one from previous line 205 msg = ("Empty line in the parsed text." 206 " Previous line's key is <%s>") % key 207 raise ValueError(msg) 208 else: 209 # line contains something but not separator 210 if not skip_invalid: 211 # TODO: here should go _ for translation 212 raise ValueError(("Line <{l}> does not contain" 213 " separator <{s}>.").format(l=line, s=sep)) 214 # if we get here we are silently ignoring the line 215 # because it is invalid (does not contain key-value separator) or 216 # because it is empty 217 continue 218 if value.find(val_sep) >= 0: 219 # lists 220 values = value.split(val_sep) 221 value_list = [] 222 for value in values: 223 value_converted = from_string(value) 224 value_list.append(value_converted) 225 kvdict[key] = value_list 226 else: 227 # single values 228 kvdict[key] = from_string(value) 229 for function in functions: 230 kvdict = function(kvdict) 231 return kvdict 232 233 234# TODO: decide if there should be some default for precision 235# TODO: define standard precisions for DCELL, FCELL, CELL, mm, ft, cm, ... 236# TODO: decide if None is valid, and use some default or no compare 237# TODO: is None a valid value for precision? 238def values_equal(value_a, value_b, precision=0.000001): 239 """ 240 >>> values_equal(1.022, 1.02, precision=0.01) 241 True 242 >>> values_equal([1.2, 5.3, 6.8], [1.1, 5.2, 6.9], precision=0.2) 243 True 244 >>> values_equal(7, 5, precision=2) 245 True 246 >>> values_equal(1, 5.9, precision=10) 247 True 248 >>> values_equal('Hello', 'hello') 249 False 250 """ 251 # each if body needs to handle only not equal state 252 253 if isinstance(value_a, float) and isinstance(value_b, float): 254 # both values are float 255 # this could be also changed to is None and raise TypeError 256 # in Python 2 None is smaller than anything 257 # in Python 3 None < 3 raises TypeError 258 precision = float(precision) 259 if abs(value_a - value_b) > precision: 260 return False 261 262 elif (isinstance(value_a, float) and isinstance(value_b, int)) or \ 263 (isinstance(value_b, float) and isinstance(value_a, int)): 264 # on is float the other is int 265 # don't accept None 266 precision = float(precision) 267 # we will apply precision to int-float comparison 268 # rather than converting both to integer 269 # (as in the original function from grass.script.core) 270 if abs(value_a - value_b) > precision: 271 return False 272 273 elif isinstance(value_a, int) and isinstance(value_b, int) and \ 274 precision and int(precision) > 0: 275 # both int but precision applies for them 276 if abs(value_a - value_b) > precision: 277 return False 278 279 elif isinstance(value_a, list) and isinstance(value_b, list): 280 if len(value_a) != len(value_b): 281 return False 282 for i in range(len(value_a)): 283 # apply this function for comparison of items in the list 284 if not values_equal(value_a[i], value_b[i], precision): 285 return False 286 else: 287 if value_a != value_b: 288 return False 289 return True 290 291 292def keyvalue_equals(dict_a, dict_b, precision, 293 def_equal=values_equal, key_equal=None, 294 a_is_subset=False): 295 """Compare two dictionaries. 296 297 .. note:: 298 Always use keyword arguments for all parameters with defaults. 299 It is a good idea to use keyword arguments also for the first 300 two parameters. 301 302 An example of key-value texts comparison:: 303 304 >>> keyvalue_equals(text_to_keyvalue('''a: Hello 305 ... b: 1.0 306 ... c: 1,2,3,4,5 307 ... d: hello,8,0.1'''), 308 ... text_to_keyvalue('''a: Hello 309 ... b: 1.1 310 ... c: 1,22,3,4,5 311 ... d: hello,8,0.1'''), precision=0.1) 312 False 313 314 :param dict_a: first dictionary 315 :param dict_b: second dictionary 316 :param precision: precision with which the floating point values 317 are compared (passed to equality functions) 318 :param callable def_equal: function used for comparison by default 319 :param dict key_equal: dictionary of functions used for comparison 320 of specific keys, `def_equal` is used for the rest, 321 keys in dictionary are keys in `dict_a` and `dict_b` dictionaries, 322 values are the functions used to comapare the given key 323 :param a_is_subset: `True` if `dict_a` is a subset of `dict_b`, 324 `False` otherwise 325 326 :return: `True` if identical, `False` if different 327 328 Use `diff_keyvalue()` to get information about differeces. 329 You can use this function to find out if there is a difference and then 330 use `diff_keyvalue()` to determine all the differences between 331 dictionaries. 332 """ 333 key_equal = {} if key_equal is None else key_equal 334 335 if not a_is_subset and sorted(dict_a.keys()) != sorted(dict_b.keys()): 336 return False 337 b_keys = dict_b.keys() if a_is_subset else None 338 339 # iterate over subset or just any if not a_is_subset 340 # check for missing keys in superset 341 # compare matching keys 342 for key in dict_a.keys(): 343 if a_is_subset and key not in b_keys: 344 return False 345 equal_fun = key_equal.get(key, def_equal) 346 if not equal_fun(dict_a[key], dict_b[key], precision): 347 return False 348 return True 349 350 351# TODO: should the return depend on the a_is_subset parameter? 352# this function must have the same interface and behavior as keyvalue_equals 353def diff_keyvalue(dict_a, dict_b, precision, 354 def_equal=values_equal, key_equal=None, 355 a_is_subset=False): 356 """Determine the difference of two dictionaries. 357 358 The function returns missing keys and different values for common keys:: 359 360 >>> a = {'c': 2, 'b': 3, 'a': 4} 361 >>> b = {'c': 1, 'b': 3, 'd': 5} 362 >>> diff_keyvalue(a, b, precision=0) 363 (['d'], ['a'], [('c', 2, 1)]) 364 365 You can provide only a subset of values in dict_a, in this case 366 first item in tuple is an emptu list:: 367 368 >>> diff_keyvalue(a, b, a_is_subset=True, precision=0) 369 ([], ['a'], [('c', 2, 1)]) 370 371 This function behaves the same as `keyvalue_equals()`. 372 373 :returns: A tuple of lists, fist is list of missing keys in dict_a, 374 second missing keys in dict_b and third is a list of mismatched 375 values as tuples (key, value_from_a, value_from_b) 376 :rtype: (list, list, list) 377 378 Comparing to the Python ``difflib`` package this function does not create 379 any difference output. It just returns the dictionaries. 380 Comparing to the Python ``unittest`` ``assertDictEqual()``, 381 this function does not issues error or exception, it just determines 382 what it the difference. 383 """ 384 key_equal = {} if key_equal is None else key_equal 385 386 a_keys = dict_a.keys() 387 b_keys = dict_b.keys() 388 389 missing_in_a = [] 390 missing_in_b = [] 391 mismatched = [] 392 393 if not a_is_subset: 394 for key in b_keys: 395 if key not in a_keys: 396 missing_in_a.append(key) 397 398 # iterate over a, so we know that it is in a 399 for key in a_keys: 400 # check if it is in b 401 if key not in b_keys: 402 missing_in_b.append(key) 403 else: 404 equal_fun = key_equal.get(key, def_equal) 405 if not equal_fun(dict_a[key], dict_b[key], precision): 406 mismatched.append((key, dict_a[key], dict_b[key])) 407 408 return sorted(missing_in_a), sorted(missing_in_b), sorted(mismatched) 409 410 411def proj_info_equals(text_a, text_b): 412 """Test if two PROJ_INFO texts are equal.""" 413 def compare_sums(list_a, list_b, precision): 414 """Compare difference of sums of two list using precision""" 415 # derived from the code in grass.script.core 416 if abs(sum(list_a) - sum(list_b)) > precision: 417 return False 418 sep = ':' 419 val_sep = ',' 420 key_equal = {'+towgs84': compare_sums} 421 dict_a = text_to_keyvalue(text_a, sep=sep, val_sep=val_sep, 422 functions=[unify_projection]) 423 dict_b = text_to_keyvalue(text_b, sep=sep, val_sep=val_sep, 424 functions=[unify_projection]) 425 return keyvalue_equals(dict_a, dict_b, 426 precision=0.000001, 427 def_equal=values_equal, 428 key_equal=key_equal) 429 430 431def proj_units_equals(text_a, text_b): 432 """Test if two PROJ_UNITS texts are equal.""" 433 def lowercase_equals(string_a, string_b, precision=None): 434 # we don't need a warning for unused precision 435 # pylint: disable=W0613 436 """Test equality of two strings ignoring their case using ``lower()``. 437 438 Precision is accepted as require by `keyvalue_equals()` but ignored. 439 """ 440 return string_a.lower() == string_b.lower() 441 sep = ':' 442 val_sep = ',' 443 key_equal = {'unit': lowercase_equals, 'units': lowercase_equals} 444 dict_a = text_to_keyvalue(text_a, sep=sep, val_sep=val_sep, 445 functions=[unify_units]) 446 dict_b = text_to_keyvalue(text_b, sep, val_sep, 447 functions=[unify_units]) 448 return keyvalue_equals(dict_a, dict_b, 449 precision=0.000001, 450 def_equal=values_equal, 451 key_equal=key_equal) 452 453 454# TODO: support also float (with E, e, inf, nan, ...?) and int (###, ##.) 455# http://hg.python.org/cpython/file/943d3e289ab4/Lib/decimal.py#l6098 456# perhaps a separate function? 457# alternative names: looks like, correspond with/to 458# TODO: change checking over lines? 459# TODO: change parameter order? 460# TODO: the behavior with last \n is strange but now using DOTALL and $ 461def check_text_ellipsis(reference, actual): 462 r""" 463 >>> check_text_ellipsis("Vector map <...> contains ... points.", 464 ... "Vector map <bridges> contains 5268 points.") 465 True 466 >>> check_text_ellipsis("user: ...\\nname: elevation", 467 ... "user: some_user\\nname: elevation") 468 True 469 >>> check_text_ellipsis("user: ...\\nname: elevation", 470 ... "user: \\nname: elevation") 471 False 472 473 The ellipsis is always considered even if it is followed by another 474 dots. Consequently, a dot at the end of the sentence with preceding 475 ellipsis will work as well as a line filled with undefined number of dots. 476 477 >>> check_text_ellipsis("The result is ....", 478 ... "The result is 25.") 479 True 480 >>> check_text_ellipsis("max ..... ...", 481 ... "max ....... 6") 482 True 483 484 However, there is no way how to express that the dot should be in the 485 beginning and the ellipsis is at the end of the group of dots. 486 487 >>> check_text_ellipsis("The result is ....", 488 ... "The result is .25") 489 False 490 491 The matching goes over lines (TODO: should this be changed?): 492 >>> check_text_ellipsis("a=11\nb=...", "a=11\nb=22\n") 493 True 494 495 This function is based on regular expression containing .+ but no other 496 regular expression matching will be done. 497 498 >>> check_text_ellipsis("Result: [569] (...)", 499 ... "Result: 9 (too high)") 500 False 501 """ 502 ref_escaped = re.escape(reference) 503 exp = re.compile(r'\\\.\\\.\\\.') # matching escaped ... 504 ref_regexp = exp.sub('.+', ref_escaped) + "$" 505 if re.match(ref_regexp, actual, re.DOTALL): 506 return True 507 else: 508 return False 509 510 511def check_text_ellipsis_doctest(reference, actual): 512 """ 513 >>> check_text_ellipsis_doctest("user: ...\\nname: elevation", 514 ... "user: some_user\\nname: elevation") 515 True 516 >>> check_text_ellipsis_doctest("user: ...\\nname: elevation", 517 ... "user: \\nname: elevation") 518 True 519 520 This function is using doctest's function to check the result, so we 521 will discuss here how the underlying function behaves. 522 523 >>> checker = doctest.OutputChecker() 524 >>> checker.check_output("user: some_user\\nname: elevation", 525 ... "user: some_user\\nname: elevation", 526 ... optionflags=None) 527 True 528 >>> checker.check_output("user: user1\\nname: elevation", 529 ... "user: some_user\\nname: elevation", 530 ... optionflags=doctest.ELLIPSIS) 531 False 532 >>> checker.check_output("user: ...\\nname: elevation", 533 ... "user: some_user\\nname: elevation", 534 ... optionflags=doctest.ELLIPSIS) 535 True 536 537 The ellipsis matches also an empty string, so the following matches: 538 539 >>> checker.check_output("user: ...\\nname: elevation", 540 ... "user: \\nname: elevation", 541 ... optionflags=doctest.ELLIPSIS) 542 True 543 544 It is robust concerning misspelled matching string but does not allow 545 ellipsis followed by a dot, e.g. at the end of the sentence: 546 547 >>> checker.check_output("user: ....\\nname: elevation", 548 ... "user: some_user\\nname: elevation", 549 ... optionflags=doctest.ELLIPSIS) 550 False 551 """ 552 # this can be also global 553 checker = doctest.OutputChecker() 554 return checker.check_output(reference, actual, 555 optionflags=doctest.ELLIPSIS) 556 557 558import hashlib 559 560# optimal size depends on file system and maybe on hasher.block_size 561_BUFFER_SIZE = 2**16 562 563 564# TODO: accept also open file object 565def file_md5(filename): 566 """Get MD5 (check) sum of a file.""" 567 hasher = hashlib.md5() 568 with open(filename, 'rb') as f: 569 buf = f.read(_BUFFER_SIZE) 570 while len(buf) > 0: 571 hasher.update(buf) 572 buf = f.read(_BUFFER_SIZE) 573 return hasher.hexdigest() 574 575 576def text_file_md5(filename, exclude_lines=None, exclude_re=None, 577 prepend_lines=None, append_lines=None): 578 """Get a MD5 (check) sum of a text file. 579 580 Works in the same way as `file_md5()` function but ignores newlines 581 characters and excludes lines from the file as well as prepend or 582 append them if requested. 583 584 :param exclude_lines: list of strings to be excluded 585 (newline characters should not be part of the strings) 586 :param exclude_re: regular expression string; 587 lines matching this regular expression will not be considered 588 :param prepend_lines: list of lines to be prepended to the file 589 before computing the sum 590 :param append_lines: list of lines to be appended to the file 591 before computing the sum 592 """ 593 hasher = hashlib.md5() 594 if exclude_re: 595 regexp = re.compile(exclude_re) 596 if prepend_lines: 597 for line in prepend_lines: 598 hasher.update(line if sys.version_info[0] == 2 else encode(line)) 599 with open(filename, 'r') as f: 600 for line in f: 601 # replace platform newlines by standard newline 602 if os.linesep != '\n': 603 line = line.rstrip(os.linesep) + '\n' 604 if exclude_lines and line in exclude_lines: 605 continue 606 if exclude_re and regexp.match(line): 607 continue 608 hasher.update(line if sys.version_info[0] == 2 else encode(line)) 609 if append_lines: 610 for line in append_lines: 611 hasher.update(line if sys.version_info[0] == 2 else encode(line)) 612 return hasher.hexdigest() 613 614 615def files_equal_md5(filename_a, filename_b): 616 """Check equality of two files according to their MD5 sums""" 617 return file_md5(filename_a) == file_md5(filename_b) 618 619 620def main(): # pragma: no cover 621 """Run the doctest""" 622 ret = doctest.testmod() 623 return ret.failed 624 625 626if __name__ == '__main__': # pragma: no cover 627 sys.exit(main()) 628