1# -*- coding: utf-8 -*- 2 3""" pyKwalify - core.py """ 4 5# python std lib 6import datetime 7import json 8import logging 9import os 10import re 11import sys 12import traceback 13import time 14from io import open 15from importlib.machinery import SourceFileLoader 16 17# pyKwalify imports 18import pykwalify 19from pykwalify.compat import unicode, nativestr, basestring 20from pykwalify.errors import CoreError, SchemaError, NotMappingError, NotSequenceError 21from pykwalify.rule import Rule 22from pykwalify.types import is_scalar, is_string, tt 23 24# 3rd party imports 25from dateutil.parser import parse 26from pykwalify.compat import yml 27from ruamel.yaml.constructor import Constructor 28 29log = logging.getLogger(__name__) 30 31 32class Core(object): 33 """ Core class of pyKwalify """ 34 35 def __init__(self, source_file=None, schema_files=None, source_data=None, schema_data=None, extensions=None, strict_rule_validation=False, 36 fix_ruby_style_regex=False, allow_assertions=False, file_encoding=None, schema_file_obj=None, data_file_obj=None): 37 """ 38 :param extensions: 39 List of paths to python files that should be imported and available via 'func' keywork. 40 This list of extensions can be set manually or they should be provided by the `--extension` 41 flag from the cli. This list should not contain files specified by the `extensions` list keyword 42 that can be defined at the top level of the schema. 43 """ 44 if schema_files is None: 45 schema_files = [] 46 if extensions is None: 47 extensions = [] 48 49 log.debug(u"source_file: %s", source_file) 50 log.debug(u"schema_file: %s", schema_files) 51 log.debug(u"source_data: %s", source_data) 52 log.debug(u"schema_data: %s", schema_data) 53 log.debug(u"extension files: %s", extensions) 54 55 self.source = None 56 self.schema = None 57 self.validation_errors = None 58 self.validation_errors_exceptions = None 59 self.root_rule = None 60 self.extensions = extensions 61 self.errors = [] 62 self.strict_rule_validation = strict_rule_validation 63 self.fix_ruby_style_regex = fix_ruby_style_regex 64 self.allow_assertions = allow_assertions 65 66 # Patch in all the normal python types into the yaml load instance so we can use all the 67 # internal python types in the yaml loading. 68 yml.constructor.add_constructor('tag:yaml.org,2002:python/bool', Constructor.construct_yaml_bool) 69 yml.constructor.add_constructor('tag:yaml.org,2002:python/complex', Constructor.construct_python_complex) 70 yml.constructor.add_constructor('tag:yaml.org,2002:python/dict', Constructor.construct_yaml_map) 71 yml.constructor.add_constructor('tag:yaml.org,2002:python/float', Constructor.construct_yaml_float) 72 yml.constructor.add_constructor('tag:yaml.org,2002:python/int', Constructor.construct_yaml_int) 73 yml.constructor.add_constructor('tag:yaml.org,2002:python/list', Constructor.construct_yaml_seq) 74 yml.constructor.add_constructor('tag:yaml.org,2002:python/long', Constructor.construct_python_long) 75 yml.constructor.add_constructor('tag:yaml.org,2002:python/none', Constructor.construct_yaml_null) 76 yml.constructor.add_constructor('tag:yaml.org,2002:python/str', Constructor.construct_python_str) 77 yml.constructor.add_constructor('tag:yaml.org,2002:python/tuple', Constructor.construct_python_tuple) 78 yml.constructor.add_constructor('tag:yaml.org,2002:python/unicode', Constructor.construct_python_unicode) 79 80 if data_file_obj: 81 try: 82 self.source = yml.load(data_file_obj.read()) 83 except Exception as e: 84 raise CoreError("Unable to load data_file_obj input") 85 86 if schema_file_obj: 87 try: 88 self.schema = yml.load(schema_file_obj.read()) 89 except Exception as e: 90 raise CoreError("Unable to load schema_file_obj") 91 92 if source_file is not None: 93 if not os.path.exists(source_file): 94 raise CoreError(u"Provided source_file do not exists on disk: {0}".format(source_file)) 95 96 with open(source_file, "r", encoding=file_encoding) as stream: 97 if source_file.endswith(".json"): 98 self.source = json.load(stream) 99 elif source_file.endswith(".yaml") or source_file.endswith('.yml'): 100 self.source = yml.load(stream) 101 else: 102 raise CoreError(u"Unable to load source_file. Unknown file format of specified file path: {0}".format(source_file)) 103 104 if not isinstance(schema_files, list): 105 raise CoreError(u"schema_files must be of list type") 106 107 # Merge all schema files into one single file for easy parsing 108 if len(schema_files) > 0: 109 schema_data = {} 110 for f in schema_files: 111 if not os.path.exists(f): 112 raise CoreError(u"Provided source_file do not exists on disk : {0}".format(f)) 113 114 with open(f, "r", encoding=file_encoding) as stream: 115 if f.endswith(".json"): 116 data = json.load(stream) 117 elif f.endswith(".yaml") or f.endswith(".yml"): 118 data = yml.load(stream) 119 if not data: 120 raise CoreError(u"No data loaded from file : {0}".format(f)) 121 else: 122 raise CoreError(u"Unable to load file : {0} : Unknown file format. Supported file endings is [.json, .yaml, .yml]") 123 124 for key in data.keys(): 125 if key in schema_data.keys(): 126 raise CoreError(u"Parsed key : {0} : two times in schema files...".format(key)) 127 128 schema_data = dict(schema_data, **data) 129 130 self.schema = schema_data 131 132 # Nothing was loaded so try the source_data variable 133 if self.source is None: 134 log.debug(u"No source file loaded, trying source data variable") 135 self.source = source_data 136 if self.schema is None: 137 log.debug(u"No schema file loaded, trying schema data variable") 138 self.schema = schema_data 139 140 # Test if anything was loaded 141 if self.source is None: 142 raise CoreError(u"No source file/data was loaded") 143 if self.schema is None: 144 raise CoreError(u"No schema file/data was loaded") 145 146 # Merge any extensions defined in the schema with the provided list of extensions from the cli 147 for f in self.schema.get('extensions', []): 148 self.extensions.append(f) 149 150 if not isinstance(self.extensions, list) and all(isinstance(e, str) for e in self.extensions): 151 raise CoreError(u"Specified extensions must be a list of file paths") 152 153 self._load_extensions() 154 155 if self.strict_rule_validation: 156 log.info("Using strict rule keywords validation...") 157 158 def _load_extensions(self): 159 """ 160 Load all extension files into the namespace pykwalify.ext 161 """ 162 log.debug(u"loading all extensions : %s", self.extensions) 163 164 self.loaded_extensions = [] 165 166 for f in self.extensions: 167 if not os.path.isabs(f): 168 f = os.path.abspath(f) 169 170 if not os.path.exists(f): 171 raise CoreError(u"Extension file: {0} not found on disk".format(f)) 172 173 self.loaded_extensions.append(SourceFileLoader("", f).load_module()) 174 175 log.debug(self.loaded_extensions) 176 log.debug([dir(m) for m in self.loaded_extensions]) 177 178 def validate(self, raise_exception=True): 179 """ 180 """ 181 log.debug(u"starting core") 182 183 self._start_validate(self.source) 184 self.validation_errors = [unicode(error) for error in self.errors] 185 self.validation_errors_exceptions = self.errors 186 187 if self.errors is None or len(self.errors) == 0: 188 log.info(u"validation.valid") 189 else: 190 log.error(u"validation.invalid") 191 log.error(u" --- All found errors ---") 192 log.error(self.validation_errors) 193 if raise_exception: 194 raise SchemaError(u"Schema validation failed:\n - {error_msg}.".format( 195 error_msg=u'.\n - '.join(self.validation_errors))) 196 else: 197 log.error(u"Errors found but will not raise exception...") 198 199 # Return validated data 200 return self.source 201 202 def _start_validate(self, value=None): 203 """ 204 """ 205 path = "" 206 self.errors = [] 207 done = [] 208 209 s = {} 210 211 # Look for schema; tags so they can be parsed before the root rule is parsed 212 for k, v in self.schema.items(): 213 if k.startswith("schema;"): 214 log.debug(u"Found partial schema; : %s", v) 215 r = Rule(schema=v) 216 log.debug(u" Partial schema : %s", r) 217 pykwalify.partial_schemas[k.split(";", 1)[1]] = r 218 else: 219 # readd all items that is not schema; so they can be parsed 220 s[k] = v 221 222 self.schema = s 223 224 log.debug(u"Building root rule object") 225 root_rule = Rule(schema=self.schema) 226 self.root_rule = root_rule 227 log.debug(u"Done building root rule") 228 log.debug(u"Root rule: %s", self.root_rule) 229 230 self._validate(value, root_rule, path, done) 231 232 def _validate(self, value, rule, path, done): 233 """ 234 """ 235 log.debug(u"Core validate") 236 log.debug(u" Root validate : Rule: %s", rule) 237 log.debug(u" Root validate : Rule_type: %s", rule.type) 238 log.debug(u" Root validate : Seq: %s", rule.sequence) 239 log.debug(u" Root validate : Map: %s", rule.mapping) 240 log.debug(u" Root validate : Done: %s", done) 241 242 if rule.required and value is None and not rule.type == 'none': 243 self.errors.append(SchemaError.SchemaErrorEntry( 244 msg=u"required.novalue : '{path}'", 245 path=path, 246 value=value.encode('unicode_escape') if value else value, 247 )) 248 return 249 250 if not rule.nullable and value is None and not rule.type == 'none': 251 self.errors.append(SchemaError.SchemaErrorEntry( 252 msg=u"nullable.novalue : '{path}'", 253 path=path, 254 value=value.encode('unicode_escape') if value else value, 255 )) 256 return 257 258 log.debug(u" ? ValidateRule: %s", rule) 259 if rule.include_name is not None: 260 self._validate_include(value, rule, path, done=None) 261 elif rule.sequence is not None: 262 self._validate_sequence(value, rule, path, done=None) 263 elif rule.mapping is not None or rule.allowempty_map: 264 self._validate_mapping(value, rule, path, done=None) 265 else: 266 self._validate_scalar(value, rule, path, done=None) 267 268 def _handle_func(self, value, rule, path, done=None): 269 """ 270 Helper function that should check if func is specified for this rule and 271 then handle it for all cases in a generic way. 272 """ 273 func = rule.func 274 275 # func keyword is not defined so nothing to do 276 if not func: 277 return 278 279 found_method = False 280 281 for extension in self.loaded_extensions: 282 method = getattr(extension, func, None) 283 if method: 284 found_method = True 285 286 # No exception will should be caught. If one is raised it should bubble up all the way. 287 ret = method(value, rule, path) 288 if ret is not True and ret is not None: 289 msg = '%s. Path: {path}' % unicode(ret) 290 self.errors.append(SchemaError.SchemaErrorEntry( 291 msg=msg, 292 path=path, 293 value=None)) 294 295 # If False or None or some other object that is interpreted as False 296 if not ret: 297 raise CoreError(u"Error when running extension function : {0}".format(func)) 298 299 # Only run the first matched function. Sinc loading order is determined 300 # it should be easy to determine which file is used before others 301 break 302 303 if not found_method: 304 raise CoreError(u"Did not find method '{0}' in any loaded extension file".format(func)) 305 306 def _validate_include(self, value, rule, path, done=None): 307 """ 308 """ 309 # TODO: It is difficult to get a good test case to trigger this if case 310 if rule.include_name is None: 311 self.errors.append(SchemaError.SchemaErrorEntry( 312 msg=u'Include name not valid', 313 path=path, 314 value=value.encode('unicode_escape'))) 315 return 316 include_name = rule.include_name 317 partial_schema_rule = pykwalify.partial_schemas.get(include_name) 318 if not partial_schema_rule: 319 self.errors.append(SchemaError.SchemaErrorEntry( 320 msg=u"Cannot find partial schema with name '{include_name}'. Existing partial schemas: '{existing_schemas}'. Path: '{path}'", 321 path=path, 322 value=value, 323 include_name=include_name, 324 existing_schemas=", ".join(sorted(pykwalify.partial_schemas.keys())))) 325 return 326 327 self._validate(value, partial_schema_rule, path, done) 328 329 def _validate_sequence(self, value, rule, path, done=None): 330 """ 331 """ 332 log.debug(u"Core Validate sequence") 333 log.debug(u" Sequence : Data: %s", value) 334 log.debug(u" Sequence : Rule: %s", rule) 335 log.debug(u" Sequence : RuleType: %s", rule.type) 336 log.debug(u" Sequence : Path: %s", path) 337 log.debug(u" Sequence : Seq: %s", rule.sequence) 338 log.debug(u" Sequence : Map: %s", rule.mapping) 339 340 if len(rule.sequence) <= 0: 341 raise CoreError(u"Sequence must contains atleast one item : {0}".format(path)) 342 343 if value is None: 344 log.debug(u" * Core seq: sequence data is None") 345 return 346 347 if not isinstance(value, list): 348 if isinstance(value, str): 349 value = value.encode('unicode_escape') 350 self.errors.append(SchemaError.SchemaErrorEntry( 351 u"Value '{value}' is not a list. Value path: '{path}'", 352 path, 353 value, 354 )) 355 return 356 357 # Handle 'func' argument on this sequence 358 self._handle_func(value, rule, path, done) 359 360 ok_values = [] 361 error_tracker = [] 362 363 unique_errors = {} 364 map_unique_errors = {} 365 366 for i, item in enumerate(value): 367 processed = [] 368 369 for r in rule.sequence: 370 tmp_errors = [] 371 372 try: 373 # Create a sub core object to enable error tracking that do not 374 # collide with this Core objects errors 375 tmp_core = Core(source_data={}, schema_data={}) 376 tmp_core.fix_ruby_style_regex = self.fix_ruby_style_regex 377 tmp_core.allow_assertions = self.allow_assertions 378 tmp_core.strict_rule_validation = self.strict_rule_validation 379 tmp_core.loaded_extensions = self.loaded_extensions 380 tmp_core._validate(item, r, "{0}/{1}".format(path, i), done) 381 tmp_errors = tmp_core.errors 382 except NotMappingError: 383 # For example: If one type was specified as 'map' but data 384 # was 'str' a exception will be thrown but we should ignore it 385 pass 386 except NotSequenceError: 387 # For example: If one type was specified as 'seq' but data 388 # was 'str' a exception will be thrown but we shold ignore it 389 pass 390 391 processed.append(tmp_errors) 392 393 if r.type == "map": 394 log.debug(u" * Found map inside sequence") 395 unique_keys = [] 396 397 if r.mapping is None: 398 log.debug(u" + No rule to apply, prolly because of allowempty: True") 399 return 400 401 for k, _rule in r.mapping.items(): 402 log.debug(u" * Key: %s", k) 403 log.debug(u" * Rule: %s", _rule) 404 405 if _rule.unique or _rule.ident: 406 unique_keys.append(k) 407 408 if len(unique_keys) > 0: 409 for v in unique_keys: 410 table = {} 411 for j, V in enumerate(value): 412 # If key do not exists it should be ignored by unique because that is not a broken constraint 413 val = V.get(v, None) 414 415 if val is None: 416 continue 417 418 if val in table: 419 curr_path = "{0}/{1}/{2}".format(path, j, v) 420 prev_path = "{0}/{1}/{2}".format(path, table[val], v) 421 s = SchemaError.SchemaErrorEntry( 422 msg=u"Value '{duplicate}' is not unique. Previous path: '{prev_path}'. Path: '{path}'", 423 path=curr_path, 424 value=value, 425 duplicate=val, 426 prev_path=prev_path, 427 ) 428 map_unique_errors[s.__repr__()] = s 429 else: 430 table[val] = j 431 elif r.unique: 432 log.debug(u" * Found unique value in sequence") 433 table = {} 434 435 for j, val in enumerate(value): 436 if val is None: 437 continue 438 439 if val in table: 440 curr_path = "{0}/{1}".format(path, j) 441 prev_path = "{0}/{1}".format(path, table[val]) 442 s = SchemaError.SchemaErrorEntry( 443 msg=u"Value '{duplicate}' is not unique. Previous path: '{prev_path}'. Path: '{path}'", 444 path=curr_path, 445 value=value, 446 duplicate=val, 447 prev_path=prev_path, 448 ) 449 unique_errors[s.__repr__()] = s 450 else: 451 table[val] = j 452 453 error_tracker.append(processed) 454 no_errors = [] 455 for _errors in processed: 456 no_errors.append(len(_errors) == 0) 457 458 if rule.matching == "any": 459 log.debug(u" * any rule %s", True in no_errors) 460 ok_values.append(True in no_errors) 461 elif rule.matching == "all": 462 log.debug(u" * all rule".format(all(no_errors))) 463 ok_values.append(all(no_errors)) 464 elif rule.matching == "*": 465 log.debug(u" * star rule", "...") 466 ok_values.append(True) 467 468 for _error in unique_errors: 469 self.errors.append(_error) 470 471 for _error in map_unique_errors: 472 self.errors.append(_error) 473 474 log.debug(u" * ok : %s", ok_values) 475 476 # All values must pass the validation, otherwise add the parsed errors 477 # to the global error list and throw up some error. 478 if not all(ok_values): 479 # Ignore checking for '*' type because it should allways go through 480 if rule.matching == "any": 481 log.debug(u" * Value: %s did not validate against one or more sequence schemas", value) 482 elif rule.matching == "all": 483 log.debug(u" * Value: %s did not validate against all possible sequence schemas", value) 484 485 for i, is_ok in enumerate(ok_values): 486 if not is_ok: 487 for error in error_tracker[i]: 488 for e in error: 489 self.errors.append(e) 490 491 log.debug(u" * Core seq: validation recursivley done...") 492 493 if rule.range is not None: 494 rr = rule.range 495 496 self._validate_range( 497 rr.get("max"), 498 rr.get("min"), 499 rr.get("max-ex"), 500 rr.get("min-ex"), 501 len(value), 502 path, 503 "seq", 504 ) 505 506 def _validate_mapping(self, value, rule, path, done=None): 507 """ 508 """ 509 log.debug(u"Validate mapping") 510 log.debug(u" Mapping : Data: %s", value) 511 log.debug(u" Mapping : Rule: %s", rule) 512 log.debug(u" Mapping : RuleType: %s", rule.type) 513 log.debug(u" Mapping : Path: %s", path) 514 log.debug(u" Mapping : Seq: %s", rule.sequence) 515 log.debug(u" Mapping : Map: %s", rule.mapping) 516 517 if not isinstance(value, dict): 518 self.errors.append(SchemaError.SchemaErrorEntry( 519 u"Value '{value}' is not a dict. Value path: '{path}'", 520 path, 521 value, 522 )) 523 return 524 525 if rule.mapping is None: 526 log.debug(u" + No rule to apply, prolly because of allowempty: True") 527 return 528 529 # Handle 'func' argument on this mapping 530 self._handle_func(value, rule, path, done) 531 532 m = rule.mapping 533 log.debug(u" Mapping: Rule-Mapping: %s", m) 534 535 if rule.range is not None: 536 r = rule.range 537 538 self._validate_range( 539 r.get("max"), 540 r.get("min"), 541 r.get("max-ex"), 542 r.get("min-ex"), 543 len(value), 544 path, 545 "map", 546 ) 547 548 for k, rr in m.items(): 549 # Handle if the value of the key contains a include keyword 550 if rr.include_name is not None: 551 include_name = rr.include_name 552 partial_schema_rule = pykwalify.partial_schemas.get(include_name) 553 554 if not partial_schema_rule: 555 self.errors.append(SchemaError.SchemaErrorEntry( 556 msg=u"Cannot find partial schema with name '{include_name}'. Existing partial schemas: '{existing_schemas}'. Path: '{path}'", 557 path=path, 558 value=value, 559 include_name=include_name, 560 existing_schemas=", ".join(sorted(pykwalify.partial_schemas.keys())))) 561 return 562 563 rr = partial_schema_rule 564 565 # Find out if this is a regex rule 566 is_regex_rule = False 567 required_regex = "" 568 for regex_rule in rule.regex_mappings: 569 if k == "regex;({})".format(regex_rule.map_regex_rule) or k == "re;({})".format(regex_rule.map_regex_rule): 570 is_regex_rule = True 571 required_regex = regex_rule.map_regex_rule 572 573 # Check for the presense of the required key 574 is_present = False 575 if not is_regex_rule: 576 is_present = k in value 577 else: 578 is_present = any([re.search(required_regex, str(v)) for v in value]) 579 580 # Specifying =: as key is considered the "default" if no other keys match 581 if rr.required and not is_present and k != "=": 582 self.errors.append(SchemaError.SchemaErrorEntry( 583 msg=u"Cannot find required key '{key}'. Path: '{path}'", 584 path=path, 585 value=value, 586 key=k)) 587 if k not in value and rr.default is not None: 588 value[k] = rr.default 589 590 for k, v in value.items(): 591 # If no other case was a match, check if a default mapping is valid/present and use 592 # that one instead 593 r = m.get(k, m.get('=')) 594 log.debug(u" Mapping-value : %s", m) 595 log.debug(u" Mapping-value : %s %s", k, v) 596 log.debug(u" Mapping-value : %s", r) 597 598 regex_mappings = [(regex_rule, re.search(regex_rule.map_regex_rule, str(k))) for regex_rule in rule.regex_mappings] 599 log.debug(u" Mapping-value: Mapping Regex matches: %s", regex_mappings) 600 601 if r is not None: 602 # validate recursively 603 log.debug(u" Mapping-value: Core Map: validate recursively: %s", r) 604 self._validate(v, r, u"{0}/{1}".format(path, k), done) 605 elif any(regex_mappings): 606 sub_regex_result = [] 607 608 # Found at least one that matches a mapping regex 609 for mm in regex_mappings: 610 if mm[1]: 611 log.debug(u" Mapping-value: Matching regex patter: %s", mm[0]) 612 self._validate(v, mm[0], "{0}/{1}".format(path, k), done) 613 sub_regex_result.append(True) 614 else: 615 sub_regex_result.append(False) 616 617 if rule.matching_rule == "any": 618 if any(sub_regex_result): 619 log.debug(u" Mapping-value: Matched at least one regex") 620 else: 621 log.debug(u" Mapping-value: No regex matched") 622 self.errors.append(SchemaError.SchemaErrorEntry( 623 msg=u"Key '{key}' does not match any regex '{regex}'. Path: '{path}'", 624 path=path, 625 value=value, 626 key=k, 627 regex="' or '".join(sorted([mm[0].map_regex_rule for mm in regex_mappings])))) 628 elif rule.matching_rule == "all": 629 if all(sub_regex_result): 630 log.debug(u" Mapping-value: Matched all regex rules") 631 else: 632 log.debug(u" Mapping-value: Did not match all regex rules") 633 self.errors.append(SchemaError.SchemaErrorEntry( 634 msg=u"Key '{key}' does not match all regex '{regex}'. Path: '{path}'", 635 path=path, 636 value=value, 637 key=k, 638 regex="' and '".join(sorted([mm[0].map_regex_rule for mm in regex_mappings])))) 639 else: 640 log.debug(u" Mapping-value: No mapping rule defined") 641 else: 642 if not rule.allowempty_map: 643 self.errors.append(SchemaError.SchemaErrorEntry( 644 msg=u"Key '{key}' was not defined. Path: '{path}'", 645 path=path, 646 value=value, 647 key=k)) 648 649 def _validate_scalar(self, value, rule, path, done=None): 650 """ 651 """ 652 log.debug(u"Validate scalar") 653 log.debug(u" Scalar : Value : %s", value) 654 log.debug(u" Scalar : Rule : %s", rule) 655 log.debug(u" Scalar : RuleType : %s", rule.type) 656 log.debug(u" Scalar : Path %s", path) 657 658 # Handle 'func' argument on this scalar 659 self._handle_func(value, rule, path, done) 660 661 if rule.assertion is not None: 662 self._validate_assert(rule, value, path) 663 664 if value is None: 665 return True 666 667 if rule.enum is not None and value not in rule.enum: 668 self.errors.append(SchemaError.SchemaErrorEntry( 669 msg=u"Enum '{value}' does not exist. Path: '{path}' Enum: {enum_values}", 670 path=path, 671 value=nativestr(value) if tt['str'](value) else value, 672 enum_values=rule.enum, 673 )) 674 675 # Set default value 676 if rule.default and value is None: 677 value = rule.default 678 679 if not self._validate_scalar_type(value, rule.type, path): 680 return 681 682 if value is None: 683 return 684 685 if rule.pattern is not None: 686 # 687 # Try to trim away the surrounding slashes around ruby style /<regex>/ if they are defined. 688 # This is a quirk from ruby that they define regex patterns with surrounding slashes. 689 # Docs on how ruby regex works can be found here: https://ruby-doc.org/core-2.4.0/Regexp.html 690 # The original ruby implementation uses this code to validate patterns 691 # unless value.to_s =~ rule.regexp 692 # Becuase python do not work with surrounding slashes we have to trim them away in order to make the regex work 693 # 694 if rule.pattern.startswith('/') and rule.pattern.endswith('/') and self.fix_ruby_style_regex: 695 rule.pattern = rule.pattern[1:-1] 696 log.debug("Trimming slashes around ruby style regex. New pattern value: '{0}'".format(rule.pattern)) 697 698 try: 699 log.debug("Matching pattern '{0}' to regex '{1}".format(rule.pattern, value)) 700 res = re.match(rule.pattern, value, re.UNICODE) 701 except TypeError: 702 res = None 703 704 if res is None: # Not matching 705 self.errors.append(SchemaError.SchemaErrorEntry( 706 msg=u"Value '{value}' does not match pattern '{pattern}'. Path: '{path}'", 707 path=path, 708 value=nativestr(str(value)), 709 pattern=rule._pattern)) 710 else: 711 log.debug("Pattern matched...") 712 713 if rule.range is not None: 714 if not is_scalar(value): 715 raise CoreError(u"value is not a valid scalar") 716 717 r = rule.range 718 719 try: 720 v = len(value) 721 value = v 722 except Exception: 723 pass 724 725 self._validate_range( 726 r.get("max"), 727 r.get("min"), 728 r.get("max-ex"), 729 r.get("min-ex"), 730 value, 731 path, 732 "scalar", 733 ) 734 735 if rule.length is not None: 736 self._validate_length( 737 rule.length, 738 value, 739 path, 740 'scalar', 741 ) 742 743 # Validate timestamp 744 if rule.type == "timestamp": 745 self._validate_scalar_timestamp(value, path) 746 747 if rule.type == "date": 748 if not is_scalar(value): 749 raise CoreError(u'value is not a valid scalar') 750 date_format = rule.format 751 self._validate_scalar_date(value, date_format, path) 752 753 def _validate_scalar_timestamp(self, timestamp_value, path): 754 """ 755 """ 756 def _check_int_timestamp_boundaries(timestamp): 757 """ 758 """ 759 if timestamp < 1: 760 # Timestamp integers can't be negative 761 self.errors.append(SchemaError.SchemaErrorEntry( 762 msg=u"Integer value of timestamp can't be below 0", 763 path=path, 764 value=timestamp, 765 timestamp=str(timestamp), 766 )) 767 if timestamp > 2147483647: 768 # Timestamp integers can't be above the upper limit of 769 # 32 bit integers 770 self.errors.append(SchemaError.SchemaErrorEntry( 771 msg=u"Integer value of timestamp can't be above 2147483647", 772 path=path, 773 value=timestamp, 774 timestamp=str(timestamp), 775 )) 776 777 if isinstance(timestamp_value, (int, float)): 778 _check_int_timestamp_boundaries(timestamp_value) 779 elif isinstance(timestamp_value, datetime.datetime): 780 # Datetime objects currently have nothing to validate. 781 # In the future, more options will be added to datetime validation 782 pass 783 elif isinstance(timestamp_value, basestring): 784 v = timestamp_value.strip() 785 786 # parse("") will give a valid date but it should not be 787 # considered a valid timestamp 788 if v == "": 789 self.errors.append(SchemaError.SchemaErrorEntry( 790 msg=u"Timestamp value is empty. Path: '{path}'", 791 path=path, 792 value=nativestr(timestamp_value), 793 timestamp=nativestr(timestamp_value))) 794 else: 795 # A string can contain a valid unit timestamp integer. Check if it is valid and validate it 796 try: 797 int_v = int(v) 798 _check_int_timestamp_boundaries(int_v) 799 except ValueError: 800 # Just continue to parse it as a timestamp 801 try: 802 parse(timestamp_value) 803 # If it can be parsed then it is valid 804 except Exception: 805 self.errors.append(SchemaError.SchemaErrorEntry( 806 msg=u"Timestamp: '{timestamp}'' is invalid. Path: '{path}'", 807 path=path, 808 value=nativestr(timestamp_value), 809 timestamp=nativestr(timestamp_value))) 810 else: 811 self.errors.append(SchemaError.SchemaErrorEntry( 812 msg=u"Not a valid timestamp", 813 path=path, 814 value=timestamp_value, 815 timestamp=timestamp_value, 816 )) 817 818 def _validate_scalar_date(self, date_value, date_formats, path): 819 log.debug(u"Validate date : %(value)s : %(format)s : %(path)s" % { 820 'value': date_value, 821 'format': date_formats, 822 'path': path, 823 }) 824 825 if isinstance(date_value, str): 826 # If a date_format is specefied then use strptime on all formats 827 # If no date_format is specefied then use dateutils.parse() to test the value 828 log.debug(date_formats) 829 830 if date_formats: 831 # Run through all date_formats and it is valid if atleast one of them passed time.strptime() parsing 832 valid = False 833 for date_format in date_formats: 834 try: 835 time.strptime(date_value, date_format) 836 valid = True 837 except ValueError: 838 pass 839 840 if not valid: 841 self.errors.append(SchemaError.SchemaErrorEntry( 842 msg=u"Not a valid date: {value} format: {format}. Path: '{path}'", 843 path=path, 844 value=date_value, 845 format=date_format, 846 )) 847 return 848 else: 849 try: 850 parse(date_value) 851 except ValueError: 852 self.errors.append(SchemaError.SchemaErrorEntry( 853 msg=u"Not a valid date: {value} Path: '{path}'", 854 path=path, 855 value=date_value, 856 )) 857 elif isinstance(date_value, (datetime.date, datetime.datetime)): 858 # If the object already is a datetime or date object it passes validation 859 pass 860 else: 861 # If value is any other type then raise error 862 self.errors.append(SchemaError.SchemaErrorEntry( 863 msg=u"Not a valid date: {value} date must be a string or a datetime.date not a '{type}'", 864 path=path, 865 value=date_value, 866 type=type(date_value).__name__, 867 )) 868 869 def _validate_length(self, rule, value, path, prefix): 870 if not is_string(value): 871 raise CoreError("Value: '{0}' must be a 'str' type for length check to work".format(value)) 872 873 value_length = len(str(value)) 874 max_, min_, max_ex, min_ex = rule.get('max'), rule.get('min'), rule.get('max-ex'), rule.get('min-ex') 875 876 log.debug( 877 u"Validate length : %s : %s : %s : %s : %s : %s", 878 max, min, max_ex, min_ex, value, path, 879 ) 880 881 if max_ is not None and max_ < value_length: 882 self.errors.append(SchemaError.SchemaErrorEntry( 883 msg=u"Value: '{value_str}' has length of '{value}', greater than max limit '{max_}'. Path: '{path}'", 884 value_str=value, 885 path=path, 886 value=len(value), 887 prefix=prefix, 888 max_=max_)) 889 890 if min_ is not None and min_ > value_length: 891 self.errors.append(SchemaError.SchemaErrorEntry( 892 msg=u"Value: '{value_str}' has length of '{value}', greater than min limit '{min_}'. Path: '{path}'", 893 value_str=value, 894 path=path, 895 value=len(value), 896 prefix=prefix, 897 min_=min_)) 898 899 if max_ex is not None and max_ex <= value_length: 900 self.errors.append(SchemaError.SchemaErrorEntry( 901 msg=u"Value: '{value_str}' has length of '{value}', greater than max_ex limit '{max_ex}'. Path: '{path}'", 902 value_str=value, 903 path=path, 904 value=len(value), 905 prefix=prefix, 906 max_ex=max_ex)) 907 908 if min_ex is not None and min_ex >= value_length: 909 self.errors.append(SchemaError.SchemaErrorEntry( 910 msg=u"Value: '{value_str}' has length of '{value}', greater than min_ex limit '{min_ex}'. Path: '{path}'", 911 value_str=value, 912 path=path, 913 value=len(value), 914 prefix=prefix, 915 min_ex=min_ex)) 916 917 def _validate_assert(self, rule, value, path): 918 if not self.allow_assertions: 919 raise CoreError('To allow usage of keyword "assert" you must use cli flag "--allow-assertions" or set the keyword "allow_assert" in Core class') 920 921 # Small hack to make strings work as a value. 922 if isinstance(value, str): 923 assert_value_str = '"{0}"'.format(value) 924 else: 925 assert_value_str = '{0}'.format(value) 926 927 assertion_string = "val = {0}; assert {1}".format(assert_value_str, rule.assertion) 928 try: 929 exec(assertion_string, {}, {}) 930 except AssertionError: 931 self.errors.append(SchemaError.SchemaErrorEntry( 932 msg=u"Value: '{0}' assertion expression failed ({1})".format(value, rule.assertion), 933 path=path, 934 value=value, 935 )) 936 return 937 except Exception as err: 938 error_class = err.__class__.__name__ 939 detail = err.args[0] 940 cl, exc, tb = sys.exc_info() 941 line_number = traceback.extract_tb(tb)[-1][1] 942 raise Exception("Unknown error during assertion\n{0}\n{1}\n{2}\n{3}\n{4}\n{5}".format( 943 error_class, detail, cl, exc, tb, line_number, 944 )) 945 946 def _validate_range(self, max_, min_, max_ex, min_ex, value, path, prefix): 947 """ 948 Validate that value is within range values. 949 """ 950 if not isinstance(value, int) and not isinstance(value, float): 951 raise CoreError("Value must be a integer type") 952 953 log.debug( 954 u"Validate range : %s : %s : %s : %s : %s : %s", 955 max_, 956 min_, 957 max_ex, 958 min_ex, 959 value, 960 path, 961 ) 962 963 if max_ is not None and max_ < value: 964 self.errors.append(SchemaError.SchemaErrorEntry( 965 msg=u"Type '{prefix}' has size of '{value}', greater than max limit '{max_}'. Path: '{path}'", 966 path=path, 967 value=nativestr(value) if tt['str'](value) else value, 968 prefix=prefix, 969 max_=max_)) 970 971 if min_ is not None and min_ > value: 972 self.errors.append(SchemaError.SchemaErrorEntry( 973 msg=u"Type '{prefix}' has size of '{value}', less than min limit '{min_}'. Path: '{path}'", 974 path=path, 975 value=nativestr(value) if tt['str'](value) else value, 976 prefix=prefix, 977 min_=min_)) 978 979 if max_ex is not None and max_ex <= value: 980 self.errors.append(SchemaError.SchemaErrorEntry( 981 msg=u"Type '{prefix}' has size of '{value}', greater than or equals to max limit(exclusive) '{max_ex}'. Path: '{path}'", 982 path=path, 983 value=nativestr(value) if tt['str'](value) else value, 984 prefix=prefix, 985 max_ex=max_ex)) 986 987 if min_ex is not None and min_ex >= value: 988 self.errors.append(SchemaError.SchemaErrorEntry( 989 msg=u"Type '{prefix}' has size of '{value}', less than or equals to min limit(exclusive) '{min_ex}'. Path: '{path}'", 990 path=path, 991 value=nativestr(value) if tt['str'](value) else value, 992 prefix=prefix, 993 min_ex=min_ex)) 994 995 def _validate_scalar_type(self, value, t, path): 996 """ 997 """ 998 log.debug(u" # Core scalar: validating scalar type : %s", t) 999 log.debug(u" # Core scalar: scalar type: %s", type(value)) 1000 1001 try: 1002 if not tt[t](value): 1003 self.errors.append(SchemaError.SchemaErrorEntry( 1004 msg=u"Value '{value}' is not of type '{scalar_type}'. Path: '{path}'", 1005 path=path, 1006 value=unicode(value) if tt['str'](value) else value, 1007 scalar_type=t)) 1008 return False 1009 return True 1010 except KeyError as e: 1011 # Type not found in valid types mapping 1012 log.debug(e) 1013 raise CoreError(u"Unknown type check: {0!s} : {1!s} : {2!s}".format(path, value, t)) 1014