1# -*- coding: utf-8 -*-
2
3""" pyKwalify - core.py """
4
5# python std lib
6import datetime
7import json
8import logging
9import os
10import re
11import sys
12import traceback
13import time
14from io import open
15from importlib.machinery import SourceFileLoader
16
17# pyKwalify imports
18import pykwalify
19from pykwalify.compat import unicode, nativestr, basestring
20from pykwalify.errors import CoreError, SchemaError, NotMappingError, NotSequenceError
21from pykwalify.rule import Rule
22from pykwalify.types import is_scalar, is_string, tt
23
24# 3rd party imports
25from dateutil.parser import parse
26from pykwalify.compat import yml
27from ruamel.yaml.constructor import Constructor
28
29log = logging.getLogger(__name__)
30
31
32class Core(object):
33    """ Core class of pyKwalify """
34
35    def __init__(self, source_file=None, schema_files=None, source_data=None, schema_data=None, extensions=None, strict_rule_validation=False,
36                 fix_ruby_style_regex=False, allow_assertions=False, file_encoding=None, schema_file_obj=None, data_file_obj=None):
37        """
38        :param extensions:
39            List of paths to python files that should be imported and available via 'func' keywork.
40            This list of extensions can be set manually or they should be provided by the `--extension`
41            flag from the cli. This list should not contain files specified by the `extensions` list keyword
42            that can be defined at the top level of the schema.
43        """
44        if schema_files is None:
45            schema_files = []
46        if extensions is None:
47            extensions = []
48
49        log.debug(u"source_file: %s", source_file)
50        log.debug(u"schema_file: %s", schema_files)
51        log.debug(u"source_data: %s", source_data)
52        log.debug(u"schema_data: %s", schema_data)
53        log.debug(u"extension files: %s", extensions)
54
55        self.source = None
56        self.schema = None
57        self.validation_errors = None
58        self.validation_errors_exceptions = None
59        self.root_rule = None
60        self.extensions = extensions
61        self.errors = []
62        self.strict_rule_validation = strict_rule_validation
63        self.fix_ruby_style_regex = fix_ruby_style_regex
64        self.allow_assertions = allow_assertions
65
66        # Patch in all the normal python types into the yaml load instance so we can use all the
67        # internal python types in the yaml loading.
68        yml.constructor.add_constructor('tag:yaml.org,2002:python/bool', Constructor.construct_yaml_bool)
69        yml.constructor.add_constructor('tag:yaml.org,2002:python/complex', Constructor.construct_python_complex)
70        yml.constructor.add_constructor('tag:yaml.org,2002:python/dict', Constructor.construct_yaml_map)
71        yml.constructor.add_constructor('tag:yaml.org,2002:python/float', Constructor.construct_yaml_float)
72        yml.constructor.add_constructor('tag:yaml.org,2002:python/int', Constructor.construct_yaml_int)
73        yml.constructor.add_constructor('tag:yaml.org,2002:python/list', Constructor.construct_yaml_seq)
74        yml.constructor.add_constructor('tag:yaml.org,2002:python/long', Constructor.construct_python_long)
75        yml.constructor.add_constructor('tag:yaml.org,2002:python/none', Constructor.construct_yaml_null)
76        yml.constructor.add_constructor('tag:yaml.org,2002:python/str', Constructor.construct_python_str)
77        yml.constructor.add_constructor('tag:yaml.org,2002:python/tuple', Constructor.construct_python_tuple)
78        yml.constructor.add_constructor('tag:yaml.org,2002:python/unicode', Constructor.construct_python_unicode)
79
80        if data_file_obj:
81            try:
82                self.source = yml.load(data_file_obj.read())
83            except Exception as e:
84                raise CoreError("Unable to load data_file_obj input")
85
86        if schema_file_obj:
87            try:
88                self.schema = yml.load(schema_file_obj.read())
89            except Exception as e:
90                raise CoreError("Unable to load schema_file_obj")
91
92        if source_file is not None:
93            if not os.path.exists(source_file):
94                raise CoreError(u"Provided source_file do not exists on disk: {0}".format(source_file))
95
96            with open(source_file, "r", encoding=file_encoding) as stream:
97                if source_file.endswith(".json"):
98                    self.source = json.load(stream)
99                elif source_file.endswith(".yaml") or source_file.endswith('.yml'):
100                    self.source = yml.load(stream)
101                else:
102                    raise CoreError(u"Unable to load source_file. Unknown file format of specified file path: {0}".format(source_file))
103
104        if not isinstance(schema_files, list):
105            raise CoreError(u"schema_files must be of list type")
106
107        # Merge all schema files into one single file for easy parsing
108        if len(schema_files) > 0:
109            schema_data = {}
110            for f in schema_files:
111                if not os.path.exists(f):
112                    raise CoreError(u"Provided source_file do not exists on disk : {0}".format(f))
113
114                with open(f, "r", encoding=file_encoding) as stream:
115                    if f.endswith(".json"):
116                        data = json.load(stream)
117                    elif f.endswith(".yaml") or f.endswith(".yml"):
118                        data = yml.load(stream)
119                        if not data:
120                            raise CoreError(u"No data loaded from file : {0}".format(f))
121                    else:
122                        raise CoreError(u"Unable to load file : {0} : Unknown file format. Supported file endings is [.json, .yaml, .yml]")
123
124                    for key in data.keys():
125                        if key in schema_data.keys():
126                            raise CoreError(u"Parsed key : {0} : two times in schema files...".format(key))
127
128                    schema_data = dict(schema_data, **data)
129
130            self.schema = schema_data
131
132        # Nothing was loaded so try the source_data variable
133        if self.source is None:
134            log.debug(u"No source file loaded, trying source data variable")
135            self.source = source_data
136        if self.schema is None:
137            log.debug(u"No schema file loaded, trying schema data variable")
138            self.schema = schema_data
139
140        # Test if anything was loaded
141        if self.source is None:
142            raise CoreError(u"No source file/data was loaded")
143        if self.schema is None:
144            raise CoreError(u"No schema file/data was loaded")
145
146        # Merge any extensions defined in the schema with the provided list of extensions from the cli
147        for f in self.schema.get('extensions', []):
148            self.extensions.append(f)
149
150        if not isinstance(self.extensions, list) and all(isinstance(e, str) for e in self.extensions):
151            raise CoreError(u"Specified extensions must be a list of file paths")
152
153        self._load_extensions()
154
155        if self.strict_rule_validation:
156            log.info("Using strict rule keywords validation...")
157
158    def _load_extensions(self):
159        """
160        Load all extension files into the namespace pykwalify.ext
161        """
162        log.debug(u"loading all extensions : %s", self.extensions)
163
164        self.loaded_extensions = []
165
166        for f in self.extensions:
167            if not os.path.isabs(f):
168                f = os.path.abspath(f)
169
170            if not os.path.exists(f):
171                raise CoreError(u"Extension file: {0} not found on disk".format(f))
172
173            self.loaded_extensions.append(SourceFileLoader("", f).load_module())
174
175        log.debug(self.loaded_extensions)
176        log.debug([dir(m) for m in self.loaded_extensions])
177
178    def validate(self, raise_exception=True):
179        """
180        """
181        log.debug(u"starting core")
182
183        self._start_validate(self.source)
184        self.validation_errors = [unicode(error) for error in self.errors]
185        self.validation_errors_exceptions = self.errors
186
187        if self.errors is None or len(self.errors) == 0:
188            log.info(u"validation.valid")
189        else:
190            log.error(u"validation.invalid")
191            log.error(u" --- All found errors ---")
192            log.error(self.validation_errors)
193            if raise_exception:
194                raise SchemaError(u"Schema validation failed:\n - {error_msg}.".format(
195                    error_msg=u'.\n - '.join(self.validation_errors)))
196            else:
197                log.error(u"Errors found but will not raise exception...")
198
199        # Return validated data
200        return self.source
201
202    def _start_validate(self, value=None):
203        """
204        """
205        path = ""
206        self.errors = []
207        done = []
208
209        s = {}
210
211        # Look for schema; tags so they can be parsed before the root rule is parsed
212        for k, v in self.schema.items():
213            if k.startswith("schema;"):
214                log.debug(u"Found partial schema; : %s", v)
215                r = Rule(schema=v)
216                log.debug(u" Partial schema : %s", r)
217                pykwalify.partial_schemas[k.split(";", 1)[1]] = r
218            else:
219                # readd all items that is not schema; so they can be parsed
220                s[k] = v
221
222        self.schema = s
223
224        log.debug(u"Building root rule object")
225        root_rule = Rule(schema=self.schema)
226        self.root_rule = root_rule
227        log.debug(u"Done building root rule")
228        log.debug(u"Root rule: %s", self.root_rule)
229
230        self._validate(value, root_rule, path, done)
231
232    def _validate(self, value, rule, path, done):
233        """
234        """
235        log.debug(u"Core validate")
236        log.debug(u" Root validate : Rule: %s", rule)
237        log.debug(u" Root validate : Rule_type: %s", rule.type)
238        log.debug(u" Root validate : Seq: %s", rule.sequence)
239        log.debug(u" Root validate : Map: %s", rule.mapping)
240        log.debug(u" Root validate : Done: %s", done)
241
242        if rule.required and value is None and not rule.type == 'none':
243            self.errors.append(SchemaError.SchemaErrorEntry(
244                msg=u"required.novalue : '{path}'",
245                path=path,
246                value=value.encode('unicode_escape') if value else value,
247            ))
248            return
249
250        if not rule.nullable and value is None and not rule.type == 'none':
251            self.errors.append(SchemaError.SchemaErrorEntry(
252                msg=u"nullable.novalue : '{path}'",
253                path=path,
254                value=value.encode('unicode_escape') if value else value,
255            ))
256            return
257
258        log.debug(u" ? ValidateRule: %s", rule)
259        if rule.include_name is not None:
260            self._validate_include(value, rule, path, done=None)
261        elif rule.sequence is not None:
262            self._validate_sequence(value, rule, path, done=None)
263        elif rule.mapping is not None or rule.allowempty_map:
264            self._validate_mapping(value, rule, path, done=None)
265        else:
266            self._validate_scalar(value, rule, path, done=None)
267
268    def _handle_func(self, value, rule, path, done=None):
269        """
270        Helper function that should check if func is specified for this rule and
271        then handle it for all cases in a generic way.
272        """
273        func = rule.func
274
275        # func keyword is not defined so nothing to do
276        if not func:
277            return
278
279        found_method = False
280
281        for extension in self.loaded_extensions:
282            method = getattr(extension, func, None)
283            if method:
284                found_method = True
285
286                # No exception will should be caught. If one is raised it should bubble up all the way.
287                ret = method(value, rule, path)
288                if ret is not True and ret is not None:
289                    msg = '%s. Path: {path}' % unicode(ret)
290                    self.errors.append(SchemaError.SchemaErrorEntry(
291                                    msg=msg,
292                                    path=path,
293                                    value=None))
294
295                # If False or None or some other object that is interpreted as False
296                if not ret:
297                    raise CoreError(u"Error when running extension function : {0}".format(func))
298
299                # Only run the first matched function. Sinc loading order is determined
300                # it should be easy to determine which file is used before others
301                break
302
303        if not found_method:
304            raise CoreError(u"Did not find method '{0}' in any loaded extension file".format(func))
305
306    def _validate_include(self, value, rule, path, done=None):
307        """
308        """
309        # TODO: It is difficult to get a good test case to trigger this if case
310        if rule.include_name is None:
311            self.errors.append(SchemaError.SchemaErrorEntry(
312                msg=u'Include name not valid',
313                path=path,
314                value=value.encode('unicode_escape')))
315            return
316        include_name = rule.include_name
317        partial_schema_rule = pykwalify.partial_schemas.get(include_name)
318        if not partial_schema_rule:
319            self.errors.append(SchemaError.SchemaErrorEntry(
320                msg=u"Cannot find partial schema with name '{include_name}'. Existing partial schemas: '{existing_schemas}'. Path: '{path}'",
321                path=path,
322                value=value,
323                include_name=include_name,
324                existing_schemas=", ".join(sorted(pykwalify.partial_schemas.keys()))))
325            return
326
327        self._validate(value, partial_schema_rule, path, done)
328
329    def _validate_sequence(self, value, rule, path, done=None):
330        """
331        """
332        log.debug(u"Core Validate sequence")
333        log.debug(u" Sequence : Data: %s", value)
334        log.debug(u" Sequence : Rule: %s", rule)
335        log.debug(u" Sequence : RuleType: %s", rule.type)
336        log.debug(u" Sequence : Path: %s", path)
337        log.debug(u" Sequence : Seq: %s", rule.sequence)
338        log.debug(u" Sequence : Map: %s", rule.mapping)
339
340        if len(rule.sequence) <= 0:
341            raise CoreError(u"Sequence must contains atleast one item : {0}".format(path))
342
343        if value is None:
344            log.debug(u" * Core seq: sequence data is None")
345            return
346
347        if not isinstance(value, list):
348            if isinstance(value, str):
349                value = value.encode('unicode_escape')
350            self.errors.append(SchemaError.SchemaErrorEntry(
351                u"Value '{value}' is not a list. Value path: '{path}'",
352                path,
353                value,
354            ))
355            return
356
357        # Handle 'func' argument on this sequence
358        self._handle_func(value, rule, path, done)
359
360        ok_values = []
361        error_tracker = []
362
363        unique_errors = {}
364        map_unique_errors = {}
365
366        for i, item in enumerate(value):
367            processed = []
368
369            for r in rule.sequence:
370                tmp_errors = []
371
372                try:
373                    # Create a sub core object to enable error tracking that do not
374                    #  collide with this Core objects errors
375                    tmp_core = Core(source_data={}, schema_data={})
376                    tmp_core.fix_ruby_style_regex = self.fix_ruby_style_regex
377                    tmp_core.allow_assertions = self.allow_assertions
378                    tmp_core.strict_rule_validation = self.strict_rule_validation
379                    tmp_core.loaded_extensions = self.loaded_extensions
380                    tmp_core._validate(item, r, "{0}/{1}".format(path, i), done)
381                    tmp_errors = tmp_core.errors
382                except NotMappingError:
383                    # For example: If one type was specified as 'map' but data
384                    # was 'str' a exception will be thrown but we should ignore it
385                    pass
386                except NotSequenceError:
387                    # For example: If one type was specified as 'seq' but data
388                    # was 'str' a exception will be thrown but we shold ignore it
389                    pass
390
391                processed.append(tmp_errors)
392
393                if r.type == "map":
394                    log.debug(u" * Found map inside sequence")
395                    unique_keys = []
396
397                    if r.mapping is None:
398                        log.debug(u" + No rule to apply, prolly because of allowempty: True")
399                        return
400
401                    for k, _rule in r.mapping.items():
402                        log.debug(u" * Key: %s", k)
403                        log.debug(u" * Rule: %s", _rule)
404
405                        if _rule.unique or _rule.ident:
406                            unique_keys.append(k)
407
408                    if len(unique_keys) > 0:
409                        for v in unique_keys:
410                            table = {}
411                            for j, V in enumerate(value):
412                                # If key do not exists it should be ignored by unique because that is not a broken constraint
413                                val = V.get(v, None)
414
415                                if val is None:
416                                    continue
417
418                                if val in table:
419                                    curr_path = "{0}/{1}/{2}".format(path, j, v)
420                                    prev_path = "{0}/{1}/{2}".format(path, table[val], v)
421                                    s = SchemaError.SchemaErrorEntry(
422                                        msg=u"Value '{duplicate}' is not unique. Previous path: '{prev_path}'. Path: '{path}'",
423                                        path=curr_path,
424                                        value=value,
425                                        duplicate=val,
426                                        prev_path=prev_path,
427                                    )
428                                    map_unique_errors[s.__repr__()] = s
429                                else:
430                                    table[val] = j
431                elif r.unique:
432                    log.debug(u" * Found unique value in sequence")
433                    table = {}
434
435                    for j, val in enumerate(value):
436                        if val is None:
437                            continue
438
439                        if val in table:
440                            curr_path = "{0}/{1}".format(path, j)
441                            prev_path = "{0}/{1}".format(path, table[val])
442                            s = SchemaError.SchemaErrorEntry(
443                                msg=u"Value '{duplicate}' is not unique. Previous path: '{prev_path}'. Path: '{path}'",
444                                path=curr_path,
445                                value=value,
446                                duplicate=val,
447                                prev_path=prev_path,
448                            )
449                            unique_errors[s.__repr__()] = s
450                        else:
451                            table[val] = j
452
453            error_tracker.append(processed)
454            no_errors = []
455            for _errors in processed:
456                no_errors.append(len(_errors) == 0)
457
458            if rule.matching == "any":
459                log.debug(u" * any rule %s", True in no_errors)
460                ok_values.append(True in no_errors)
461            elif rule.matching == "all":
462                log.debug(u" * all rule".format(all(no_errors)))
463                ok_values.append(all(no_errors))
464            elif rule.matching == "*":
465                log.debug(u" * star rule", "...")
466                ok_values.append(True)
467
468        for _error in unique_errors:
469            self.errors.append(_error)
470
471        for _error in map_unique_errors:
472            self.errors.append(_error)
473
474        log.debug(u" * ok : %s", ok_values)
475
476        # All values must pass the validation, otherwise add the parsed errors
477        # to the global error list and throw up some error.
478        if not all(ok_values):
479            # Ignore checking for '*' type because it should allways go through
480            if rule.matching == "any":
481                log.debug(u" * Value: %s did not validate against one or more sequence schemas", value)
482            elif rule.matching == "all":
483                log.debug(u" * Value: %s did not validate against all possible sequence schemas", value)
484
485            for i, is_ok in enumerate(ok_values):
486                if not is_ok:
487                    for error in error_tracker[i]:
488                        for e in error:
489                            self.errors.append(e)
490
491        log.debug(u" * Core seq: validation recursivley done...")
492
493        if rule.range is not None:
494            rr = rule.range
495
496            self._validate_range(
497                rr.get("max"),
498                rr.get("min"),
499                rr.get("max-ex"),
500                rr.get("min-ex"),
501                len(value),
502                path,
503                "seq",
504            )
505
506    def _validate_mapping(self, value, rule, path, done=None):
507        """
508        """
509        log.debug(u"Validate mapping")
510        log.debug(u" Mapping : Data: %s", value)
511        log.debug(u" Mapping : Rule: %s", rule)
512        log.debug(u" Mapping : RuleType: %s", rule.type)
513        log.debug(u" Mapping : Path: %s", path)
514        log.debug(u" Mapping : Seq: %s", rule.sequence)
515        log.debug(u" Mapping : Map: %s", rule.mapping)
516
517        if not isinstance(value, dict):
518            self.errors.append(SchemaError.SchemaErrorEntry(
519                u"Value '{value}' is not a dict. Value path: '{path}'",
520                path,
521                value,
522            ))
523            return
524
525        if rule.mapping is None:
526            log.debug(u" + No rule to apply, prolly because of allowempty: True")
527            return
528
529        # Handle 'func' argument on this mapping
530        self._handle_func(value, rule, path, done)
531
532        m = rule.mapping
533        log.debug(u"   Mapping: Rule-Mapping: %s", m)
534
535        if rule.range is not None:
536            r = rule.range
537
538            self._validate_range(
539                r.get("max"),
540                r.get("min"),
541                r.get("max-ex"),
542                r.get("min-ex"),
543                len(value),
544                path,
545                "map",
546            )
547
548        for k, rr in m.items():
549            # Handle if the value of the key contains a include keyword
550            if rr.include_name is not None:
551                include_name = rr.include_name
552                partial_schema_rule = pykwalify.partial_schemas.get(include_name)
553
554                if not partial_schema_rule:
555                    self.errors.append(SchemaError.SchemaErrorEntry(
556                        msg=u"Cannot find partial schema with name '{include_name}'. Existing partial schemas: '{existing_schemas}'. Path: '{path}'",
557                        path=path,
558                        value=value,
559                        include_name=include_name,
560                        existing_schemas=", ".join(sorted(pykwalify.partial_schemas.keys()))))
561                    return
562
563                rr = partial_schema_rule
564
565            # Find out if this is a regex rule
566            is_regex_rule = False
567            required_regex = ""
568            for regex_rule in rule.regex_mappings:
569                if k == "regex;({})".format(regex_rule.map_regex_rule) or k == "re;({})".format(regex_rule.map_regex_rule):
570                    is_regex_rule = True
571                    required_regex = regex_rule.map_regex_rule
572
573            # Check for the presense of the required key
574            is_present = False
575            if not is_regex_rule:
576                is_present = k in value
577            else:
578                is_present = any([re.search(required_regex, str(v)) for v in value])
579
580            # Specifying =: as key is considered the "default" if no other keys match
581            if rr.required and not is_present and k != "=":
582                self.errors.append(SchemaError.SchemaErrorEntry(
583                    msg=u"Cannot find required key '{key}'. Path: '{path}'",
584                    path=path,
585                    value=value,
586                    key=k))
587            if k not in value and rr.default is not None:
588                value[k] = rr.default
589
590        for k, v in value.items():
591            # If no other case was a match, check if a default mapping is valid/present and use
592            # that one instead
593            r = m.get(k, m.get('='))
594            log.debug(u"  Mapping-value : %s", m)
595            log.debug(u"  Mapping-value : %s %s", k, v)
596            log.debug(u"  Mapping-value : %s", r)
597
598            regex_mappings = [(regex_rule, re.search(regex_rule.map_regex_rule, str(k))) for regex_rule in rule.regex_mappings]
599            log.debug(u"  Mapping-value: Mapping Regex matches: %s", regex_mappings)
600
601            if r is not None:
602                # validate recursively
603                log.debug(u"  Mapping-value: Core Map: validate recursively: %s", r)
604                self._validate(v, r, u"{0}/{1}".format(path, k), done)
605            elif any(regex_mappings):
606                sub_regex_result = []
607
608                # Found at least one that matches a mapping regex
609                for mm in regex_mappings:
610                    if mm[1]:
611                        log.debug(u"  Mapping-value: Matching regex patter: %s", mm[0])
612                        self._validate(v, mm[0], "{0}/{1}".format(path, k), done)
613                        sub_regex_result.append(True)
614                    else:
615                        sub_regex_result.append(False)
616
617                if rule.matching_rule == "any":
618                    if any(sub_regex_result):
619                        log.debug(u"  Mapping-value: Matched at least one regex")
620                    else:
621                        log.debug(u"  Mapping-value: No regex matched")
622                        self.errors.append(SchemaError.SchemaErrorEntry(
623                            msg=u"Key '{key}' does not match any regex '{regex}'. Path: '{path}'",
624                            path=path,
625                            value=value,
626                            key=k,
627                            regex="' or '".join(sorted([mm[0].map_regex_rule for mm in regex_mappings]))))
628                elif rule.matching_rule == "all":
629                    if all(sub_regex_result):
630                        log.debug(u"  Mapping-value: Matched all regex rules")
631                    else:
632                        log.debug(u"  Mapping-value: Did not match all regex rules")
633                        self.errors.append(SchemaError.SchemaErrorEntry(
634                            msg=u"Key '{key}' does not match all regex '{regex}'. Path: '{path}'",
635                            path=path,
636                            value=value,
637                            key=k,
638                            regex="' and '".join(sorted([mm[0].map_regex_rule for mm in regex_mappings]))))
639                else:
640                    log.debug(u"  Mapping-value: No mapping rule defined")
641            else:
642                if not rule.allowempty_map:
643                    self.errors.append(SchemaError.SchemaErrorEntry(
644                        msg=u"Key '{key}' was not defined. Path: '{path}'",
645                        path=path,
646                        value=value,
647                        key=k))
648
649    def _validate_scalar(self, value, rule, path, done=None):
650        """
651        """
652        log.debug(u"Validate scalar")
653        log.debug(u" Scalar : Value : %s", value)
654        log.debug(u" Scalar : Rule :  %s", rule)
655        log.debug(u" Scalar : RuleType : %s", rule.type)
656        log.debug(u" Scalar : Path %s", path)
657
658        # Handle 'func' argument on this scalar
659        self._handle_func(value, rule, path, done)
660
661        if rule.assertion is not None:
662            self._validate_assert(rule, value, path)
663
664        if value is None:
665            return True
666
667        if rule.enum is not None and value not in rule.enum:
668            self.errors.append(SchemaError.SchemaErrorEntry(
669                msg=u"Enum '{value}' does not exist. Path: '{path}' Enum: {enum_values}",
670                path=path,
671                value=nativestr(value) if tt['str'](value) else value,
672                enum_values=rule.enum,
673            ))
674
675        # Set default value
676        if rule.default and value is None:
677            value = rule.default
678
679        if not self._validate_scalar_type(value, rule.type, path):
680            return
681
682        if value is None:
683            return
684
685        if rule.pattern is not None:
686            #
687            # Try to trim away the surrounding slashes around ruby style /<regex>/ if they are defined.
688            # This is a quirk from ruby that they define regex patterns with surrounding slashes.
689            # Docs on how ruby regex works can be found here: https://ruby-doc.org/core-2.4.0/Regexp.html
690            # The original ruby implementation uses this code to validate patterns
691            #   unless value.to_s =~ rule.regexp
692            # Becuase python do not work with surrounding slashes we have to trim them away in order to make the regex work
693            #
694            if rule.pattern.startswith('/') and rule.pattern.endswith('/') and self.fix_ruby_style_regex:
695                rule.pattern = rule.pattern[1:-1]
696                log.debug("Trimming slashes around ruby style regex. New pattern value: '{0}'".format(rule.pattern))
697
698            try:
699                log.debug("Matching pattern '{0}' to regex '{1}".format(rule.pattern, value))
700                res = re.match(rule.pattern, value, re.UNICODE)
701            except TypeError:
702                res = None
703
704            if res is None:  # Not matching
705                self.errors.append(SchemaError.SchemaErrorEntry(
706                    msg=u"Value '{value}' does not match pattern '{pattern}'. Path: '{path}'",
707                    path=path,
708                    value=nativestr(str(value)),
709                    pattern=rule._pattern))
710            else:
711                log.debug("Pattern matched...")
712
713        if rule.range is not None:
714            if not is_scalar(value):
715                raise CoreError(u"value is not a valid scalar")
716
717            r = rule.range
718
719            try:
720                v = len(value)
721                value = v
722            except Exception:
723                pass
724
725            self._validate_range(
726                r.get("max"),
727                r.get("min"),
728                r.get("max-ex"),
729                r.get("min-ex"),
730                value,
731                path,
732                "scalar",
733            )
734
735        if rule.length is not None:
736            self._validate_length(
737                rule.length,
738                value,
739                path,
740                'scalar',
741            )
742
743        # Validate timestamp
744        if rule.type == "timestamp":
745            self._validate_scalar_timestamp(value, path)
746
747        if rule.type == "date":
748            if not is_scalar(value):
749                raise CoreError(u'value is not a valid scalar')
750            date_format = rule.format
751            self._validate_scalar_date(value, date_format, path)
752
753    def _validate_scalar_timestamp(self, timestamp_value, path):
754        """
755        """
756        def _check_int_timestamp_boundaries(timestamp):
757            """
758            """
759            if timestamp < 1:
760                # Timestamp integers can't be negative
761                self.errors.append(SchemaError.SchemaErrorEntry(
762                    msg=u"Integer value of timestamp can't be below 0",
763                    path=path,
764                    value=timestamp,
765                    timestamp=str(timestamp),
766                ))
767            if timestamp > 2147483647:
768                # Timestamp integers can't be above the upper limit of
769                # 32 bit integers
770                self.errors.append(SchemaError.SchemaErrorEntry(
771                    msg=u"Integer value of timestamp can't be above 2147483647",
772                    path=path,
773                    value=timestamp,
774                    timestamp=str(timestamp),
775                ))
776
777        if isinstance(timestamp_value, (int, float)):
778            _check_int_timestamp_boundaries(timestamp_value)
779        elif isinstance(timestamp_value, datetime.datetime):
780            # Datetime objects currently have nothing to validate.
781            # In the future, more options will be added to datetime validation
782            pass
783        elif isinstance(timestamp_value, basestring):
784            v = timestamp_value.strip()
785
786            # parse("") will give a valid date but it should not be
787            # considered a valid timestamp
788            if v == "":
789                self.errors.append(SchemaError.SchemaErrorEntry(
790                    msg=u"Timestamp value is empty. Path: '{path}'",
791                    path=path,
792                    value=nativestr(timestamp_value),
793                    timestamp=nativestr(timestamp_value)))
794            else:
795                # A string can contain a valid unit timestamp integer. Check if it is valid and validate it
796                try:
797                    int_v = int(v)
798                    _check_int_timestamp_boundaries(int_v)
799                except ValueError:
800                    # Just continue to parse it as a timestamp
801                    try:
802                        parse(timestamp_value)
803                        # If it can be parsed then it is valid
804                    except Exception:
805                        self.errors.append(SchemaError.SchemaErrorEntry(
806                            msg=u"Timestamp: '{timestamp}'' is invalid. Path: '{path}'",
807                            path=path,
808                            value=nativestr(timestamp_value),
809                            timestamp=nativestr(timestamp_value)))
810        else:
811            self.errors.append(SchemaError.SchemaErrorEntry(
812                msg=u"Not a valid timestamp",
813                path=path,
814                value=timestamp_value,
815                timestamp=timestamp_value,
816            ))
817
818    def _validate_scalar_date(self, date_value, date_formats, path):
819        log.debug(u"Validate date : %(value)s : %(format)s : %(path)s" % {
820            'value': date_value,
821            'format': date_formats,
822            'path': path,
823        })
824
825        if isinstance(date_value, str):
826            # If a date_format is specefied then use strptime on all formats
827            # If no date_format is specefied then use dateutils.parse() to test the value
828            log.debug(date_formats)
829
830            if date_formats:
831                # Run through all date_formats and it is valid if atleast one of them passed time.strptime() parsing
832                valid = False
833                for date_format in date_formats:
834                    try:
835                        time.strptime(date_value, date_format)
836                        valid = True
837                    except ValueError:
838                        pass
839
840                if not valid:
841                    self.errors.append(SchemaError.SchemaErrorEntry(
842                        msg=u"Not a valid date: {value} format: {format}. Path: '{path}'",
843                        path=path,
844                        value=date_value,
845                        format=date_format,
846                    ))
847                    return
848            else:
849                try:
850                    parse(date_value)
851                except ValueError:
852                    self.errors.append(SchemaError.SchemaErrorEntry(
853                        msg=u"Not a valid date: {value} Path: '{path}'",
854                        path=path,
855                        value=date_value,
856                    ))
857        elif isinstance(date_value, (datetime.date, datetime.datetime)):
858            # If the object already is a datetime or date object it passes validation
859            pass
860        else:
861            # If value is any other type then raise error
862            self.errors.append(SchemaError.SchemaErrorEntry(
863                msg=u"Not a valid date: {value} date must be a string or a datetime.date not a '{type}'",
864                path=path,
865                value=date_value,
866                type=type(date_value).__name__,
867            ))
868
869    def _validate_length(self, rule, value, path, prefix):
870        if not is_string(value):
871            raise CoreError("Value: '{0}' must be a 'str' type for length check to work".format(value))
872
873        value_length = len(str(value))
874        max_, min_, max_ex, min_ex = rule.get('max'), rule.get('min'), rule.get('max-ex'), rule.get('min-ex')
875
876        log.debug(
877            u"Validate length : %s : %s : %s : %s : %s : %s",
878            max, min, max_ex, min_ex, value, path,
879        )
880
881        if max_ is not None and max_ < value_length:
882            self.errors.append(SchemaError.SchemaErrorEntry(
883                msg=u"Value: '{value_str}' has length of '{value}', greater than max limit '{max_}'. Path: '{path}'",
884                value_str=value,
885                path=path,
886                value=len(value),
887                prefix=prefix,
888                max_=max_))
889
890        if min_ is not None and min_ > value_length:
891            self.errors.append(SchemaError.SchemaErrorEntry(
892                msg=u"Value: '{value_str}' has length of '{value}', greater than min limit '{min_}'. Path: '{path}'",
893                value_str=value,
894                path=path,
895                value=len(value),
896                prefix=prefix,
897                min_=min_))
898
899        if max_ex is not None and max_ex <= value_length:
900            self.errors.append(SchemaError.SchemaErrorEntry(
901                msg=u"Value: '{value_str}' has length of '{value}', greater than max_ex limit '{max_ex}'. Path: '{path}'",
902                value_str=value,
903                path=path,
904                value=len(value),
905                prefix=prefix,
906                max_ex=max_ex))
907
908        if min_ex is not None and min_ex >= value_length:
909            self.errors.append(SchemaError.SchemaErrorEntry(
910                msg=u"Value: '{value_str}' has length of '{value}', greater than min_ex limit '{min_ex}'. Path: '{path}'",
911                value_str=value,
912                path=path,
913                value=len(value),
914                prefix=prefix,
915                min_ex=min_ex))
916
917    def _validate_assert(self, rule, value, path):
918        if not self.allow_assertions:
919            raise CoreError('To allow usage of keyword "assert" you must use cli flag "--allow-assertions" or set the keyword "allow_assert" in Core class')
920
921        # Small hack to make strings work as a value.
922        if isinstance(value, str):
923            assert_value_str = '"{0}"'.format(value)
924        else:
925            assert_value_str = '{0}'.format(value)
926
927        assertion_string = "val = {0}; assert {1}".format(assert_value_str, rule.assertion)
928        try:
929            exec(assertion_string, {}, {})
930        except AssertionError:
931            self.errors.append(SchemaError.SchemaErrorEntry(
932                msg=u"Value: '{0}' assertion expression failed ({1})".format(value, rule.assertion),
933                path=path,
934                value=value,
935            ))
936            return
937        except Exception as err:
938            error_class = err.__class__.__name__
939            detail = err.args[0]
940            cl, exc, tb = sys.exc_info()
941            line_number = traceback.extract_tb(tb)[-1][1]
942            raise Exception("Unknown error during assertion\n{0}\n{1}\n{2}\n{3}\n{4}\n{5}".format(
943                error_class, detail, cl, exc, tb, line_number,
944            ))
945
946    def _validate_range(self, max_, min_, max_ex, min_ex, value, path, prefix):
947        """
948        Validate that value is within range values.
949        """
950        if not isinstance(value, int) and not isinstance(value, float):
951            raise CoreError("Value must be a integer type")
952
953        log.debug(
954            u"Validate range : %s : %s : %s : %s : %s : %s",
955            max_,
956            min_,
957            max_ex,
958            min_ex,
959            value,
960            path,
961        )
962
963        if max_ is not None and max_ < value:
964                self.errors.append(SchemaError.SchemaErrorEntry(
965                    msg=u"Type '{prefix}' has size of '{value}', greater than max limit '{max_}'. Path: '{path}'",
966                    path=path,
967                    value=nativestr(value) if tt['str'](value) else value,
968                    prefix=prefix,
969                    max_=max_))
970
971        if min_ is not None and min_ > value:
972                self.errors.append(SchemaError.SchemaErrorEntry(
973                    msg=u"Type '{prefix}' has size of '{value}', less than min limit '{min_}'. Path: '{path}'",
974                    path=path,
975                    value=nativestr(value) if tt['str'](value) else value,
976                    prefix=prefix,
977                    min_=min_))
978
979        if max_ex is not None and max_ex <= value:
980                self.errors.append(SchemaError.SchemaErrorEntry(
981                    msg=u"Type '{prefix}' has size of '{value}', greater than or equals to max limit(exclusive) '{max_ex}'. Path: '{path}'",
982                    path=path,
983                    value=nativestr(value) if tt['str'](value) else value,
984                    prefix=prefix,
985                    max_ex=max_ex))
986
987        if min_ex is not None and min_ex >= value:
988                self.errors.append(SchemaError.SchemaErrorEntry(
989                    msg=u"Type '{prefix}' has size of '{value}', less than or equals to min limit(exclusive) '{min_ex}'. Path: '{path}'",
990                    path=path,
991                    value=nativestr(value) if tt['str'](value) else value,
992                    prefix=prefix,
993                    min_ex=min_ex))
994
995    def _validate_scalar_type(self, value, t, path):
996        """
997        """
998        log.debug(u" # Core scalar: validating scalar type : %s", t)
999        log.debug(u" # Core scalar: scalar type: %s", type(value))
1000
1001        try:
1002            if not tt[t](value):
1003                self.errors.append(SchemaError.SchemaErrorEntry(
1004                    msg=u"Value '{value}' is not of type '{scalar_type}'. Path: '{path}'",
1005                    path=path,
1006                    value=unicode(value) if tt['str'](value) else value,
1007                    scalar_type=t))
1008                return False
1009            return True
1010        except KeyError as e:
1011            # Type not found in valid types mapping
1012            log.debug(e)
1013            raise CoreError(u"Unknown type check: {0!s} : {1!s} : {2!s}".format(path, value, t))
1014