1# cython: language_level=3str
2
3import array
4import numbers
5try:
6    from collections.abc import Mapping, Sequence
7except ImportError:
8    # python2
9    from collections import Mapping, Sequence
10
11from . import const
12from ._six import long, is_str, iterkeys, itervalues
13from ._schema import (
14    extract_record_type, extract_logical_type, schema_name, parse_schema
15)
16from ._logical_writers import LOGICAL_WRITERS
17from ._schema_common import UnknownType
18from ._validate_common import ValidationError, ValidationErrorData
19
20ctypedef int int32
21ctypedef unsigned int uint32
22ctypedef unsigned long long ulong64
23ctypedef long long long64
24
25cdef int32 INT_MIN_VALUE = const.INT_MIN_VALUE
26cdef int32 INT_MAX_VALUE = const.INT_MAX_VALUE
27cdef long64 LONG_MIN_VALUE = const.LONG_MIN_VALUE
28cdef long64 LONG_MAX_VALUE = const.LONG_MAX_VALUE
29
30
31cdef inline bint validate_null(datum):
32    return datum is None
33
34
35cdef inline bint validate_boolean(datum):
36    return isinstance(datum, bool)
37
38
39cdef inline bint validate_string(datum):
40    return is_str(datum)
41
42
43cdef inline bint validate_bytes(datum):
44    return isinstance(datum, (bytes, bytearray))
45
46
47cdef inline bint validate_int(datum):
48    return (
49        (isinstance(datum, (int, long, numbers.Integral))
50         and INT_MIN_VALUE <= datum <= INT_MAX_VALUE
51         and not isinstance(datum, bool))
52    )
53
54
55cdef inline bint validate_long(datum):
56    return (
57        (isinstance(datum, (int, long, numbers.Integral))
58         and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE
59         and not isinstance(datum, bool))
60    )
61
62
63cdef inline bint validate_float(datum):
64    return (
65        isinstance(datum, (int, long, float, numbers.Real))
66        and not isinstance(datum, bool)
67    )
68
69
70cdef inline bint validate_fixed(datum, dict schema):
71    return (
72        ((isinstance(datum, bytes) or isinstance(datum, bytearray))
73         and len(datum) == schema['size'])
74    )
75
76
77cdef inline bint validate_enum(datum, dict schema):
78    return datum in schema['symbols']
79
80
81cdef inline bint validate_array(
82    datum,
83    dict schema,
84    dict named_schemas,
85    str parent_ns='',
86    bint raise_errors=True,
87) except -1:
88    if not isinstance(datum, (Sequence, array.array)) or is_str(datum):
89        return False
90
91    for d in datum:
92        if not _validate(datum=d, schema=schema['items'],
93                         named_schemas=named_schemas,
94                         field=parent_ns,
95                         raise_errors=raise_errors):
96            return False
97    return True
98
99
100cdef inline bint validate_map(
101    object datum,
102    dict schema,
103    dict named_schemas,
104    str parent_ns='',
105    bint raise_errors=True,
106) except -1:
107    # initial checks for map type
108    if not isinstance(datum, Mapping):
109        return False
110    for k in iterkeys(datum):
111        if not is_str(k):
112            return False
113
114    for v in itervalues(datum):
115        if not _validate(datum=v, schema=schema['values'],
116                         named_schemas=named_schemas,
117                         field=parent_ns,
118                         raise_errors=raise_errors):
119            return False
120    return True
121
122
123cdef inline bint validate_record(
124    object datum,
125    dict schema,
126    dict named_schemas,
127    str parent_ns='',
128    bint raise_errors=True,
129) except -1:
130    if not isinstance(datum, Mapping):
131        return False
132    _, namespace = schema_name(schema, parent_ns)
133    for f in schema['fields']:
134        if not _validate(datum=datum.get(f['name'], f.get('default')),
135                         schema=f['type'],
136                         named_schemas=named_schemas,
137                         field='{}.{}'.format(namespace, f['name']),
138                         raise_errors=raise_errors):
139            return False
140    return True
141
142
143cdef inline bint validate_union(
144    object datum,
145    list schema,
146    dict named_schemas,
147    str parent_ns=None,
148    bint raise_errors=True,
149) except -1:
150    if isinstance(datum, tuple):
151        (name, datum) = datum
152        for candidate in schema:
153            if extract_record_type(candidate) == 'record':
154                schema_name = candidate["name"]
155            else:
156                schema_name = candidate
157            if schema_name == name:
158                return _validate(datum, schema=candidate,
159                                 named_schemas=named_schemas,
160                                 field=parent_ns,
161                                 raise_errors=raise_errors)
162        else:
163            return False
164
165    cdef list errors = []
166    for s in schema:
167        try:
168            ret = _validate(datum, schema=s,
169                            named_schemas=named_schemas,
170                            field=parent_ns,
171                            raise_errors=raise_errors)
172            if ret:
173                # We exit on the first passing type in Unions
174                return True
175        except ValidationError as e:
176            errors.extend(e.errors)
177    if raise_errors:
178        raise ValidationError(*errors)
179    return False
180
181
182cpdef _validate(
183    object datum,
184    object schema,
185    dict named_schemas,
186    str field='',
187    bint raise_errors=True,
188):
189    record_type = extract_record_type(schema)
190    result = None
191
192    logical_type = extract_logical_type(schema)
193    if logical_type:
194        prepare = LOGICAL_WRITERS.get(logical_type)
195        if prepare:
196            datum = prepare(datum, schema)
197
198    # explicit, so that cython is faster, but only for Base Validators
199    if record_type == 'null':
200        result = validate_null(datum)
201    elif record_type == 'boolean':
202        result = validate_boolean(datum)
203    elif record_type == 'string':
204        result = validate_string(datum)
205    elif record_type == 'int':
206        result = validate_int(datum)
207    elif record_type == 'long':
208        result = validate_long(datum)
209    elif record_type in ('float', 'double'):
210        result = validate_float(datum)
211    elif record_type == 'bytes':
212        result = validate_bytes(datum)
213    elif record_type == 'fixed':
214        result = validate_fixed(datum, schema=schema)
215    elif record_type == 'enum':
216        result = validate_enum(datum, schema=schema)
217    elif record_type == 'array':
218        result = validate_array(
219            datum,
220            schema=schema,
221            named_schemas=named_schemas,
222            parent_ns=field,
223            raise_errors=raise_errors,
224        )
225    elif record_type == 'map':
226        result = validate_map(
227            datum,
228            schema=schema,
229            named_schemas=named_schemas,
230            parent_ns=field,
231            raise_errors=raise_errors,
232        )
233    elif record_type in ('union', 'error_union'):
234        result = validate_union(
235            datum,
236            schema=schema,
237            named_schemas=named_schemas,
238            parent_ns=field,
239            raise_errors=raise_errors,
240        )
241    elif record_type in ('record', 'error', 'request'):
242        result = validate_record(
243            datum,
244            schema=schema,
245            named_schemas=named_schemas,
246            parent_ns=field,
247            raise_errors=raise_errors,
248        )
249    elif record_type in named_schemas:
250        result = _validate(datum,
251                           schema=named_schemas[record_type],
252                           named_schemas=named_schemas,
253                           field=field,
254                           raise_errors=raise_errors)
255    else:
256        raise UnknownType(record_type)
257
258    if raise_errors and result is False:
259        raise ValidationError(ValidationErrorData(datum, schema, field))
260
261    return bool(result)
262
263
264cpdef validate(object datum, object schema, str field='',
265               bint raise_errors=True):
266    named_schemas = {}
267    parsed_schema = parse_schema(
268        schema, _force=True, _named_schemas=named_schemas
269    )
270    return _validate(datum, parsed_schema, named_schemas, field, raise_errors)
271
272
273cpdef validate_many(records, schema, bint raise_errors=True):
274    cdef bint result
275    cdef list errors = []
276    cdef list results = []
277    named_schemas = {}
278    parsed_schema = parse_schema(
279        schema, _force=True, _named_schemas=named_schemas
280    )
281    for record in records:
282        try:
283            result = _validate(
284                record, parsed_schema, named_schemas, raise_errors=raise_errors
285            )
286            results.append(result)
287        except ValidationError as e:
288            errors.extend(e.errors)
289    if raise_errors and errors:
290        raise ValidationError(*errors)
291    return all(results)
292