1import array 2import numbers 3try: 4 from collections.abc import Mapping, Sequence 5except ImportError: 6 # python2 7 from collections import Mapping, Sequence 8 9from fastavro.const import ( 10 INT_MAX_VALUE, INT_MIN_VALUE, LONG_MAX_VALUE, LONG_MIN_VALUE 11) 12from ._validate_common import ValidationError, ValidationErrorData 13from .schema import ( 14 extract_record_type, extract_logical_type, schema_name, parse_schema 15) 16from .logical_writers import LOGICAL_WRITERS 17from .six import long, is_str, iterkeys, itervalues 18from ._schema_common import UnknownType 19 20 21def validate_null(datum, **kwargs): 22 """ 23 Checks that the data value is None. 24 25 Parameters 26 ---------- 27 datum: Any 28 Data being validated 29 kwargs: Any 30 Unused kwargs 31 """ 32 return datum is None 33 34 35def validate_boolean(datum, **kwargs): 36 """ 37 Check that the data value is bool instance 38 39 Parameters 40 ---------- 41 datum: Any 42 Data being validated 43 kwargs: Any 44 Unused kwargs 45 """ 46 return isinstance(datum, bool) 47 48 49def validate_string(datum, **kwargs): 50 """ 51 Check that the data value is string, uses 52 six for Python version compatibility. 53 54 Parameters 55 ---------- 56 datum: Any 57 Data being validated 58 kwargs: Any 59 Unused kwargs 60 """ 61 return is_str(datum) 62 63 64def validate_bytes(datum, **kwargs): 65 """ 66 Check that the data value is python bytes type 67 68 Parameters 69 ---------- 70 datum: Any 71 Data being validated 72 kwargs: Any 73 Unused kwargs 74 """ 75 return isinstance(datum, (bytes, bytearray)) 76 77 78def validate_int(datum, **kwargs): 79 """ 80 Check that the data value is a non floating 81 point number with size less that Int32. 82 83 Int32 = -2147483648<=datum<=2147483647 84 85 conditional python types: int, long, numbers.Integral 86 87 Parameters 88 ---------- 89 datum: Any 90 Data being validated 91 kwargs: Any 92 Unused kwargs 93 """ 94 return ( 95 (isinstance(datum, (int, long, numbers.Integral)) 96 and INT_MIN_VALUE <= datum <= INT_MAX_VALUE 97 and not isinstance(datum, bool)) 98 ) 99 100 101def validate_long(datum, **kwargs): 102 """ 103 Check that the data value is a non floating 104 point number with size less that long64. 105 106 Int64 = -9223372036854775808 <= datum <= 9223372036854775807 107 108 conditional python types: int, long, numbers.Integral 109 110 :Parameters 111 ---------- 112 datum: Any 113 Data being validated 114 kwargs: Any 115 Unused kwargs 116 """ 117 return ( 118 (isinstance(datum, (int, long, numbers.Integral)) 119 and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE 120 and not isinstance(datum, bool)) 121 ) 122 123 124def validate_float(datum, **kwargs): 125 """ 126 Check that the data value is a floating 127 point number or double precision. 128 129 conditional python types 130 (int, long, float, numbers.Real) 131 132 Parameters 133 ---------- 134 datum: Any 135 Data being validated 136 kwargs: Any 137 Unused kwargs 138 """ 139 return ( 140 isinstance(datum, (int, long, float, numbers.Real)) 141 and not isinstance(datum, bool) 142 ) 143 144 145def validate_fixed(datum, schema, **kwargs): 146 """ 147 Check that the data value is fixed width bytes, 148 matching the schema['size'] exactly! 149 150 Parameters 151 ---------- 152 datum: Any 153 Data being validated 154 schema: dict 155 Schema 156 kwargs: Any 157 Unused kwargs 158 """ 159 return ( 160 (isinstance(datum, bytes) and len(datum) == schema['size']) 161 ) 162 163 164def validate_enum(datum, schema, **kwargs): 165 """ 166 Check that the data value matches one of the enum symbols. 167 168 i.e "blue" in ["red", green", "blue"] 169 170 Parameters 171 ---------- 172 datum: Any 173 Data being validated 174 schema: dict 175 Schema 176 kwargs: Any 177 Unused kwargs 178 """ 179 return datum in schema['symbols'] 180 181 182def validate_array( 183 datum, schema, named_schemas, parent_ns=None, raise_errors=True 184): 185 """ 186 Check that the data list values all match schema['items']. 187 188 Parameters 189 ---------- 190 datum: Any 191 Data being validated 192 schema: dict 193 Schema 194 parent_ns: str 195 parent namespace 196 raise_errors: bool 197 If true, raises ValidationError on invalid data 198 """ 199 return ( 200 isinstance(datum, (Sequence, array.array)) and 201 not is_str(datum) and 202 all(_validate(datum=d, schema=schema['items'], 203 named_schemas=named_schemas, 204 field=parent_ns, 205 raise_errors=raise_errors) for d in datum) 206 ) 207 208 209def validate_map( 210 datum, schema, named_schemas, parent_ns=None, raise_errors=True 211): 212 """ 213 Check that the data is a Map(k,v) 214 matching values to schema['values'] type. 215 216 Parameters 217 ---------- 218 datum: Any 219 Data being validated 220 schema: dict 221 Schema 222 parent_ns: str 223 parent namespace 224 raise_errors: bool 225 If true, raises ValidationError on invalid data 226 """ 227 return ( 228 isinstance(datum, Mapping) and 229 all(is_str(k) for k in iterkeys(datum)) and 230 all( 231 _validate( 232 datum=v, 233 schema=schema['values'], 234 named_schemas=named_schemas, 235 field=parent_ns, 236 raise_errors=raise_errors 237 ) for v in itervalues(datum) 238 ) 239 ) 240 241 242def validate_record( 243 datum, schema, named_schemas, parent_ns=None, raise_errors=True 244): 245 """ 246 Check that the data is a Mapping type with all schema defined fields 247 validated as True. 248 249 Parameters 250 ---------- 251 datum: Any 252 Data being validated 253 schema: dict 254 Schema 255 parent_ns: str 256 parent namespace 257 raise_errors: bool 258 If true, raises ValidationError on invalid data 259 """ 260 _, namespace = schema_name(schema, parent_ns) 261 return ( 262 isinstance(datum, Mapping) and 263 all(_validate(datum=datum.get(f['name'], f.get('default')), 264 schema=f['type'], 265 named_schemas=named_schemas, 266 field='{}.{}'.format(namespace, f['name']), 267 raise_errors=raise_errors) 268 for f in schema['fields'] 269 ) 270 ) 271 272 273def validate_union( 274 datum, schema, named_schemas, parent_ns=None, raise_errors=True 275): 276 """ 277 Check that the data is a list type with possible options to 278 validate as True. 279 280 Parameters 281 ---------- 282 datum: Any 283 Data being validated 284 schema: dict 285 Schema 286 parent_ns: str 287 parent namespace 288 raise_errors: bool 289 If true, raises ValidationError on invalid data 290 """ 291 if isinstance(datum, tuple): 292 (name, datum) = datum 293 for candidate in schema: 294 if extract_record_type(candidate) == 'record': 295 schema_name = candidate["name"] 296 else: 297 schema_name = candidate 298 if schema_name == name: 299 return _validate( 300 datum, 301 schema=candidate, 302 named_schemas=named_schemas, 303 field=parent_ns, 304 raise_errors=raise_errors, 305 ) 306 else: 307 return False 308 309 errors = [] 310 for s in schema: 311 try: 312 ret = _validate( 313 datum, 314 schema=s, 315 named_schemas=named_schemas, 316 field=parent_ns, 317 raise_errors=raise_errors, 318 ) 319 if ret: 320 # We exit on the first passing type in Unions 321 return True 322 except ValidationError as e: 323 errors.extend(e.errors) 324 if raise_errors: 325 raise ValidationError(*errors) 326 return False 327 328 329VALIDATORS = { 330 'null': validate_null, 331 'boolean': validate_boolean, 332 'string': validate_string, 333 'int': validate_int, 334 'long': validate_long, 335 'float': validate_float, 336 'double': validate_float, 337 'bytes': validate_bytes, 338 'fixed': validate_fixed, 339 'enum': validate_enum, 340 'array': validate_array, 341 'map': validate_map, 342 'union': validate_union, 343 'error_union': validate_union, 344 'record': validate_record, 345 'error': validate_record, 346 'request': validate_record 347} 348 349 350def _validate(datum, schema, named_schemas, field=None, raise_errors=True): 351 # This function expects the schema to already be parsed 352 record_type = extract_record_type(schema) 353 result = None 354 355 logical_type = extract_logical_type(schema) 356 if logical_type: 357 prepare = LOGICAL_WRITERS.get(logical_type) 358 if prepare: 359 datum = prepare(datum, schema) 360 361 validator = VALIDATORS.get(record_type) 362 if validator: 363 result = validator(datum, schema=schema, 364 named_schemas=named_schemas, 365 parent_ns=field, 366 raise_errors=raise_errors) 367 elif record_type in named_schemas: 368 result = _validate( 369 datum, 370 schema=named_schemas[record_type], 371 named_schemas=named_schemas, 372 field=field, 373 raise_errors=raise_errors, 374 ) 375 else: 376 raise UnknownType(record_type) 377 378 if raise_errors and result is False: 379 raise ValidationError(ValidationErrorData(datum, schema, field)) 380 381 return result 382 383 384def validate(datum, schema, field=None, raise_errors=True): 385 """ 386 Determine if a python datum is an instance of a schema. 387 388 Parameters 389 ---------- 390 datum: Any 391 Data being validated 392 schema: dict 393 Schema 394 field: str, optional 395 Record field being validated 396 raise_errors: bool, optional 397 If true, errors are raised for invalid data. If false, a simple 398 True (valid) or False (invalid) result is returned 399 400 401 Example:: 402 403 from fastavro.validation import validate 404 schema = {...} 405 record = {...} 406 validate(record, schema) 407 """ 408 named_schemas = {} 409 parsed_schema = parse_schema( 410 schema, _force=True, _named_schemas=named_schemas 411 ) 412 return _validate(datum, parsed_schema, named_schemas, field, raise_errors) 413 414 415def validate_many(records, schema, raise_errors=True): 416 """ 417 Validate a list of data! 418 419 Parameters 420 ---------- 421 records: iterable 422 List of records to validate 423 schema: dict 424 Schema 425 raise_errors: bool, optional 426 If true, errors are raised for invalid data. If false, a simple 427 True (valid) or False (invalid) result is returned 428 429 430 Example:: 431 432 from fastavro.validation import validate_many 433 schema = {...} 434 records = [{...}, {...}, ...] 435 validate_many(records, schema) 436 """ 437 named_schemas = {} 438 parsed_schema = parse_schema( 439 schema, _force=True, _named_schemas=named_schemas 440 ) 441 errors = [] 442 results = [] 443 for record in records: 444 try: 445 results.append( 446 _validate( 447 record, 448 parsed_schema, 449 named_schemas, 450 raise_errors=raise_errors 451 ) 452 ) 453 except ValidationError as e: 454 errors.extend(e.errors) 455 if raise_errors and errors: 456 raise ValidationError(*errors) 457 return all(results) 458