1import pytest 2import pathlib 3from frictionless import validate, Detector, Layout, Check, errors, helpers 4 5 6IS_UNIX = not helpers.is_platform("windows") 7 8 9# General 10 11 12def test_validate(): 13 report = validate({"path": "data/table.csv"}) 14 assert report.valid 15 16 17def test_validate_invalid_source(): 18 report = validate("bad.json", type="resource") 19 assert report.flatten(["code", "note"]) == [ 20 [ 21 "resource-error", 22 'cannot extract metadata "bad.json" because "[Errno 2] No such file or directory: \'bad.json\'"', 23 ] 24 ] 25 26 27def test_validate_invalid_resource(): 28 report = validate({"path": "data/table.csv", "schema": "bad"}) 29 assert report.flatten(["code", "note"]) == [ 30 [ 31 "schema-error", 32 'cannot extract metadata "bad" because "[Errno 2] No such file or directory: \'bad\'"', 33 ] 34 ] 35 36 37def test_validate_invalid_resource_original(): 38 report = validate({"path": "data/table.csv"}, original=True) 39 assert report.flatten(["code", "note"]) == [ 40 [ 41 "resource-error", 42 '"{\'path\': \'data/table.csv\'} is not valid under any of the given schemas" at "" in metadata and at "oneOf" in profile', 43 ] 44 ] 45 46 47def test_validate_invalid_table(): 48 report = validate({"path": "data/invalid.csv"}) 49 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 50 [None, 3, "blank-label"], 51 [None, 4, "duplicate-label"], 52 [2, 3, "missing-cell"], 53 [2, 4, "missing-cell"], 54 [3, 3, "missing-cell"], 55 [3, 4, "missing-cell"], 56 [4, None, "blank-row"], 57 [5, 5, "extra-cell"], 58 ] 59 60 61def test_validate_resource_with_schema_as_string(): 62 report = validate({"path": "data/table.csv", "schema": "data/schema.json"}) 63 assert report.valid 64 65 66def test_validate_from_path(): 67 report = validate("data/table.csv") 68 assert report.valid 69 70 71def test_validate_invalid(): 72 report = validate("data/invalid.csv") 73 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 74 [None, 3, "blank-label"], 75 [None, 4, "duplicate-label"], 76 [2, 3, "missing-cell"], 77 [2, 4, "missing-cell"], 78 [3, 3, "missing-cell"], 79 [3, 4, "missing-cell"], 80 [4, None, "blank-row"], 81 [5, 5, "extra-cell"], 82 ] 83 84 85def test_validate_blank_headers(): 86 report = validate("data/blank-headers.csv") 87 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 88 [None, 2, "blank-label"], 89 ] 90 91 92def test_validate_duplicate_headers(): 93 report = validate("data/duplicate-headers.csv") 94 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 95 [None, 3, "duplicate-label"], 96 [None, 5, "duplicate-label"], 97 ] 98 99 100def test_validate_defective_rows(): 101 report = validate("data/defective-rows.csv") 102 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 103 [2, 3, "missing-cell"], 104 [3, 4, "extra-cell"], 105 ] 106 107 108def test_validate_blank_rows(): 109 report = validate("data/blank-rows.csv") 110 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 111 [4, None, "blank-row"], 112 ] 113 114 115def test_validate_blank_rows_multiple(): 116 report = validate("data/blank-rows-multiple.csv") 117 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 118 [4, None, "blank-row"], 119 [5, None, "blank-row"], 120 [6, None, "blank-row"], 121 [7, None, "blank-row"], 122 [8, None, "blank-row"], 123 [9, None, "blank-row"], 124 [10, None, "blank-row"], 125 [11, None, "blank-row"], 126 [12, None, "blank-row"], 127 [13, None, "blank-row"], 128 [14, None, "blank-row"], 129 ] 130 131 132def test_validate_blank_cell_not_required(): 133 report = validate("data/blank-cells.csv") 134 assert report.valid 135 136 137def test_validate_no_data(): 138 report = validate("data/empty.csv") 139 assert report.flatten(["code", "note"]) == [ 140 ["source-error", "the source is empty"], 141 ] 142 143 144def test_validate_no_rows(): 145 report = validate("data/without-rows.csv") 146 assert report.valid 147 148 149def test_validate_no_rows_with_compression(): 150 report = validate("data/without-rows.csv.zip") 151 assert report.valid 152 153 154def test_validate_task_error(): 155 report = validate("data/table.csv", limit_rows="bad") 156 assert report.flatten(["code"]) == [ 157 ["task-error"], 158 ] 159 160 161def test_validate_source_invalid(): 162 # Reducing sample size to get raise on iter, not on open 163 detector = Detector(sample_size=1) 164 report = validate([["h"], [1], "bad"], detector=detector) 165 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 166 [None, None, "source-error"], 167 ] 168 169 170def test_validate_source_pathlib_path_table(): 171 report = validate(pathlib.Path("data/table.csv")) 172 assert report.valid 173 174 175# Scheme 176 177 178def test_validate_scheme(): 179 report = validate("data/table.csv", scheme="file") 180 assert report.valid 181 182 183def test_validate_scheme_invalid(): 184 report = validate("bad://data/table.csv") 185 assert report.flatten(["code", "note"]) == [ 186 ["scheme-error", 'cannot create loader "bad". Try installing "frictionless-bad"'], 187 ] 188 189 190# Format 191 192 193def test_validate_format(): 194 report = validate("data/table.csv", format="csv") 195 assert report.valid 196 197 198def test_validate_format_non_tabular(): 199 report = validate("data/table.bad") 200 assert report.valid 201 202 203# Encoding 204 205 206def test_validate_encoding(): 207 report = validate("data/table.csv", encoding="utf-8") 208 assert report.valid 209 210 211def test_validate_encoding_invalid(): 212 report = validate("data/latin1.csv", encoding="utf-8") 213 assert not report.valid 214 if IS_UNIX: 215 assert report.flatten(["code", "note"]) == [ 216 [ 217 "encoding-error", 218 "'utf-8' codec can't decode byte 0xa9 in position 20: invalid start byte", 219 ], 220 ] 221 222 223# Compression 224 225 226def test_validate_compression(): 227 report = validate("data/table.csv.zip") 228 assert report.valid 229 230 231def test_validate_compression_explicit(): 232 report = validate("data/table.csv.zip", compression="zip") 233 assert report.valid 234 235 236def test_validate_compression_invalid(): 237 report = validate("data/table.csv.zip", compression="bad") 238 assert report.flatten(["code", "note"]) == [ 239 ["compression-error", 'compression "bad" is not supported'], 240 ] 241 242 243# Dialect 244 245 246def test_validate_dialect_delimiter(): 247 report = validate("data/delimiter.csv", dialect={"delimiter": ";"}) 248 assert report.valid 249 assert report.task.resource.stats["rows"] == 2 250 251 252# Layout 253 254 255def test_validate_layout_none(): 256 layout = Layout(header=False) 257 report = validate("data/without-headers.csv", layout=layout) 258 assert report.valid 259 assert report.task.resource.stats["rows"] == 3 260 assert report.task.resource.layout.header is False 261 assert report.task.resource.labels == [] 262 assert report.task.resource.header == ["field1", "field2"] 263 264 265def test_validate_layout_none_extra_cell(): 266 layout = Layout(header=False) 267 report = validate("data/without-headers-extra.csv", layout=layout) 268 assert report.task.resource.stats["rows"] == 3 269 assert report.task.resource.layout.header is False 270 assert report.task.resource.labels == [] 271 assert report.task.resource.header == ["field1", "field2"] 272 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 273 [3, 3, "extra-cell"], 274 ] 275 276 277def test_validate_layout_number(): 278 layout = Layout(header_rows=[2]) 279 report = validate("data/matrix.csv", layout=layout) 280 assert report.task.resource.header == ["11", "12", "13", "14"] 281 assert report.valid 282 283 284def test_validate_layout_list_of_numbers(): 285 layout = Layout(header_rows=[2, 3, 4]) 286 report = validate("data/matrix.csv", layout=layout) 287 assert report.task.resource.header == ["11 21 31", "12 22 32", "13 23 33", "14 24 34"] 288 assert report.valid 289 290 291def test_validate_layout_list_of_numbers_and_headers_join(): 292 layout = Layout(header_rows=[2, 3, 4], header_join=".") 293 report = validate("data/matrix.csv", layout=layout) 294 assert report.task.resource.header == ["11.21.31", "12.22.32", "13.23.33", "14.24.34"] 295 assert report.valid 296 297 298def test_validate_layout_pick_fields(): 299 layout = Layout(pick_fields=[2, "f3"]) 300 report = validate("data/matrix.csv", layout=layout) 301 assert report.task.resource.header == ["f2", "f3"] 302 assert report.task.resource.stats["rows"] == 4 303 assert report.task.valid 304 305 306def test_validate_layout_pick_fields_regex(): 307 layout = Layout(pick_fields=["<regex>f[23]"]) 308 report = validate("data/matrix.csv", layout=layout) 309 assert report.task.resource.header == ["f2", "f3"] 310 assert report.task.resource.stats["rows"] == 4 311 assert report.task.valid 312 313 314def test_validate_layout_skip_fields(): 315 layout = Layout(skip_fields=[1, "f4"]) 316 report = validate("data/matrix.csv", layout=layout) 317 assert report.task.resource.header == ["f2", "f3"] 318 assert report.task.resource.stats["rows"] == 4 319 assert report.task.valid 320 321 322def test_validate_layout_skip_fields_regex(): 323 layout = Layout(skip_fields=["<regex>f[14]"]) 324 report = validate("data/matrix.csv", layout=layout) 325 assert report.task.resource.header == ["f2", "f3"] 326 assert report.task.resource.stats["rows"] == 4 327 assert report.task.valid 328 329 330def test_validate_layout_limit_fields(): 331 layout = Layout(limit_fields=1) 332 report = validate("data/matrix.csv", layout=layout) 333 assert report.task.resource.header == ["f1"] 334 assert report.task.resource.stats["rows"] == 4 335 assert report.task.valid 336 337 338def test_validate_layout_offset_fields(): 339 layout = Layout(offset_fields=3) 340 report = validate("data/matrix.csv", layout=layout) 341 assert report.task.resource.header == ["f4"] 342 assert report.task.resource.stats["rows"] == 4 343 assert report.task.valid 344 345 346def test_validate_layout_limit_and_offset_fields(): 347 layout = Layout(limit_fields=2, offset_fields=1) 348 report = validate("data/matrix.csv", layout=layout) 349 assert report.task.resource.header == ["f2", "f3"] 350 assert report.task.resource.stats["rows"] == 4 351 assert report.task.valid 352 353 354def test_validate_layout_pick_rows(): 355 layout = Layout(pick_rows=[1, 3, "31"]) 356 report = validate("data/matrix.csv", layout=layout) 357 assert report.task.resource.header == ["f1", "f2", "f3", "f4"] 358 assert report.task.resource.stats["rows"] == 2 359 assert report.task.valid 360 361 362def test_validate_layout_pick_rows_regex(): 363 layout = Layout(pick_rows=["<regex>[f23]1"]) 364 report = validate("data/matrix.csv", layout=layout) 365 assert report.task.resource.header == ["f1", "f2", "f3", "f4"] 366 assert report.task.resource.stats["rows"] == 2 367 assert report.task.valid 368 369 370def test_validate_layout_skip_rows(): 371 layout = Layout(skip_rows=[2, "41"]) 372 report = validate("data/matrix.csv", layout=layout) 373 assert report.task.resource.header == ["f1", "f2", "f3", "f4"] 374 assert report.task.resource.stats["rows"] == 2 375 assert report.task.valid 376 377 378def test_validate_layout_skip_rows_regex(): 379 layout = Layout(skip_rows=["<regex>[14]1"]) 380 report = validate("data/matrix.csv", layout=layout) 381 assert report.task.resource.header == ["f1", "f2", "f3", "f4"] 382 assert report.task.resource.stats["rows"] == 2 383 assert report.task.valid 384 385 386def test_validate_layout_skip_rows_blank(): 387 layout = Layout(skip_rows=["<blank>"]) 388 report = validate("data/blank-rows.csv", layout=layout) 389 assert report.task.resource.header == ["id", "name", "age"] 390 assert report.task.resource.stats["rows"] == 2 391 assert report.task.valid 392 393 394def test_validate_layout_pick_rows_and_fields(): 395 layout = Layout(pick_rows=[1, 3, "31"], pick_fields=[2, "f3"]) 396 report = validate("data/matrix.csv", layout=layout) 397 assert report.task.resource.header == ["f2", "f3"] 398 assert report.task.resource.stats["rows"] == 2 399 assert report.task.valid 400 401 402def test_validate_layout_skip_rows_and_fields(): 403 layout = Layout(skip_rows=[2, "41"], skip_fields=[1, "f4"]) 404 report = validate("data/matrix.csv", layout=layout) 405 assert report.task.resource.header == ["f2", "f3"] 406 assert report.task.resource.stats["rows"] == 2 407 assert report.task.valid 408 409 410def test_validate_layout_limit_rows(): 411 layout = Layout(limit_rows=1) 412 report = validate("data/matrix.csv", layout=layout) 413 assert report.task.resource.header == ["f1", "f2", "f3", "f4"] 414 assert report.task.resource.stats["rows"] == 1 415 assert report.task.valid 416 417 418def test_validate_layout_offset_rows(): 419 layout = Layout(offset_rows=3) 420 report = validate("data/matrix.csv", layout=layout) 421 assert report.task.resource.header == ["f1", "f2", "f3", "f4"] 422 assert report.task.resource.stats["rows"] == 1 423 assert report.task.valid 424 425 426def test_validate_layout_limit_and_offset_rows(): 427 layout = Layout(limit_rows=2, offset_rows=1) 428 report = validate("data/matrix.csv", layout=layout) 429 assert report.task.resource.header == ["f1", "f2", "f3", "f4"] 430 assert report.task.resource.stats["rows"] == 2 431 assert report.task.valid 432 433 434def test_validate_layout_invalid_limit_rows(): 435 layout = Layout(limit_rows=2) 436 report = validate("data/invalid.csv", layout=layout) 437 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 438 [None, 3, "blank-label"], 439 [None, 4, "duplicate-label"], 440 [2, 3, "missing-cell"], 441 [2, 4, "missing-cell"], 442 [3, 3, "missing-cell"], 443 [3, 4, "missing-cell"], 444 ] 445 446 447def test_validate_layout_structure_errors_with_limit_rows(): 448 layout = Layout(limit_rows=3) 449 report = validate("data/structure-errors.csv", layout=layout) 450 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 451 [4, None, "blank-row"], 452 ] 453 454 455# Schema 456 457 458def test_validate_schema_invalid(): 459 source = [["name", "age"], ["Alex", "33"]] 460 schema = {"fields": [{"name": "name"}, {"name": "age", "type": "bad"}]} 461 report = validate(source, schema=schema) 462 assert report.flatten(["code", "note"]) == [ 463 [ 464 "field-error", 465 "\"{'name': 'age', 'type': 'bad'} is not valid under any of the given schemas\" at \"\" in metadata and at \"anyOf\" in profile", 466 ], 467 ] 468 469 470def test_validate_schema_invalid_json(): 471 report = validate("data/table.csv", schema="data/invalid.json") 472 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 473 [None, None, "schema-error"], 474 ] 475 476 477def test_validate_schema_extra_headers_and_cells(): 478 schema = {"fields": [{"name": "id", "type": "integer"}]} 479 report = validate("data/table.csv", schema=schema) 480 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 481 [None, 2, "extra-label"], 482 [2, 2, "extra-cell"], 483 [3, 2, "extra-cell"], 484 ] 485 486 487def test_validate_schema_multiple_errors(): 488 source = "data/schema-errors.csv" 489 schema = "data/schema-valid.json" 490 report = validate(source, schema=schema, pick_errors=["#row"], limit_errors=3) 491 assert report.task.partial 492 assert report.task.flatten(["rowPosition", "fieldPosition", "code"]) == [ 493 [4, 1, "type-error"], 494 [4, 2, "constraint-error"], 495 [4, 3, "constraint-error"], 496 ] 497 498 499def test_validate_schema_min_length_constraint(): 500 source = [["row", "word"], [2, "a"], [3, "ab"], [4, "abc"], [5, "abcd"], [6]] 501 schema = { 502 "fields": [ 503 {"name": "row", "type": "integer"}, 504 {"name": "word", "type": "string", "constraints": {"minLength": 2}}, 505 ] 506 } 507 report = validate(source, schema=schema, pick_errors=["constraint-error"]) 508 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 509 [2, 2, "constraint-error"], 510 ] 511 512 513def test_validate_schema_max_length_constraint(): 514 source = [["row", "word"], [2, "a"], [3, "ab"], [4, "abc"], [5, "abcd"], [6]] 515 schema = { 516 "fields": [ 517 {"name": "row", "type": "integer"}, 518 {"name": "word", "type": "string", "constraints": {"maxLength": 2}}, 519 ] 520 } 521 report = validate(source, schema=schema, pick_errors=["constraint-error"]) 522 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 523 [4, 2, "constraint-error"], 524 [5, 2, "constraint-error"], 525 ] 526 527 528def test_validate_schema_minimum_constraint(): 529 source = [["row", "score"], [2, 1], [3, 2], [4, 3], [5, 4], [6]] 530 schema = { 531 "fields": [ 532 {"name": "row", "type": "integer"}, 533 {"name": "score", "type": "integer", "constraints": {"minimum": 2}}, 534 ] 535 } 536 report = validate(source, schema=schema, pick_errors=["constraint-error"]) 537 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 538 [2, 2, "constraint-error"], 539 ] 540 541 542def test_validate_schema_maximum_constraint(): 543 source = [["row", "score"], [2, 1], [3, 2], [4, 3], [5, 4], [6]] 544 schema = { 545 "fields": [ 546 {"name": "row", "type": "integer"}, 547 {"name": "score", "type": "integer", "constraints": {"maximum": 2}}, 548 ] 549 } 550 report = validate(source, schema=schema, pick_errors=["constraint-error"]) 551 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 552 [4, 2, "constraint-error"], 553 [5, 2, "constraint-error"], 554 ] 555 556 557def test_validate_schema_foreign_key_error_self_referencing(): 558 source = { 559 "path": "data/nested.csv", 560 "schema": { 561 "fields": [ 562 {"name": "id", "type": "integer"}, 563 {"name": "cat", "type": "integer"}, 564 {"name": "name", "type": "string"}, 565 ], 566 "foreignKeys": [ 567 {"fields": "cat", "reference": {"resource": "", "fields": "id"}} 568 ], 569 }, 570 } 571 report = validate(source) 572 assert report.valid 573 574 575def test_validate_schema_foreign_key_error_self_referencing_invalid(): 576 source = { 577 "path": "data/nested-invalid.csv", 578 "schema": { 579 "fields": [ 580 {"name": "id", "type": "integer"}, 581 {"name": "cat", "type": "integer"}, 582 {"name": "name", "type": "string"}, 583 ], 584 "foreignKeys": [ 585 {"fields": "cat", "reference": {"resource": "", "fields": "id"}} 586 ], 587 }, 588 } 589 report = validate(source) 590 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 591 [6, None, "foreign-key-error"], 592 ] 593 594 595def test_validate_schema_unique_error(): 596 report = validate( 597 "data/unique-field.csv", 598 schema="data/unique-field.json", 599 pick_errors=["unique-error"], 600 ) 601 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 602 [10, 1, "unique-error"], 603 ] 604 605 606def test_validate_schema_unique_error_and_type_error(): 607 source = [ 608 ["id", "unique_number"], 609 ["a1", 100], 610 ["a2", "bad"], 611 ["a3", 100], 612 ] 613 schema = { 614 "fields": [ 615 {"name": "id"}, 616 {"name": "unique_number", "type": "number", "constraints": {"unique": True}}, 617 ] 618 } 619 report = validate(source, schema=schema) 620 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 621 [3, 2, "type-error"], 622 [4, 2, "unique-error"], 623 ] 624 625 626def test_validate_schema_primary_key_error(): 627 report = validate( 628 "data/unique-field.csv", 629 schema="data/unique-field.json", 630 pick_errors=["primary-key-error"], 631 ) 632 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 633 [10, None, "primary-key-error"], 634 ] 635 636 637def test_validate_schema_primary_key_and_unique_error(): 638 report = validate( 639 "data/unique-field.csv", 640 schema="data/unique-field.json", 641 ) 642 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 643 [10, 1, "unique-error"], 644 [10, None, "primary-key-error"], 645 ] 646 647 648def test_validate_schema_primary_key_error_composite(): 649 source = [ 650 ["id", "name"], 651 [1, "Alex"], 652 [1, "John"], 653 ["", "Paul"], 654 [1, "John"], 655 ["", None], 656 ] 657 schema = { 658 "fields": [ 659 {"name": "id", "type": "integer"}, 660 {"name": "name", "type": "string"}, 661 ], 662 "primaryKey": ["id", "name"], 663 } 664 report = validate(source, schema=schema) 665 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 666 [5, None, "primary-key-error"], 667 [6, None, "blank-row"], 668 [6, None, "primary-key-error"], 669 ] 670 671 672# Stats 673 674 675def test_validate_stats_hash(): 676 hash = "6c2c61dd9b0e9c6876139a449ed87933" 677 report = validate("data/table.csv", stats={"hash": hash}) 678 if IS_UNIX: 679 assert report.task.valid 680 681 682def test_validate_stats_hash_invalid(): 683 hash = "6c2c61dd9b0e9c6876139a449ed87933" 684 report = validate("data/table.csv", stats={"hash": "bad"}) 685 if IS_UNIX: 686 assert report.flatten(["code", "note"]) == [ 687 ["hash-count-error", 'expected md5 is "bad" and actual is "%s"' % hash], 688 ] 689 690 691def test_validate_stats_hash_md5(): 692 hash = "6c2c61dd9b0e9c6876139a449ed87933" 693 report = validate("data/table.csv", stats={"hash": hash}) 694 if IS_UNIX: 695 assert report.task.valid 696 697 698def test_validate_stats_hash_md5_invalid(): 699 hash = "6c2c61dd9b0e9c6876139a449ed87933" 700 report = validate("data/table.csv", stats={"hash": "bad"}) 701 if IS_UNIX: 702 assert report.flatten(["code", "note"]) == [ 703 ["hash-count-error", 'expected md5 is "bad" and actual is "%s"' % hash], 704 ] 705 706 707def test_validate_stats_hash_sha1(): 708 hash = "db6ea2f8ff72a9e13e1d70c28ed1c6b42af3bb0e" 709 report = validate("data/table.csv", hashing="sha1", stats={"hash": hash}) 710 if IS_UNIX: 711 assert report.task.valid 712 713 714def test_validate_stats_hash_sha1_invalid(): 715 hash = "db6ea2f8ff72a9e13e1d70c28ed1c6b42af3bb0e" 716 report = validate("data/table.csv", hashing="sha1", stats={"hash": "bad"}) 717 if IS_UNIX: 718 assert report.flatten(["code", "note"]) == [ 719 ["hash-count-error", 'expected sha1 is "bad" and actual is "%s"' % hash], 720 ] 721 722 723def test_validate_stats_hash_sha256(): 724 hash = "a1fd6c5ff3494f697874deeb07f69f8667e903dd94a7bc062dd57550cea26da8" 725 report = validate("data/table.csv", hashing="sha256", stats={"hash": hash}) 726 if IS_UNIX: 727 assert report.task.valid 728 729 730def test_validate_stats_hash_sha256_invalid(): 731 hash = "a1fd6c5ff3494f697874deeb07f69f8667e903dd94a7bc062dd57550cea26da8" 732 report = validate("data/table.csv", hashing="sha256", stats={"hash": "bad"}) 733 if IS_UNIX: 734 assert report.flatten(["code", "note"]) == [ 735 [ 736 "hash-count-error", 737 'expected sha256 is "bad" and actual is "%s"' % hash, 738 ], 739 ] 740 741 742def test_validate_stats_hash_sha512(): 743 hash = "d52e3f5f5693894282f023b9985967007d7984292e9abd29dca64454500f27fa45b980132d7b496bc84d336af33aeba6caf7730ec1075d6418d74fb8260de4fd" 744 report = validate("data/table.csv", hashing="sha512", stats={"hash": hash}) 745 if IS_UNIX: 746 assert report.task.valid 747 748 749def test_validate_stats_hash_sha512_invalid(): 750 hash = "d52e3f5f5693894282f023b9985967007d7984292e9abd29dca64454500f27fa45b980132d7b496bc84d336af33aeba6caf7730ec1075d6418d74fb8260de4fd" 751 report = validate("data/table.csv", hashing="sha512", stats={"hash": "bad"}) 752 if IS_UNIX: 753 assert report.flatten(["code", "note"]) == [ 754 [ 755 "hash-count-error", 756 'expected sha512 is "bad" and actual is "%s"' % hash, 757 ], 758 ] 759 760 761def test_validate_stats_bytes(): 762 report = validate("data/table.csv", stats={"bytes": 30}) 763 if IS_UNIX: 764 assert report.task.valid 765 766 767def test_validate_stats_bytes_invalid(): 768 report = validate("data/table.csv", stats={"bytes": 40}) 769 assert report.task.error.get("rowPosition") is None 770 assert report.task.error.get("fieldPosition") is None 771 if IS_UNIX: 772 assert report.flatten(["code", "note"]) == [ 773 ["byte-count-error", 'expected is "40" and actual is "30"'], 774 ] 775 776 777def test_validate_stats_rows(): 778 report = validate("data/table.csv", stats={"rows": 2}) 779 if IS_UNIX: 780 assert report.task.valid 781 782 783def test_validate_stats_rows_invalid(): 784 report = validate("data/table.csv", stats={"rows": 3}) 785 assert report.task.error.get("rowPosition") is None 786 assert report.task.error.get("fieldPosition") is None 787 if IS_UNIX: 788 assert report.flatten(["code", "note"]) == [ 789 ["row-count-error", 'expected is "3" and actual is "2"'], 790 ] 791 792 793# Detector 794 795 796def test_validate_detector_sync_schema(): 797 schema = { 798 "fields": [ 799 {"name": "id", "type": "integer"}, 800 {"name": "name", "type": "string"}, 801 ], 802 } 803 detector = Detector(schema_sync=True) 804 report = validate("data/sync-schema.csv", schema=schema, detector=detector) 805 assert report.valid 806 assert report.task.resource.schema == { 807 "fields": [ 808 {"name": "name", "type": "string"}, 809 {"name": "id", "type": "integer"}, 810 ], 811 } 812 813 814def test_validate_detector_sync_schema_invalid(): 815 source = [["LastName", "FirstName", "Address"], ["Test", "Tester", "23 Avenue"]] 816 schema = {"fields": [{"name": "id"}, {"name": "FirstName"}, {"name": "LastName"}]} 817 detector = Detector(schema_sync=True) 818 report = validate(source, schema=schema, detector=detector) 819 assert report.valid 820 821 822def test_validate_detector_headers_errors(): 823 source = [ 824 ["id", "last_name", "first_name", "language"], 825 [1, "Alex", "John", "English"], 826 [2, "Peters", "John", "Afrikaans"], 827 [3, "Smith", "Paul", None], 828 ] 829 schema = { 830 "fields": [ 831 {"name": "id", "type": "number"}, 832 {"name": "language", "constraints": {"required": True}}, 833 {"name": "country"}, 834 ] 835 } 836 detector = Detector(schema_sync=True) 837 report = validate(source, schema=schema, detector=detector) 838 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 839 [4, 4, "constraint-error"], 840 ] 841 842 843def test_validate_detector_patch_schema(): 844 detector = Detector(schema_patch={"missingValues": ["-"]}) 845 report = validate("data/table.csv", detector=detector) 846 assert report.valid 847 assert report.task.resource.schema == { 848 "fields": [ 849 {"name": "id", "type": "integer"}, 850 {"name": "name", "type": "string"}, 851 ], 852 "missingValues": ["-"], 853 } 854 855 856def test_validate_detector_patch_schema_fields(): 857 detector = Detector( 858 schema_patch={"fields": {"id": {"type": "string"}}, "missingValues": ["-"]} 859 ) 860 report = validate("data/table.csv", detector=detector) 861 assert report.valid 862 assert report.task.resource.schema == { 863 "fields": [{"name": "id", "type": "string"}, {"name": "name", "type": "string"}], 864 "missingValues": ["-"], 865 } 866 867 868def test_validate_detector_infer_type_string(): 869 detector = Detector(field_type="string") 870 report = validate("data/table.csv", detector=detector) 871 assert report.valid 872 assert report.task.resource.schema == { 873 "fields": [{"name": "id", "type": "string"}, {"name": "name", "type": "string"}], 874 } 875 876 877def test_validate_detector_infer_type_any(): 878 detector = Detector(field_type="any") 879 report = validate("data/table.csv", detector=detector) 880 assert report.valid 881 assert report.task.resource.schema == { 882 "fields": [{"name": "id", "type": "any"}, {"name": "name", "type": "any"}], 883 } 884 885 886def test_validate_detector_infer_names(): 887 detector = Detector(field_names=["id", "name"]) 888 report = validate( 889 "data/without-headers.csv", 890 layout={"header": False}, 891 detector=detector, 892 ) 893 assert report.task.resource.schema["fields"][0]["name"] == "id" 894 assert report.task.resource.schema["fields"][1]["name"] == "name" 895 assert report.task.resource.stats["rows"] == 3 896 assert report.task.resource.labels == [] 897 assert report.task.resource.header == ["id", "name"] 898 assert report.valid 899 900 901# Validation 902 903 904def test_validate_pick_errors(): 905 report = validate("data/invalid.csv", pick_errors=["blank-label", "blank-row"]) 906 assert report.task.scope == ["blank-label", "blank-row"] 907 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 908 [None, 3, "blank-label"], 909 [4, None, "blank-row"], 910 ] 911 912 913def test_validate_pick_errors_tags(): 914 report = validate("data/invalid.csv", pick_errors=["#header"]) 915 assert report.task.scope == [ 916 "blank-header", 917 "extra-label", 918 "missing-label", 919 "blank-label", 920 "duplicate-label", 921 "incorrect-label", 922 ] 923 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 924 [None, 3, "blank-label"], 925 [None, 4, "duplicate-label"], 926 ] 927 928 929def test_validate_skip_errors(): 930 report = validate("data/invalid.csv", skip_errors=["blank-label", "blank-row"]) 931 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 932 [None, 4, "duplicate-label"], 933 [2, 3, "missing-cell"], 934 [2, 4, "missing-cell"], 935 [3, 3, "missing-cell"], 936 [3, 4, "missing-cell"], 937 [5, 5, "extra-cell"], 938 ] 939 940 941def test_validate_skip_errors_tags(): 942 report = validate("data/invalid.csv", skip_errors=["#header"]) 943 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 944 [2, 3, "missing-cell"], 945 [2, 4, "missing-cell"], 946 [3, 3, "missing-cell"], 947 [3, 4, "missing-cell"], 948 [4, None, "blank-row"], 949 [5, 5, "extra-cell"], 950 ] 951 952 953def test_validate_invalid_limit_errors(): 954 report = validate("data/invalid.csv", limit_errors=3) 955 assert report.task.partial 956 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 957 [None, 3, "blank-label"], 958 [None, 4, "duplicate-label"], 959 [2, 3, "missing-cell"], 960 ] 961 962 963def test_validate_structure_errors_with_limit_errors(): 964 report = validate("data/structure-errors.csv", limit_errors=3) 965 assert report.task.partial 966 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 967 [4, None, "blank-row"], 968 [5, 4, "extra-cell"], 969 [5, 5, "extra-cell"], 970 ] 971 972 973@pytest.mark.ci 974def test_validate_limit_memory(): 975 source = lambda: ([integer] for integer in range(1, 100000000)) 976 schema = {"fields": [{"name": "integer", "type": "integer"}], "primaryKey": "integer"} 977 layout = Layout(header=False) 978 report = validate(source, schema=schema, layout=layout, limit_memory=50) 979 assert report.flatten(["code", "note"]) == [ 980 ["task-error", 'exceeded memory limit "50MB"'] 981 ] 982 983 984@pytest.mark.ci 985def test_validate_limit_memory_small(): 986 source = lambda: ([integer] for integer in range(1, 100000000)) 987 schema = {"fields": [{"name": "integer", "type": "integer"}], "primaryKey": "integer"} 988 layout = Layout(header=False) 989 report = validate(source, schema=schema, layout=layout, limit_memory=1) 990 assert report.flatten(["code", "note"]) == [ 991 ["task-error", 'exceeded memory limit "1MB"'] 992 ] 993 994 995def test_validate_custom_check(): 996 997 # Create check 998 class custom(Check): 999 def validate_row(self, row): 1000 yield errors.BlankRowError( 1001 note="", 1002 cells=list(map(str, row.values())), 1003 row_number=row.row_number, 1004 row_position=row.row_position, 1005 ) 1006 1007 # Validate resource 1008 report = validate("data/table.csv", checks=[custom()]) 1009 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 1010 [2, None, "blank-row"], 1011 [3, None, "blank-row"], 1012 ] 1013 1014 1015def test_validate_custom_check_with_arguments(): 1016 1017 # Create check 1018 class custom(Check): 1019 def __init__(self, descriptor=None, *, row_position=None): 1020 self.setinitial("rowPosition", row_position) 1021 super().__init__(descriptor) 1022 1023 def validate_row(self, row): 1024 yield errors.BlankRowError( 1025 note="", 1026 cells=list(map(str, row.values())), 1027 row_number=row.row_number, 1028 row_position=self.get("rowPosition") or row.row_position, 1029 ) 1030 1031 # Validate resource 1032 report = validate("data/table.csv", checks=[custom(row_position=1)]) 1033 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 1034 [1, None, "blank-row"], 1035 [1, None, "blank-row"], 1036 ] 1037 1038 1039def test_validate_custom_check_function_based(): 1040 1041 # Create check 1042 def custom(row): 1043 yield errors.BlankRowError( 1044 note="", 1045 cells=list(map(str, row.values())), 1046 row_number=row.row_number, 1047 row_position=row.row_position, 1048 ) 1049 1050 # Validate resource 1051 report = validate("data/table.csv", checks=[custom]) 1052 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 1053 [2, None, "blank-row"], 1054 [3, None, "blank-row"], 1055 ] 1056 1057 1058def test_validate_custom_check_bad_name(): 1059 report = validate("data/table.csv", checks=[{"code": "bad"}]) 1060 assert report.flatten(["code", "note"]) == [ 1061 ["check-error", 'cannot create check "bad". Try installing "frictionless-bad"'], 1062 ] 1063 1064 1065# Issues 1066 1067 1068def test_validate_infer_fields_issue_223(): 1069 source = [["name1", "name2"], ["123", "abc"], ["456", "def"], ["789", "ghi"]] 1070 detector = Detector(schema_patch={"fields": {"name": {"type": "string"}}}) 1071 report = validate(source, detector=detector) 1072 assert report.valid 1073 1074 1075def test_validate_infer_fields_issue_225(): 1076 source = [["name1", "name2"], ["123", None], ["456", None], ["789"]] 1077 detector = Detector(schema_patch={"fields": {"name": {"type": "string"}}}) 1078 report = validate(source, detector=detector) 1079 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 1080 [4, 2, "missing-cell"], 1081 ] 1082 1083 1084def test_validate_fails_with_wrong_encoding_issue_274(): 1085 # For now, by default encoding is detected incorectly by chardet 1086 report = validate("data/encoding-issue-274.csv", encoding="utf-8") 1087 assert report.valid 1088 1089 1090def test_validate_wide_table_with_order_fields_issue_277(): 1091 source = "data/issue-277.csv" 1092 schema = "data/issue-277.json" 1093 detector = Detector(schema_sync=True) 1094 report = validate(source, schema=schema, detector=detector) 1095 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 1096 [49, 50, "constraint-error"], 1097 [68, 50, "constraint-error"], 1098 [69, 50, "constraint-error"], 1099 ] 1100 1101 1102def test_validate_invalid_table_schema_issue_304(): 1103 source = [["name", "age"], ["Alex", "33"]] 1104 schema = {"fields": [{"name": "name"}, {"name": "age", "type": "bad"}]} 1105 report = validate(source, schema=schema) 1106 assert report.flatten(["code", "note"]) == [ 1107 [ 1108 "field-error", 1109 "\"{'name': 'age', 'type': 'bad'} is not valid under any of the given schemas\" at \"\" in metadata and at \"anyOf\" in profile", 1110 ], 1111 ] 1112 1113 1114def test_validate_table_is_invalid_issue_312(): 1115 report = validate("data/issue-312.xlsx") 1116 assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ 1117 [None, 3, "blank-label"], 1118 [None, 4, "duplicate-label"], 1119 [None, 5, "blank-label"], 1120 [5, None, "blank-row"], 1121 ] 1122 1123 1124def test_validate_order_fields_issue_313(): 1125 source = "data/issue-313.xlsx" 1126 layout = Layout(pick_fields=[1, 2, 3, 4, 5]) 1127 schema = { 1128 "fields": [ 1129 {"name": "Column_1", "type": "string"}, 1130 {"name": "Column_2", "type": "string", "constraints": {"required": True}}, 1131 {"name": "Column_3", "type": "string"}, 1132 {"name": "Column_4", "type": "string"}, 1133 {"name": "Column_5", "type": "string"}, 1134 ] 1135 } 1136 detector = Detector(schema_sync=True) 1137 report = validate(source, layout=layout, schema=schema, detector=detector) 1138 assert report.valid 1139 1140 1141def test_validate_missing_local_file_raises_scheme_error_issue_315(): 1142 report = validate("bad-path.csv") 1143 assert report.flatten(["code", "note"]) == [ 1144 ["scheme-error", "[Errno 2] No such file or directory: 'bad-path.csv'"], 1145 ] 1146 1147 1148def test_validate_inline_not_a_binary_issue_349(): 1149 with open("data/table.csv") as source: 1150 report = validate(source) 1151 assert report.valid 1152 1153 1154def test_validate_newline_inside_label_issue_811(): 1155 report = validate("data/issue-811.csv") 1156 assert report.valid 1157 1158 1159def test_validate_resource_from_json_format_issue_827(): 1160 report = validate(path="data/table.json") 1161 assert report.valid 1162 1163 1164def test_validate_resource_none_is_not_iterable_enum_constraint_issue_833(): 1165 report = validate("data/issue-833.csv", schema="data/issue-833.json") 1166 assert report.valid 1167 1168 1169def test_validate_resource_header_row_has_first_number_issue_870(): 1170 report = validate("data/issue-870.xlsx", layout={"limitRows": 5}) 1171 assert report.valid 1172 1173 1174def test_validate_resource_descriptor_type_invalid(): 1175 report = validate(descriptor="data/table.csv") 1176 assert report.flatten() == [[1, None, None, "resource-error"]] 1177