1import pytest
2import pathlib
3from frictionless import validate, Detector, Layout, Check, errors, helpers
4
5
6IS_UNIX = not helpers.is_platform("windows")
7
8
9# General
10
11
12def test_validate():
13    report = validate({"path": "data/table.csv"})
14    assert report.valid
15
16
17def test_validate_invalid_source():
18    report = validate("bad.json", type="resource")
19    assert report.flatten(["code", "note"]) == [
20        [
21            "resource-error",
22            'cannot extract metadata "bad.json" because "[Errno 2] No such file or directory: \'bad.json\'"',
23        ]
24    ]
25
26
27def test_validate_invalid_resource():
28    report = validate({"path": "data/table.csv", "schema": "bad"})
29    assert report.flatten(["code", "note"]) == [
30        [
31            "schema-error",
32            'cannot extract metadata "bad" because "[Errno 2] No such file or directory: \'bad\'"',
33        ]
34    ]
35
36
37def test_validate_invalid_resource_original():
38    report = validate({"path": "data/table.csv"}, original=True)
39    assert report.flatten(["code", "note"]) == [
40        [
41            "resource-error",
42            '"{\'path\': \'data/table.csv\'} is not valid under any of the given schemas" at "" in metadata and at "oneOf" in profile',
43        ]
44    ]
45
46
47def test_validate_invalid_table():
48    report = validate({"path": "data/invalid.csv"})
49    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
50        [None, 3, "blank-label"],
51        [None, 4, "duplicate-label"],
52        [2, 3, "missing-cell"],
53        [2, 4, "missing-cell"],
54        [3, 3, "missing-cell"],
55        [3, 4, "missing-cell"],
56        [4, None, "blank-row"],
57        [5, 5, "extra-cell"],
58    ]
59
60
61def test_validate_resource_with_schema_as_string():
62    report = validate({"path": "data/table.csv", "schema": "data/schema.json"})
63    assert report.valid
64
65
66def test_validate_from_path():
67    report = validate("data/table.csv")
68    assert report.valid
69
70
71def test_validate_invalid():
72    report = validate("data/invalid.csv")
73    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
74        [None, 3, "blank-label"],
75        [None, 4, "duplicate-label"],
76        [2, 3, "missing-cell"],
77        [2, 4, "missing-cell"],
78        [3, 3, "missing-cell"],
79        [3, 4, "missing-cell"],
80        [4, None, "blank-row"],
81        [5, 5, "extra-cell"],
82    ]
83
84
85def test_validate_blank_headers():
86    report = validate("data/blank-headers.csv")
87    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
88        [None, 2, "blank-label"],
89    ]
90
91
92def test_validate_duplicate_headers():
93    report = validate("data/duplicate-headers.csv")
94    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
95        [None, 3, "duplicate-label"],
96        [None, 5, "duplicate-label"],
97    ]
98
99
100def test_validate_defective_rows():
101    report = validate("data/defective-rows.csv")
102    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
103        [2, 3, "missing-cell"],
104        [3, 4, "extra-cell"],
105    ]
106
107
108def test_validate_blank_rows():
109    report = validate("data/blank-rows.csv")
110    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
111        [4, None, "blank-row"],
112    ]
113
114
115def test_validate_blank_rows_multiple():
116    report = validate("data/blank-rows-multiple.csv")
117    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
118        [4, None, "blank-row"],
119        [5, None, "blank-row"],
120        [6, None, "blank-row"],
121        [7, None, "blank-row"],
122        [8, None, "blank-row"],
123        [9, None, "blank-row"],
124        [10, None, "blank-row"],
125        [11, None, "blank-row"],
126        [12, None, "blank-row"],
127        [13, None, "blank-row"],
128        [14, None, "blank-row"],
129    ]
130
131
132def test_validate_blank_cell_not_required():
133    report = validate("data/blank-cells.csv")
134    assert report.valid
135
136
137def test_validate_no_data():
138    report = validate("data/empty.csv")
139    assert report.flatten(["code", "note"]) == [
140        ["source-error", "the source is empty"],
141    ]
142
143
144def test_validate_no_rows():
145    report = validate("data/without-rows.csv")
146    assert report.valid
147
148
149def test_validate_no_rows_with_compression():
150    report = validate("data/without-rows.csv.zip")
151    assert report.valid
152
153
154def test_validate_task_error():
155    report = validate("data/table.csv", limit_rows="bad")
156    assert report.flatten(["code"]) == [
157        ["task-error"],
158    ]
159
160
161def test_validate_source_invalid():
162    # Reducing sample size to get raise on iter, not on open
163    detector = Detector(sample_size=1)
164    report = validate([["h"], [1], "bad"], detector=detector)
165    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
166        [None, None, "source-error"],
167    ]
168
169
170def test_validate_source_pathlib_path_table():
171    report = validate(pathlib.Path("data/table.csv"))
172    assert report.valid
173
174
175# Scheme
176
177
178def test_validate_scheme():
179    report = validate("data/table.csv", scheme="file")
180    assert report.valid
181
182
183def test_validate_scheme_invalid():
184    report = validate("bad://data/table.csv")
185    assert report.flatten(["code", "note"]) == [
186        ["scheme-error", 'cannot create loader "bad". Try installing "frictionless-bad"'],
187    ]
188
189
190# Format
191
192
193def test_validate_format():
194    report = validate("data/table.csv", format="csv")
195    assert report.valid
196
197
198def test_validate_format_non_tabular():
199    report = validate("data/table.bad")
200    assert report.valid
201
202
203# Encoding
204
205
206def test_validate_encoding():
207    report = validate("data/table.csv", encoding="utf-8")
208    assert report.valid
209
210
211def test_validate_encoding_invalid():
212    report = validate("data/latin1.csv", encoding="utf-8")
213    assert not report.valid
214    if IS_UNIX:
215        assert report.flatten(["code", "note"]) == [
216            [
217                "encoding-error",
218                "'utf-8' codec can't decode byte 0xa9 in position 20: invalid start byte",
219            ],
220        ]
221
222
223# Compression
224
225
226def test_validate_compression():
227    report = validate("data/table.csv.zip")
228    assert report.valid
229
230
231def test_validate_compression_explicit():
232    report = validate("data/table.csv.zip", compression="zip")
233    assert report.valid
234
235
236def test_validate_compression_invalid():
237    report = validate("data/table.csv.zip", compression="bad")
238    assert report.flatten(["code", "note"]) == [
239        ["compression-error", 'compression "bad" is not supported'],
240    ]
241
242
243# Dialect
244
245
246def test_validate_dialect_delimiter():
247    report = validate("data/delimiter.csv", dialect={"delimiter": ";"})
248    assert report.valid
249    assert report.task.resource.stats["rows"] == 2
250
251
252# Layout
253
254
255def test_validate_layout_none():
256    layout = Layout(header=False)
257    report = validate("data/without-headers.csv", layout=layout)
258    assert report.valid
259    assert report.task.resource.stats["rows"] == 3
260    assert report.task.resource.layout.header is False
261    assert report.task.resource.labels == []
262    assert report.task.resource.header == ["field1", "field2"]
263
264
265def test_validate_layout_none_extra_cell():
266    layout = Layout(header=False)
267    report = validate("data/without-headers-extra.csv", layout=layout)
268    assert report.task.resource.stats["rows"] == 3
269    assert report.task.resource.layout.header is False
270    assert report.task.resource.labels == []
271    assert report.task.resource.header == ["field1", "field2"]
272    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
273        [3, 3, "extra-cell"],
274    ]
275
276
277def test_validate_layout_number():
278    layout = Layout(header_rows=[2])
279    report = validate("data/matrix.csv", layout=layout)
280    assert report.task.resource.header == ["11", "12", "13", "14"]
281    assert report.valid
282
283
284def test_validate_layout_list_of_numbers():
285    layout = Layout(header_rows=[2, 3, 4])
286    report = validate("data/matrix.csv", layout=layout)
287    assert report.task.resource.header == ["11 21 31", "12 22 32", "13 23 33", "14 24 34"]
288    assert report.valid
289
290
291def test_validate_layout_list_of_numbers_and_headers_join():
292    layout = Layout(header_rows=[2, 3, 4], header_join=".")
293    report = validate("data/matrix.csv", layout=layout)
294    assert report.task.resource.header == ["11.21.31", "12.22.32", "13.23.33", "14.24.34"]
295    assert report.valid
296
297
298def test_validate_layout_pick_fields():
299    layout = Layout(pick_fields=[2, "f3"])
300    report = validate("data/matrix.csv", layout=layout)
301    assert report.task.resource.header == ["f2", "f3"]
302    assert report.task.resource.stats["rows"] == 4
303    assert report.task.valid
304
305
306def test_validate_layout_pick_fields_regex():
307    layout = Layout(pick_fields=["<regex>f[23]"])
308    report = validate("data/matrix.csv", layout=layout)
309    assert report.task.resource.header == ["f2", "f3"]
310    assert report.task.resource.stats["rows"] == 4
311    assert report.task.valid
312
313
314def test_validate_layout_skip_fields():
315    layout = Layout(skip_fields=[1, "f4"])
316    report = validate("data/matrix.csv", layout=layout)
317    assert report.task.resource.header == ["f2", "f3"]
318    assert report.task.resource.stats["rows"] == 4
319    assert report.task.valid
320
321
322def test_validate_layout_skip_fields_regex():
323    layout = Layout(skip_fields=["<regex>f[14]"])
324    report = validate("data/matrix.csv", layout=layout)
325    assert report.task.resource.header == ["f2", "f3"]
326    assert report.task.resource.stats["rows"] == 4
327    assert report.task.valid
328
329
330def test_validate_layout_limit_fields():
331    layout = Layout(limit_fields=1)
332    report = validate("data/matrix.csv", layout=layout)
333    assert report.task.resource.header == ["f1"]
334    assert report.task.resource.stats["rows"] == 4
335    assert report.task.valid
336
337
338def test_validate_layout_offset_fields():
339    layout = Layout(offset_fields=3)
340    report = validate("data/matrix.csv", layout=layout)
341    assert report.task.resource.header == ["f4"]
342    assert report.task.resource.stats["rows"] == 4
343    assert report.task.valid
344
345
346def test_validate_layout_limit_and_offset_fields():
347    layout = Layout(limit_fields=2, offset_fields=1)
348    report = validate("data/matrix.csv", layout=layout)
349    assert report.task.resource.header == ["f2", "f3"]
350    assert report.task.resource.stats["rows"] == 4
351    assert report.task.valid
352
353
354def test_validate_layout_pick_rows():
355    layout = Layout(pick_rows=[1, 3, "31"])
356    report = validate("data/matrix.csv", layout=layout)
357    assert report.task.resource.header == ["f1", "f2", "f3", "f4"]
358    assert report.task.resource.stats["rows"] == 2
359    assert report.task.valid
360
361
362def test_validate_layout_pick_rows_regex():
363    layout = Layout(pick_rows=["<regex>[f23]1"])
364    report = validate("data/matrix.csv", layout=layout)
365    assert report.task.resource.header == ["f1", "f2", "f3", "f4"]
366    assert report.task.resource.stats["rows"] == 2
367    assert report.task.valid
368
369
370def test_validate_layout_skip_rows():
371    layout = Layout(skip_rows=[2, "41"])
372    report = validate("data/matrix.csv", layout=layout)
373    assert report.task.resource.header == ["f1", "f2", "f3", "f4"]
374    assert report.task.resource.stats["rows"] == 2
375    assert report.task.valid
376
377
378def test_validate_layout_skip_rows_regex():
379    layout = Layout(skip_rows=["<regex>[14]1"])
380    report = validate("data/matrix.csv", layout=layout)
381    assert report.task.resource.header == ["f1", "f2", "f3", "f4"]
382    assert report.task.resource.stats["rows"] == 2
383    assert report.task.valid
384
385
386def test_validate_layout_skip_rows_blank():
387    layout = Layout(skip_rows=["<blank>"])
388    report = validate("data/blank-rows.csv", layout=layout)
389    assert report.task.resource.header == ["id", "name", "age"]
390    assert report.task.resource.stats["rows"] == 2
391    assert report.task.valid
392
393
394def test_validate_layout_pick_rows_and_fields():
395    layout = Layout(pick_rows=[1, 3, "31"], pick_fields=[2, "f3"])
396    report = validate("data/matrix.csv", layout=layout)
397    assert report.task.resource.header == ["f2", "f3"]
398    assert report.task.resource.stats["rows"] == 2
399    assert report.task.valid
400
401
402def test_validate_layout_skip_rows_and_fields():
403    layout = Layout(skip_rows=[2, "41"], skip_fields=[1, "f4"])
404    report = validate("data/matrix.csv", layout=layout)
405    assert report.task.resource.header == ["f2", "f3"]
406    assert report.task.resource.stats["rows"] == 2
407    assert report.task.valid
408
409
410def test_validate_layout_limit_rows():
411    layout = Layout(limit_rows=1)
412    report = validate("data/matrix.csv", layout=layout)
413    assert report.task.resource.header == ["f1", "f2", "f3", "f4"]
414    assert report.task.resource.stats["rows"] == 1
415    assert report.task.valid
416
417
418def test_validate_layout_offset_rows():
419    layout = Layout(offset_rows=3)
420    report = validate("data/matrix.csv", layout=layout)
421    assert report.task.resource.header == ["f1", "f2", "f3", "f4"]
422    assert report.task.resource.stats["rows"] == 1
423    assert report.task.valid
424
425
426def test_validate_layout_limit_and_offset_rows():
427    layout = Layout(limit_rows=2, offset_rows=1)
428    report = validate("data/matrix.csv", layout=layout)
429    assert report.task.resource.header == ["f1", "f2", "f3", "f4"]
430    assert report.task.resource.stats["rows"] == 2
431    assert report.task.valid
432
433
434def test_validate_layout_invalid_limit_rows():
435    layout = Layout(limit_rows=2)
436    report = validate("data/invalid.csv", layout=layout)
437    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
438        [None, 3, "blank-label"],
439        [None, 4, "duplicate-label"],
440        [2, 3, "missing-cell"],
441        [2, 4, "missing-cell"],
442        [3, 3, "missing-cell"],
443        [3, 4, "missing-cell"],
444    ]
445
446
447def test_validate_layout_structure_errors_with_limit_rows():
448    layout = Layout(limit_rows=3)
449    report = validate("data/structure-errors.csv", layout=layout)
450    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
451        [4, None, "blank-row"],
452    ]
453
454
455# Schema
456
457
458def test_validate_schema_invalid():
459    source = [["name", "age"], ["Alex", "33"]]
460    schema = {"fields": [{"name": "name"}, {"name": "age", "type": "bad"}]}
461    report = validate(source, schema=schema)
462    assert report.flatten(["code", "note"]) == [
463        [
464            "field-error",
465            "\"{'name': 'age', 'type': 'bad'} is not valid under any of the given schemas\" at \"\" in metadata and at \"anyOf\" in profile",
466        ],
467    ]
468
469
470def test_validate_schema_invalid_json():
471    report = validate("data/table.csv", schema="data/invalid.json")
472    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
473        [None, None, "schema-error"],
474    ]
475
476
477def test_validate_schema_extra_headers_and_cells():
478    schema = {"fields": [{"name": "id", "type": "integer"}]}
479    report = validate("data/table.csv", schema=schema)
480    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
481        [None, 2, "extra-label"],
482        [2, 2, "extra-cell"],
483        [3, 2, "extra-cell"],
484    ]
485
486
487def test_validate_schema_multiple_errors():
488    source = "data/schema-errors.csv"
489    schema = "data/schema-valid.json"
490    report = validate(source, schema=schema, pick_errors=["#row"], limit_errors=3)
491    assert report.task.partial
492    assert report.task.flatten(["rowPosition", "fieldPosition", "code"]) == [
493        [4, 1, "type-error"],
494        [4, 2, "constraint-error"],
495        [4, 3, "constraint-error"],
496    ]
497
498
499def test_validate_schema_min_length_constraint():
500    source = [["row", "word"], [2, "a"], [3, "ab"], [4, "abc"], [5, "abcd"], [6]]
501    schema = {
502        "fields": [
503            {"name": "row", "type": "integer"},
504            {"name": "word", "type": "string", "constraints": {"minLength": 2}},
505        ]
506    }
507    report = validate(source, schema=schema, pick_errors=["constraint-error"])
508    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
509        [2, 2, "constraint-error"],
510    ]
511
512
513def test_validate_schema_max_length_constraint():
514    source = [["row", "word"], [2, "a"], [3, "ab"], [4, "abc"], [5, "abcd"], [6]]
515    schema = {
516        "fields": [
517            {"name": "row", "type": "integer"},
518            {"name": "word", "type": "string", "constraints": {"maxLength": 2}},
519        ]
520    }
521    report = validate(source, schema=schema, pick_errors=["constraint-error"])
522    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
523        [4, 2, "constraint-error"],
524        [5, 2, "constraint-error"],
525    ]
526
527
528def test_validate_schema_minimum_constraint():
529    source = [["row", "score"], [2, 1], [3, 2], [4, 3], [5, 4], [6]]
530    schema = {
531        "fields": [
532            {"name": "row", "type": "integer"},
533            {"name": "score", "type": "integer", "constraints": {"minimum": 2}},
534        ]
535    }
536    report = validate(source, schema=schema, pick_errors=["constraint-error"])
537    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
538        [2, 2, "constraint-error"],
539    ]
540
541
542def test_validate_schema_maximum_constraint():
543    source = [["row", "score"], [2, 1], [3, 2], [4, 3], [5, 4], [6]]
544    schema = {
545        "fields": [
546            {"name": "row", "type": "integer"},
547            {"name": "score", "type": "integer", "constraints": {"maximum": 2}},
548        ]
549    }
550    report = validate(source, schema=schema, pick_errors=["constraint-error"])
551    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
552        [4, 2, "constraint-error"],
553        [5, 2, "constraint-error"],
554    ]
555
556
557def test_validate_schema_foreign_key_error_self_referencing():
558    source = {
559        "path": "data/nested.csv",
560        "schema": {
561            "fields": [
562                {"name": "id", "type": "integer"},
563                {"name": "cat", "type": "integer"},
564                {"name": "name", "type": "string"},
565            ],
566            "foreignKeys": [
567                {"fields": "cat", "reference": {"resource": "", "fields": "id"}}
568            ],
569        },
570    }
571    report = validate(source)
572    assert report.valid
573
574
575def test_validate_schema_foreign_key_error_self_referencing_invalid():
576    source = {
577        "path": "data/nested-invalid.csv",
578        "schema": {
579            "fields": [
580                {"name": "id", "type": "integer"},
581                {"name": "cat", "type": "integer"},
582                {"name": "name", "type": "string"},
583            ],
584            "foreignKeys": [
585                {"fields": "cat", "reference": {"resource": "", "fields": "id"}}
586            ],
587        },
588    }
589    report = validate(source)
590    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
591        [6, None, "foreign-key-error"],
592    ]
593
594
595def test_validate_schema_unique_error():
596    report = validate(
597        "data/unique-field.csv",
598        schema="data/unique-field.json",
599        pick_errors=["unique-error"],
600    )
601    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
602        [10, 1, "unique-error"],
603    ]
604
605
606def test_validate_schema_unique_error_and_type_error():
607    source = [
608        ["id", "unique_number"],
609        ["a1", 100],
610        ["a2", "bad"],
611        ["a3", 100],
612    ]
613    schema = {
614        "fields": [
615            {"name": "id"},
616            {"name": "unique_number", "type": "number", "constraints": {"unique": True}},
617        ]
618    }
619    report = validate(source, schema=schema)
620    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
621        [3, 2, "type-error"],
622        [4, 2, "unique-error"],
623    ]
624
625
626def test_validate_schema_primary_key_error():
627    report = validate(
628        "data/unique-field.csv",
629        schema="data/unique-field.json",
630        pick_errors=["primary-key-error"],
631    )
632    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
633        [10, None, "primary-key-error"],
634    ]
635
636
637def test_validate_schema_primary_key_and_unique_error():
638    report = validate(
639        "data/unique-field.csv",
640        schema="data/unique-field.json",
641    )
642    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
643        [10, 1, "unique-error"],
644        [10, None, "primary-key-error"],
645    ]
646
647
648def test_validate_schema_primary_key_error_composite():
649    source = [
650        ["id", "name"],
651        [1, "Alex"],
652        [1, "John"],
653        ["", "Paul"],
654        [1, "John"],
655        ["", None],
656    ]
657    schema = {
658        "fields": [
659            {"name": "id", "type": "integer"},
660            {"name": "name", "type": "string"},
661        ],
662        "primaryKey": ["id", "name"],
663    }
664    report = validate(source, schema=schema)
665    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
666        [5, None, "primary-key-error"],
667        [6, None, "blank-row"],
668        [6, None, "primary-key-error"],
669    ]
670
671
672# Stats
673
674
675def test_validate_stats_hash():
676    hash = "6c2c61dd9b0e9c6876139a449ed87933"
677    report = validate("data/table.csv", stats={"hash": hash})
678    if IS_UNIX:
679        assert report.task.valid
680
681
682def test_validate_stats_hash_invalid():
683    hash = "6c2c61dd9b0e9c6876139a449ed87933"
684    report = validate("data/table.csv", stats={"hash": "bad"})
685    if IS_UNIX:
686        assert report.flatten(["code", "note"]) == [
687            ["hash-count-error", 'expected md5 is "bad" and actual is "%s"' % hash],
688        ]
689
690
691def test_validate_stats_hash_md5():
692    hash = "6c2c61dd9b0e9c6876139a449ed87933"
693    report = validate("data/table.csv", stats={"hash": hash})
694    if IS_UNIX:
695        assert report.task.valid
696
697
698def test_validate_stats_hash_md5_invalid():
699    hash = "6c2c61dd9b0e9c6876139a449ed87933"
700    report = validate("data/table.csv", stats={"hash": "bad"})
701    if IS_UNIX:
702        assert report.flatten(["code", "note"]) == [
703            ["hash-count-error", 'expected md5 is "bad" and actual is "%s"' % hash],
704        ]
705
706
707def test_validate_stats_hash_sha1():
708    hash = "db6ea2f8ff72a9e13e1d70c28ed1c6b42af3bb0e"
709    report = validate("data/table.csv", hashing="sha1", stats={"hash": hash})
710    if IS_UNIX:
711        assert report.task.valid
712
713
714def test_validate_stats_hash_sha1_invalid():
715    hash = "db6ea2f8ff72a9e13e1d70c28ed1c6b42af3bb0e"
716    report = validate("data/table.csv", hashing="sha1", stats={"hash": "bad"})
717    if IS_UNIX:
718        assert report.flatten(["code", "note"]) == [
719            ["hash-count-error", 'expected sha1 is "bad" and actual is "%s"' % hash],
720        ]
721
722
723def test_validate_stats_hash_sha256():
724    hash = "a1fd6c5ff3494f697874deeb07f69f8667e903dd94a7bc062dd57550cea26da8"
725    report = validate("data/table.csv", hashing="sha256", stats={"hash": hash})
726    if IS_UNIX:
727        assert report.task.valid
728
729
730def test_validate_stats_hash_sha256_invalid():
731    hash = "a1fd6c5ff3494f697874deeb07f69f8667e903dd94a7bc062dd57550cea26da8"
732    report = validate("data/table.csv", hashing="sha256", stats={"hash": "bad"})
733    if IS_UNIX:
734        assert report.flatten(["code", "note"]) == [
735            [
736                "hash-count-error",
737                'expected sha256 is "bad" and actual is "%s"' % hash,
738            ],
739        ]
740
741
742def test_validate_stats_hash_sha512():
743    hash = "d52e3f5f5693894282f023b9985967007d7984292e9abd29dca64454500f27fa45b980132d7b496bc84d336af33aeba6caf7730ec1075d6418d74fb8260de4fd"
744    report = validate("data/table.csv", hashing="sha512", stats={"hash": hash})
745    if IS_UNIX:
746        assert report.task.valid
747
748
749def test_validate_stats_hash_sha512_invalid():
750    hash = "d52e3f5f5693894282f023b9985967007d7984292e9abd29dca64454500f27fa45b980132d7b496bc84d336af33aeba6caf7730ec1075d6418d74fb8260de4fd"
751    report = validate("data/table.csv", hashing="sha512", stats={"hash": "bad"})
752    if IS_UNIX:
753        assert report.flatten(["code", "note"]) == [
754            [
755                "hash-count-error",
756                'expected sha512 is "bad" and actual is "%s"' % hash,
757            ],
758        ]
759
760
761def test_validate_stats_bytes():
762    report = validate("data/table.csv", stats={"bytes": 30})
763    if IS_UNIX:
764        assert report.task.valid
765
766
767def test_validate_stats_bytes_invalid():
768    report = validate("data/table.csv", stats={"bytes": 40})
769    assert report.task.error.get("rowPosition") is None
770    assert report.task.error.get("fieldPosition") is None
771    if IS_UNIX:
772        assert report.flatten(["code", "note"]) == [
773            ["byte-count-error", 'expected is "40" and actual is "30"'],
774        ]
775
776
777def test_validate_stats_rows():
778    report = validate("data/table.csv", stats={"rows": 2})
779    if IS_UNIX:
780        assert report.task.valid
781
782
783def test_validate_stats_rows_invalid():
784    report = validate("data/table.csv", stats={"rows": 3})
785    assert report.task.error.get("rowPosition") is None
786    assert report.task.error.get("fieldPosition") is None
787    if IS_UNIX:
788        assert report.flatten(["code", "note"]) == [
789            ["row-count-error", 'expected is "3" and actual is "2"'],
790        ]
791
792
793# Detector
794
795
796def test_validate_detector_sync_schema():
797    schema = {
798        "fields": [
799            {"name": "id", "type": "integer"},
800            {"name": "name", "type": "string"},
801        ],
802    }
803    detector = Detector(schema_sync=True)
804    report = validate("data/sync-schema.csv", schema=schema, detector=detector)
805    assert report.valid
806    assert report.task.resource.schema == {
807        "fields": [
808            {"name": "name", "type": "string"},
809            {"name": "id", "type": "integer"},
810        ],
811    }
812
813
814def test_validate_detector_sync_schema_invalid():
815    source = [["LastName", "FirstName", "Address"], ["Test", "Tester", "23 Avenue"]]
816    schema = {"fields": [{"name": "id"}, {"name": "FirstName"}, {"name": "LastName"}]}
817    detector = Detector(schema_sync=True)
818    report = validate(source, schema=schema, detector=detector)
819    assert report.valid
820
821
822def test_validate_detector_headers_errors():
823    source = [
824        ["id", "last_name", "first_name", "language"],
825        [1, "Alex", "John", "English"],
826        [2, "Peters", "John", "Afrikaans"],
827        [3, "Smith", "Paul", None],
828    ]
829    schema = {
830        "fields": [
831            {"name": "id", "type": "number"},
832            {"name": "language", "constraints": {"required": True}},
833            {"name": "country"},
834        ]
835    }
836    detector = Detector(schema_sync=True)
837    report = validate(source, schema=schema, detector=detector)
838    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
839        [4, 4, "constraint-error"],
840    ]
841
842
843def test_validate_detector_patch_schema():
844    detector = Detector(schema_patch={"missingValues": ["-"]})
845    report = validate("data/table.csv", detector=detector)
846    assert report.valid
847    assert report.task.resource.schema == {
848        "fields": [
849            {"name": "id", "type": "integer"},
850            {"name": "name", "type": "string"},
851        ],
852        "missingValues": ["-"],
853    }
854
855
856def test_validate_detector_patch_schema_fields():
857    detector = Detector(
858        schema_patch={"fields": {"id": {"type": "string"}}, "missingValues": ["-"]}
859    )
860    report = validate("data/table.csv", detector=detector)
861    assert report.valid
862    assert report.task.resource.schema == {
863        "fields": [{"name": "id", "type": "string"}, {"name": "name", "type": "string"}],
864        "missingValues": ["-"],
865    }
866
867
868def test_validate_detector_infer_type_string():
869    detector = Detector(field_type="string")
870    report = validate("data/table.csv", detector=detector)
871    assert report.valid
872    assert report.task.resource.schema == {
873        "fields": [{"name": "id", "type": "string"}, {"name": "name", "type": "string"}],
874    }
875
876
877def test_validate_detector_infer_type_any():
878    detector = Detector(field_type="any")
879    report = validate("data/table.csv", detector=detector)
880    assert report.valid
881    assert report.task.resource.schema == {
882        "fields": [{"name": "id", "type": "any"}, {"name": "name", "type": "any"}],
883    }
884
885
886def test_validate_detector_infer_names():
887    detector = Detector(field_names=["id", "name"])
888    report = validate(
889        "data/without-headers.csv",
890        layout={"header": False},
891        detector=detector,
892    )
893    assert report.task.resource.schema["fields"][0]["name"] == "id"
894    assert report.task.resource.schema["fields"][1]["name"] == "name"
895    assert report.task.resource.stats["rows"] == 3
896    assert report.task.resource.labels == []
897    assert report.task.resource.header == ["id", "name"]
898    assert report.valid
899
900
901# Validation
902
903
904def test_validate_pick_errors():
905    report = validate("data/invalid.csv", pick_errors=["blank-label", "blank-row"])
906    assert report.task.scope == ["blank-label", "blank-row"]
907    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
908        [None, 3, "blank-label"],
909        [4, None, "blank-row"],
910    ]
911
912
913def test_validate_pick_errors_tags():
914    report = validate("data/invalid.csv", pick_errors=["#header"])
915    assert report.task.scope == [
916        "blank-header",
917        "extra-label",
918        "missing-label",
919        "blank-label",
920        "duplicate-label",
921        "incorrect-label",
922    ]
923    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
924        [None, 3, "blank-label"],
925        [None, 4, "duplicate-label"],
926    ]
927
928
929def test_validate_skip_errors():
930    report = validate("data/invalid.csv", skip_errors=["blank-label", "blank-row"])
931    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
932        [None, 4, "duplicate-label"],
933        [2, 3, "missing-cell"],
934        [2, 4, "missing-cell"],
935        [3, 3, "missing-cell"],
936        [3, 4, "missing-cell"],
937        [5, 5, "extra-cell"],
938    ]
939
940
941def test_validate_skip_errors_tags():
942    report = validate("data/invalid.csv", skip_errors=["#header"])
943    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
944        [2, 3, "missing-cell"],
945        [2, 4, "missing-cell"],
946        [3, 3, "missing-cell"],
947        [3, 4, "missing-cell"],
948        [4, None, "blank-row"],
949        [5, 5, "extra-cell"],
950    ]
951
952
953def test_validate_invalid_limit_errors():
954    report = validate("data/invalid.csv", limit_errors=3)
955    assert report.task.partial
956    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
957        [None, 3, "blank-label"],
958        [None, 4, "duplicate-label"],
959        [2, 3, "missing-cell"],
960    ]
961
962
963def test_validate_structure_errors_with_limit_errors():
964    report = validate("data/structure-errors.csv", limit_errors=3)
965    assert report.task.partial
966    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
967        [4, None, "blank-row"],
968        [5, 4, "extra-cell"],
969        [5, 5, "extra-cell"],
970    ]
971
972
973@pytest.mark.ci
974def test_validate_limit_memory():
975    source = lambda: ([integer] for integer in range(1, 100000000))
976    schema = {"fields": [{"name": "integer", "type": "integer"}], "primaryKey": "integer"}
977    layout = Layout(header=False)
978    report = validate(source, schema=schema, layout=layout, limit_memory=50)
979    assert report.flatten(["code", "note"]) == [
980        ["task-error", 'exceeded memory limit "50MB"']
981    ]
982
983
984@pytest.mark.ci
985def test_validate_limit_memory_small():
986    source = lambda: ([integer] for integer in range(1, 100000000))
987    schema = {"fields": [{"name": "integer", "type": "integer"}], "primaryKey": "integer"}
988    layout = Layout(header=False)
989    report = validate(source, schema=schema, layout=layout, limit_memory=1)
990    assert report.flatten(["code", "note"]) == [
991        ["task-error", 'exceeded memory limit "1MB"']
992    ]
993
994
995def test_validate_custom_check():
996
997    # Create check
998    class custom(Check):
999        def validate_row(self, row):
1000            yield errors.BlankRowError(
1001                note="",
1002                cells=list(map(str, row.values())),
1003                row_number=row.row_number,
1004                row_position=row.row_position,
1005            )
1006
1007    # Validate resource
1008    report = validate("data/table.csv", checks=[custom()])
1009    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
1010        [2, None, "blank-row"],
1011        [3, None, "blank-row"],
1012    ]
1013
1014
1015def test_validate_custom_check_with_arguments():
1016
1017    # Create check
1018    class custom(Check):
1019        def __init__(self, descriptor=None, *, row_position=None):
1020            self.setinitial("rowPosition", row_position)
1021            super().__init__(descriptor)
1022
1023        def validate_row(self, row):
1024            yield errors.BlankRowError(
1025                note="",
1026                cells=list(map(str, row.values())),
1027                row_number=row.row_number,
1028                row_position=self.get("rowPosition") or row.row_position,
1029            )
1030
1031    # Validate resource
1032    report = validate("data/table.csv", checks=[custom(row_position=1)])
1033    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
1034        [1, None, "blank-row"],
1035        [1, None, "blank-row"],
1036    ]
1037
1038
1039def test_validate_custom_check_function_based():
1040
1041    # Create check
1042    def custom(row):
1043        yield errors.BlankRowError(
1044            note="",
1045            cells=list(map(str, row.values())),
1046            row_number=row.row_number,
1047            row_position=row.row_position,
1048        )
1049
1050    # Validate resource
1051    report = validate("data/table.csv", checks=[custom])
1052    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
1053        [2, None, "blank-row"],
1054        [3, None, "blank-row"],
1055    ]
1056
1057
1058def test_validate_custom_check_bad_name():
1059    report = validate("data/table.csv", checks=[{"code": "bad"}])
1060    assert report.flatten(["code", "note"]) == [
1061        ["check-error", 'cannot create check "bad". Try installing "frictionless-bad"'],
1062    ]
1063
1064
1065# Issues
1066
1067
1068def test_validate_infer_fields_issue_223():
1069    source = [["name1", "name2"], ["123", "abc"], ["456", "def"], ["789", "ghi"]]
1070    detector = Detector(schema_patch={"fields": {"name": {"type": "string"}}})
1071    report = validate(source, detector=detector)
1072    assert report.valid
1073
1074
1075def test_validate_infer_fields_issue_225():
1076    source = [["name1", "name2"], ["123", None], ["456", None], ["789"]]
1077    detector = Detector(schema_patch={"fields": {"name": {"type": "string"}}})
1078    report = validate(source, detector=detector)
1079    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
1080        [4, 2, "missing-cell"],
1081    ]
1082
1083
1084def test_validate_fails_with_wrong_encoding_issue_274():
1085    # For now, by default encoding is detected incorectly by chardet
1086    report = validate("data/encoding-issue-274.csv", encoding="utf-8")
1087    assert report.valid
1088
1089
1090def test_validate_wide_table_with_order_fields_issue_277():
1091    source = "data/issue-277.csv"
1092    schema = "data/issue-277.json"
1093    detector = Detector(schema_sync=True)
1094    report = validate(source, schema=schema, detector=detector)
1095    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
1096        [49, 50, "constraint-error"],
1097        [68, 50, "constraint-error"],
1098        [69, 50, "constraint-error"],
1099    ]
1100
1101
1102def test_validate_invalid_table_schema_issue_304():
1103    source = [["name", "age"], ["Alex", "33"]]
1104    schema = {"fields": [{"name": "name"}, {"name": "age", "type": "bad"}]}
1105    report = validate(source, schema=schema)
1106    assert report.flatten(["code", "note"]) == [
1107        [
1108            "field-error",
1109            "\"{'name': 'age', 'type': 'bad'} is not valid under any of the given schemas\" at \"\" in metadata and at \"anyOf\" in profile",
1110        ],
1111    ]
1112
1113
1114def test_validate_table_is_invalid_issue_312():
1115    report = validate("data/issue-312.xlsx")
1116    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
1117        [None, 3, "blank-label"],
1118        [None, 4, "duplicate-label"],
1119        [None, 5, "blank-label"],
1120        [5, None, "blank-row"],
1121    ]
1122
1123
1124def test_validate_order_fields_issue_313():
1125    source = "data/issue-313.xlsx"
1126    layout = Layout(pick_fields=[1, 2, 3, 4, 5])
1127    schema = {
1128        "fields": [
1129            {"name": "Column_1", "type": "string"},
1130            {"name": "Column_2", "type": "string", "constraints": {"required": True}},
1131            {"name": "Column_3", "type": "string"},
1132            {"name": "Column_4", "type": "string"},
1133            {"name": "Column_5", "type": "string"},
1134        ]
1135    }
1136    detector = Detector(schema_sync=True)
1137    report = validate(source, layout=layout, schema=schema, detector=detector)
1138    assert report.valid
1139
1140
1141def test_validate_missing_local_file_raises_scheme_error_issue_315():
1142    report = validate("bad-path.csv")
1143    assert report.flatten(["code", "note"]) == [
1144        ["scheme-error", "[Errno 2] No such file or directory: 'bad-path.csv'"],
1145    ]
1146
1147
1148def test_validate_inline_not_a_binary_issue_349():
1149    with open("data/table.csv") as source:
1150        report = validate(source)
1151        assert report.valid
1152
1153
1154def test_validate_newline_inside_label_issue_811():
1155    report = validate("data/issue-811.csv")
1156    assert report.valid
1157
1158
1159def test_validate_resource_from_json_format_issue_827():
1160    report = validate(path="data/table.json")
1161    assert report.valid
1162
1163
1164def test_validate_resource_none_is_not_iterable_enum_constraint_issue_833():
1165    report = validate("data/issue-833.csv", schema="data/issue-833.json")
1166    assert report.valid
1167
1168
1169def test_validate_resource_header_row_has_first_number_issue_870():
1170    report = validate("data/issue-870.xlsx", layout={"limitRows": 5})
1171    assert report.valid
1172
1173
1174def test_validate_resource_descriptor_type_invalid():
1175    report = validate(descriptor="data/table.csv")
1176    assert report.flatten() == [[1, None, None, "resource-error"]]
1177