1import sys
2import petl
3import typer
4from typing import List
5from ..validate import validate
6from ..detector import Detector
7from ..layout import Layout
8from .main import program
9from .. import helpers
10from . import common
11
12
13@program.command(name="validate")
14def program_validate(
15    # Source
16    source: List[str] = common.source,
17    type: str = common.type,
18    # File
19    path: str = common.path,
20    scheme: str = common.scheme,
21    format: str = common.format,
22    hashing: str = common.hashing,
23    encoding: str = common.encoding,
24    innerpath: str = common.innerpath,
25    compression: str = common.compression,
26    # Control
27    control: str = common.control,
28    # Dialect
29    dialect: str = common.dialect,
30    # Layout
31    header_rows: str = common.header_rows,
32    header_join: str = common.header_join,
33    pick_fields: str = common.pick_fields,
34    skip_fields: str = common.skip_fields,
35    limit_fields: int = common.limit_fields,
36    offset_fields: int = common.offset_fields,
37    pick_rows: str = common.pick_rows,
38    skip_rows: str = common.skip_rows,
39    limit_rows: int = common.limit_rows,
40    offset_rows: int = common.offset_rows,
41    # Schema
42    schema: str = common.schema,
43    # Stats
44    stats_hash: str = common.stats_hash,
45    stats_bytes: int = common.stats_bytes,
46    stats_fields: int = common.stats_fields,
47    stats_rows: int = common.stats_rows,
48    # Detector
49    buffer_size: int = common.buffer_size,
50    sample_size: int = common.sample_size,
51    field_type: str = common.field_type,
52    field_names: str = common.field_names,
53    field_confidence: float = common.field_confidence,
54    field_float_numbers: bool = common.field_float_numbers,
55    field_missing_values: str = common.field_missing_values,
56    schema_sync: bool = common.schema_sync,
57    # Command
58    basepath: str = common.basepath,
59    pick_errors: str = common.pick_errors,
60    skip_errors: str = common.skip_errors,
61    limit_errors: int = common.limit_errors,
62    limit_memory: int = common.limit_memory,
63    original: bool = common.original,
64    parallel: bool = common.parallel,
65    yaml: bool = common.yaml,
66    json: bool = common.json,
67):
68    """
69    Validate a data source.
70
71    Based on the inferred data source type it will validate resource or package.
72    Default output format is YAML with a front matter.
73    """
74
75    # Support stdin
76    is_stdin = False
77    if not source and not path:
78        if not sys.stdin.isatty():
79            is_stdin = True
80            source = [sys.stdin.buffer.read()]
81
82    # Validate input
83    if not source and not path:
84        message = 'Providing "source" or "path" is required'
85        typer.secho(message, err=True, fg=typer.colors.RED, bold=True)
86        raise typer.Exit(1)
87
88    # Normalize parameters
89    source = list(source) if len(source) > 1 else (source[0] if source else None)
90    control = helpers.parse_json_string(control)
91    dialect = helpers.parse_json_string(dialect)
92    header_rows = helpers.parse_csv_string(header_rows, convert=int)
93    pick_fields = helpers.parse_csv_string(pick_fields, convert=int, fallback=True)
94    skip_fields = helpers.parse_csv_string(skip_fields, convert=int, fallback=True)
95    pick_rows = helpers.parse_csv_string(pick_rows, convert=int, fallback=True)
96    skip_rows = helpers.parse_csv_string(skip_rows, convert=int, fallback=True)
97    field_names = helpers.parse_csv_string(field_names)
98    field_missing_values = helpers.parse_csv_string(field_missing_values)
99    pick_errors = helpers.parse_csv_string(pick_errors)
100    skip_errors = helpers.parse_csv_string(skip_errors)
101
102    # Prepare layout
103    layout = (
104        Layout(
105            header_rows=header_rows,
106            header_join=header_join,
107            pick_fields=pick_fields,
108            skip_fields=skip_fields,
109            limit_fields=limit_fields,
110            offset_fields=offset_fields,
111            pick_rows=pick_rows,
112            skip_rows=skip_rows,
113            limit_rows=limit_rows,
114            offset_rows=offset_rows,
115        )
116        or None
117    )
118
119    # Prepare stats
120    stats = (
121        helpers.remove_non_values(
122            dict(
123                hash=stats_hash,
124                bytes=stats_bytes,
125                fields=stats_fields,
126                rows=stats_rows,
127            )
128        )
129        or None
130    )
131
132    # Prepare detector
133    detector = Detector(
134        **helpers.remove_non_values(
135            dict(
136                buffer_size=buffer_size,
137                sample_size=sample_size,
138                field_type=field_type,
139                field_names=field_names,
140                field_confidence=field_confidence,
141                field_float_numbers=field_float_numbers,
142                field_missing_values=field_missing_values,
143                schema_sync=schema_sync,
144            )
145        )
146    )
147
148    # Prepare options
149    options = helpers.remove_non_values(
150        dict(
151            type=type,
152            # Spec
153            path=path,
154            scheme=scheme,
155            format=format,
156            hashing=hashing,
157            encoding=encoding,
158            innerpath=innerpath,
159            compression=compression,
160            control=control,
161            dialect=dialect,
162            layout=layout,
163            schema=schema,
164            stats=stats,
165            # Extra
166            basepath=basepath,
167            detector=detector,
168            pick_errors=pick_errors,
169            skip_errors=skip_errors,
170            limit_errors=limit_errors,
171            limit_memory=limit_memory,
172            original=original,
173            parallel=parallel,
174        )
175    )
176
177    # Validate source
178    try:
179        report = validate(source, **options)
180    except Exception as exception:
181        typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True)
182        raise typer.Exit(1)
183
184    # Return JSON
185    if json:
186        content = report.to_json()
187        typer.secho(content)
188        raise typer.Exit()
189
190    # Return YAML
191    if yaml:
192        content = report.to_yaml().strip()
193        typer.secho(content)
194        raise typer.Exit()
195
196    # Return report
197    if report.errors:
198        content = []
199        if is_stdin:
200            source = "stdin"
201        prefix = "invalid"
202        typer.secho(f"# {'-'*len(prefix)}", bold=True)
203        typer.secho(f"# {prefix}: {source}", bold=True)
204        typer.secho(f"# {'-'*len(prefix)}", bold=True)
205        for error in report.errors:
206            content.append([error.code, error.message])
207        typer.secho(
208            str(
209                petl.util.vis.lookall(
210                    [["code", "message"]] + content, vrepr=str, style="simple"
211                )
212            )
213        )
214
215    # Return tables
216    prev_invalid = False
217    for number, task in enumerate(report.tasks, start=1):
218        if number != 1 and prev_invalid:
219            typer.secho("")
220        prefix = "valid" if task.valid else "invalid"
221        source = task.resource.path
222        if is_stdin:
223            source = "stdin"
224        typer.secho(f"# {'-'*len(prefix)}", bold=True)
225        typer.secho(f"# {prefix}: {source}", bold=True)
226        typer.secho(f"# {'-'*len(prefix)}", bold=True)
227        if task.errors:
228            prev_invalid = True
229            typer.secho("")
230            content = []
231            for error in task.errors:
232                content.append(
233                    [
234                        error.get("rowPosition", ""),
235                        error.get("fieldPosition", ""),
236                        error.code,
237                        error.message,
238                    ]
239                )
240            typer.secho(
241                str(
242                    petl.util.vis.lookall(
243                        [["row", "field", "code", "message"]] + content,
244                        vrepr=str,
245                        style="simple",
246                    )
247                )
248            )
249
250    # Return retcode
251    raise typer.Exit(code=int(not report.valid))
252