1import sys 2import petl 3import typer 4from typing import List 5from ..validate import validate 6from ..detector import Detector 7from ..layout import Layout 8from .main import program 9from .. import helpers 10from . import common 11 12 13@program.command(name="validate") 14def program_validate( 15 # Source 16 source: List[str] = common.source, 17 type: str = common.type, 18 # File 19 path: str = common.path, 20 scheme: str = common.scheme, 21 format: str = common.format, 22 hashing: str = common.hashing, 23 encoding: str = common.encoding, 24 innerpath: str = common.innerpath, 25 compression: str = common.compression, 26 # Control 27 control: str = common.control, 28 # Dialect 29 dialect: str = common.dialect, 30 # Layout 31 header_rows: str = common.header_rows, 32 header_join: str = common.header_join, 33 pick_fields: str = common.pick_fields, 34 skip_fields: str = common.skip_fields, 35 limit_fields: int = common.limit_fields, 36 offset_fields: int = common.offset_fields, 37 pick_rows: str = common.pick_rows, 38 skip_rows: str = common.skip_rows, 39 limit_rows: int = common.limit_rows, 40 offset_rows: int = common.offset_rows, 41 # Schema 42 schema: str = common.schema, 43 # Stats 44 stats_hash: str = common.stats_hash, 45 stats_bytes: int = common.stats_bytes, 46 stats_fields: int = common.stats_fields, 47 stats_rows: int = common.stats_rows, 48 # Detector 49 buffer_size: int = common.buffer_size, 50 sample_size: int = common.sample_size, 51 field_type: str = common.field_type, 52 field_names: str = common.field_names, 53 field_confidence: float = common.field_confidence, 54 field_float_numbers: bool = common.field_float_numbers, 55 field_missing_values: str = common.field_missing_values, 56 schema_sync: bool = common.schema_sync, 57 # Command 58 basepath: str = common.basepath, 59 pick_errors: str = common.pick_errors, 60 skip_errors: str = common.skip_errors, 61 limit_errors: int = common.limit_errors, 62 limit_memory: int = common.limit_memory, 63 original: bool = common.original, 64 parallel: bool = common.parallel, 65 yaml: bool = common.yaml, 66 json: bool = common.json, 67): 68 """ 69 Validate a data source. 70 71 Based on the inferred data source type it will validate resource or package. 72 Default output format is YAML with a front matter. 73 """ 74 75 # Support stdin 76 is_stdin = False 77 if not source and not path: 78 if not sys.stdin.isatty(): 79 is_stdin = True 80 source = [sys.stdin.buffer.read()] 81 82 # Validate input 83 if not source and not path: 84 message = 'Providing "source" or "path" is required' 85 typer.secho(message, err=True, fg=typer.colors.RED, bold=True) 86 raise typer.Exit(1) 87 88 # Normalize parameters 89 source = list(source) if len(source) > 1 else (source[0] if source else None) 90 control = helpers.parse_json_string(control) 91 dialect = helpers.parse_json_string(dialect) 92 header_rows = helpers.parse_csv_string(header_rows, convert=int) 93 pick_fields = helpers.parse_csv_string(pick_fields, convert=int, fallback=True) 94 skip_fields = helpers.parse_csv_string(skip_fields, convert=int, fallback=True) 95 pick_rows = helpers.parse_csv_string(pick_rows, convert=int, fallback=True) 96 skip_rows = helpers.parse_csv_string(skip_rows, convert=int, fallback=True) 97 field_names = helpers.parse_csv_string(field_names) 98 field_missing_values = helpers.parse_csv_string(field_missing_values) 99 pick_errors = helpers.parse_csv_string(pick_errors) 100 skip_errors = helpers.parse_csv_string(skip_errors) 101 102 # Prepare layout 103 layout = ( 104 Layout( 105 header_rows=header_rows, 106 header_join=header_join, 107 pick_fields=pick_fields, 108 skip_fields=skip_fields, 109 limit_fields=limit_fields, 110 offset_fields=offset_fields, 111 pick_rows=pick_rows, 112 skip_rows=skip_rows, 113 limit_rows=limit_rows, 114 offset_rows=offset_rows, 115 ) 116 or None 117 ) 118 119 # Prepare stats 120 stats = ( 121 helpers.remove_non_values( 122 dict( 123 hash=stats_hash, 124 bytes=stats_bytes, 125 fields=stats_fields, 126 rows=stats_rows, 127 ) 128 ) 129 or None 130 ) 131 132 # Prepare detector 133 detector = Detector( 134 **helpers.remove_non_values( 135 dict( 136 buffer_size=buffer_size, 137 sample_size=sample_size, 138 field_type=field_type, 139 field_names=field_names, 140 field_confidence=field_confidence, 141 field_float_numbers=field_float_numbers, 142 field_missing_values=field_missing_values, 143 schema_sync=schema_sync, 144 ) 145 ) 146 ) 147 148 # Prepare options 149 options = helpers.remove_non_values( 150 dict( 151 type=type, 152 # Spec 153 path=path, 154 scheme=scheme, 155 format=format, 156 hashing=hashing, 157 encoding=encoding, 158 innerpath=innerpath, 159 compression=compression, 160 control=control, 161 dialect=dialect, 162 layout=layout, 163 schema=schema, 164 stats=stats, 165 # Extra 166 basepath=basepath, 167 detector=detector, 168 pick_errors=pick_errors, 169 skip_errors=skip_errors, 170 limit_errors=limit_errors, 171 limit_memory=limit_memory, 172 original=original, 173 parallel=parallel, 174 ) 175 ) 176 177 # Validate source 178 try: 179 report = validate(source, **options) 180 except Exception as exception: 181 typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True) 182 raise typer.Exit(1) 183 184 # Return JSON 185 if json: 186 content = report.to_json() 187 typer.secho(content) 188 raise typer.Exit() 189 190 # Return YAML 191 if yaml: 192 content = report.to_yaml().strip() 193 typer.secho(content) 194 raise typer.Exit() 195 196 # Return report 197 if report.errors: 198 content = [] 199 if is_stdin: 200 source = "stdin" 201 prefix = "invalid" 202 typer.secho(f"# {'-'*len(prefix)}", bold=True) 203 typer.secho(f"# {prefix}: {source}", bold=True) 204 typer.secho(f"# {'-'*len(prefix)}", bold=True) 205 for error in report.errors: 206 content.append([error.code, error.message]) 207 typer.secho( 208 str( 209 petl.util.vis.lookall( 210 [["code", "message"]] + content, vrepr=str, style="simple" 211 ) 212 ) 213 ) 214 215 # Return tables 216 prev_invalid = False 217 for number, task in enumerate(report.tasks, start=1): 218 if number != 1 and prev_invalid: 219 typer.secho("") 220 prefix = "valid" if task.valid else "invalid" 221 source = task.resource.path 222 if is_stdin: 223 source = "stdin" 224 typer.secho(f"# {'-'*len(prefix)}", bold=True) 225 typer.secho(f"# {prefix}: {source}", bold=True) 226 typer.secho(f"# {'-'*len(prefix)}", bold=True) 227 if task.errors: 228 prev_invalid = True 229 typer.secho("") 230 content = [] 231 for error in task.errors: 232 content.append( 233 [ 234 error.get("rowPosition", ""), 235 error.get("fieldPosition", ""), 236 error.code, 237 error.message, 238 ] 239 ) 240 typer.secho( 241 str( 242 petl.util.vis.lookall( 243 [["row", "field", "code", "message"]] + content, 244 vrepr=str, 245 style="simple", 246 ) 247 ) 248 ) 249 250 # Return retcode 251 raise typer.Exit(code=int(not report.valid)) 252