1import typing 2from .metadata import Metadata 3from . import settings 4from . import helpers 5from . import errors 6 7 8class Layout(Metadata): 9 """Layout representation 10 11 API | Usage 12 -------- | -------- 13 Public | `from frictionless import Layout` 14 15 Parameters: 16 descriptor? (str|dict): layout descriptor 17 pick_fields? ((str|int)[]): what fields to pick 18 skip_fields? ((str|int)[]): what fields to skip 19 limit_fields? (int): amount of fields 20 offset_fields? (int): from what field to start 21 pick_rows? ((str|int)[]): what rows to pick 22 skip_rows? ((str|int)[]): what rows to skip 23 limit_rows? (int): amount of rows 24 offset_rows? (int): from what row to start 25 """ 26 27 def __init__( 28 self, 29 descriptor=None, 30 *, 31 header=None, 32 header_rows=None, 33 header_join=None, 34 header_case=None, 35 pick_fields=None, 36 skip_fields=None, 37 limit_fields=None, 38 offset_fields=None, 39 pick_rows=None, 40 skip_rows=None, 41 limit_rows=None, 42 offset_rows=None, 43 ): 44 self.setinitial("header", header) 45 self.setinitial("headerRows", header_rows) 46 self.setinitial("headerJoin", header_join) 47 self.setinitial("headerCase", header_case) 48 self.setinitial("pickFields", pick_fields) 49 self.setinitial("skipFields", skip_fields) 50 self.setinitial("limitFields", limit_fields) 51 self.setinitial("offsetFields", offset_fields) 52 self.setinitial("pickRows", pick_rows) 53 self.setinitial("skipRows", skip_rows) 54 self.setinitial("limitRows", limit_rows) 55 self.setinitial("offsetRows", offset_rows) 56 super().__init__(descriptor) 57 58 @Metadata.property 59 def header(self): 60 """ 61 Returns: 62 bool: if there is a header row 63 """ 64 return self.get("header", settings.DEFAULT_HEADER) 65 66 @Metadata.property 67 def header_rows(self): 68 """ 69 Returns: 70 int[]: header rows 71 """ 72 if not self.header: 73 return [] 74 return self.get("headerRows", settings.DEFAULT_HEADER_ROWS) 75 76 @Metadata.property 77 def header_join(self): 78 """ 79 Returns: 80 str: header joiner 81 """ 82 return self.get("headerJoin", settings.DEFAULT_HEADER_JOIN) 83 84 @Metadata.property 85 def header_case(self): 86 """ 87 Returns: 88 str: header case sensitive 89 """ 90 return self.get("headerCase", settings.DEFAULT_HEADER_CASE) 91 92 @Metadata.property 93 def pick_fields(self): 94 """ 95 Returns: 96 (str|int)[]?: pick fields 97 """ 98 return self.get("pickFields") 99 100 @Metadata.property 101 def skip_fields(self): 102 """ 103 Returns: 104 (str|int)[]?: skip fields 105 """ 106 return self.get("skipFields") 107 108 @Metadata.property 109 def limit_fields(self): 110 """ 111 Returns: 112 int?: limit fields 113 """ 114 return self.get("limitFields") 115 116 @Metadata.property 117 def offset_fields(self): 118 """ 119 Returns: 120 int?: offset fields 121 """ 122 return self.get("offsetFields") 123 124 @Metadata.property 125 def pick_rows(self): 126 """ 127 Returns: 128 (str|int)[]?: pick rows 129 """ 130 return self.get("pickRows") 131 132 @Metadata.property 133 def skip_rows(self): 134 """ 135 Returns: 136 (str|int)[]?: skip rows 137 """ 138 return self.get("skipRows") 139 140 @Metadata.property 141 def limit_rows(self): 142 """ 143 Returns: 144 int?: limit rows 145 """ 146 return self.get("limitRows") 147 148 @Metadata.property 149 def offset_rows(self): 150 """ 151 Returns: 152 int?: offset rows 153 """ 154 return self.get("offsetRows") 155 156 @Metadata.property(write=False) 157 def is_field_filtering(self): 158 """ 159 Returns: 160 bool: whether there is a field filtering 161 """ 162 return ( 163 self.pick_fields is not None 164 or self.skip_fields is not None 165 or self.limit_fields is not None 166 or self.offset_fields is not None 167 ) 168 169 @Metadata.property(write=False) 170 def pick_fields_compiled(self): 171 """ 172 Returns: 173 re?: compiled pick fields 174 """ 175 return helpers.compile_regex(self.pick_fields) 176 177 @Metadata.property(write=False) 178 def skip_fields_compiled(self): 179 """ 180 Returns: 181 re?: compiled skip fields 182 """ 183 return helpers.compile_regex(self.skip_fields) 184 185 @Metadata.property(write=False) 186 def pick_rows_compiled(self): 187 """ 188 Returns: 189 re?: compiled pick rows 190 """ 191 return helpers.compile_regex(self.pick_rows) 192 193 @Metadata.property(write=False) 194 def skip_rows_compiled(self): 195 """ 196 Returns: 197 re?: compiled skip fields 198 """ 199 return helpers.compile_regex(self.skip_rows) 200 201 # Expand 202 203 def expand(self): 204 """Expand metadata""" 205 self.setdefault("header", self.header) 206 self.setdefault("headerRows", self.header_rows) 207 self.setdefault("headerJoin", self.header_join) 208 self.setdefault("headerCase", self.header_case) 209 210 # Read 211 212 def read_labels(self, sample): 213 214 # Collect lists 215 lists = [] 216 row_number = 0 217 for row_position, cells in enumerate(sample, start=1): 218 if self.read_filter_rows(cells, row_position=row_position): 219 row_number += 1 220 if row_number in self.header_rows: 221 lists.append(helpers.stringify_label(cells)) 222 if row_number >= max(self.header_rows, default=0): 223 break 224 225 # No header 226 if not self.header: 227 return [], list(range(1, len(sample[0]) + 1)) 228 229 # Get labels 230 raw_labels = [] 231 prev_cells = {} 232 for cells in lists: 233 for index, cell in enumerate(cells): 234 if prev_cells.get(index) == cell: 235 continue 236 prev_cells[index] = cell 237 if len(raw_labels) <= index: 238 raw_labels.append(cell) 239 continue 240 raw_labels[index] = self.header_join.join([raw_labels[index], cell]) 241 242 # Filter labels 243 labels = [] 244 field_positions = [] 245 limit = self.limit_fields 246 offset = self.offset_fields or 0 247 for field_position, label in enumerate(raw_labels, start=1): 248 if self.read_filter_fields(label, field_position=field_position): 249 if offset: 250 offset -= 1 251 continue 252 labels.append(label) 253 field_positions.append(field_position) 254 if limit and limit <= len(labels): 255 break 256 257 return labels, field_positions 258 259 def read_fragment(self, sample): 260 261 # Collect fragment 262 fragment = [] 263 row_number = 0 264 fragment_positions = [] 265 field_positions = self.read_labels(sample)[1] 266 for row_position, cells in enumerate(sample, start=1): 267 if self.read_filter_rows(cells, row_position=row_position): 268 row_number += 1 269 if self.header_rows and row_number < self.header_rows[0]: 270 continue 271 if row_number in self.header_rows: 272 continue 273 cells = self.read_filter_cells(cells, field_positions=field_positions) 274 fragment_positions.append(row_position) 275 fragment.append(cells) 276 277 return fragment, fragment_positions 278 279 def read_filter_fields(self, label, *, field_position): 280 match = True 281 for name in ["pick", "skip"]: 282 if name == "pick": 283 items = self.pick_fields_compiled 284 else: 285 items = self.skip_fields_compiled 286 if not items: 287 continue 288 match = match and name == "skip" 289 for item in items: 290 if item == "<blank>" and label == "": 291 match = not match 292 elif isinstance(item, str) and item == label: 293 match = not match 294 elif isinstance(item, int) and item == field_position: 295 match = not match 296 elif isinstance(item, typing.Pattern) and item.match(label): 297 match = not match 298 return match 299 300 def read_filter_rows(self, cells, *, row_position): 301 match = True 302 cell = cells[0] if cells else None 303 cell = "" if cell is None else str(cell) 304 for name in ["pick", "skip"]: 305 if name == "pick": 306 items = self.pick_rows_compiled 307 else: 308 items = self.skip_rows_compiled 309 if not items: 310 continue 311 match = match and name == "skip" 312 for item in items: 313 if item == "<blank>": 314 if not any(cell for cell in cells if cell not in ["", None]): 315 match = not match 316 elif isinstance(item, str): 317 if item == cell or (item and cell.startswith(item)): 318 match = not match 319 elif isinstance(item, int) and item == row_position: 320 match = not match 321 elif isinstance(item, typing.Pattern) and item.match(cell): 322 match = not match 323 return match 324 325 def read_filter_cells(self, cells, *, field_positions): 326 if self.is_field_filtering: 327 result = [] 328 for field_position, cell in enumerate(cells, start=1): 329 if field_position in field_positions: 330 result.append(cell) 331 return result 332 return cells 333 334 # Metadata 335 336 metadata_Error = errors.LayoutError 337 metadata_profile = { # type: ignore 338 "type": "object", 339 "additionalProperties": False, 340 "properties": { 341 "header": {"type": "boolean"}, 342 "headerRows": {"type": "array", "items": {"type": "number"}}, 343 "headerJoin": {"type": "string"}, 344 "headerCase": {"type": "boolean"}, 345 "pickFields": {"type": "array"}, 346 "skipFields": {"type": "array"}, 347 "limitFields": {"type": "number", "minimum": 1}, 348 "offsetFields": {"type": "number", "minimum": 1}, 349 "pickRows": {"type": "array"}, 350 "skipRows": {"type": "array"}, 351 "limitRows": {"type": "number", "minimum": 1}, 352 "offsetRows": {"type": "number", "minimum": 1}, 353 }, 354 } 355