1import typing
2from .metadata import Metadata
3from . import settings
4from . import helpers
5from . import errors
6
7
8class Layout(Metadata):
9    """Layout representation
10
11    API      | Usage
12    -------- | --------
13    Public   | `from frictionless import Layout`
14
15    Parameters:
16        descriptor? (str|dict): layout descriptor
17        pick_fields? ((str|int)[]): what fields to pick
18        skip_fields? ((str|int)[]): what fields to skip
19        limit_fields? (int): amount of fields
20        offset_fields? (int): from what field to start
21        pick_rows? ((str|int)[]): what rows to pick
22        skip_rows? ((str|int)[]): what rows to skip
23        limit_rows? (int): amount of rows
24        offset_rows? (int): from what row to start
25    """
26
27    def __init__(
28        self,
29        descriptor=None,
30        *,
31        header=None,
32        header_rows=None,
33        header_join=None,
34        header_case=None,
35        pick_fields=None,
36        skip_fields=None,
37        limit_fields=None,
38        offset_fields=None,
39        pick_rows=None,
40        skip_rows=None,
41        limit_rows=None,
42        offset_rows=None,
43    ):
44        self.setinitial("header", header)
45        self.setinitial("headerRows", header_rows)
46        self.setinitial("headerJoin", header_join)
47        self.setinitial("headerCase", header_case)
48        self.setinitial("pickFields", pick_fields)
49        self.setinitial("skipFields", skip_fields)
50        self.setinitial("limitFields", limit_fields)
51        self.setinitial("offsetFields", offset_fields)
52        self.setinitial("pickRows", pick_rows)
53        self.setinitial("skipRows", skip_rows)
54        self.setinitial("limitRows", limit_rows)
55        self.setinitial("offsetRows", offset_rows)
56        super().__init__(descriptor)
57
58    @Metadata.property
59    def header(self):
60        """
61        Returns:
62            bool: if there is a header row
63        """
64        return self.get("header", settings.DEFAULT_HEADER)
65
66    @Metadata.property
67    def header_rows(self):
68        """
69        Returns:
70            int[]: header rows
71        """
72        if not self.header:
73            return []
74        return self.get("headerRows", settings.DEFAULT_HEADER_ROWS)
75
76    @Metadata.property
77    def header_join(self):
78        """
79        Returns:
80            str: header joiner
81        """
82        return self.get("headerJoin", settings.DEFAULT_HEADER_JOIN)
83
84    @Metadata.property
85    def header_case(self):
86        """
87        Returns:
88            str: header case sensitive
89        """
90        return self.get("headerCase", settings.DEFAULT_HEADER_CASE)
91
92    @Metadata.property
93    def pick_fields(self):
94        """
95        Returns:
96            (str|int)[]?: pick fields
97        """
98        return self.get("pickFields")
99
100    @Metadata.property
101    def skip_fields(self):
102        """
103        Returns:
104            (str|int)[]?: skip fields
105        """
106        return self.get("skipFields")
107
108    @Metadata.property
109    def limit_fields(self):
110        """
111        Returns:
112            int?: limit fields
113        """
114        return self.get("limitFields")
115
116    @Metadata.property
117    def offset_fields(self):
118        """
119        Returns:
120            int?: offset fields
121        """
122        return self.get("offsetFields")
123
124    @Metadata.property
125    def pick_rows(self):
126        """
127        Returns:
128            (str|int)[]?: pick rows
129        """
130        return self.get("pickRows")
131
132    @Metadata.property
133    def skip_rows(self):
134        """
135        Returns:
136            (str|int)[]?: skip rows
137        """
138        return self.get("skipRows")
139
140    @Metadata.property
141    def limit_rows(self):
142        """
143        Returns:
144            int?: limit rows
145        """
146        return self.get("limitRows")
147
148    @Metadata.property
149    def offset_rows(self):
150        """
151        Returns:
152            int?: offset rows
153        """
154        return self.get("offsetRows")
155
156    @Metadata.property(write=False)
157    def is_field_filtering(self):
158        """
159        Returns:
160            bool: whether there is a field filtering
161        """
162        return (
163            self.pick_fields is not None
164            or self.skip_fields is not None
165            or self.limit_fields is not None
166            or self.offset_fields is not None
167        )
168
169    @Metadata.property(write=False)
170    def pick_fields_compiled(self):
171        """
172        Returns:
173            re?: compiled pick fields
174        """
175        return helpers.compile_regex(self.pick_fields)
176
177    @Metadata.property(write=False)
178    def skip_fields_compiled(self):
179        """
180        Returns:
181            re?: compiled skip fields
182        """
183        return helpers.compile_regex(self.skip_fields)
184
185    @Metadata.property(write=False)
186    def pick_rows_compiled(self):
187        """
188        Returns:
189            re?: compiled pick rows
190        """
191        return helpers.compile_regex(self.pick_rows)
192
193    @Metadata.property(write=False)
194    def skip_rows_compiled(self):
195        """
196        Returns:
197            re?: compiled skip fields
198        """
199        return helpers.compile_regex(self.skip_rows)
200
201    # Expand
202
203    def expand(self):
204        """Expand metadata"""
205        self.setdefault("header", self.header)
206        self.setdefault("headerRows", self.header_rows)
207        self.setdefault("headerJoin", self.header_join)
208        self.setdefault("headerCase", self.header_case)
209
210    # Read
211
212    def read_labels(self, sample):
213
214        # Collect lists
215        lists = []
216        row_number = 0
217        for row_position, cells in enumerate(sample, start=1):
218            if self.read_filter_rows(cells, row_position=row_position):
219                row_number += 1
220                if row_number in self.header_rows:
221                    lists.append(helpers.stringify_label(cells))
222                if row_number >= max(self.header_rows, default=0):
223                    break
224
225        # No header
226        if not self.header:
227            return [], list(range(1, len(sample[0]) + 1))
228
229        # Get labels
230        raw_labels = []
231        prev_cells = {}
232        for cells in lists:
233            for index, cell in enumerate(cells):
234                if prev_cells.get(index) == cell:
235                    continue
236                prev_cells[index] = cell
237                if len(raw_labels) <= index:
238                    raw_labels.append(cell)
239                    continue
240                raw_labels[index] = self.header_join.join([raw_labels[index], cell])
241
242        # Filter labels
243        labels = []
244        field_positions = []
245        limit = self.limit_fields
246        offset = self.offset_fields or 0
247        for field_position, label in enumerate(raw_labels, start=1):
248            if self.read_filter_fields(label, field_position=field_position):
249                if offset:
250                    offset -= 1
251                    continue
252                labels.append(label)
253                field_positions.append(field_position)
254                if limit and limit <= len(labels):
255                    break
256
257        return labels, field_positions
258
259    def read_fragment(self, sample):
260
261        # Collect fragment
262        fragment = []
263        row_number = 0
264        fragment_positions = []
265        field_positions = self.read_labels(sample)[1]
266        for row_position, cells in enumerate(sample, start=1):
267            if self.read_filter_rows(cells, row_position=row_position):
268                row_number += 1
269                if self.header_rows and row_number < self.header_rows[0]:
270                    continue
271                if row_number in self.header_rows:
272                    continue
273                cells = self.read_filter_cells(cells, field_positions=field_positions)
274                fragment_positions.append(row_position)
275                fragment.append(cells)
276
277        return fragment, fragment_positions
278
279    def read_filter_fields(self, label, *, field_position):
280        match = True
281        for name in ["pick", "skip"]:
282            if name == "pick":
283                items = self.pick_fields_compiled
284            else:
285                items = self.skip_fields_compiled
286            if not items:
287                continue
288            match = match and name == "skip"
289            for item in items:
290                if item == "<blank>" and label == "":
291                    match = not match
292                elif isinstance(item, str) and item == label:
293                    match = not match
294                elif isinstance(item, int) and item == field_position:
295                    match = not match
296                elif isinstance(item, typing.Pattern) and item.match(label):
297                    match = not match
298        return match
299
300    def read_filter_rows(self, cells, *, row_position):
301        match = True
302        cell = cells[0] if cells else None
303        cell = "" if cell is None else str(cell)
304        for name in ["pick", "skip"]:
305            if name == "pick":
306                items = self.pick_rows_compiled
307            else:
308                items = self.skip_rows_compiled
309            if not items:
310                continue
311            match = match and name == "skip"
312            for item in items:
313                if item == "<blank>":
314                    if not any(cell for cell in cells if cell not in ["", None]):
315                        match = not match
316                elif isinstance(item, str):
317                    if item == cell or (item and cell.startswith(item)):
318                        match = not match
319                elif isinstance(item, int) and item == row_position:
320                    match = not match
321                elif isinstance(item, typing.Pattern) and item.match(cell):
322                    match = not match
323        return match
324
325    def read_filter_cells(self, cells, *, field_positions):
326        if self.is_field_filtering:
327            result = []
328            for field_position, cell in enumerate(cells, start=1):
329                if field_position in field_positions:
330                    result.append(cell)
331            return result
332        return cells
333
334    # Metadata
335
336    metadata_Error = errors.LayoutError
337    metadata_profile = {  # type: ignore
338        "type": "object",
339        "additionalProperties": False,
340        "properties": {
341            "header": {"type": "boolean"},
342            "headerRows": {"type": "array", "items": {"type": "number"}},
343            "headerJoin": {"type": "string"},
344            "headerCase": {"type": "boolean"},
345            "pickFields": {"type": "array"},
346            "skipFields": {"type": "array"},
347            "limitFields": {"type": "number", "minimum": 1},
348            "offsetFields": {"type": "number", "minimum": 1},
349            "pickRows": {"type": "array"},
350            "skipRows": {"type": "array"},
351            "limitRows": {"type": "number", "minimum": 1},
352            "offsetRows": {"type": "number", "minimum": 1},
353        },
354    }
355