1"""
2    pyexcel.core
3    ~~~~~~~~~~~~~~~~~~~
4
5    A list of pyexcel signature functions
6
7    :copyright: (c) 2015-2020 by Onni Software Ltd.
8    :license: New BSD License
9"""
10import re
11
12from pyexcel import constants as constants
13from pyexcel import docstrings as docs
14from pyexcel.book import Book, to_book
15from pyexcel.sheet import Sheet
16from pyexcel._compact import OrderedDict, append_doc, zip_longest
17from pyexcel.internal import core as sources
18
19from pyexcel_io import manager as manager
20
21STARTS_WITH_DEST = "^dest_(.*)"
22SAVE_AS_EXCEPTION = (
23    "This function does not accept parameters for "
24    + "pyexce.Sheet. Please use pyexcel.save_as instead."
25)
26
27
28@append_doc(docs.GET_SHEET)
29def get_sheet(**keywords):
30    """
31    Get an instance of :class:`Sheet` from an excel source
32    """
33    sheet_params = {}
34    for field in constants.VALID_SHEET_PARAMETERS:
35        if field in keywords:
36            sheet_params[field] = keywords.pop(field)
37    named_content = sources.get_sheet_stream(**keywords)
38    sheet = Sheet(named_content.payload, named_content.name, **sheet_params)
39    return sheet
40
41
42@append_doc(docs.GET_BOOK)
43def get_book(**keywords):
44    """
45    Get an instance of :class:`Book` from an excel source
46    """
47    book_stream = sources.get_book_stream(**keywords)
48    book = Book(
49        book_stream.to_dict(),
50        filename=book_stream.filename,
51        path=book_stream.path,
52    )
53    return book
54
55
56@append_doc(docs.IGET_BOOK)
57def iget_book(**keywords):
58    """
59    Get an instance of :class:`BookStream` from an excel source
60
61    First use case is to get all sheet names without extracting
62    the sheets into memory.
63    """
64    return sources.get_book_stream(on_demand=True, **keywords)
65
66
67@append_doc(docs.SAVE_AS)
68def save_as(**keywords):
69    """
70    Save a sheet from a data source to another one
71    """
72    dest_keywords, source_keywords = _split_keywords(**keywords)
73    sheet_params = {}
74    for field in constants.VALID_SHEET_PARAMETERS:
75        if field in source_keywords:
76            sheet_params[field] = source_keywords.pop(field)
77    sheet_stream = sources.get_sheet_stream(**source_keywords)
78    output_sheet_name = sheet_stream.name
79    if "sheet_name" in dest_keywords:
80        output_sheet_name = dest_keywords["sheet_name"]
81    sheet = Sheet(sheet_stream.payload, output_sheet_name, **sheet_params)
82    return sources.save_sheet(sheet, **dest_keywords)
83
84
85@append_doc(docs.ISAVE_AS)
86def isave_as(**keywords):
87    """
88    Save a sheet from a data source to another one with less memory
89
90    It is simliar to :meth:`pyexcel.save_as` except that it does
91    not accept parameters for :class:`pyexcel.Sheet`. And it read
92    when it writes.
93    """
94    dest_keywords, source_keywords = _split_keywords(**keywords)
95    for field in constants.VALID_SHEET_PARAMETERS:
96        if field in source_keywords:
97            raise Exception(SAVE_AS_EXCEPTION)
98    sheet = sources.get_sheet_stream(on_demand=True, **source_keywords)
99    if "sheet_name" in dest_keywords:
100        sheet.name = dest_keywords["sheet_name"]
101    return sources.save_sheet(sheet, **dest_keywords)
102
103
104@append_doc(docs.SAVE_BOOK_AS)
105def save_book_as(**keywords):
106    """
107    Save a book from a data source to another one
108    """
109    dest_keywords, source_keywords = _split_keywords(**keywords)
110    book = sources.get_book_stream(**source_keywords)
111    book = to_book(book)
112    return sources.save_book(book, **dest_keywords)
113
114
115@append_doc(docs.ISAVE_BOOK_AS)
116def isave_book_as(**keywords):
117    """
118    Save a book from a data source to another one
119
120    It is simliar to :meth:`pyexcel.save_book_as` but it read
121    when it writes. This function provide some speedup but
122    the output data is not made uniform.
123    """
124    dest_keywords, source_keywords = _split_keywords(**keywords)
125    book = sources.get_book_stream(on_demand=True, **source_keywords)
126    return sources.save_book(book, **dest_keywords)
127
128
129@append_doc(docs.GET_ARRAY)
130def get_array(**keywords):
131    """
132    Obtain an array from an excel source
133
134    It accepts the same parameters as :meth:`~pyexcel.get_sheet`
135    but return an array instead.
136    """
137    sheet = get_sheet(**keywords)
138    return sheet.to_array()
139
140
141@append_doc(docs.GET_DICT)
142def get_dict(name_columns_by_row=0, **keywords):
143    """
144    Obtain a dictionary from an excel source
145
146    It accepts the same parameters as :meth:`~pyexcel.get_sheet`
147    but return a dictionary instead.
148
149    Specifically:
150    name_columns_by_row : specify a row to be a dictionary key.
151    It is default to 0 or first row.
152
153    If you would use a column index 0 instead, you should do::
154
155        get_dict(name_columns_by_row=-1, name_rows_by_column=0)
156
157    """
158    sheet = get_sheet(name_columns_by_row=name_columns_by_row, **keywords)
159    return sheet.to_dict()
160
161
162@append_doc(docs.GET_RECORDS)
163def get_records(name_columns_by_row=0, **keywords):
164    """
165    Obtain a list of records from an excel source
166
167    It accepts the same parameters as :meth:`~pyexcel.get_sheet`
168    but return a list of dictionary(records) instead.
169
170    Specifically:
171    name_columns_by_row : specify a row to be a dictionary key.
172    It is default to 0 or first row.
173
174    If you would use a column index 0 instead, you should do::
175
176        get_records(name_columns_by_row=-1, name_rows_by_column=0)
177
178    """
179    sheet = get_sheet(name_columns_by_row=name_columns_by_row, **keywords)
180    return list(sheet.to_records())
181
182
183@append_doc(docs.IGET_ARRAY)
184def iget_array(**keywords):
185    """
186    Obtain a generator of an two dimensional array from an excel source
187
188    It is similiar to :meth:`pyexcel.get_array` but it has less memory
189    footprint.
190    """
191    sheet_stream = sources.get_sheet_stream(on_demand=True, **keywords)
192    return sheet_stream.payload
193
194
195@append_doc(docs.IGET_RECORDS)
196def iget_records(custom_headers=None, **keywords):
197    """
198    Obtain a generator of a list of records from an excel source
199
200    It is similiar to :meth:`pyexcel.get_records` but it has less memory
201    footprint but requires the headers to be in the first row. And the
202    data matrix should be of equal length. It should consume less memory
203    and should work well with large files.
204    """
205    sheet_stream = sources.get_sheet_stream(on_demand=True, **keywords)
206    headers = None
207    for row_index, row in enumerate(sheet_stream.payload):
208        if row_index == 0:
209            headers = row
210        else:
211            if custom_headers:
212                # custom order
213                tmp_dict = dict(
214                    zip_longest(headers, row, fillvalue=constants.DEFAULT_NA)
215                )
216                ordered_dict = OrderedDict()
217                for name in custom_headers:
218                    ordered_dict[name] = tmp_dict[name]
219                yield ordered_dict
220            else:
221                # default order
222                yield OrderedDict(
223                    zip_longest(headers, row, fillvalue=constants.DEFAULT_NA)
224                )
225
226
227@append_doc(docs.GET_BOOK_DICT)
228def get_book_dict(**keywords):
229    """
230    Obtain a dictionary of two dimensional arrays
231
232    It accepts the same parameters as :meth:`~pyexcel.get_book`
233    but return a dictionary instead.
234    """
235    book = get_book(**keywords)
236    return book.to_dict()
237
238
239def get_io_type(file_type):
240    """
241    Return the io stream types, string or bytes
242    """
243    io_type = manager.get_io_type(file_type)
244    if io_type is None:
245        io_type = "string"
246    return io_type
247
248
249def _split_keywords(**keywords):
250    dest_keywords = {}
251    source_keywords = {}
252    for key, value in keywords.items():
253        result = re.match(STARTS_WITH_DEST, key)
254        if result:
255            parameter = result.group(1)
256            dest_keywords[parameter] = value
257        else:
258            source_keywords[key] = value
259    return dest_keywords, source_keywords
260