1""" 2 pyexcel.core 3 ~~~~~~~~~~~~~~~~~~~ 4 5 A list of pyexcel signature functions 6 7 :copyright: (c) 2015-2020 by Onni Software Ltd. 8 :license: New BSD License 9""" 10import re 11 12from pyexcel import constants as constants 13from pyexcel import docstrings as docs 14from pyexcel.book import Book, to_book 15from pyexcel.sheet import Sheet 16from pyexcel._compact import OrderedDict, append_doc, zip_longest 17from pyexcel.internal import core as sources 18 19from pyexcel_io import manager as manager 20 21STARTS_WITH_DEST = "^dest_(.*)" 22SAVE_AS_EXCEPTION = ( 23 "This function does not accept parameters for " 24 + "pyexce.Sheet. Please use pyexcel.save_as instead." 25) 26 27 28@append_doc(docs.GET_SHEET) 29def get_sheet(**keywords): 30 """ 31 Get an instance of :class:`Sheet` from an excel source 32 """ 33 sheet_params = {} 34 for field in constants.VALID_SHEET_PARAMETERS: 35 if field in keywords: 36 sheet_params[field] = keywords.pop(field) 37 named_content = sources.get_sheet_stream(**keywords) 38 sheet = Sheet(named_content.payload, named_content.name, **sheet_params) 39 return sheet 40 41 42@append_doc(docs.GET_BOOK) 43def get_book(**keywords): 44 """ 45 Get an instance of :class:`Book` from an excel source 46 """ 47 book_stream = sources.get_book_stream(**keywords) 48 book = Book( 49 book_stream.to_dict(), 50 filename=book_stream.filename, 51 path=book_stream.path, 52 ) 53 return book 54 55 56@append_doc(docs.IGET_BOOK) 57def iget_book(**keywords): 58 """ 59 Get an instance of :class:`BookStream` from an excel source 60 61 First use case is to get all sheet names without extracting 62 the sheets into memory. 63 """ 64 return sources.get_book_stream(on_demand=True, **keywords) 65 66 67@append_doc(docs.SAVE_AS) 68def save_as(**keywords): 69 """ 70 Save a sheet from a data source to another one 71 """ 72 dest_keywords, source_keywords = _split_keywords(**keywords) 73 sheet_params = {} 74 for field in constants.VALID_SHEET_PARAMETERS: 75 if field in source_keywords: 76 sheet_params[field] = source_keywords.pop(field) 77 sheet_stream = sources.get_sheet_stream(**source_keywords) 78 output_sheet_name = sheet_stream.name 79 if "sheet_name" in dest_keywords: 80 output_sheet_name = dest_keywords["sheet_name"] 81 sheet = Sheet(sheet_stream.payload, output_sheet_name, **sheet_params) 82 return sources.save_sheet(sheet, **dest_keywords) 83 84 85@append_doc(docs.ISAVE_AS) 86def isave_as(**keywords): 87 """ 88 Save a sheet from a data source to another one with less memory 89 90 It is simliar to :meth:`pyexcel.save_as` except that it does 91 not accept parameters for :class:`pyexcel.Sheet`. And it read 92 when it writes. 93 """ 94 dest_keywords, source_keywords = _split_keywords(**keywords) 95 for field in constants.VALID_SHEET_PARAMETERS: 96 if field in source_keywords: 97 raise Exception(SAVE_AS_EXCEPTION) 98 sheet = sources.get_sheet_stream(on_demand=True, **source_keywords) 99 if "sheet_name" in dest_keywords: 100 sheet.name = dest_keywords["sheet_name"] 101 return sources.save_sheet(sheet, **dest_keywords) 102 103 104@append_doc(docs.SAVE_BOOK_AS) 105def save_book_as(**keywords): 106 """ 107 Save a book from a data source to another one 108 """ 109 dest_keywords, source_keywords = _split_keywords(**keywords) 110 book = sources.get_book_stream(**source_keywords) 111 book = to_book(book) 112 return sources.save_book(book, **dest_keywords) 113 114 115@append_doc(docs.ISAVE_BOOK_AS) 116def isave_book_as(**keywords): 117 """ 118 Save a book from a data source to another one 119 120 It is simliar to :meth:`pyexcel.save_book_as` but it read 121 when it writes. This function provide some speedup but 122 the output data is not made uniform. 123 """ 124 dest_keywords, source_keywords = _split_keywords(**keywords) 125 book = sources.get_book_stream(on_demand=True, **source_keywords) 126 return sources.save_book(book, **dest_keywords) 127 128 129@append_doc(docs.GET_ARRAY) 130def get_array(**keywords): 131 """ 132 Obtain an array from an excel source 133 134 It accepts the same parameters as :meth:`~pyexcel.get_sheet` 135 but return an array instead. 136 """ 137 sheet = get_sheet(**keywords) 138 return sheet.to_array() 139 140 141@append_doc(docs.GET_DICT) 142def get_dict(name_columns_by_row=0, **keywords): 143 """ 144 Obtain a dictionary from an excel source 145 146 It accepts the same parameters as :meth:`~pyexcel.get_sheet` 147 but return a dictionary instead. 148 149 Specifically: 150 name_columns_by_row : specify a row to be a dictionary key. 151 It is default to 0 or first row. 152 153 If you would use a column index 0 instead, you should do:: 154 155 get_dict(name_columns_by_row=-1, name_rows_by_column=0) 156 157 """ 158 sheet = get_sheet(name_columns_by_row=name_columns_by_row, **keywords) 159 return sheet.to_dict() 160 161 162@append_doc(docs.GET_RECORDS) 163def get_records(name_columns_by_row=0, **keywords): 164 """ 165 Obtain a list of records from an excel source 166 167 It accepts the same parameters as :meth:`~pyexcel.get_sheet` 168 but return a list of dictionary(records) instead. 169 170 Specifically: 171 name_columns_by_row : specify a row to be a dictionary key. 172 It is default to 0 or first row. 173 174 If you would use a column index 0 instead, you should do:: 175 176 get_records(name_columns_by_row=-1, name_rows_by_column=0) 177 178 """ 179 sheet = get_sheet(name_columns_by_row=name_columns_by_row, **keywords) 180 return list(sheet.to_records()) 181 182 183@append_doc(docs.IGET_ARRAY) 184def iget_array(**keywords): 185 """ 186 Obtain a generator of an two dimensional array from an excel source 187 188 It is similiar to :meth:`pyexcel.get_array` but it has less memory 189 footprint. 190 """ 191 sheet_stream = sources.get_sheet_stream(on_demand=True, **keywords) 192 return sheet_stream.payload 193 194 195@append_doc(docs.IGET_RECORDS) 196def iget_records(custom_headers=None, **keywords): 197 """ 198 Obtain a generator of a list of records from an excel source 199 200 It is similiar to :meth:`pyexcel.get_records` but it has less memory 201 footprint but requires the headers to be in the first row. And the 202 data matrix should be of equal length. It should consume less memory 203 and should work well with large files. 204 """ 205 sheet_stream = sources.get_sheet_stream(on_demand=True, **keywords) 206 headers = None 207 for row_index, row in enumerate(sheet_stream.payload): 208 if row_index == 0: 209 headers = row 210 else: 211 if custom_headers: 212 # custom order 213 tmp_dict = dict( 214 zip_longest(headers, row, fillvalue=constants.DEFAULT_NA) 215 ) 216 ordered_dict = OrderedDict() 217 for name in custom_headers: 218 ordered_dict[name] = tmp_dict[name] 219 yield ordered_dict 220 else: 221 # default order 222 yield OrderedDict( 223 zip_longest(headers, row, fillvalue=constants.DEFAULT_NA) 224 ) 225 226 227@append_doc(docs.GET_BOOK_DICT) 228def get_book_dict(**keywords): 229 """ 230 Obtain a dictionary of two dimensional arrays 231 232 It accepts the same parameters as :meth:`~pyexcel.get_book` 233 but return a dictionary instead. 234 """ 235 book = get_book(**keywords) 236 return book.to_dict() 237 238 239def get_io_type(file_type): 240 """ 241 Return the io stream types, string or bytes 242 """ 243 io_type = manager.get_io_type(file_type) 244 if io_type is None: 245 io_type = "string" 246 return io_type 247 248 249def _split_keywords(**keywords): 250 dest_keywords = {} 251 source_keywords = {} 252 for key, value in keywords.items(): 253 result = re.match(STARTS_WITH_DEST, key) 254 if result: 255 parameter = result.group(1) 256 dest_keywords[parameter] = value 257 else: 258 source_keywords[key] = value 259 return dest_keywords, source_keywords 260