1""" 2 pyexcel_io.io 3 ~~~~~~~~~~~~~~~~~~~ 4 5 The io interface to file extensions 6 7 :copyright: (c) 2014-2020 by Onni Software Ltd. 8 :license: New BSD License, see LICENSE for more details 9""" 10import os 11import warnings 12from types import GeneratorType 13 14from pyexcel_io import constants 15from pyexcel_io.reader import Reader 16from pyexcel_io.writer import Writer 17from pyexcel_io.plugins import OLD_READERS, OLD_WRITERS 18from pyexcel_io._compact import isstream 19from pyexcel_io.exceptions import ( 20 NoSupportingPluginFound, 21 SupportingPluginAvailableButNotInstalled, 22) 23 24 25def iget_data(afile, file_type=None, **keywords): 26 """Get data from an excel file source 27 28 The data has not gone into memory yet. If you use dedicated partial read 29 plugins, such as pyexcel-xlsxr, pyexcel-odsr, you will notice 30 the memory consumption drop when you work with big files. 31 32 :param afile: a file name, a file stream or actual content 33 :param sheet_name: the name of the sheet to be loaded 34 :param sheet_index: the index of the sheet to be loaded 35 :param sheets: a list of sheet to be loaded 36 :param file_type: used only when filename is not a physical file name 37 :param force_file_type: used only when filename refers to a physical file 38 and it is intended to open it as forced file type. 39 :param library: explicitly name a library for use. 40 e.g. library='pyexcel-ods' 41 :param auto_detect_float: defaults to True 42 :param auto_detect_int: defaults to True 43 :param auto_detect_datetime: defaults to True 44 :param ignore_infinity: defaults to True 45 :param ignore_nan_text: various forms of 'NaN', 'nan' are ignored 46 :param default_float_nan: choose one form of 'NaN', 'nan' 47 :param pep_0515_off: turn off pep 0515. default to True. 48 :param keep_trailing_empty_cells: keep trailing columns. default to False 49 :param keywords: any other library specific parameters 50 :returns: an ordered dictionary 51 """ 52 data, reader = _get_data( 53 afile, file_type=file_type, streaming=True, **keywords 54 ) 55 return data, reader 56 57 58def get_data(afile, file_type=None, streaming=None, **keywords): 59 """Get data from an excel file source 60 61 :param afile: a file name, a file stream or actual content 62 :param sheet_name: the name of the sheet to be loaded 63 :param sheet_index: the index of the sheet to be loaded 64 :param sheets: a list of sheet to be loaded 65 :param file_type: used only when filename is not a physial file name 66 :param force_file_type: used only when filename refers to a physical file 67 and it is intended to open it as forced file type. 68 :param streaming: toggles the type of returned data. The values of the 69 returned dictionary remain as generator if it is set 70 to True. Default is False. 71 :param library: explicitly name a library for use. 72 e.g. library='pyexcel-ods' 73 :param auto_detect_float: defaults to True 74 :param auto_detect_int: defaults to True 75 :param auto_detect_datetime: defaults to True 76 :param ignore_infinity: defaults to True 77 :param ignore_nan_text: various forms of 'NaN', 'nan' are ignored 78 :param default_float_nan: choose one form of 'NaN', 'nan' 79 :param pep_0515_off: turn off pep 0515. default to True. 80 :param keep_trailing_empty_cells: keep trailing columns. default to False 81 :param keywords: any other library specific parameters 82 :returns: an ordered dictionary 83 """ 84 if streaming is not None and streaming is True: 85 warnings.warn("Please use iget_data instead") 86 data, _ = _get_data( 87 afile, file_type=file_type, streaming=False, **keywords 88 ) 89 return data 90 91 92def _get_data(afile, file_type=None, **keywords): 93 if isstream(afile): 94 keywords.update( 95 dict( 96 file_stream=afile, 97 file_type=file_type or constants.FILE_FORMAT_CSV, 98 ) 99 ) 100 else: 101 if afile is None or file_type is None: 102 keywords.update(dict(file_name=afile, file_type=file_type)) 103 else: 104 keywords.update(dict(file_content=afile, file_type=file_type)) 105 return load_data(**keywords) 106 107 108def save_data(afile, data, file_type=None, **keywords): 109 """Save data to an excel file source 110 111 Your data must be a dictionary 112 113 :param filename: actual file name, a file stream or actual content 114 :param data: a dictionary but an ordered dictionary is preferred 115 :param file_type: used only when filename is not a physial file name 116 :param force_file_type: used only when filename refers to a physical file 117 and it is intended to open it as forced file type. 118 :param library: explicitly name a library for use. 119 e.g. library='pyexcel-ods' 120 :param keywords: any other parameters that python csv module's 121 `fmtparams <https://docs.python.org/release/3.1.5/library/csv.html#dialects-and-formatting-parameters>`_ 122 """ # noqa 123 to_store = data 124 125 is_list = isinstance(data, (list, GeneratorType)) 126 if is_list: 127 single_sheet_in_book = True 128 to_store = {constants.DEFAULT_SHEET_NAME: data} 129 else: 130 keys = list(data.keys()) 131 single_sheet_in_book = len(keys) == 1 132 133 no_file_type = isstream(afile) and file_type is None 134 if no_file_type: 135 file_type = constants.FILE_FORMAT_CSV 136 137 if isstream(afile): 138 keywords.update(dict(file_stream=afile, file_type=file_type)) 139 else: 140 keywords.update(dict(file_name=afile, file_type=file_type)) 141 keywords["single_sheet_in_book"] = single_sheet_in_book 142 with get_writer(**keywords) as writer: 143 writer.write(to_store) 144 145 146def load_data( 147 file_name=None, 148 file_content=None, 149 file_stream=None, 150 file_type=None, 151 force_file_type=None, 152 sheet_name=None, 153 sheet_index=None, 154 sheets=None, 155 library=None, 156 streaming=False, 157 **keywords 158): 159 """Load data from any supported excel formats 160 161 :param filename: actual file name, a file stream or actual content 162 :param file_type: used only when filename is not a physial file name 163 :param force_file_type: used only when filename refers to a physical file 164 and it is intended to open it as forced file type. 165 :param sheet_name: the name of the sheet to be loaded 166 :param sheet_index: the index of the sheet to be loaded 167 :param keywords: any other parameters 168 """ 169 result = {} 170 inputs = [file_name, file_content, file_stream] 171 number_of_none_inputs = [x for x in inputs if x is not None] 172 if len(number_of_none_inputs) != 1: 173 raise IOError(constants.MESSAGE_ERROR_02) 174 175 if file_type is None: 176 if force_file_type: 177 file_type = force_file_type 178 else: 179 try: 180 file_type = file_name.split(".")[-1] 181 except AttributeError: 182 raise Exception(constants.MESSAGE_FILE_NAME_SHOULD_BE_STRING) 183 184 try: 185 reader = OLD_READERS.get_a_plugin(file_type, library) 186 except (NoSupportingPluginFound, SupportingPluginAvailableButNotInstalled): 187 reader = Reader(file_type, library) 188 189 try: 190 if file_name: 191 reader.open(file_name, **keywords) 192 elif file_content: 193 reader.open_content(file_content, **keywords) 194 elif file_stream: 195 reader.open_stream(file_stream, **keywords) 196 else: 197 raise IOError("Unrecognized options") 198 if sheet_name: 199 result = reader.read_sheet_by_name(sheet_name) 200 elif sheet_index is not None: 201 result = reader.read_sheet_by_index(sheet_index) 202 elif sheets is not None: 203 result = reader.read_many(sheets) 204 else: 205 result = reader.read_all() 206 if streaming is False: 207 for key in result.keys(): 208 result[key] = list(result[key]) 209 reader.close() 210 reader = None 211 212 return result, reader 213 except NoSupportingPluginFound: 214 if file_name: 215 if os.path.exists(file_name): 216 if os.path.isfile(file_name): 217 raise 218 else: 219 raise IOError( 220 constants.MESSAGE_NOT_FILE_FORMATTER % file_name 221 ) 222 else: 223 raise IOError( 224 constants.MESSAGE_FILE_DOES_NOT_EXIST % file_name 225 ) 226 else: 227 raise 228 229 230def get_writer( 231 file_name=None, 232 file_stream=None, 233 file_type=None, 234 library=None, 235 force_file_type=None, 236 **keywords 237): 238 """find a suitable writer""" 239 inputs = [file_name, file_stream] 240 number_of_none_inputs = [x for x in inputs if x is not None] 241 242 if len(number_of_none_inputs) != 1: 243 raise IOError(constants.MESSAGE_ERROR_02) 244 245 file_type_given = True 246 247 if file_type is None and file_name: 248 if force_file_type: 249 file_type = force_file_type 250 else: 251 try: 252 file_type = file_name.split(".")[-1] 253 except AttributeError: 254 raise Exception(constants.MESSAGE_FILE_NAME_SHOULD_BE_STRING) 255 256 file_type_given = False 257 258 try: 259 writer = OLD_WRITERS.get_a_plugin(file_type, library) 260 except (NoSupportingPluginFound, SupportingPluginAvailableButNotInstalled): 261 writer = Writer(file_type, library) 262 263 if file_name: 264 if file_type_given: 265 writer.open_content(file_name, **keywords) 266 else: 267 writer.open(file_name, **keywords) 268 elif file_stream: 269 writer.open_stream(file_stream, **keywords) 270 # else: is resolved by earlier raise statement 271 return writer 272 273 274# backward compactibility 275store_data = save_data 276