1"""
2    pyexcel_xlsr
3    ~~~~~~~~~~~~~~~~~~~
4
5    The lower level xls/xlsm file format handler using xlrd
6
7    :copyright: (c) 2016-2021 by Onni Software Ltd
8    :license: New BSD License
9"""
10import datetime
11
12import xlrd
13from pyexcel_io.service import has_no_digits_in_float
14from pyexcel_io.plugin_api import ISheet, IReader
15
16XLS_KEYWORDS = [
17    "filename",
18    "logfile",
19    "verbosity",
20    "use_mmap",
21    "file_contents",
22    "encoding_override",
23    "formatting_info",
24    "on_demand",
25    "ragged_rows",
26]
27DEFAULT_ERROR_VALUE = "#N/A"
28
29
30class MergedCell(object):
31    def __init__(self, row_low, row_high, column_low, column_high):
32        self.__rl = row_low
33        self.__rh = row_high
34        self.__cl = column_low
35        self.__ch = column_high
36        self.value = None
37
38    def register_cells(self, registry):
39        for rowx in range(self.__rl, self.__rh):
40            for colx in range(self.__cl, self.__ch):
41                key = "%s-%s" % (rowx, colx)
42                registry[key] = self
43
44
45class XLSheet(ISheet):
46    """
47    xls, xlsx, xlsm sheet reader
48
49    Currently only support first sheet in the file
50    """
51
52    def __init__(self, sheet, auto_detect_int=True, date_mode=0, **keywords):
53        self.__auto_detect_int = auto_detect_int
54        self.__hidden_cols = []
55        self.__hidden_rows = []
56        self.__merged_cells = {}
57        self._book_date_mode = date_mode
58        self.xls_sheet = sheet
59        self._keywords = keywords
60        if keywords.get("detect_merged_cells") is True:
61            for merged_cell_ranges in sheet.merged_cells:
62                merged_cells = MergedCell(*merged_cell_ranges)
63                merged_cells.register_cells(self.__merged_cells)
64        if keywords.get("skip_hidden_row_and_column") is True:
65            for col_index, info in self.xls_sheet.colinfo_map.items():
66                if info.hidden == 1:
67                    self.__hidden_cols.append(col_index)
68            for row_index, info in self.xls_sheet.rowinfo_map.items():
69                if info.hidden == 1:
70                    self.__hidden_rows.append(row_index)
71
72    @property
73    def name(self):
74        return self.xls_sheet.name
75
76    def row_iterator(self):
77        number_of_rows = self.xls_sheet.nrows - len(self.__hidden_rows)
78        return range(number_of_rows)
79
80    def column_iterator(self, row):
81        number_of_columns = self.xls_sheet.ncols - len(self.__hidden_cols)
82        for column in range(number_of_columns):
83            yield self.cell_value(row, column)
84
85    def cell_value(self, row, column):
86        """
87        Random access to the xls cells
88        """
89        if self._keywords.get("skip_hidden_row_and_column") is True:
90            row, column = self._offset_hidden_indices(row, column)
91        cell_type = self.xls_sheet.cell_type(row, column)
92        value = self.xls_sheet.cell_value(row, column)
93
94        if cell_type == xlrd.XL_CELL_DATE:
95            value = xldate_to_python_date(value, self._book_date_mode)
96        elif cell_type == xlrd.XL_CELL_NUMBER and self.__auto_detect_int:
97            if has_no_digits_in_float(value):
98                value = int(value)
99        elif cell_type == xlrd.XL_CELL_ERROR:
100            value = DEFAULT_ERROR_VALUE
101
102        if self.__merged_cells:
103            merged_cell = self.__merged_cells.get("%s-%s" % (row, column))
104            if merged_cell:
105                if merged_cell.value:
106                    value = merged_cell.value
107                else:
108                    merged_cell.value = value
109        return value
110
111    def _offset_hidden_indices(self, row, column):
112        row = calculate_offsets(row, self.__hidden_rows)
113        column = calculate_offsets(column, self.__hidden_cols)
114        return row, column
115
116
117def calculate_offsets(incoming_index, hidden_indices):
118    offset = 0
119    for index in hidden_indices:
120        if index <= (incoming_index + offset):
121            offset += 1
122    return incoming_index + offset
123
124
125class XLSReader(IReader):
126    """
127    XLSBook reader
128
129    It reads xls, xlsm, xlsx work book
130    """
131
132    def __init__(self, file_type, **keywords):
133        self.__skip_hidden_sheets = keywords.get("skip_hidden_sheets", True)
134        self.__skip_hidden_row_column = keywords.get(
135            "skip_hidden_row_and_column", True
136        )
137        self.__detect_merged_cells = keywords.get("detect_merged_cells", False)
138        self._keywords = keywords
139        xlrd_params = self._extract_xlrd_params()
140        if self.__skip_hidden_row_column and file_type == "xls":
141            xlrd_params["formatting_info"] = True
142        if self.__detect_merged_cells:
143            xlrd_params["formatting_info"] = True
144
145        self.content_array = []
146        self.xls_book = self.get_xls_book(**xlrd_params)
147        for sheet in self.xls_book.sheets():
148            if self.__skip_hidden_sheets and sheet.visibility != 0:
149                continue
150            self.content_array.append(sheet)
151
152    def read_sheet(self, index):
153        native_sheet = self.content_array[index]
154        sheet = XLSheet(
155            native_sheet, date_mode=self.xls_book.datemode, **self._keywords
156        )
157        return sheet
158
159    def close(self):
160        if self.xls_book:
161            self.xls_book.release_resources()
162            self.xls_book = None
163
164    def get_xls_book(self, **xlrd_params):
165        xls_book = xlrd.open_workbook(**xlrd_params)
166        return xls_book
167
168    def _extract_xlrd_params(self):
169        params = {}
170        if self._keywords is not None:
171            for key in list(self._keywords.keys()):
172                if key in XLS_KEYWORDS:
173                    params[key] = self._keywords.pop(key)
174        return params
175
176
177class XLSInFile(XLSReader):
178    def __init__(self, file_name, file_type, **keywords):
179        super().__init__(file_type, filename=file_name, **keywords)
180
181
182class XLSInContent(XLSReader):
183    def __init__(self, file_content, file_type, **keywords):
184        super().__init__(file_type, file_contents=file_content, **keywords)
185
186
187class XLSInMemory(XLSReader):
188    def __init__(self, file_stream, file_type, **keywords):
189        file_stream.seek(0)
190        super().__init__(
191            file_type, file_contents=file_stream.read(), **keywords
192        )
193
194
195def xldate_to_python_date(value, date_mode):
196    """
197    convert xl date to python date
198    """
199    date_tuple = xlrd.xldate_as_tuple(value, date_mode)
200
201    ret = None
202    if date_tuple == (0, 0, 0, 0, 0, 0):
203        ret = datetime.datetime(1900, 1, 1, 0, 0, 0)
204    elif date_tuple[0:3] == (0, 0, 0):
205        ret = datetime.time(date_tuple[3], date_tuple[4], date_tuple[5])
206    elif date_tuple[3:6] == (0, 0, 0):
207        ret = datetime.date(date_tuple[0], date_tuple[1], date_tuple[2])
208    else:
209        ret = datetime.datetime(
210            date_tuple[0],
211            date_tuple[1],
212            date_tuple[2],
213            date_tuple[3],
214            date_tuple[4],
215            date_tuple[5],
216        )
217    return ret
218