1from __future__ import absolute_import
2# Copyright (c) 2010-2019 openpyxl
3
4"""
5Collection of utilities used within the package and also available for client code
6"""
7import re
8
9from openpyxl.compat import basestring
10from .exceptions import CellCoordinatesException
11
12# constants
13COORD_RE = re.compile(r'^[$]?([A-Za-z]{1,3})[$]?(\d+)$')
14COL_RANGE = """[A-Z]{1,3}:[A-Z]{1,3}:"""
15ROW_RANGE = r"""\d+:\d+:"""
16RANGE_EXPR = r"""
17[$]?(?P<min_col>[A-Za-z]{1,3})?
18[$]?(?P<min_row>\d+)?
19(:[$]?(?P<max_col>[A-Za-z]{1,3})?
20[$]?(?P<max_row>\d+)?)?
21"""
22ABSOLUTE_RE = re.compile('^' + RANGE_EXPR +'$', re.VERBOSE)
23SHEET_TITLE = r"""
24(('(?P<quoted>([^']|'')*)')|(?P<notquoted>[^'^ ^!]*))!"""
25SHEETRANGE_RE = re.compile("""{0}(?P<cells>{1})(?=,?)""".format(
26    SHEET_TITLE, RANGE_EXPR), re.VERBOSE)
27
28
29def get_column_interval(start, end):
30    """
31    Given the start and end columns, return all the columns in the series.
32
33    The start and end columns can be either column letters or 1-based
34    indexes.
35    """
36    if isinstance(start, basestring):
37        start = column_index_from_string(start)
38    if isinstance(end, basestring):
39        end = column_index_from_string(end)
40    return [get_column_letter(x) for x in range(start, end + 1)]
41
42
43def coordinate_from_string(coord_string):
44    """Convert a coordinate string like 'B12' to a tuple ('B', 12)"""
45    match = COORD_RE.match(coord_string)
46    if not match:
47        msg = 'Invalid cell coordinates (%s)' % coord_string
48        raise CellCoordinatesException(msg)
49    column, row = match.groups()
50    row = int(row)
51    if not row:
52        msg = "There is no row 0 (%s)" % coord_string
53        raise CellCoordinatesException(msg)
54    return column, row
55
56
57def absolute_coordinate(coord_string):
58    """Convert a coordinate to an absolute coordinate string (B12 -> $B$12)"""
59    m = ABSOLUTE_RE.match(coord_string)
60    if not m:
61        raise ValueError("{0} is not a valid coordinate range".format(
62            coord_string))
63
64    d = m.groupdict('')
65    for k, v in d.items():
66        if v:
67            d[k] = "${0}".format(v)
68
69    if d['max_col'] or d['max_row']:
70        fmt = "{min_col}{min_row}:{max_col}{max_row}"
71    else:
72        fmt = "{min_col}{min_row}"
73    return fmt.format(**d)
74
75
76def _get_column_letter(col_idx):
77    """Convert a column number into a column letter (3 -> 'C')
78
79    Right shift the column col_idx by 26 to find column letters in reverse
80    order.  These numbers are 1-based, and can be converted to ASCII
81    ordinals by adding 64.
82
83    """
84    # these indicies corrospond to A -> ZZZ and include all allowed
85    # columns
86    if not 1 <= col_idx <= 18278:
87        raise ValueError("Invalid column index {0}".format(col_idx))
88    letters = []
89    while col_idx > 0:
90        col_idx, remainder = divmod(col_idx, 26)
91        # check for exact division and borrow if needed
92        if remainder == 0:
93            remainder = 26
94            col_idx -= 1
95        letters.append(chr(remainder+64))
96    return ''.join(reversed(letters))
97
98
99_COL_STRING_CACHE = {}
100_STRING_COL_CACHE = {}
101for i in range(1, 18279):
102    col = _get_column_letter(i)
103    _STRING_COL_CACHE[i] = col
104    _COL_STRING_CACHE[col] = i
105
106
107def get_column_letter(idx,):
108    """Convert a column index into a column letter
109    (3 -> 'C')
110    """
111    try:
112        return _STRING_COL_CACHE[idx]
113    except KeyError:
114        raise ValueError("Invalid column index {0}".format(idx))
115
116
117def column_index_from_string(str_col):
118    """Convert a column name into a numerical index
119    ('A' -> 1)
120    """
121    # we use a function argument to get indexed name lookup
122    try:
123        return _COL_STRING_CACHE[str_col.upper()]
124    except KeyError:
125        raise ValueError("{0} is not a valid column name".format(str_col))
126
127
128def range_boundaries(range_string):
129    """
130    Convert a range string into a tuple of boundaries:
131    (min_col, min_row, max_col, max_row)
132    Cell coordinates will be converted into a range with the cell at both end
133    """
134    msg = "{0} is not a valid coordinate or range".format(range_string)
135    m = ABSOLUTE_RE.match(range_string)
136    if not m:
137        raise ValueError(msg)
138
139    min_col, min_row, sep, max_col, max_row = m.groups()
140
141    if sep:
142        cols = min_col, max_col
143        rows = min_row, max_row
144
145        if not (
146            all(cols + rows) or
147            all(cols) and not any(rows) or
148            all(rows) and not any(cols)
149        ):
150            raise ValueError(msg)
151
152    if min_col is not None:
153        min_col = column_index_from_string(min_col)
154
155    if min_row is not None:
156        min_row = int(min_row)
157
158    if max_col is not None:
159        max_col = column_index_from_string(max_col)
160    else:
161        max_col = min_col
162
163    if max_row is not None:
164        max_row = int(max_row)
165    else:
166        max_row = min_row
167
168    return min_col, min_row, max_col, max_row
169
170
171def rows_from_range(range_string):
172    """
173    Get individual addresses for every cell in a range.
174    Yields one row at a time.
175    """
176    min_col, min_row, max_col, max_row = range_boundaries(range_string)
177    rows = range(min_row, max_row + 1)
178    cols = [get_column_letter(col) for col in range(min_col, max_col + 1)]
179    for row in rows:
180        yield tuple('{0}{1}'.format(col, row) for col in cols)
181
182
183def cols_from_range(range_string):
184    """
185    Get individual addresses for every cell in a range.
186    Yields one row at a time.
187    """
188    min_col, min_row, max_col, max_row = range_boundaries(range_string)
189    rows = range(min_row, max_row+1)
190    cols = (get_column_letter(col) for col in range(min_col, max_col+1))
191    for col in cols:
192        yield tuple('{0}{1}'.format(col, row) for row in rows)
193
194
195def coordinate_to_tuple(coordinate):
196    """
197    Convert an Excel style coordinate to (row, colum) tuple
198    """
199    match = COORD_RE.split(coordinate)
200    col, row = match[1:3]
201    return int(row), _COL_STRING_CACHE[col]
202
203
204def range_to_tuple(range_string):
205    """
206    Convert a worksheet range to the sheetname and maximum and minimum
207    coordinate indices
208    """
209    m = SHEETRANGE_RE.match(range_string)
210    if m is None:
211        raise ValueError("Value must be of the form sheetname!A1:E4")
212    sheetname = m.group("quoted") or m.group("notquoted")
213    cells = m.group("cells")
214    boundaries = range_boundaries(cells)
215    return sheetname, boundaries
216
217
218def quote_sheetname(sheetname):
219    """
220    Add quotes around sheetnames if they contain spaces.
221    """
222    if "'" in sheetname:
223        sheetname = sheetname.replace("'", "''")
224
225    sheetname = u"'{0}'".format(sheetname)
226    return sheetname
227