1from __future__ import absolute_import 2# Copyright (c) 2010-2019 openpyxl 3 4""" 5Collection of utilities used within the package and also available for client code 6""" 7import re 8 9from openpyxl.compat import basestring 10from .exceptions import CellCoordinatesException 11 12# constants 13COORD_RE = re.compile(r'^[$]?([A-Za-z]{1,3})[$]?(\d+)$') 14COL_RANGE = """[A-Z]{1,3}:[A-Z]{1,3}:""" 15ROW_RANGE = r"""\d+:\d+:""" 16RANGE_EXPR = r""" 17[$]?(?P<min_col>[A-Za-z]{1,3})? 18[$]?(?P<min_row>\d+)? 19(:[$]?(?P<max_col>[A-Za-z]{1,3})? 20[$]?(?P<max_row>\d+)?)? 21""" 22ABSOLUTE_RE = re.compile('^' + RANGE_EXPR +'$', re.VERBOSE) 23SHEET_TITLE = r""" 24(('(?P<quoted>([^']|'')*)')|(?P<notquoted>[^'^ ^!]*))!""" 25SHEETRANGE_RE = re.compile("""{0}(?P<cells>{1})(?=,?)""".format( 26 SHEET_TITLE, RANGE_EXPR), re.VERBOSE) 27 28 29def get_column_interval(start, end): 30 """ 31 Given the start and end columns, return all the columns in the series. 32 33 The start and end columns can be either column letters or 1-based 34 indexes. 35 """ 36 if isinstance(start, basestring): 37 start = column_index_from_string(start) 38 if isinstance(end, basestring): 39 end = column_index_from_string(end) 40 return [get_column_letter(x) for x in range(start, end + 1)] 41 42 43def coordinate_from_string(coord_string): 44 """Convert a coordinate string like 'B12' to a tuple ('B', 12)""" 45 match = COORD_RE.match(coord_string) 46 if not match: 47 msg = 'Invalid cell coordinates (%s)' % coord_string 48 raise CellCoordinatesException(msg) 49 column, row = match.groups() 50 row = int(row) 51 if not row: 52 msg = "There is no row 0 (%s)" % coord_string 53 raise CellCoordinatesException(msg) 54 return column, row 55 56 57def absolute_coordinate(coord_string): 58 """Convert a coordinate to an absolute coordinate string (B12 -> $B$12)""" 59 m = ABSOLUTE_RE.match(coord_string) 60 if not m: 61 raise ValueError("{0} is not a valid coordinate range".format( 62 coord_string)) 63 64 d = m.groupdict('') 65 for k, v in d.items(): 66 if v: 67 d[k] = "${0}".format(v) 68 69 if d['max_col'] or d['max_row']: 70 fmt = "{min_col}{min_row}:{max_col}{max_row}" 71 else: 72 fmt = "{min_col}{min_row}" 73 return fmt.format(**d) 74 75 76def _get_column_letter(col_idx): 77 """Convert a column number into a column letter (3 -> 'C') 78 79 Right shift the column col_idx by 26 to find column letters in reverse 80 order. These numbers are 1-based, and can be converted to ASCII 81 ordinals by adding 64. 82 83 """ 84 # these indicies corrospond to A -> ZZZ and include all allowed 85 # columns 86 if not 1 <= col_idx <= 18278: 87 raise ValueError("Invalid column index {0}".format(col_idx)) 88 letters = [] 89 while col_idx > 0: 90 col_idx, remainder = divmod(col_idx, 26) 91 # check for exact division and borrow if needed 92 if remainder == 0: 93 remainder = 26 94 col_idx -= 1 95 letters.append(chr(remainder+64)) 96 return ''.join(reversed(letters)) 97 98 99_COL_STRING_CACHE = {} 100_STRING_COL_CACHE = {} 101for i in range(1, 18279): 102 col = _get_column_letter(i) 103 _STRING_COL_CACHE[i] = col 104 _COL_STRING_CACHE[col] = i 105 106 107def get_column_letter(idx,): 108 """Convert a column index into a column letter 109 (3 -> 'C') 110 """ 111 try: 112 return _STRING_COL_CACHE[idx] 113 except KeyError: 114 raise ValueError("Invalid column index {0}".format(idx)) 115 116 117def column_index_from_string(str_col): 118 """Convert a column name into a numerical index 119 ('A' -> 1) 120 """ 121 # we use a function argument to get indexed name lookup 122 try: 123 return _COL_STRING_CACHE[str_col.upper()] 124 except KeyError: 125 raise ValueError("{0} is not a valid column name".format(str_col)) 126 127 128def range_boundaries(range_string): 129 """ 130 Convert a range string into a tuple of boundaries: 131 (min_col, min_row, max_col, max_row) 132 Cell coordinates will be converted into a range with the cell at both end 133 """ 134 msg = "{0} is not a valid coordinate or range".format(range_string) 135 m = ABSOLUTE_RE.match(range_string) 136 if not m: 137 raise ValueError(msg) 138 139 min_col, min_row, sep, max_col, max_row = m.groups() 140 141 if sep: 142 cols = min_col, max_col 143 rows = min_row, max_row 144 145 if not ( 146 all(cols + rows) or 147 all(cols) and not any(rows) or 148 all(rows) and not any(cols) 149 ): 150 raise ValueError(msg) 151 152 if min_col is not None: 153 min_col = column_index_from_string(min_col) 154 155 if min_row is not None: 156 min_row = int(min_row) 157 158 if max_col is not None: 159 max_col = column_index_from_string(max_col) 160 else: 161 max_col = min_col 162 163 if max_row is not None: 164 max_row = int(max_row) 165 else: 166 max_row = min_row 167 168 return min_col, min_row, max_col, max_row 169 170 171def rows_from_range(range_string): 172 """ 173 Get individual addresses for every cell in a range. 174 Yields one row at a time. 175 """ 176 min_col, min_row, max_col, max_row = range_boundaries(range_string) 177 rows = range(min_row, max_row + 1) 178 cols = [get_column_letter(col) for col in range(min_col, max_col + 1)] 179 for row in rows: 180 yield tuple('{0}{1}'.format(col, row) for col in cols) 181 182 183def cols_from_range(range_string): 184 """ 185 Get individual addresses for every cell in a range. 186 Yields one row at a time. 187 """ 188 min_col, min_row, max_col, max_row = range_boundaries(range_string) 189 rows = range(min_row, max_row+1) 190 cols = (get_column_letter(col) for col in range(min_col, max_col+1)) 191 for col in cols: 192 yield tuple('{0}{1}'.format(col, row) for row in rows) 193 194 195def coordinate_to_tuple(coordinate): 196 """ 197 Convert an Excel style coordinate to (row, colum) tuple 198 """ 199 match = COORD_RE.split(coordinate) 200 col, row = match[1:3] 201 return int(row), _COL_STRING_CACHE[col] 202 203 204def range_to_tuple(range_string): 205 """ 206 Convert a worksheet range to the sheetname and maximum and minimum 207 coordinate indices 208 """ 209 m = SHEETRANGE_RE.match(range_string) 210 if m is None: 211 raise ValueError("Value must be of the form sheetname!A1:E4") 212 sheetname = m.group("quoted") or m.group("notquoted") 213 cells = m.group("cells") 214 boundaries = range_boundaries(cells) 215 return sheetname, boundaries 216 217 218def quote_sheetname(sheetname): 219 """ 220 Add quotes around sheetnames if they contain spaces. 221 """ 222 if "'" in sheetname: 223 sheetname = sheetname.replace("'", "''") 224 225 sheetname = u"'{0}'".format(sheetname) 226 return sheetname 227