1import datetime 2import io 3from os import linesep 4import re 5import sys 6 7from toml.tz import TomlTz 8 9if sys.version_info < (3,): 10 _range = xrange # noqa: F821 11else: 12 unicode = str 13 _range = range 14 basestring = str 15 unichr = chr 16 17 18def _detect_pathlib_path(p): 19 if (3, 4) <= sys.version_info: 20 import pathlib 21 if isinstance(p, pathlib.PurePath): 22 return True 23 return False 24 25 26def _ispath(p): 27 if isinstance(p, (bytes, basestring)): 28 return True 29 return _detect_pathlib_path(p) 30 31 32def _getpath(p): 33 if (3, 6) <= sys.version_info: 34 import os 35 return os.fspath(p) 36 if _detect_pathlib_path(p): 37 return str(p) 38 return p 39 40 41try: 42 FNFError = FileNotFoundError 43except NameError: 44 FNFError = IOError 45 46 47TIME_RE = re.compile(r"([0-9]{2}):([0-9]{2}):([0-9]{2})(\.([0-9]{3,6}))?") 48 49 50class TomlDecodeError(ValueError): 51 """Base toml Exception / Error.""" 52 53 def __init__(self, msg, doc, pos): 54 lineno = doc.count('\n', 0, pos) + 1 55 colno = pos - doc.rfind('\n', 0, pos) 56 emsg = '{} (line {} column {} char {})'.format(msg, lineno, colno, pos) 57 ValueError.__init__(self, emsg) 58 self.msg = msg 59 self.doc = doc 60 self.pos = pos 61 self.lineno = lineno 62 self.colno = colno 63 64 65# Matches a TOML number, which allows underscores for readability 66_number_with_underscores = re.compile('([0-9])(_([0-9]))*') 67 68 69class CommentValue(object): 70 def __init__(self, val, comment, beginline, _dict): 71 self.val = val 72 separator = "\n" if beginline else " " 73 self.comment = separator + comment 74 self._dict = _dict 75 76 def __getitem__(self, key): 77 return self.val[key] 78 79 def __setitem__(self, key, value): 80 self.val[key] = value 81 82 def dump(self, dump_value_func): 83 retstr = dump_value_func(self.val) 84 if isinstance(self.val, self._dict): 85 return self.comment + "\n" + unicode(retstr) 86 else: 87 return unicode(retstr) + self.comment 88 89 90def _strictly_valid_num(n): 91 n = n.strip() 92 if not n: 93 return False 94 if n[0] == '_': 95 return False 96 if n[-1] == '_': 97 return False 98 if "_." in n or "._" in n: 99 return False 100 if len(n) == 1: 101 return True 102 if n[0] == '0' and n[1] not in ['.', 'o', 'b', 'x']: 103 return False 104 if n[0] == '+' or n[0] == '-': 105 n = n[1:] 106 if len(n) > 1 and n[0] == '0' and n[1] != '.': 107 return False 108 if '__' in n: 109 return False 110 return True 111 112 113def load(f, _dict=dict, decoder=None): 114 """Parses named file or files as toml and returns a dictionary 115 116 Args: 117 f: Path to the file to open, array of files to read into single dict 118 or a file descriptor 119 _dict: (optional) Specifies the class of the returned toml dictionary 120 decoder: The decoder to use 121 122 Returns: 123 Parsed toml file represented as a dictionary 124 125 Raises: 126 TypeError -- When f is invalid type 127 TomlDecodeError: Error while decoding toml 128 IOError / FileNotFoundError -- When an array with no valid (existing) 129 (Python 2 / Python 3) file paths is passed 130 """ 131 132 if _ispath(f): 133 with io.open(_getpath(f), encoding='utf-8') as ffile: 134 return loads(ffile.read(), _dict, decoder) 135 elif isinstance(f, list): 136 from os import path as op 137 from warnings import warn 138 if not [path for path in f if op.exists(path)]: 139 error_msg = "Load expects a list to contain filenames only." 140 error_msg += linesep 141 error_msg += ("The list needs to contain the path of at least one " 142 "existing file.") 143 raise FNFError(error_msg) 144 if decoder is None: 145 decoder = TomlDecoder(_dict) 146 d = decoder.get_empty_table() 147 for l in f: # noqa: E741 148 if op.exists(l): 149 d.update(load(l, _dict, decoder)) 150 else: 151 warn("Non-existent filename in list with at least one valid " 152 "filename") 153 return d 154 else: 155 try: 156 return loads(f.read(), _dict, decoder) 157 except AttributeError: 158 raise TypeError("You can only load a file descriptor, filename or " 159 "list") 160 161 162_groupname_re = re.compile(r'^[A-Za-z0-9_-]+$') 163 164 165def loads(s, _dict=dict, decoder=None): 166 """Parses string as toml 167 168 Args: 169 s: String to be parsed 170 _dict: (optional) Specifies the class of the returned toml dictionary 171 172 Returns: 173 Parsed toml file represented as a dictionary 174 175 Raises: 176 TypeError: When a non-string is passed 177 TomlDecodeError: Error while decoding toml 178 """ 179 180 implicitgroups = [] 181 if decoder is None: 182 decoder = TomlDecoder(_dict) 183 retval = decoder.get_empty_table() 184 currentlevel = retval 185 if not isinstance(s, basestring): 186 raise TypeError("Expecting something like a string") 187 188 if not isinstance(s, unicode): 189 s = s.decode('utf8') 190 191 original = s 192 sl = list(s) 193 openarr = 0 194 openstring = False 195 openstrchar = "" 196 multilinestr = False 197 arrayoftables = False 198 beginline = True 199 keygroup = False 200 dottedkey = False 201 keyname = 0 202 key = '' 203 prev_key = '' 204 line_no = 1 205 206 for i, item in enumerate(sl): 207 if item == '\r' and sl[i + 1] == '\n': 208 sl[i] = ' ' 209 continue 210 if keyname: 211 key += item 212 if item == '\n': 213 raise TomlDecodeError("Key name found without value." 214 " Reached end of line.", original, i) 215 if openstring: 216 if item == openstrchar: 217 oddbackslash = False 218 k = 1 219 while i >= k and sl[i - k] == '\\': 220 oddbackslash = not oddbackslash 221 k += 1 222 if not oddbackslash: 223 keyname = 2 224 openstring = False 225 openstrchar = "" 226 continue 227 elif keyname == 1: 228 if item.isspace(): 229 keyname = 2 230 continue 231 elif item == '.': 232 dottedkey = True 233 continue 234 elif item.isalnum() or item == '_' or item == '-': 235 continue 236 elif (dottedkey and sl[i - 1] == '.' and 237 (item == '"' or item == "'")): 238 openstring = True 239 openstrchar = item 240 continue 241 elif keyname == 2: 242 if item.isspace(): 243 if dottedkey: 244 nextitem = sl[i + 1] 245 if not nextitem.isspace() and nextitem != '.': 246 keyname = 1 247 continue 248 if item == '.': 249 dottedkey = True 250 nextitem = sl[i + 1] 251 if not nextitem.isspace() and nextitem != '.': 252 keyname = 1 253 continue 254 if item == '=': 255 keyname = 0 256 prev_key = key[:-1].rstrip() 257 key = '' 258 dottedkey = False 259 else: 260 raise TomlDecodeError("Found invalid character in key name: '" + 261 item + "'. Try quoting the key name.", 262 original, i) 263 if item == "'" and openstrchar != '"': 264 k = 1 265 try: 266 while sl[i - k] == "'": 267 k += 1 268 if k == 3: 269 break 270 except IndexError: 271 pass 272 if k == 3: 273 multilinestr = not multilinestr 274 openstring = multilinestr 275 else: 276 openstring = not openstring 277 if openstring: 278 openstrchar = "'" 279 else: 280 openstrchar = "" 281 if item == '"' and openstrchar != "'": 282 oddbackslash = False 283 k = 1 284 tripquote = False 285 try: 286 while sl[i - k] == '"': 287 k += 1 288 if k == 3: 289 tripquote = True 290 break 291 if k == 1 or (k == 3 and tripquote): 292 while sl[i - k] == '\\': 293 oddbackslash = not oddbackslash 294 k += 1 295 except IndexError: 296 pass 297 if not oddbackslash: 298 if tripquote: 299 multilinestr = not multilinestr 300 openstring = multilinestr 301 else: 302 openstring = not openstring 303 if openstring: 304 openstrchar = '"' 305 else: 306 openstrchar = "" 307 if item == '#' and (not openstring and not keygroup and 308 not arrayoftables): 309 j = i 310 comment = "" 311 try: 312 while sl[j] != '\n': 313 comment += s[j] 314 sl[j] = ' ' 315 j += 1 316 except IndexError: 317 break 318 if not openarr: 319 decoder.preserve_comment(line_no, prev_key, comment, beginline) 320 if item == '[' and (not openstring and not keygroup and 321 not arrayoftables): 322 if beginline: 323 if len(sl) > i + 1 and sl[i + 1] == '[': 324 arrayoftables = True 325 else: 326 keygroup = True 327 else: 328 openarr += 1 329 if item == ']' and not openstring: 330 if keygroup: 331 keygroup = False 332 elif arrayoftables: 333 if sl[i - 1] == ']': 334 arrayoftables = False 335 else: 336 openarr -= 1 337 if item == '\n': 338 if openstring or multilinestr: 339 if not multilinestr: 340 raise TomlDecodeError("Unbalanced quotes", original, i) 341 if ((sl[i - 1] == "'" or sl[i - 1] == '"') and ( 342 sl[i - 2] == sl[i - 1])): 343 sl[i] = sl[i - 1] 344 if sl[i - 3] == sl[i - 1]: 345 sl[i - 3] = ' ' 346 elif openarr: 347 sl[i] = ' ' 348 else: 349 beginline = True 350 line_no += 1 351 elif beginline and sl[i] != ' ' and sl[i] != '\t': 352 beginline = False 353 if not keygroup and not arrayoftables: 354 if sl[i] == '=': 355 raise TomlDecodeError("Found empty keyname. ", original, i) 356 keyname = 1 357 key += item 358 if keyname: 359 raise TomlDecodeError("Key name found without value." 360 " Reached end of file.", original, len(s)) 361 if openstring: # reached EOF and have an unterminated string 362 raise TomlDecodeError("Unterminated string found." 363 " Reached end of file.", original, len(s)) 364 s = ''.join(sl) 365 s = s.split('\n') 366 multikey = None 367 multilinestr = "" 368 multibackslash = False 369 pos = 0 370 for idx, line in enumerate(s): 371 if idx > 0: 372 pos += len(s[idx - 1]) + 1 373 374 decoder.embed_comments(idx, currentlevel) 375 376 if not multilinestr or multibackslash or '\n' not in multilinestr: 377 line = line.strip() 378 if line == "" and (not multikey or multibackslash): 379 continue 380 if multikey: 381 if multibackslash: 382 multilinestr += line 383 else: 384 multilinestr += line 385 multibackslash = False 386 closed = False 387 if multilinestr[0] == '[': 388 closed = line[-1] == ']' 389 elif len(line) > 2: 390 closed = (line[-1] == multilinestr[0] and 391 line[-2] == multilinestr[0] and 392 line[-3] == multilinestr[0]) 393 if closed: 394 try: 395 value, vtype = decoder.load_value(multilinestr) 396 except ValueError as err: 397 raise TomlDecodeError(str(err), original, pos) 398 currentlevel[multikey] = value 399 multikey = None 400 multilinestr = "" 401 else: 402 k = len(multilinestr) - 1 403 while k > -1 and multilinestr[k] == '\\': 404 multibackslash = not multibackslash 405 k -= 1 406 if multibackslash: 407 multilinestr = multilinestr[:-1] 408 else: 409 multilinestr += "\n" 410 continue 411 if line[0] == '[': 412 arrayoftables = False 413 if len(line) == 1: 414 raise TomlDecodeError("Opening key group bracket on line by " 415 "itself.", original, pos) 416 if line[1] == '[': 417 arrayoftables = True 418 line = line[2:] 419 splitstr = ']]' 420 else: 421 line = line[1:] 422 splitstr = ']' 423 i = 1 424 quotesplits = decoder._get_split_on_quotes(line) 425 quoted = False 426 for quotesplit in quotesplits: 427 if not quoted and splitstr in quotesplit: 428 break 429 i += quotesplit.count(splitstr) 430 quoted = not quoted 431 line = line.split(splitstr, i) 432 if len(line) < i + 1 or line[-1].strip() != "": 433 raise TomlDecodeError("Key group not on a line by itself.", 434 original, pos) 435 groups = splitstr.join(line[:-1]).split('.') 436 i = 0 437 while i < len(groups): 438 groups[i] = groups[i].strip() 439 if len(groups[i]) > 0 and (groups[i][0] == '"' or 440 groups[i][0] == "'"): 441 groupstr = groups[i] 442 j = i + 1 443 while ((not groupstr[0] == groupstr[-1]) or 444 len(groupstr) == 1): 445 j += 1 446 if j > len(groups) + 2: 447 raise TomlDecodeError("Invalid group name '" + 448 groupstr + "' Something " + 449 "went wrong.", original, pos) 450 groupstr = '.'.join(groups[i:j]).strip() 451 groups[i] = groupstr[1:-1] 452 groups[i + 1:j] = [] 453 else: 454 if not _groupname_re.match(groups[i]): 455 raise TomlDecodeError("Invalid group name '" + 456 groups[i] + "'. Try quoting it.", 457 original, pos) 458 i += 1 459 currentlevel = retval 460 for i in _range(len(groups)): 461 group = groups[i] 462 if group == "": 463 raise TomlDecodeError("Can't have a keygroup with an empty " 464 "name", original, pos) 465 try: 466 currentlevel[group] 467 if i == len(groups) - 1: 468 if group in implicitgroups: 469 implicitgroups.remove(group) 470 if arrayoftables: 471 raise TomlDecodeError("An implicitly defined " 472 "table can't be an array", 473 original, pos) 474 elif arrayoftables: 475 currentlevel[group].append(decoder.get_empty_table() 476 ) 477 else: 478 raise TomlDecodeError("What? " + group + 479 " already exists?" + 480 str(currentlevel), 481 original, pos) 482 except TypeError: 483 currentlevel = currentlevel[-1] 484 if group not in currentlevel: 485 currentlevel[group] = decoder.get_empty_table() 486 if i == len(groups) - 1 and arrayoftables: 487 currentlevel[group] = [decoder.get_empty_table()] 488 except KeyError: 489 if i != len(groups) - 1: 490 implicitgroups.append(group) 491 currentlevel[group] = decoder.get_empty_table() 492 if i == len(groups) - 1 and arrayoftables: 493 currentlevel[group] = [decoder.get_empty_table()] 494 currentlevel = currentlevel[group] 495 if arrayoftables: 496 try: 497 currentlevel = currentlevel[-1] 498 except KeyError: 499 pass 500 elif line[0] == "{": 501 if line[-1] != "}": 502 raise TomlDecodeError("Line breaks are not allowed in inline" 503 "objects", original, pos) 504 try: 505 decoder.load_inline_object(line, currentlevel, multikey, 506 multibackslash) 507 except ValueError as err: 508 raise TomlDecodeError(str(err), original, pos) 509 elif "=" in line: 510 try: 511 ret = decoder.load_line(line, currentlevel, multikey, 512 multibackslash) 513 except ValueError as err: 514 raise TomlDecodeError(str(err), original, pos) 515 if ret is not None: 516 multikey, multilinestr, multibackslash = ret 517 return retval 518 519 520def _load_date(val): 521 microsecond = 0 522 tz = None 523 try: 524 if len(val) > 19: 525 if val[19] == '.': 526 if val[-1].upper() == 'Z': 527 subsecondval = val[20:-1] 528 tzval = "Z" 529 else: 530 subsecondvalandtz = val[20:] 531 if '+' in subsecondvalandtz: 532 splitpoint = subsecondvalandtz.index('+') 533 subsecondval = subsecondvalandtz[:splitpoint] 534 tzval = subsecondvalandtz[splitpoint:] 535 elif '-' in subsecondvalandtz: 536 splitpoint = subsecondvalandtz.index('-') 537 subsecondval = subsecondvalandtz[:splitpoint] 538 tzval = subsecondvalandtz[splitpoint:] 539 else: 540 tzval = None 541 subsecondval = subsecondvalandtz 542 if tzval is not None: 543 tz = TomlTz(tzval) 544 microsecond = int(int(subsecondval) * 545 (10 ** (6 - len(subsecondval)))) 546 else: 547 tz = TomlTz(val[19:]) 548 except ValueError: 549 tz = None 550 if "-" not in val[1:]: 551 return None 552 try: 553 if len(val) == 10: 554 d = datetime.date( 555 int(val[:4]), int(val[5:7]), 556 int(val[8:10])) 557 else: 558 d = datetime.datetime( 559 int(val[:4]), int(val[5:7]), 560 int(val[8:10]), int(val[11:13]), 561 int(val[14:16]), int(val[17:19]), microsecond, tz) 562 except ValueError: 563 return None 564 return d 565 566 567def _load_unicode_escapes(v, hexbytes, prefix): 568 skip = False 569 i = len(v) - 1 570 while i > -1 and v[i] == '\\': 571 skip = not skip 572 i -= 1 573 for hx in hexbytes: 574 if skip: 575 skip = False 576 i = len(hx) - 1 577 while i > -1 and hx[i] == '\\': 578 skip = not skip 579 i -= 1 580 v += prefix 581 v += hx 582 continue 583 hxb = "" 584 i = 0 585 hxblen = 4 586 if prefix == "\\U": 587 hxblen = 8 588 hxb = ''.join(hx[i:i + hxblen]).lower() 589 if hxb.strip('0123456789abcdef'): 590 raise ValueError("Invalid escape sequence: " + hxb) 591 if hxb[0] == "d" and hxb[1].strip('01234567'): 592 raise ValueError("Invalid escape sequence: " + hxb + 593 ". Only scalar unicode points are allowed.") 594 v += unichr(int(hxb, 16)) 595 v += unicode(hx[len(hxb):]) 596 return v 597 598 599# Unescape TOML string values. 600 601# content after the \ 602_escapes = ['0', 'b', 'f', 'n', 'r', 't', '"'] 603# What it should be replaced by 604_escapedchars = ['\0', '\b', '\f', '\n', '\r', '\t', '\"'] 605# Used for substitution 606_escape_to_escapedchars = dict(zip(_escapes, _escapedchars)) 607 608 609def _unescape(v): 610 """Unescape characters in a TOML string.""" 611 i = 0 612 backslash = False 613 while i < len(v): 614 if backslash: 615 backslash = False 616 if v[i] in _escapes: 617 v = v[:i - 1] + _escape_to_escapedchars[v[i]] + v[i + 1:] 618 elif v[i] == '\\': 619 v = v[:i - 1] + v[i:] 620 elif v[i] == 'u' or v[i] == 'U': 621 i += 1 622 else: 623 raise ValueError("Reserved escape sequence used") 624 continue 625 elif v[i] == '\\': 626 backslash = True 627 i += 1 628 return v 629 630 631class InlineTableDict(object): 632 """Sentinel subclass of dict for inline tables.""" 633 634 635class TomlDecoder(object): 636 637 def __init__(self, _dict=dict): 638 self._dict = _dict 639 640 def get_empty_table(self): 641 return self._dict() 642 643 def get_empty_inline_table(self): 644 class DynamicInlineTableDict(self._dict, InlineTableDict): 645 """Concrete sentinel subclass for inline tables. 646 It is a subclass of _dict which is passed in dynamically at load 647 time 648 649 It is also a subclass of InlineTableDict 650 """ 651 652 return DynamicInlineTableDict() 653 654 def load_inline_object(self, line, currentlevel, multikey=False, 655 multibackslash=False): 656 candidate_groups = line[1:-1].split(",") 657 groups = [] 658 if len(candidate_groups) == 1 and not candidate_groups[0].strip(): 659 candidate_groups.pop() 660 while len(candidate_groups) > 0: 661 candidate_group = candidate_groups.pop(0) 662 try: 663 _, value = candidate_group.split('=', 1) 664 except ValueError: 665 raise ValueError("Invalid inline table encountered") 666 value = value.strip() 667 if ((value[0] == value[-1] and value[0] in ('"', "'")) or ( 668 value[0] in '-0123456789' or 669 value in ('true', 'false') or 670 (value[0] == "[" and value[-1] == "]") or 671 (value[0] == '{' and value[-1] == '}'))): 672 groups.append(candidate_group) 673 elif len(candidate_groups) > 0: 674 candidate_groups[0] = (candidate_group + "," + 675 candidate_groups[0]) 676 else: 677 raise ValueError("Invalid inline table value encountered") 678 for group in groups: 679 status = self.load_line(group, currentlevel, multikey, 680 multibackslash) 681 if status is not None: 682 break 683 684 def _get_split_on_quotes(self, line): 685 doublequotesplits = line.split('"') 686 quoted = False 687 quotesplits = [] 688 if len(doublequotesplits) > 1 and "'" in doublequotesplits[0]: 689 singlequotesplits = doublequotesplits[0].split("'") 690 doublequotesplits = doublequotesplits[1:] 691 while len(singlequotesplits) % 2 == 0 and len(doublequotesplits): 692 singlequotesplits[-1] += '"' + doublequotesplits[0] 693 doublequotesplits = doublequotesplits[1:] 694 if "'" in singlequotesplits[-1]: 695 singlequotesplits = (singlequotesplits[:-1] + 696 singlequotesplits[-1].split("'")) 697 quotesplits += singlequotesplits 698 for doublequotesplit in doublequotesplits: 699 if quoted: 700 quotesplits.append(doublequotesplit) 701 else: 702 quotesplits += doublequotesplit.split("'") 703 quoted = not quoted 704 return quotesplits 705 706 def load_line(self, line, currentlevel, multikey, multibackslash): 707 i = 1 708 quotesplits = self._get_split_on_quotes(line) 709 quoted = False 710 for quotesplit in quotesplits: 711 if not quoted and '=' in quotesplit: 712 break 713 i += quotesplit.count('=') 714 quoted = not quoted 715 pair = line.split('=', i) 716 strictly_valid = _strictly_valid_num(pair[-1]) 717 if _number_with_underscores.match(pair[-1]): 718 pair[-1] = pair[-1].replace('_', '') 719 while len(pair[-1]) and (pair[-1][0] != ' ' and pair[-1][0] != '\t' and 720 pair[-1][0] != "'" and pair[-1][0] != '"' and 721 pair[-1][0] != '[' and pair[-1][0] != '{' and 722 pair[-1].strip() != 'true' and 723 pair[-1].strip() != 'false'): 724 try: 725 float(pair[-1]) 726 break 727 except ValueError: 728 pass 729 if _load_date(pair[-1]) is not None: 730 break 731 if TIME_RE.match(pair[-1]): 732 break 733 i += 1 734 prev_val = pair[-1] 735 pair = line.split('=', i) 736 if prev_val == pair[-1]: 737 raise ValueError("Invalid date or number") 738 if strictly_valid: 739 strictly_valid = _strictly_valid_num(pair[-1]) 740 pair = ['='.join(pair[:-1]).strip(), pair[-1].strip()] 741 if '.' in pair[0]: 742 if '"' in pair[0] or "'" in pair[0]: 743 quotesplits = self._get_split_on_quotes(pair[0]) 744 quoted = False 745 levels = [] 746 for quotesplit in quotesplits: 747 if quoted: 748 levels.append(quotesplit) 749 else: 750 levels += [level.strip() for level in 751 quotesplit.split('.')] 752 quoted = not quoted 753 else: 754 levels = pair[0].split('.') 755 while levels[-1] == "": 756 levels = levels[:-1] 757 for level in levels[:-1]: 758 if level == "": 759 continue 760 if level not in currentlevel: 761 currentlevel[level] = self.get_empty_table() 762 currentlevel = currentlevel[level] 763 pair[0] = levels[-1].strip() 764 elif (pair[0][0] == '"' or pair[0][0] == "'") and \ 765 (pair[0][-1] == pair[0][0]): 766 pair[0] = _unescape(pair[0][1:-1]) 767 k, koffset = self._load_line_multiline_str(pair[1]) 768 if k > -1: 769 while k > -1 and pair[1][k + koffset] == '\\': 770 multibackslash = not multibackslash 771 k -= 1 772 if multibackslash: 773 multilinestr = pair[1][:-1] 774 else: 775 multilinestr = pair[1] + "\n" 776 multikey = pair[0] 777 else: 778 value, vtype = self.load_value(pair[1], strictly_valid) 779 try: 780 currentlevel[pair[0]] 781 raise ValueError("Duplicate keys!") 782 except TypeError: 783 raise ValueError("Duplicate keys!") 784 except KeyError: 785 if multikey: 786 return multikey, multilinestr, multibackslash 787 else: 788 currentlevel[pair[0]] = value 789 790 def _load_line_multiline_str(self, p): 791 poffset = 0 792 if len(p) < 3: 793 return -1, poffset 794 if p[0] == '[' and (p.strip()[-1] != ']' and 795 self._load_array_isstrarray(p)): 796 newp = p[1:].strip().split(',') 797 while len(newp) > 1 and newp[-1][0] != '"' and newp[-1][0] != "'": 798 newp = newp[:-2] + [newp[-2] + ',' + newp[-1]] 799 newp = newp[-1] 800 poffset = len(p) - len(newp) 801 p = newp 802 if p[0] != '"' and p[0] != "'": 803 return -1, poffset 804 if p[1] != p[0] or p[2] != p[0]: 805 return -1, poffset 806 if len(p) > 5 and p[-1] == p[0] and p[-2] == p[0] and p[-3] == p[0]: 807 return -1, poffset 808 return len(p) - 1, poffset 809 810 def load_value(self, v, strictly_valid=True): 811 if not v: 812 raise ValueError("Empty value is invalid") 813 if v == 'true': 814 return (True, "bool") 815 elif v.lower() == 'true': 816 raise ValueError("Only all lowercase booleans allowed") 817 elif v == 'false': 818 return (False, "bool") 819 elif v.lower() == 'false': 820 raise ValueError("Only all lowercase booleans allowed") 821 elif v[0] == '"' or v[0] == "'": 822 quotechar = v[0] 823 testv = v[1:].split(quotechar) 824 triplequote = False 825 triplequotecount = 0 826 if len(testv) > 1 and testv[0] == '' and testv[1] == '': 827 testv = testv[2:] 828 triplequote = True 829 closed = False 830 for tv in testv: 831 if tv == '': 832 if triplequote: 833 triplequotecount += 1 834 else: 835 closed = True 836 else: 837 oddbackslash = False 838 try: 839 i = -1 840 j = tv[i] 841 while j == '\\': 842 oddbackslash = not oddbackslash 843 i -= 1 844 j = tv[i] 845 except IndexError: 846 pass 847 if not oddbackslash: 848 if closed: 849 raise ValueError("Found tokens after a closed " + 850 "string. Invalid TOML.") 851 else: 852 if not triplequote or triplequotecount > 1: 853 closed = True 854 else: 855 triplequotecount = 0 856 if quotechar == '"': 857 escapeseqs = v.split('\\')[1:] 858 backslash = False 859 for i in escapeseqs: 860 if i == '': 861 backslash = not backslash 862 else: 863 if i[0] not in _escapes and (i[0] != 'u' and 864 i[0] != 'U' and 865 not backslash): 866 raise ValueError("Reserved escape sequence used") 867 if backslash: 868 backslash = False 869 for prefix in ["\\u", "\\U"]: 870 if prefix in v: 871 hexbytes = v.split(prefix) 872 v = _load_unicode_escapes(hexbytes[0], hexbytes[1:], 873 prefix) 874 v = _unescape(v) 875 if len(v) > 1 and v[1] == quotechar and (len(v) < 3 or 876 v[1] == v[2]): 877 v = v[2:-2] 878 return (v[1:-1], "str") 879 elif v[0] == '[': 880 return (self.load_array(v), "array") 881 elif v[0] == '{': 882 inline_object = self.get_empty_inline_table() 883 self.load_inline_object(v, inline_object) 884 return (inline_object, "inline_object") 885 elif TIME_RE.match(v): 886 h, m, s, _, ms = TIME_RE.match(v).groups() 887 time = datetime.time(int(h), int(m), int(s), int(ms) if ms else 0) 888 return (time, "time") 889 else: 890 parsed_date = _load_date(v) 891 if parsed_date is not None: 892 return (parsed_date, "date") 893 if not strictly_valid: 894 raise ValueError("Weirdness with leading zeroes or " 895 "underscores in your number.") 896 itype = "int" 897 neg = False 898 if v[0] == '-': 899 neg = True 900 v = v[1:] 901 elif v[0] == '+': 902 v = v[1:] 903 v = v.replace('_', '') 904 lowerv = v.lower() 905 if '.' in v or ('x' not in v and ('e' in v or 'E' in v)): 906 if '.' in v and v.split('.', 1)[1] == '': 907 raise ValueError("This float is missing digits after " 908 "the point") 909 if v[0] not in '0123456789': 910 raise ValueError("This float doesn't have a leading " 911 "digit") 912 v = float(v) 913 itype = "float" 914 elif len(lowerv) == 3 and (lowerv == 'inf' or lowerv == 'nan'): 915 v = float(v) 916 itype = "float" 917 if itype == "int": 918 v = int(v, 0) 919 if neg: 920 return (0 - v, itype) 921 return (v, itype) 922 923 def bounded_string(self, s): 924 if len(s) == 0: 925 return True 926 if s[-1] != s[0]: 927 return False 928 i = -2 929 backslash = False 930 while len(s) + i > 0: 931 if s[i] == "\\": 932 backslash = not backslash 933 i -= 1 934 else: 935 break 936 return not backslash 937 938 def _load_array_isstrarray(self, a): 939 a = a[1:-1].strip() 940 if a != '' and (a[0] == '"' or a[0] == "'"): 941 return True 942 return False 943 944 def load_array(self, a): 945 atype = None 946 retval = [] 947 a = a.strip() 948 if '[' not in a[1:-1] or "" != a[1:-1].split('[')[0].strip(): 949 strarray = self._load_array_isstrarray(a) 950 if not a[1:-1].strip().startswith('{'): 951 a = a[1:-1].split(',') 952 else: 953 # a is an inline object, we must find the matching parenthesis 954 # to define groups 955 new_a = [] 956 start_group_index = 1 957 end_group_index = 2 958 open_bracket_count = 1 if a[start_group_index] == '{' else 0 959 in_str = False 960 while end_group_index < len(a[1:]): 961 if a[end_group_index] == '"' or a[end_group_index] == "'": 962 if in_str: 963 backslash_index = end_group_index - 1 964 while (backslash_index > -1 and 965 a[backslash_index] == '\\'): 966 in_str = not in_str 967 backslash_index -= 1 968 in_str = not in_str 969 if not in_str and a[end_group_index] == '{': 970 open_bracket_count += 1 971 if in_str or a[end_group_index] != '}': 972 end_group_index += 1 973 continue 974 elif a[end_group_index] == '}' and open_bracket_count > 1: 975 open_bracket_count -= 1 976 end_group_index += 1 977 continue 978 979 # Increase end_group_index by 1 to get the closing bracket 980 end_group_index += 1 981 982 new_a.append(a[start_group_index:end_group_index]) 983 984 # The next start index is at least after the closing 985 # bracket, a closing bracket can be followed by a comma 986 # since we are in an array. 987 start_group_index = end_group_index + 1 988 while (start_group_index < len(a[1:]) and 989 a[start_group_index] != '{'): 990 start_group_index += 1 991 end_group_index = start_group_index + 1 992 a = new_a 993 b = 0 994 if strarray: 995 while b < len(a) - 1: 996 ab = a[b].strip() 997 while (not self.bounded_string(ab) or 998 (len(ab) > 2 and 999 ab[0] == ab[1] == ab[2] and 1000 ab[-2] != ab[0] and 1001 ab[-3] != ab[0])): 1002 a[b] = a[b] + ',' + a[b + 1] 1003 ab = a[b].strip() 1004 if b < len(a) - 2: 1005 a = a[:b + 1] + a[b + 2:] 1006 else: 1007 a = a[:b + 1] 1008 b += 1 1009 else: 1010 al = list(a[1:-1]) 1011 a = [] 1012 openarr = 0 1013 j = 0 1014 for i in _range(len(al)): 1015 if al[i] == '[': 1016 openarr += 1 1017 elif al[i] == ']': 1018 openarr -= 1 1019 elif al[i] == ',' and not openarr: 1020 a.append(''.join(al[j:i])) 1021 j = i + 1 1022 a.append(''.join(al[j:])) 1023 for i in _range(len(a)): 1024 a[i] = a[i].strip() 1025 if a[i] != '': 1026 nval, ntype = self.load_value(a[i]) 1027 if atype: 1028 if ntype != atype: 1029 raise ValueError("Not a homogeneous array") 1030 else: 1031 atype = ntype 1032 retval.append(nval) 1033 return retval 1034 1035 def preserve_comment(self, line_no, key, comment, beginline): 1036 pass 1037 1038 def embed_comments(self, idx, currentlevel): 1039 pass 1040 1041 1042class TomlPreserveCommentDecoder(TomlDecoder): 1043 1044 def __init__(self, _dict=dict): 1045 self.saved_comments = {} 1046 super(TomlPreserveCommentDecoder, self).__init__(_dict) 1047 1048 def preserve_comment(self, line_no, key, comment, beginline): 1049 self.saved_comments[line_no] = (key, comment, beginline) 1050 1051 def embed_comments(self, idx, currentlevel): 1052 if idx not in self.saved_comments: 1053 return 1054 1055 key, comment, beginline = self.saved_comments[idx] 1056 currentlevel[key] = CommentValue(currentlevel[key], comment, beginline, 1057 self._dict) 1058