1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3# 4r""" A JSON data encoder and decoder. 5 6 This Python module implements the JSON (http://json.org/) data 7 encoding format; a subset of ECMAScript (aka JavaScript) for encoding 8 primitive data types (numbers, strings, booleans, lists, and 9 associative arrays) in a language-neutral simple text-based syntax. 10 11 It can encode or decode between JSON formatted strings and native 12 Python data types. Normally you would use the encode() and decode() 13 functions defined by this module, but if you want more control over 14 the processing you can use the JSON class. 15 16 This implementation tries to be as completely cormforming to all 17 intricacies of the standards as possible. It can operate in strict 18 mode (which only allows JSON-compliant syntax) or a non-strict mode 19 (which allows much more of the whole ECMAScript permitted syntax). 20 This includes complete support for Unicode strings (including 21 surrogate-pairs for non-BMP characters), and all number formats 22 including negative zero and IEEE 754 non-numbers such a NaN or 23 Infinity. 24 25 The JSON/ECMAScript to Python type mappings are: 26 ---JSON--- ---Python--- 27 null None 28 undefined undefined (note 1) 29 Boolean (true,false) bool (True or False) 30 Integer int or long (note 2) 31 Float float 32 String str or unicode ( "..." or u"..." ) 33 Array [a, ...] list ( [...] ) 34 Object {a:b, ...} dict ( {...} ) 35 36 -- Note 1. an 'undefined' object is declared in this module which 37 represents the native Python value for this type when in 38 non-strict mode. 39 40 -- Note 2. some ECMAScript integers may be up-converted to Python 41 floats, such as 1e+40. Also integer -0 is converted to 42 float -0, so as to preserve the sign (which ECMAScript requires). 43 44 -- Note 3. numbers requiring more significant digits than can be 45 represented by the Python float type will be converted into a 46 Python Decimal type, from the standard 'decimal' module. 47 48 In addition, when operating in non-strict mode, several IEEE 754 49 non-numbers are also handled, and are mapped to specific Python 50 objects declared in this module: 51 52 NaN (not a number) nan (float('nan')) 53 Infinity, +Infinity inf (float('inf')) 54 -Infinity neginf (float('-inf')) 55 56 When encoding Python objects into JSON, you may use types other than 57 native lists or dictionaries, as long as they support the minimal 58 interfaces required of all sequences or mappings. This means you can 59 use generators and iterators, tuples, UserDict subclasses, etc. 60 61 To make it easier to produce JSON encoded representations of user 62 defined classes, if the object has a method named json_equivalent(), 63 then it will call that method and attempt to encode the object 64 returned from it instead. It will do this recursively as needed and 65 before any attempt to encode the object using it's default 66 strategies. Note that any json_equivalent() method should return 67 "equivalent" Python objects to be encoded, not an already-encoded 68 JSON-formatted string. There is no such aid provided to decode 69 JSON back into user-defined classes as that would dramatically 70 complicate the interface. 71 72 When decoding strings with this module it may operate in either 73 strict or non-strict mode. The strict mode only allows syntax which 74 is conforming to RFC 7159 (JSON), while the non-strict allows much 75 more of the permissible ECMAScript syntax. 76 77 The following are permitted when processing in NON-STRICT mode: 78 79 * Unicode format control characters are allowed anywhere in the input. 80 * All Unicode line terminator characters are recognized. 81 * All Unicode white space characters are recognized. 82 * The 'undefined' keyword is recognized. 83 * Hexadecimal number literals are recognized (e.g., 0xA6, 0177). 84 * String literals may use either single or double quote marks. 85 * Strings may contain \x (hexadecimal) escape sequences, as well as the 86 \v and \0 escape sequences. 87 * Lists may have omitted (elided) elements, e.g., [,,,,,], with 88 missing elements interpreted as 'undefined' values. 89 * Object properties (dictionary keys) can be of any of the 90 types: string literals, numbers, or identifiers (the later of 91 which are treated as if they are string literals)---as permitted 92 by ECMAScript. JSON only permits strings literals as keys. 93 94 Concerning non-strict and non-ECMAScript allowances: 95 96 * Octal numbers: If you allow the 'octal_numbers' behavior (which 97 is never enabled by default), then you can use octal integers 98 and octal character escape sequences (per the ECMAScript 99 standard Annex B.1.2). This behavior is allowed, if enabled, 100 because it was valid JavaScript at one time. 101 102 * Multi-line string literals: Strings which are more than one 103 line long (contain embedded raw newline characters) are never 104 permitted. This is neither valid JSON nor ECMAScript. Some other 105 JSON implementations may allow this, but this module considers 106 that behavior to be a mistake. 107 108 References: 109 * JSON (JavaScript Object Notation) 110 <http://json.org/> 111 * RFC 7159. The application/json Media Type for JavaScript Object Notation (JSON) 112 <http://www.ietf.org/rfc/rfc7159.txt> 113 * ECMA-262 3rd edition (1999) 114 <http://www.ecma-international.org/publications/files/ecma-st/ECMA-262.pdf> 115 * IEEE 754-1985: Standard for Binary Floating-Point Arithmetic. 116 <http://www.cs.berkeley.edu/~ejr/Projects/ieee754/> 117 118""" 119 120__author__ = "Deron Meranda <http://deron.meranda.us/>" 121__homepage__ = "http://deron.meranda.us/python/demjson/" 122 123__date__ = "2015-12-22" 124__version__ = "2.2.4" 125__version_info__ = ( 2, 2, 4 ) # Will be converted into a namedtuple below 126 127__credits__ = """Copyright (c) 2006-2015 Deron E. Meranda <http://deron.meranda.us/> 128 129Licensed under GNU LGPL (GNU Lesser General Public License) version 3.0 130or later. See LICENSE.txt included with this software. 131 132This program is free software: you can redistribute it and/or modify 133it under the terms of the GNU Lesser General Public License as 134published by the Free Software Foundation, either version 3 of the 135License, or (at your option) any later version. 136 137This program is distributed in the hope that it will be useful, 138but WITHOUT ANY WARRANTY; without even the implied warranty of 139MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 140GNU General Public License for more details. 141 142You should have received a copy of the GNU Lesser General Public License 143along with this program. If not, see <http://www.gnu.org/licenses/> 144or <http://www.fsf.org/licensing/>. 145 146""" 147 148# ---------------------------------------------------------------------- 149 150# Set demjson version 151try: 152 from collections import namedtuple as _namedtuple 153 __version_info__ = _namedtuple('version_info', ['major', 'minor', 'micro'])( *__version_info__ ) 154except ImportError: 155 raise ImportError("demjson %s requires a Python 2.6 or later" % __version__ ) 156 157version, version_info = __version__, __version_info__ 158 159 160# Determine Python version 161_py_major, _py_minor = None, None 162def _get_pyver(): 163 global _py_major, _py_minor 164 import sys 165 vi = sys.version_info 166 try: 167 _py_major, _py_minor = vi.major, vi.minor 168 except AttributeError: 169 _py_major, _py_minor = vi[0], vi[1] 170_get_pyver() 171 172# ---------------------------------------------------------------------- 173# Useful global constants 174 175content_type = 'application/json' 176file_ext = 'json' 177 178 179class _dummy_context_manager(object): 180 """A context manager that does nothing on entry or exit.""" 181 def __enter__(self): 182 pass 183 def __exit__(self, exc_type, exc_val, exc_tb): 184 return False 185_dummy_context_manager = _dummy_context_manager() 186 187 188# ---------------------------------------------------------------------- 189# Decimal and float types. 190# 191# If a JSON number can not be stored in a Python float without loosing 192# precision and the Python has the decimal type, then we will try to 193# use decimal instead of float. To make this determination we need to 194# know the limits of the float type, but Python doesn't have an easy 195# way to tell what the largest floating-point number it supports. So, 196# we detemine the precision and scale of the float type by testing it. 197 198try: 199 # decimal module was introduced in Python 2.4 200 import decimal 201except ImportError: 202 decimal = None 203 204 205def determine_float_limits( number_type=float ): 206 """Determines the precision and range of the given float type. 207 208 The passed in 'number_type' argument should refer to the type of 209 floating-point number. It should either be the built-in 'float', 210 or decimal context or constructor; i.e., one of: 211 212 # 1. FLOAT TYPE 213 determine_float_limits( float ) 214 215 # 2. DEFAULT DECIMAL CONTEXT 216 determine_float_limits( decimal.Decimal ) 217 218 # 3. CUSTOM DECIMAL CONTEXT 219 ctx = decimal.Context( prec=75 ) 220 determine_float_limits( ctx ) 221 222 Returns a named tuple with components: 223 224 ( significant_digits, 225 max_exponent, 226 min_exponent ) 227 228 Where: 229 * significant_digits -- maximum number of *decimal* digits 230 that can be represented without any loss of precision. 231 This is conservative, so if there are 16 1/2 digits, it 232 will return 16, not 17. 233 234 * max_exponent -- The maximum exponent (power of 10) that can 235 be represented before an overflow (or rounding to 236 infinity) occurs. 237 238 * min_exponent -- The minimum exponent (negative power of 10) 239 that can be represented before either an underflow 240 (rounding to zero) or a subnormal result (loss of 241 precision) occurs. Note this is conservative, as 242 subnormal numbers are excluded. 243 244 """ 245 if decimal: 246 numeric_exceptions = (ValueError,decimal.Overflow,decimal.Underflow) 247 else: 248 numeric_exceptions = (ValueError,) 249 250 if decimal and number_type == decimal.Decimal: 251 number_type = decimal.DefaultContext 252 253 if decimal and isinstance(number_type, decimal.Context): 254 # Passed a decimal Context, extract the bound creator function. 255 create_num = number_type.create_decimal 256 decimal_ctx = decimal.localcontext(number_type) 257 is_zero_or_subnormal = lambda n: n.is_zero() or n.is_subnormal() 258 elif number_type == float: 259 create_num = number_type 260 decimal_ctx = _dummy_context_manager 261 is_zero_or_subnormal = lambda n: n==0 262 else: 263 raise TypeError("Expected a float type, e.g., float or decimal context") 264 265 with decimal_ctx: 266 zero = create_num('0.0') 267 268 # Find signifianct digits by comparing floats of increasing 269 # number of digits, differing in the last digit only, until 270 # they numerically compare as being equal. 271 sigdigits = None 272 n = 0 273 while True: 274 n = n + 1 275 pfx = '0.' + '1'*n 276 a = create_num( pfx + '0') 277 for sfx in '123456789': # Check all possible last digits to 278 # avoid any partial-decimal. 279 b = create_num( pfx + sfx ) 280 if (a+zero) == (b+zero): 281 sigdigits = n 282 break 283 if sigdigits: 284 break 285 286 # Find exponent limits. First find order of magnitude and 287 # then use a binary search to find the exact exponent. 288 base = '1.' + '1'*(sigdigits-1) 289 base0 = '1.' + '1'*(sigdigits-2) 290 minexp, maxexp = None, None 291 292 for expsign in ('+','-'): 293 minv = 0; maxv = 10 294 # First find order of magnitude of exponent limit 295 while True: 296 try: 297 s = base + 'e' + expsign + str(maxv) 298 s0 = base0 + 'e' + expsign + str(maxv) 299 f = create_num( s ) + zero 300 f0 = create_num( s0 ) + zero 301 except numeric_exceptions: 302 f = None 303 if not f or not str(f)[0].isdigit() or is_zero_or_subnormal(f) or f==f0: 304 break 305 else: 306 minv = maxv 307 maxv = maxv * 10 308 309 # Now do a binary search to find exact limit 310 while True: 311 if minv+1 == maxv: 312 if expsign=='+': 313 maxexp = minv 314 else: 315 minexp = minv 316 break 317 elif maxv < minv: 318 if expsign=='+': 319 maxexp = None 320 else: 321 minexp = None 322 break 323 m = (minv + maxv) // 2 324 try: 325 s = base + 'e' + expsign + str(m) 326 s0 = base0 + 'e' + expsign + str(m) 327 f = create_num( s ) + zero 328 f0 = create_num( s0 ) + zero 329 except numeric_exceptions: 330 f = None 331 else: 332 if not f or not str(f)[0].isdigit(): 333 f = None 334 elif is_zero_or_subnormal(f) or f==f0: 335 f = None 336 if not f: 337 # infinite 338 maxv = m 339 else: 340 minv = m 341 342 return _namedtuple('float_limits', ['significant_digits', 'max_exponent', 'min_exponent'])( sigdigits, maxexp, -minexp ) 343 344 345float_sigdigits, float_maxexp, float_minexp = determine_float_limits( float ) 346 347 348# For backwards compatibility with older demjson versions: 349def determine_float_precision(): 350 v = determine_float_limits( float ) 351 return ( v.significant_digits, v.max_exponent ) 352 353# ---------------------------------------------------------------------- 354# The undefined value. 355# 356# ECMAScript has an undefined value (similar to yet distinct from null). 357# Neither Python or strict JSON have support undefined, but to allow 358# JavaScript behavior we must simulate it. 359 360class _undefined_class(object): 361 """Represents the ECMAScript 'undefined' value.""" 362 __slots__ = [] 363 def __repr__(self): 364 return self.__module__ + '.undefined' 365 def __str__(self): 366 return 'undefined' 367 def __nonzero__(self): 368 return False 369undefined = _undefined_class() 370syntax_error = _undefined_class() # same as undefined, but has separate identity 371del _undefined_class 372 373 374# ---------------------------------------------------------------------- 375# Non-Numbers: NaN, Infinity, -Infinity 376# 377# ECMAScript has official support for non-number floats, although 378# strict JSON does not. Python doesn't either. So to support the 379# full JavaScript behavior we must try to add them into Python, which 380# is unfortunately a bit of black magic. If our python implementation 381# happens to be built on top of IEEE 754 we can probably trick python 382# into using real floats. Otherwise we must simulate it with classes. 383 384def _nonnumber_float_constants(): 385 """Try to return the Nan, Infinity, and -Infinity float values. 386 387 This is necessarily complex because there is no standard 388 platform-independent way to do this in Python as the language 389 (opposed to some implementation of it) doesn't discuss 390 non-numbers. We try various strategies from the best to the 391 worst. 392 393 If this Python interpreter uses the IEEE 754 floating point 394 standard then the returned values will probably be real instances 395 of the 'float' type. Otherwise a custom class object is returned 396 which will attempt to simulate the correct behavior as much as 397 possible. 398 399 """ 400 try: 401 # First, try (mostly portable) float constructor. Works under 402 # Linux x86 (gcc) and some Unices. 403 nan = float('nan') 404 inf = float('inf') 405 neginf = float('-inf') 406 except ValueError: 407 try: 408 # Try the AIX (PowerPC) float constructors 409 nan = float('NaNQ') 410 inf = float('INF') 411 neginf = float('-INF') 412 except ValueError: 413 try: 414 # Next, try binary unpacking. Should work under 415 # platforms using IEEE 754 floating point. 416 import struct, sys 417 xnan = '7ff8000000000000'.decode('hex') # Quiet NaN 418 xinf = '7ff0000000000000'.decode('hex') 419 xcheck = 'bdc145651592979d'.decode('hex') # -3.14159e-11 420 # Could use float.__getformat__, but it is a new python feature, 421 # so we use sys.byteorder. 422 if sys.byteorder == 'big': 423 nan = struct.unpack('d', xnan)[0] 424 inf = struct.unpack('d', xinf)[0] 425 check = struct.unpack('d', xcheck)[0] 426 else: 427 nan = struct.unpack('d', xnan[::-1])[0] 428 inf = struct.unpack('d', xinf[::-1])[0] 429 check = struct.unpack('d', xcheck[::-1])[0] 430 neginf = - inf 431 if check != -3.14159e-11: 432 raise ValueError('Unpacking raw IEEE 754 floats does not work') 433 except (ValueError, TypeError): 434 # Punt, make some fake classes to simulate. These are 435 # not perfect though. For instance nan * 1.0 == nan, 436 # as expected, but 1.0 * nan == 0.0, which is wrong. 437 class nan(float): 438 """An approximation of the NaN (not a number) floating point number.""" 439 def __repr__(self): return 'nan' 440 def __str__(self): return 'nan' 441 def __add__(self,x): return self 442 def __radd__(self,x): return self 443 def __sub__(self,x): return self 444 def __rsub__(self,x): return self 445 def __mul__(self,x): return self 446 def __rmul__(self,x): return self 447 def __div__(self,x): return self 448 def __rdiv__(self,x): return self 449 def __divmod__(self,x): return (self,self) 450 def __rdivmod__(self,x): return (self,self) 451 def __mod__(self,x): return self 452 def __rmod__(self,x): return self 453 def __pow__(self,exp): return self 454 def __rpow__(self,exp): return self 455 def __neg__(self): return self 456 def __pos__(self): return self 457 def __abs__(self): return self 458 def __lt__(self,x): return False 459 def __le__(self,x): return False 460 def __eq__(self,x): return False 461 def __neq__(self,x): return True 462 def __ge__(self,x): return False 463 def __gt__(self,x): return False 464 def __complex__(self,*a): raise NotImplementedError('NaN can not be converted to a complex') 465 if decimal: 466 nan = decimal.Decimal('NaN') 467 else: 468 nan = nan() 469 class inf(float): 470 """An approximation of the +Infinity floating point number.""" 471 def __repr__(self): return 'inf' 472 def __str__(self): return 'inf' 473 def __add__(self,x): return self 474 def __radd__(self,x): return self 475 def __sub__(self,x): return self 476 def __rsub__(self,x): return self 477 def __mul__(self,x): 478 if x is neginf or x < 0: 479 return neginf 480 elif x == 0: 481 return nan 482 else: 483 return self 484 def __rmul__(self,x): return self.__mul__(x) 485 def __div__(self,x): 486 if x == 0: 487 raise ZeroDivisionError('float division') 488 elif x < 0: 489 return neginf 490 else: 491 return self 492 def __rdiv__(self,x): 493 if x is inf or x is neginf or x is nan: 494 return nan 495 return 0.0 496 def __divmod__(self,x): 497 if x == 0: 498 raise ZeroDivisionError('float divmod()') 499 elif x < 0: 500 return (nan,nan) 501 else: 502 return (self,self) 503 def __rdivmod__(self,x): 504 if x is inf or x is neginf or x is nan: 505 return (nan, nan) 506 return (0.0, x) 507 def __mod__(self,x): 508 if x == 0: 509 raise ZeroDivisionError('float modulo') 510 else: 511 return nan 512 def __rmod__(self,x): 513 if x is inf or x is neginf or x is nan: 514 return nan 515 return x 516 def __pow__(self, exp): 517 if exp == 0: 518 return 1.0 519 else: 520 return self 521 def __rpow__(self, x): 522 if -1 < x < 1: return 0.0 523 elif x == 1.0: return 1.0 524 elif x is nan or x is neginf or x < 0: 525 return nan 526 else: 527 return self 528 def __neg__(self): return neginf 529 def __pos__(self): return self 530 def __abs__(self): return self 531 def __lt__(self,x): return False 532 def __le__(self,x): 533 if x is self: 534 return True 535 else: 536 return False 537 def __eq__(self,x): 538 if x is self: 539 return True 540 else: 541 return False 542 def __neq__(self,x): 543 if x is self: 544 return False 545 else: 546 return True 547 def __ge__(self,x): return True 548 def __gt__(self,x): return True 549 def __complex__(self,*a): raise NotImplementedError('Infinity can not be converted to a complex') 550 if decimal: 551 inf = decimal.Decimal('Infinity') 552 else: 553 inf = inf() 554 class neginf(float): 555 """An approximation of the -Infinity floating point number.""" 556 def __repr__(self): return '-inf' 557 def __str__(self): return '-inf' 558 def __add__(self,x): return self 559 def __radd__(self,x): return self 560 def __sub__(self,x): return self 561 def __rsub__(self,x): return self 562 def __mul__(self,x): 563 if x is self or x < 0: 564 return inf 565 elif x == 0: 566 return nan 567 else: 568 return self 569 def __rmul__(self,x): return self.__mul__(self) 570 def __div__(self,x): 571 if x == 0: 572 raise ZeroDivisionError('float division') 573 elif x < 0: 574 return inf 575 else: 576 return self 577 def __rdiv__(self,x): 578 if x is inf or x is neginf or x is nan: 579 return nan 580 return -0.0 581 def __divmod__(self,x): 582 if x == 0: 583 raise ZeroDivisionError('float divmod()') 584 elif x < 0: 585 return (nan,nan) 586 else: 587 return (self,self) 588 def __rdivmod__(self,x): 589 if x is inf or x is neginf or x is nan: 590 return (nan, nan) 591 return (-0.0, x) 592 def __mod__(self,x): 593 if x == 0: 594 raise ZeroDivisionError('float modulo') 595 else: 596 return nan 597 def __rmod__(self,x): 598 if x is inf or x is neginf or x is nan: 599 return nan 600 return x 601 def __pow__(self,exp): 602 if exp == 0: 603 return 1.0 604 else: 605 return self 606 def __rpow__(self, x): 607 if x is nan or x is inf or x is inf: 608 return nan 609 return 0.0 610 def __neg__(self): return inf 611 def __pos__(self): return self 612 def __abs__(self): return inf 613 def __lt__(self,x): return True 614 def __le__(self,x): return True 615 def __eq__(self,x): 616 if x is self: 617 return True 618 else: 619 return False 620 def __neq__(self,x): 621 if x is self: 622 return False 623 else: 624 return True 625 def __ge__(self,x): 626 if x is self: 627 return True 628 else: 629 return False 630 def __gt__(self,x): return False 631 def __complex__(self,*a): raise NotImplementedError('-Infinity can not be converted to a complex') 632 if decimal: 633 neginf = decimal.Decimal('-Infinity') 634 else: 635 neginf = neginf(0) 636 return nan, inf, neginf 637 638nan, inf, neginf = _nonnumber_float_constants() 639del _nonnumber_float_constants 640 641 642# ---------------------------------------------------------------------- 643# Integers 644 645class json_int( (1L).__class__ ): # Have to specify base this way to satisfy 2to3 646 """A subclass of the Python int/long that remembers its format (hex,octal,etc). 647 648 Initialize it the same as an int, but also accepts an additional keyword 649 argument 'number_format' which should be one of the NUMBER_FORMAT_* values. 650 651 n = json_int( x[, base, number_format=NUMBER_FORMAT_DECIMAL] ) 652 653 """ 654 def __new__(cls, *args, **kwargs): 655 if 'number_format' in kwargs: 656 number_format = kwargs['number_format'] 657 del kwargs['number_format'] 658 if number_format not in (NUMBER_FORMAT_DECIMAL, NUMBER_FORMAT_HEX, NUMBER_FORMAT_OCTAL, NUMBER_FORMAT_LEGACYOCTAL, NUMBER_FORMAT_BINARY): 659 raise TypeError("json_int(): Invalid value for number_format argument") 660 else: 661 number_format = NUMBER_FORMAT_DECIMAL 662 obj = super(json_int,cls).__new__(cls,*args,**kwargs) 663 obj._jsonfmt = number_format 664 return obj 665 666 @property 667 def number_format(self): 668 """The original radix format of the number""" 669 return self._jsonfmt 670 671 def json_format(self): 672 """Returns the integer value formatted as a JSON literal""" 673 fmt = self._jsonfmt 674 if fmt == NUMBER_FORMAT_HEX: 675 return format(self, '#x') 676 elif fmt == NUMBER_FORMAT_OCTAL: 677 return format(self, '#o') 678 elif fmt == NUMBER_FORMAT_BINARY: 679 return format(self, '#b') 680 elif fmt == NUMBER_FORMAT_LEGACYOCTAL: 681 if self==0: 682 return '0' # For some reason Python's int doesn't do '00' 683 elif self < 0: 684 return '-0%o' % (-self) 685 else: 686 return '0%o' % self 687 else: 688 return str(self) 689 690# ---------------------------------------------------------------------- 691# String processing helpers 692 693def skipstringsafe( s, start=0, end=None ): 694 i = start 695 #if end is None: 696 # end = len(s) 697 unsafe = helpers.unsafe_string_chars 698 while i < end and s[i] not in unsafe: 699 #c = s[i] 700 #if c in unsafe_string_chars: 701 # break 702 i += 1 703 return i 704 705def skipstringsafe_slow( s, start=0, end=None ): 706 i = start 707 if end is None: 708 end = len(s) 709 while i < end: 710 c = s[i] 711 if c == '"' or c == "'" or c == '\\' or ord(c) <= 0x1f: 712 break 713 i += 1 714 return i 715 716def extend_list_with_sep( orig_seq, extension_seq, sepchar='' ): 717 if not sepchar: 718 orig_seq.extend( extension_seq ) 719 else: 720 for i, x in enumerate(extension_seq): 721 if i > 0: 722 orig_seq.append( sepchar ) 723 orig_seq.append( x ) 724 725def extend_and_flatten_list_with_sep( orig_seq, extension_seq, separator='' ): 726 for i, part in enumerate(extension_seq): 727 if i > 0 and separator: 728 orig_seq.append( separator ) 729 orig_seq.extend( part ) 730 731 732 733# ---------------------------------------------------------------------- 734# Unicode UTF-32 735# ---------------------------------------------------------------------- 736 737def _make_raw_bytes( byte_list ): 738 """Takes a list of byte values (numbers) and returns a bytes (Python 3) or string (Python 2) 739 """ 740 if _py_major >= 3: 741 b = bytes( byte_list ) 742 else: 743 b = ''.join(chr(n) for n in byte_list) 744 return b 745 746import codecs 747 748class utf32(codecs.CodecInfo): 749 """Unicode UTF-32 and UCS4 encoding/decoding support. 750 751 This is for older Pythons whch did not have UTF-32 codecs. 752 753 JSON requires that all JSON implementations must support the 754 UTF-32 encoding (as well as UTF-8 and UTF-16). But earlier 755 versions of Python did not provide a UTF-32 codec, so we must 756 implement UTF-32 ourselves in case we need it. 757 758 See http://en.wikipedia.org/wiki/UTF-32 759 760 """ 761 BOM_UTF32_BE = _make_raw_bytes([ 0, 0, 0xFE, 0xFF ]) #'\x00\x00\xfe\xff' 762 BOM_UTF32_LE = _make_raw_bytes([ 0xFF, 0xFE, 0, 0 ]) #'\xff\xfe\x00\x00' 763 764 @staticmethod 765 def lookup( name ): 766 """A standard Python codec lookup function for UCS4/UTF32. 767 768 If if recognizes an encoding name it returns a CodecInfo 769 structure which contains the various encode and decoder 770 functions to use. 771 772 """ 773 ci = None 774 name = name.upper() 775 if name in ('UCS4BE','UCS-4BE','UCS-4-BE','UTF32BE','UTF-32BE','UTF-32-BE'): 776 ci = codecs.CodecInfo( utf32.utf32be_encode, utf32.utf32be_decode, name='utf-32be') 777 elif name in ('UCS4LE','UCS-4LE','UCS-4-LE','UTF32LE','UTF-32LE','UTF-32-LE'): 778 ci = codecs.CodecInfo( utf32.utf32le_encode, utf32.utf32le_decode, name='utf-32le') 779 elif name in ('UCS4','UCS-4','UTF32','UTF-32'): 780 ci = codecs.CodecInfo( utf32.encode, utf32.decode, name='utf-32') 781 return ci 782 783 @staticmethod 784 def encode( obj, errors='strict', endianness=None, include_bom=True ): 785 """Encodes a Unicode string into a UTF-32 encoded byte string. 786 787 Returns a tuple: (bytearray, num_chars) 788 789 The errors argument should be one of 'strict', 'ignore', or 'replace'. 790 791 The endianness should be one of: 792 * 'B', '>', or 'big' -- Big endian 793 * 'L', '<', or 'little' -- Little endien 794 * None -- Default, from sys.byteorder 795 796 If include_bom is true a Byte-Order Mark will be written to 797 the beginning of the string, otherwise it will be omitted. 798 799 """ 800 import sys, struct 801 802 # Make a container that can store bytes 803 if _py_major >= 3: 804 f = bytearray() 805 write = f.extend 806 def tobytes(): 807 return bytes(f) 808 else: 809 try: 810 import cStringIO as sio 811 except ImportError: 812 import StringIO as sio 813 f = sio.StringIO() 814 write = f.write 815 tobytes = f.getvalue 816 817 if not endianness: 818 endianness = sys.byteorder 819 820 if endianness.upper()[0] in ('B>'): 821 big_endian = True 822 elif endianness.upper()[0] in ('L<'): 823 big_endian = False 824 else: 825 raise ValueError("Invalid endianness %r: expected 'big', 'little', or None" % endianness) 826 827 pack = struct.pack 828 packspec = '>L' if big_endian else '<L' 829 830 num_chars = 0 831 832 if include_bom: 833 if big_endian: 834 write( utf32.BOM_UTF32_BE ) 835 else: 836 write( utf32.BOM_UTF32_LE ) 837 num_chars += 1 838 839 for pos, c in enumerate(obj): 840 n = ord(c) 841 if 0xD800 <= n <= 0xDFFF: # surrogate codepoints are prohibited by UTF-32 842 if errors == 'ignore': 843 pass 844 elif errors == 'replace': 845 n = 0xFFFD 846 else: 847 raise UnicodeEncodeError('utf32',obj,pos,pos+1,"surrogate code points from U+D800 to U+DFFF are not allowed") 848 write( pack( packspec, n) ) 849 num_chars += 1 850 851 return (tobytes(), num_chars) 852 853 @staticmethod 854 def utf32le_encode( obj, errors='strict', include_bom=False ): 855 """Encodes a Unicode string into a UTF-32LE (little endian) encoded byte string.""" 856 return utf32.encode( obj, errors=errors, endianness='L', include_bom=include_bom ) 857 858 @staticmethod 859 def utf32be_encode( obj, errors='strict', include_bom=False ): 860 """Encodes a Unicode string into a UTF-32BE (big endian) encoded byte string.""" 861 return utf32.encode( obj, errors=errors, endianness='B', include_bom=include_bom ) 862 863 @staticmethod 864 def decode( obj, errors='strict', endianness=None ): 865 """Decodes a UTF-32 byte string into a Unicode string. 866 867 Returns tuple (bytearray, num_bytes) 868 869 The errors argument shold be one of 'strict', 'ignore', 870 'replace', 'backslashreplace', or 'xmlcharrefreplace'. 871 872 The endianness should either be None (for auto-guessing), or a 873 word that starts with 'B' (big) or 'L' (little). 874 875 Will detect a Byte-Order Mark. If a BOM is found and endianness 876 is also set, then the two must match. 877 878 If neither a BOM is found nor endianness is set, then big 879 endian order is assumed. 880 881 """ 882 import struct, sys 883 maxunicode = sys.maxunicode 884 unpack = struct.unpack 885 886 # Detect BOM 887 if obj.startswith( utf32.BOM_UTF32_BE ): 888 bom_endianness = 'B' 889 start = len(utf32.BOM_UTF32_BE) 890 elif obj.startswith( utf32.BOM_UTF32_LE ): 891 bom_endianness = 'L' 892 start = len(utf32.BOM_UTF32_LE) 893 else: 894 bom_endianness = None 895 start = 0 896 897 num_bytes = start 898 899 if endianness == None: 900 if bom_endianness == None: 901 endianness = sys.byteorder.upper()[0] # Assume platform default 902 else: 903 endianness = bom_endianness 904 else: 905 endianness = endianness[0].upper() 906 if bom_endianness and endianness != bom_endianness: 907 raise UnicodeDecodeError('utf32',obj,0,start,'BOM does not match expected byte order') 908 909 # Check for truncated last character 910 if ((len(obj)-start) % 4) != 0: 911 raise UnicodeDecodeError('utf32',obj,start,len(obj), 912 'Data length not a multiple of 4 bytes') 913 914 # Start decoding characters 915 chars = [] 916 packspec = '>L' if endianness=='B' else '<L' 917 i = 0 918 for i in range(start, len(obj), 4): 919 seq = obj[i:i+4] 920 n = unpack( packspec, seq )[0] 921 num_bytes += 4 922 923 if n > maxunicode or (0xD800 <= n <= 0xDFFF): 924 if errors == 'strict': 925 raise UnicodeDecodeError('utf32',obj,i,i+4,'Invalid code point U+%04X' % n) 926 elif errors == 'replace': 927 chars.append( unichr(0xFFFD) ) 928 elif errors == 'backslashreplace': 929 if n > 0xffff: 930 esc = "\\u%04x" % (n,) 931 else: 932 esc = "\\U%08x" % (n,) 933 for esc_c in esc: 934 chars.append( esc_c ) 935 elif errors == 'xmlcharrefreplace': 936 esc = "&#%d;" % (n,) 937 for esc_c in esc: 938 chars.append( esc_c ) 939 else: # ignore 940 pass 941 else: 942 chars.append( helpers.safe_unichr(n) ) 943 return (u''.join( chars ), num_bytes) 944 945 @staticmethod 946 def utf32le_decode( obj, errors='strict' ): 947 """Decodes a UTF-32LE (little endian) byte string into a Unicode string.""" 948 return utf32.decode( obj, errors=errors, endianness='L' ) 949 950 @staticmethod 951 def utf32be_decode( obj, errors='strict' ): 952 """Decodes a UTF-32BE (big endian) byte string into a Unicode string.""" 953 return utf32.decode( obj, errors=errors, endianness='B' ) 954 955 956# ---------------------------------------------------------------------- 957# Helper functions 958# ---------------------------------------------------------------------- 959 960def _make_unsafe_string_chars(): 961 import unicodedata 962 unsafe = [] 963 for c in [unichr(i) for i in range(0x100)]: 964 if c == u'"' or c == u'\\' \ 965 or unicodedata.category( c ) in ['Cc','Cf','Zl','Zp']: 966 unsafe.append( c ) 967 return u''.join( unsafe ) 968 969class helpers(object): 970 """A set of utility functions.""" 971 972 hexdigits = '0123456789ABCDEFabcdef' 973 octaldigits = '01234567' 974 unsafe_string_chars = _make_unsafe_string_chars() 975 976 import sys 977 maxunicode = sys.maxunicode 978 979 always_use_custom_codecs = False # If True use demjson's codecs 980 # before system codecs. This 981 # is mainly here for testing. 982 983 javascript_reserved_words = frozenset([ 984 # Keywords (plus "let") (ECMAScript 6 section 11.6.2.1) 985 'break','case','catch','class','const','continue', 986 'debugger','default','delete','do','else','export', 987 'extends','finally','for','function','if','import', 988 'in','instanceof','let','new','return','super', 989 'switch','this','throw','try','typeof','var','void', 990 'while','with','yield', 991 # Future reserved words (ECMAScript 6 section 11.6.2.2) 992 'enum','implements','interface','package', 993 'private','protected','public','static', 994 # null/boolean literals 995 'null','true','false' 996 ]) 997 998 @staticmethod 999 def make_raw_bytes( byte_list ): 1000 """Constructs a byte array (bytes in Python 3, str in Python 2) from a list of byte values (0-255). 1001 1002 """ 1003 return _make_raw_bytes( byte_list ) 1004 1005 @staticmethod 1006 def is_hex_digit( c ): 1007 """Determines if the given character is a valid hexadecimal digit (0-9, a-f, A-F).""" 1008 return (c in helpers.hexdigits) 1009 1010 @staticmethod 1011 def is_octal_digit( c ): 1012 """Determines if the given character is a valid octal digit (0-7).""" 1013 return (c in helpers.octaldigits) 1014 1015 @staticmethod 1016 def is_binary_digit( c ): 1017 """Determines if the given character is a valid binary digit (0 or 1).""" 1018 return (c == '0' or c == '1') 1019 1020 @staticmethod 1021 def char_is_json_ws( c ): 1022 """Determines if the given character is a JSON white-space character""" 1023 return c in ' \t\n\r' 1024 1025 @staticmethod 1026 def safe_unichr( codepoint ): 1027 """Just like Python's unichr() but works in narrow-Unicode Pythons.""" 1028 if codepoint >= 0x10000 and codepoint > helpers.maxunicode: 1029 # Narrow-Unicode python, construct a UTF-16 surrogate pair. 1030 w1, w2 = helpers.make_surrogate_pair( codepoint ) 1031 if w2 is None: 1032 c = unichr(w1) 1033 else: 1034 c = unichr(w1) + unichr(w2) 1035 else: 1036 c = unichr(codepoint) 1037 return c 1038 1039 @staticmethod 1040 def char_is_unicode_ws( c ): 1041 """Determines if the given character is a Unicode space character""" 1042 if not isinstance(c,unicode): 1043 c = unicode(c) 1044 if c in u' \t\n\r\f\v': 1045 return True 1046 import unicodedata 1047 return unicodedata.category(c) == 'Zs' 1048 1049 @staticmethod 1050 def char_is_json_eol( c ): 1051 """Determines if the given character is a JSON line separator""" 1052 return c in '\n\r' 1053 1054 @staticmethod 1055 def char_is_unicode_eol( c ): 1056 """Determines if the given character is a Unicode line or 1057 paragraph separator. These correspond to CR and LF as well as 1058 Unicode characters in the Zl or Zp categories. 1059 1060 """ 1061 return c in u'\r\n\u2028\u2029' 1062 1063 @staticmethod 1064 def char_is_identifier_leader( c ): 1065 """Determines if the character may be the first character of a 1066 JavaScript identifier. 1067 """ 1068 return c.isalpha() or c in '_$' 1069 1070 @staticmethod 1071 def char_is_identifier_tail( c ): 1072 """Determines if the character may be part of a JavaScript 1073 identifier. 1074 """ 1075 return c.isalnum() or c in u'_$\u200c\u200d' 1076 1077 @staticmethod 1078 def extend_and_flatten_list_with_sep( orig_seq, extension_seq, separator='' ): 1079 for i, part in enumerate(extension_seq): 1080 if i > 0 and separator: 1081 orig_seq.append( separator ) 1082 orig_seq.extend( part ) 1083 1084 @staticmethod 1085 def strip_format_control_chars( txt ): 1086 """Filters out all Unicode format control characters from the string. 1087 1088 ECMAScript permits any Unicode "format control characters" to 1089 appear at any place in the source code. They are to be 1090 ignored as if they are not there before any other lexical 1091 tokenization occurs. Note that JSON does not allow them, 1092 except within string literals. 1093 1094 * Ref. ECMAScript section 7.1. 1095 * http://en.wikipedia.org/wiki/Unicode_control_characters 1096 1097 There are dozens of Format Control Characters, for example: 1098 U+00AD SOFT HYPHEN 1099 U+200B ZERO WIDTH SPACE 1100 U+2060 WORD JOINER 1101 1102 """ 1103 import unicodedata 1104 txt2 = filter( lambda c: unicodedata.category(unicode(c)) != 'Cf', txt ) 1105 1106 # 2to3 NOTE: The following is needed to work around a broken 1107 # Python3 conversion in which filter() will be transformed 1108 # into a list rather than a string. 1109 if not isinstance(txt2,basestring): 1110 txt2 = u''.join(txt2) 1111 return txt2 1112 1113 @staticmethod 1114 def lookup_codec( encoding ): 1115 """Wrapper around codecs.lookup(). 1116 1117 Returns None if codec not found, rather than raising a LookupError. 1118 """ 1119 import codecs 1120 if isinstance( encoding, codecs.CodecInfo ): 1121 return encoding 1122 encoding = encoding.lower() 1123 import codecs 1124 if helpers.always_use_custom_codecs: 1125 # Try custom utf32 first, then standard python codecs 1126 cdk = utf32.lookup(encoding) 1127 if not cdk: 1128 try: 1129 cdk = codecs.lookup( encoding ) 1130 except LookupError: 1131 cdk = None 1132 else: 1133 # Try standard python codecs first, then custom utf32 1134 try: 1135 cdk = codecs.lookup( encoding ) 1136 except LookupError: 1137 cdk = utf32.lookup( encoding ) 1138 return cdk 1139 1140 @staticmethod 1141 def auto_detect_encoding( s ): 1142 """Takes a string (or byte array) and tries to determine the Unicode encoding it is in. 1143 1144 Returns the encoding name, as a string. 1145 1146 """ 1147 if not s or len(s)==0: 1148 return "utf-8" 1149 1150 # Get the byte values of up to the first 4 bytes 1151 ords = [] 1152 for i in range(0, min(len(s),4)): 1153 x = s[i] 1154 if isinstance(x, basestring): 1155 x = ord(x) 1156 ords.append( x ) 1157 1158 # Look for BOM marker 1159 import sys, codecs 1160 bom2, bom3, bom4 = None, None, None 1161 if len(s) >= 2: 1162 bom2 = s[:2] 1163 if len(s) >= 3: 1164 bom3 = s[:3] 1165 if len(s) >= 4: 1166 bom4 = s[:4] 1167 1168 # Assign values of first four bytes to: a, b, c, d; and last byte to: z 1169 a, b, c, d, z = None, None, None, None, None 1170 if len(s) >= 1: 1171 a = ords[0] 1172 if len(s) >= 2: 1173 b = ords[1] 1174 if len(s) >= 3: 1175 c = ords[2] 1176 if len(s) >= 4: 1177 d = ords[3] 1178 1179 z = s[-1] 1180 if isinstance(z, basestring): 1181 z = ord(z) 1182 1183 if bom4 and ( (hasattr(codecs,'BOM_UTF32_LE') and bom4 == codecs.BOM_UTF32_LE) or 1184 (bom4 == utf32.BOM_UTF32_LE) ): 1185 encoding = 'utf-32le' 1186 s = s[4:] 1187 elif bom4 and ( (hasattr(codecs,'BOM_UTF32_BE') and bom4 == codecs.BOM_UTF32_BE) or 1188 (bom4 == utf32.BOM_UTF32_BE) ): 1189 encoding = 'utf-32be' 1190 s = s[4:] 1191 elif bom2 and bom2 == codecs.BOM_UTF16_LE: 1192 encoding = 'utf-16le' 1193 s = s[2:] 1194 elif bom2 and bom2 == codecs.BOM_UTF16_BE: 1195 encoding = 'utf-16be' 1196 s = s[2:] 1197 elif bom3 and bom3 == codecs.BOM_UTF8: 1198 encoding = 'utf-8' 1199 s = s[3:] 1200 1201 # No BOM, so autodetect encoding used by looking at first four 1202 # bytes according to RFC 4627 section 3. The first and last bytes 1203 # in a JSON document will be ASCII. The second byte will be ASCII 1204 # unless the first byte was a quotation mark. 1205 1206 elif len(s)>=4 and a==0 and b==0 and c==0 and d!=0: # UTF-32BE (0 0 0 x) 1207 encoding = 'utf-32be' 1208 elif len(s)>=4 and a!=0 and b==0 and c==0 and d==0 and z==0: # UTF-32LE (x 0 0 0 [... 0]) 1209 encoding = 'utf-32le' 1210 elif len(s)>=2 and a==0 and b!=0: # UTF-16BE (0 x) 1211 encoding = 'utf-16be' 1212 elif len(s)>=2 and a!=0 and b==0 and z==0: # UTF-16LE (x 0 [... 0]) 1213 encoding = 'utf-16le' 1214 elif ord('\t') <= a <= 127: 1215 # First byte appears to be ASCII, so guess UTF-8. 1216 encoding = 'utf8' 1217 else: 1218 raise ValueError("Can not determine the Unicode encoding for byte stream") 1219 1220 return encoding 1221 1222 @staticmethod 1223 def unicode_decode( txt, encoding=None ): 1224 """Takes a string (or byte array) and tries to convert it to a Unicode string. 1225 1226 Returns a named tuple: (string, codec, bom) 1227 1228 The 'encoding' argument, if supplied, should either the name of 1229 a character encoding, or an instance of codecs.CodecInfo. If 1230 the encoding argument is None or "auto" then the encoding is 1231 automatically determined, if possible. 1232 1233 Any BOM (Byte Order Mark) that is found at the beginning of the 1234 input will be stripped off and placed in the 'bom' portion of 1235 the returned value. 1236 1237 """ 1238 if isinstance(txt, unicode): 1239 res = _namedtuple('DecodedString',['string','codec','bom'])( txt, None, None ) 1240 else: 1241 if encoding is None or encoding == 'auto': 1242 encoding = helpers.auto_detect_encoding( txt ) 1243 1244 cdk = helpers.lookup_codec( encoding ) 1245 if not cdk: 1246 raise LookupError("Can not find codec for encoding %r" % encoding) 1247 1248 try: 1249 # Determine if codec takes arguments; try a decode of nothing 1250 cdk.decode( helpers.make_raw_bytes([]), errors='strict' ) 1251 except TypeError: 1252 cdk_kw = {} # This coded doesn't like the errors argument 1253 else: 1254 cdk_kw = {'errors': 'strict'} 1255 1256 unitxt, numbytes = cdk.decode( txt, **cdk_kw ) # DO THE DECODE HERE! 1257 1258 # Remove BOM if present 1259 if len(unitxt) > 0 and unitxt[0] == u'\uFEFF': 1260 bom = cdk.encode(unitxt[0])[0] 1261 unitxt = unitxt[1:] 1262 elif len(unitxt) > 0 and unitxt[0] == u'\uFFFE': # Reversed BOM 1263 raise UnicodeDecodeError(cdk.name,txt,0,0,"Wrong byte order, found reversed BOM U+FFFE") 1264 else: 1265 bom = None 1266 1267 res = _namedtuple('DecodedString',['string','codec','bom'])( unitxt, cdk, bom ) 1268 return res 1269 1270 @staticmethod 1271 def surrogate_pair_as_unicode( c1, c2 ): 1272 """Takes a pair of unicode surrogates and returns the equivalent unicode character. 1273 1274 The input pair must be a surrogate pair, with c1 in the range 1275 U+D800 to U+DBFF and c2 in the range U+DC00 to U+DFFF. 1276 1277 """ 1278 n1, n2 = ord(c1), ord(c2) 1279 if n1 < 0xD800 or n1 > 0xDBFF or n2 < 0xDC00 or n2 > 0xDFFF: 1280 raise JSONDecodeError('illegal Unicode surrogate pair',(c1,c2)) 1281 a = n1 - 0xD800 1282 b = n2 - 0xDC00 1283 v = (a << 10) | b 1284 v += 0x10000 1285 return helpers.safe_unichr(v) 1286 1287 @staticmethod 1288 def unicode_as_surrogate_pair( c ): 1289 """Takes a single unicode character and returns a sequence of surrogate pairs. 1290 1291 The output of this function is a tuple consisting of one or two unicode 1292 characters, such that if the input character is outside the BMP range 1293 then the output is a two-character surrogate pair representing that character. 1294 1295 If the input character is inside the BMP then the output tuple will have 1296 just a single character...the same one. 1297 1298 """ 1299 n = ord(c) 1300 w1, w2 = helpers.make_surrogate_pair(n) 1301 if w2 is None: 1302 return (unichr(w1),) 1303 else: 1304 return (unichr(w1), unichr(w2)) 1305 1306 @staticmethod 1307 def make_surrogate_pair( codepoint ): 1308 """Given a Unicode codepoint (int) returns a 2-tuple of surrogate codepoints.""" 1309 if codepoint < 0x10000: 1310 return (codepoint,None) # in BMP, surrogate pair not required 1311 v = codepoint - 0x10000 1312 vh = (v >> 10) & 0x3ff # highest 10 bits 1313 vl = v & 0x3ff # lowest 10 bits 1314 w1 = 0xD800 | vh 1315 w2 = 0xDC00 | vl 1316 return (w1, w2) 1317 1318 @staticmethod 1319 def isnumbertype( obj ): 1320 """Is the object of a Python number type (excluding complex)?""" 1321 return isinstance(obj, (int,long,float)) \ 1322 and not isinstance(obj, bool) \ 1323 or obj is nan or obj is inf or obj is neginf \ 1324 or (decimal and isinstance(obj, decimal.Decimal)) 1325 1326 @staticmethod 1327 def is_negzero( n ): 1328 """Is the number value a negative zero?""" 1329 if isinstance( n, float ): 1330 return n == 0.0 and repr(n).startswith('-') 1331 elif decimal and isinstance( n, decimal.Decimal ): 1332 return n.is_zero() and n.is_signed() 1333 else: 1334 return False 1335 1336 @staticmethod 1337 def is_nan( n ): 1338 """Is the number a NaN (not-a-number)?""" 1339 if isinstance( n, float ): 1340 return n is nan or n.hex() == 'nan' or n != n 1341 elif decimal and isinstance( n, decimal.Decimal ): 1342 return n.is_nan() 1343 else: 1344 return False 1345 1346 @staticmethod 1347 def is_infinite( n ): 1348 """Is the number infinite?""" 1349 if isinstance( n, float ): 1350 return n is inf or n is neginf or n.hex() in ('inf','-inf') 1351 elif decimal and isinstance( n, decimal.Decimal ): 1352 return n.is_infinite() 1353 else: 1354 return False 1355 1356 @staticmethod 1357 def isstringtype( obj ): 1358 """Is the object of a Python string type?""" 1359 if isinstance(obj, basestring): 1360 return True 1361 # Must also check for some other pseudo-string types 1362 import types, UserString 1363 return isinstance(obj, types.StringTypes) \ 1364 or isinstance(obj, UserString.UserString) 1365 ## or isinstance(obj, UserString.MutableString) 1366 1367 @staticmethod 1368 def decode_hex( hexstring ): 1369 """Decodes a hexadecimal string into it's integer value.""" 1370 # We don't use the builtin 'hex' codec in python since it can 1371 # not handle odd numbers of digits, nor raise the same type 1372 # of exceptions we want to. 1373 n = 0 1374 for c in hexstring: 1375 if '0' <= c <= '9': 1376 d = ord(c) - ord('0') 1377 elif 'a' <= c <= 'f': 1378 d = ord(c) - ord('a') + 10 1379 elif 'A' <= c <= 'F': 1380 d = ord(c) - ord('A') + 10 1381 else: 1382 raise ValueError('Not a hexadecimal number', hexstring) 1383 # Could use ((n << 4 ) | d), but python 2.3 issues a FutureWarning. 1384 n = (n * 16) + d 1385 return n 1386 1387 @staticmethod 1388 def decode_octal( octalstring ): 1389 """Decodes an octal string into it's integer value.""" 1390 n = 0 1391 for c in octalstring: 1392 if '0' <= c <= '7': 1393 d = ord(c) - ord('0') 1394 else: 1395 raise ValueError('Not an octal number', octalstring) 1396 # Could use ((n << 3 ) | d), but python 2.3 issues a FutureWarning. 1397 n = (n * 8) + d 1398 return n 1399 1400 @staticmethod 1401 def decode_binary( binarystring ): 1402 """Decodes a binary string into it's integer value.""" 1403 n = 0 1404 for c in binarystring: 1405 if c == '0': 1406 d = 0 1407 elif c == '1': 1408 d = 1 1409 else: 1410 raise ValueError('Not an binary number', binarystring) 1411 # Could use ((n << 3 ) | d), but python 2.3 issues a FutureWarning. 1412 n = (n * 2) + d 1413 return n 1414 1415 @staticmethod 1416 def format_timedelta_iso( td ): 1417 """Encodes a datetime.timedelta into ISO-8601 Time Period format. 1418 """ 1419 d = td.days 1420 s = td.seconds 1421 ms = td.microseconds 1422 m, s = divmod(s,60) 1423 h, m = divmod(m,60) 1424 a = ['P'] 1425 if d: 1426 a.append( '%dD' % d ) 1427 if h or m or s or ms: 1428 a.append( 'T' ) 1429 if h: 1430 a.append( '%dH' % h ) 1431 if m: 1432 a.append( '%dM' % m ) 1433 if s or ms: 1434 if ms: 1435 a.append( '%d.%06d' % (s,ms) ) 1436 else: 1437 a.append( '%d' % s ) 1438 if len(a)==1: 1439 a.append('T0S') 1440 return ''.join(a) 1441 1442 1443# ---------------------------------------------------------------------- 1444# File position indicator 1445# ---------------------------------------------------------------------- 1446 1447class position_marker(object): 1448 """A position marks a specific place in a text document. 1449 It consists of the following attributes: 1450 1451 * line - The line number, starting at 1 1452 * column - The column on the line, starting at 0 1453 * char_position - The number of characters from the start of 1454 the document, starting at 0 1455 * text_after - (optional) a short excerpt of the text of 1456 document starting at the current position 1457 1458 Lines are separated by any Unicode line separator character. As an 1459 exception a CR+LF character pair is treated as being a single line 1460 separator demarcation. 1461 1462 Columns are simply a measure of the number of characters after the 1463 start of a new line, starting at 0. Visual effects caused by 1464 Unicode characters such as combining characters, bidirectional 1465 text, zero-width characters and so on do not affect the 1466 computation of the column regardless of visual appearance. 1467 1468 The char_position is a count of the number of characters since the 1469 beginning of the document, starting at 0. As used within the 1470 buffered_stream class, if the document starts with a Unicode Byte 1471 Order Mark (BOM), the BOM prefix is NOT INCLUDED in the count. 1472 1473 """ 1474 def __init__(self, offset=0, line=1, column=0, text_after=None): 1475 self.__char_position = offset 1476 self.__line = line 1477 self.__column = column 1478 self.__text_after = text_after 1479 self.__at_end = False 1480 self.__last_was_cr = False 1481 1482 @property 1483 def line(self): 1484 """The current line within the document, starts at 1.""" 1485 return self.__line 1486 @property 1487 def column(self): 1488 """The current character column from the beginning of the 1489 document, starts at 0. 1490 """ 1491 return self.__column 1492 @property 1493 def char_position(self): 1494 """The current character offset from the beginning of the 1495 document, starts at 0. 1496 """ 1497 return self.__char_position 1498 1499 @property 1500 def at_start(self): 1501 """Returns True if the position is at the start of the document.""" 1502 return (self.char_position == 0) 1503 1504 @property 1505 def at_end(self): 1506 """Returns True if the position is at the end of the document. 1507 1508 This property must be set by the user. 1509 """ 1510 return self.__at_end 1511 1512 @at_end.setter 1513 def at_end(self, b): 1514 """Sets the at_end property to True or False. 1515 """ 1516 self.__at_end = bool(b) 1517 1518 @property 1519 def text_after(self): 1520 """Returns a textual excerpt starting at the current position. 1521 1522 This property must be set by the user. 1523 """ 1524 return self.__at_end 1525 1526 @text_after.setter 1527 def text_after(self, value): 1528 """Sets the text_after property to a given string. 1529 """ 1530 self.__text_after = value 1531 1532 def __repr__(self): 1533 s = "%s(offset=%r,line=%r,column=%r" \ 1534 % (self.__class__.__name__, 1535 self.__char_position, 1536 self.__line, 1537 self.__column) 1538 if self.text_after: 1539 s += ",text_after=%r" % (self.text_after,) 1540 s += ")" 1541 return s 1542 1543 def describe(self, show_text=True): 1544 """Returns a human-readable description of the position, in English.""" 1545 s = "line %d, column %d, offset %d" % (self.__line, 1546 self.__column, 1547 self.__char_position) 1548 if self.at_start: 1549 s += " (AT-START)" 1550 elif self.at_end: 1551 s += " (AT-END)" 1552 if show_text and self.text_after: 1553 s += ", text %r" % (self.text_after) 1554 return s 1555 1556 def __str__(self): 1557 """Same as the describe() function.""" 1558 return self.describe( show_text=True ) 1559 1560 def copy( self ): 1561 """Create a copy of the position object.""" 1562 p = self.__class__() 1563 p.__char_position = self.__char_position 1564 p.__line = self.__line 1565 p.__column = self.__column 1566 p.text_after = self.__text_after 1567 p.at_end = self.at_end 1568 p.__last_was_cr = self.__last_was_cr 1569 return p 1570 1571 def rewind( self ): 1572 """Set the position to the start of the document.""" 1573 if not self.at_start: 1574 self.text_after = None 1575 self.at_end = False 1576 self.__char_position = 0 1577 self.__line = 1 1578 self.__column = 0 1579 self.__last_was_cr = False 1580 1581 def advance( self, s ): 1582 """Advance the position from its current place according to 1583 the given string of characters. 1584 1585 """ 1586 if s: 1587 self.text_after = None 1588 for c in s: 1589 self.__char_position += 1 1590 if c == '\n' and self.__last_was_cr: 1591 self.__last_was_cr = False 1592 elif helpers.char_is_unicode_eol(c): 1593 self.__line += 1 1594 self.__column = 0 1595 self.__last_was_cr = (c == '\r') 1596 else: 1597 self.__column += 1 1598 self.__last_was_cr = False 1599 1600# ---------------------------------------------------------------------- 1601# Buffered Stream Reader 1602# ---------------------------------------------------------------------- 1603 1604class buffered_stream(object): 1605 """A helper class for the JSON parser. 1606 1607 It allows for reading an input document, while handling some 1608 low-level Unicode issues as well as tracking the current position 1609 in terms of line and column position. 1610 1611 """ 1612 def __init__(self, txt='', encoding=None): 1613 self.reset() 1614 self.set_text( txt, encoding ) 1615 1616 def reset(self): 1617 """Clears the state to nothing.""" 1618 self.__pos = position_marker() 1619 self.__saved_pos = [] # Stack of saved positions 1620 self.__bom = helpers.make_raw_bytes([]) # contains copy of byte-order mark, if any 1621 self.__codec = None # The CodecInfo 1622 self.__encoding = None # The name of the codec's encoding 1623 self.__input_is_bytes = False 1624 self.__rawbuf = None 1625 self.__raw_bytes = None 1626 self.__cmax = 0 1627 self.num_ws_skipped = 0 1628 1629 def save_position(self): 1630 self.__saved_pos.append( self.__pos.copy() ) 1631 return True 1632 1633 def clear_saved_position(self): 1634 if self.__saved_pos: 1635 self.__saved_pos.pop() 1636 return True 1637 else: 1638 return False 1639 1640 def restore_position(self): 1641 try: 1642 old_pos = self.__saved_pos.pop() # Can raise IndexError 1643 except IndexError, err: 1644 raise IndexError("Attempt to restore buffer position that was never saved") 1645 else: 1646 self.__pos = old_pos 1647 return True 1648 1649 def _find_codec(self, encoding): 1650 if encoding is None: 1651 self.__codec = None 1652 self.__encoding = None 1653 elif isinstance(encoding, codecs.CodecInfo): 1654 self.__codec = encoding 1655 self.__encoding = self.__codec.name 1656 else: 1657 self.__encoding = encoding 1658 self.__codec = helpers.lookup_codec( encoding ) 1659 if not self.__codec: 1660 raise JSONDecodeError('no codec available for character encoding',encoding) 1661 return self.__codec 1662 1663 def set_text( self, txt, encoding=None ): 1664 """Changes the input text document and rewinds the position to 1665 the start of the new document. 1666 1667 """ 1668 import sys 1669 self.rewind() 1670 self.__codec = None 1671 self.__bom = None 1672 self.__rawbuf = u'' 1673 self.__cmax = 0 # max number of chars in input 1674 try: 1675 decoded = helpers.unicode_decode( txt, encoding ) 1676 except JSONError: 1677 raise 1678 except Exception, err: 1679 # Re-raise as a JSONDecodeError 1680 e2 = sys.exc_info() 1681 newerr = JSONDecodeError("a Unicode decoding error occurred") 1682 # Simulate Python 3's: "raise X from Y" exception chaining 1683 newerr.__cause__ = err 1684 newerr.__traceback__ = e2[2] 1685 raise newerr 1686 else: 1687 self.__codec = decoded.codec 1688 self.__bom = decoded.bom 1689 self.__rawbuf = decoded.string 1690 self.__cmax = len(self.__rawbuf) 1691 1692 def __repr__(self): 1693 return '<%s at %r text %r>' % (self.__class__.__name__, self.__pos, self.text_context) 1694 1695 def rewind(self): 1696 """Resets the position back to the start of the input text.""" 1697 self.__pos.rewind() 1698 1699 @property 1700 def codec(self): 1701 """The codec object used to perform Unicode decoding, or None.""" 1702 return self.__codec 1703 1704 @property 1705 def bom(self): 1706 """The Unicode Byte-Order Mark (BOM), if any, that was present 1707 at the start of the input text. The returned BOM is a string 1708 of the raw bytes, and is not Unicode-decoded. 1709 1710 """ 1711 return self.__bom 1712 1713 @property 1714 def cpos(self): 1715 """The current character offset from the start of the document.""" 1716 return self.__pos.char_position 1717 1718 @property 1719 def position(self): 1720 """The current position (as a position_marker object). 1721 Returns a copy. 1722 1723 """ 1724 p = self.__pos.copy() 1725 p.text_after = self.text_context 1726 p.at_end = self.at_end 1727 return p 1728 1729 @property 1730 def at_start(self): 1731 """Returns True if the position is currently at the start of 1732 the document, or False otherwise. 1733 1734 """ 1735 return self.__pos.at_start 1736 1737 @property 1738 def at_end(self): 1739 """Returns True if the position is currently at the end of the 1740 document, of False otherwise. 1741 1742 """ 1743 c = self.peek() 1744 return (not c) 1745 1746 def at_ws(self, allow_unicode_whitespace=True): 1747 """Returns True if the current position contains a white-space 1748 character. 1749 1750 """ 1751 c = self.peek() 1752 if not c: 1753 return False 1754 elif allow_unicode_whitespace: 1755 return helpers.char_is_unicode_ws(c) 1756 else: 1757 return helpers.char_is_json_ws(c) 1758 1759 def at_eol(self, allow_unicode_eol=True): 1760 """Returns True if the current position contains an 1761 end-of-line control character. 1762 1763 """ 1764 c = self.peek() 1765 if not c: 1766 return True # End of file is treated as end of line 1767 elif allow_unicode_eol: 1768 return helpers.char_is_unicode_eol(c) 1769 else: 1770 return helpers.char_is_json_eol(c) 1771 1772 def peek( self, offset=0 ): 1773 """Returns the character at the current position, or at a 1774 given offset away from the current position. If the position 1775 is beyond the limits of the document size, then an empty 1776 string '' is returned. 1777 1778 """ 1779 i = self.cpos + offset 1780 if i < 0 or i >= self.__cmax: 1781 return '' 1782 return self.__rawbuf[i] 1783 1784 def peekstr( self, span=1, offset=0 ): 1785 """Returns one or more characters starting at the current 1786 position, or at a given offset away from the current position, 1787 and continuing for the given span length. If the offset and 1788 span go outside the limit of the current document size, then 1789 the returned string may be shorter than the requested span 1790 length. 1791 1792 """ 1793 i = self.cpos + offset 1794 j = i + span 1795 if i < 0 or i >= self.__cmax: 1796 return '' 1797 return self.__rawbuf[i : j] 1798 1799 @property 1800 def text_context( self, context_size = 20 ): 1801 """A short human-readable textual excerpt of the document at 1802 the current position, in English. 1803 1804 """ 1805 context_size = max( context_size, 4 ) 1806 s = self.peekstr(context_size + 1) 1807 if not s: 1808 return '' 1809 if len(s) > context_size: 1810 s = s[:context_size - 3] + "..." 1811 return s 1812 1813 def startswith( self, s ): 1814 """Determines if the text at the current position starts with 1815 the given string. 1816 1817 See also method: pop_if_startswith() 1818 1819 """ 1820 s2 = self.peekstr( len(s) ) 1821 return s == s2 1822 1823 def skip( self, span=1 ): 1824 """Advances the current position by one (or the given number) 1825 of characters. Will not advance beyond the end of the 1826 document. Returns the number of characters skipped. 1827 1828 """ 1829 1830 i = self.cpos 1831 self.__pos.advance( self.peekstr(span) ) 1832 return self.cpos - i 1833 1834 def skipuntil( self, testfn ): 1835 """Advances the current position until a given predicate test 1836 function succeeds, or the end of the document is reached. 1837 1838 Returns the actual number of characters skipped. 1839 1840 The provided test function should take a single unicode 1841 character and return a boolean value, such as: 1842 1843 lambda c : c == '.' # Skip to next period 1844 1845 See also methods: skipwhile() and popuntil() 1846 1847 """ 1848 i = self.cpos 1849 while True: 1850 c = self.peek() 1851 if not c or testfn(c): 1852 break 1853 else: 1854 self.__pos.advance(c) 1855 return self.cpos - i 1856 1857 def skipwhile( self, testfn ): 1858 """Advances the current position until a given predicate test 1859 function fails, or the end of the document is reached. 1860 1861 Returns the actual number of characters skipped. 1862 1863 The provided test function should take a single unicode 1864 character and return a boolean value, such as: 1865 1866 lambda c : c.isdigit() # Skip all digits 1867 1868 See also methods: skipuntil() and popwhile() 1869 1870 """ 1871 return self.skipuntil( lambda c: not testfn(c) ) 1872 1873 def skip_to_next_line( self, allow_unicode_eol=True ): 1874 """Advances the current position to the start of the next 1875 line. Will not advance beyond the end of the file. Note that 1876 the two-character sequence CR+LF is recognized as being just a 1877 single end-of-line marker. 1878 1879 """ 1880 ln = self.__pos.line 1881 while True: 1882 c = self.pop() 1883 if not c or self.__pos.line > ln: 1884 if c == '\r' and self.peek() == '\n': 1885 self.skip() 1886 break 1887 1888 def skipws( self, allow_unicode_whitespace=True ): 1889 """Advances the current position past all whitespace, or until 1890 the end of the document is reached. 1891 1892 """ 1893 if allow_unicode_whitespace: 1894 n = self.skipwhile( helpers.char_is_unicode_ws ) 1895 else: 1896 n = self.skipwhile( helpers.char_is_json_ws ) 1897 self.num_ws_skipped += n 1898 return n 1899 1900 def pop( self ): 1901 """Returns the character at the current position and advances 1902 the position to the next character. At the end of the 1903 document this function returns an empty string. 1904 1905 """ 1906 c = self.peek() 1907 if c: 1908 self.__pos.advance( c ) 1909 return c 1910 1911 def popstr( self, span=1, offset=0 ): 1912 """Returns a string of one or more characters starting at the 1913 current position, and advances the position to the following 1914 character after the span. Will not go beyond the end of the 1915 document, so the returned string may be shorter than the 1916 requested span. 1917 1918 """ 1919 s = self.peekstr(span) 1920 if s: 1921 self.__pos.advance( s ) 1922 return s 1923 1924 def popif( self, testfn ): 1925 """Just like the pop() function, but only returns the 1926 character if the given predicate test function succeeds. 1927 """ 1928 c = self.peek() 1929 if c and testfn(c): 1930 self.__pos.advance( c ) 1931 return c 1932 return '' 1933 1934 def pop_while_in( self, chars ): 1935 """Pops a sequence of characters at the current position 1936 as long as each of them is in the given set of characters. 1937 1938 """ 1939 if not isinstance( chars, (set,frozenset)): 1940 cset = set( chars ) 1941 c = self.peek() 1942 if c and c in cset: 1943 s = self.popwhile( lambda c: c and c in cset ) 1944 return s 1945 return None 1946 1947 def pop_identifier( self, match=None ): 1948 """Pops the sequence of characters at the current position 1949 that match the syntax for a JavaScript identifier. 1950 1951 """ 1952 c = self.peek() 1953 if c and helpers.char_is_identifier_leader(c): 1954 s = self.popwhile( helpers.char_is_identifier_tail ) 1955 return s 1956 return None 1957 1958 def pop_if_startswith( self, s ): 1959 """Pops the sequence of characters if they match the given string. 1960 1961 See also method: startswith() 1962 1963 """ 1964 s2 = self.peekstr( len(s) ) 1965 if s2 != s: 1966 return NULL 1967 self.__pos.advance( s2 ) 1968 return s2 1969 1970 def popwhile( self, testfn, maxchars=None ): 1971 """Pops all the characters starting at the current position as 1972 long as each character passes the given predicate function 1973 test. If maxchars a numeric value instead of None then then 1974 no more than that number of characters will be popped 1975 regardless of the predicate test. 1976 1977 See also methods: skipwhile() and popuntil() 1978 1979 """ 1980 s = [] 1981 i = 0 1982 while maxchars is None or i < maxchars: 1983 c = self.popif( testfn ) 1984 if not c: 1985 break 1986 s.append( c ) 1987 i += 1 1988 return ''.join(s) 1989 1990 def popuntil( self, testfn, maxchars=None ): 1991 """Just like popwhile() method except the predicate function 1992 should return True to stop the sequence rather than False. 1993 1994 See also methods: skipuntil() and popwhile() 1995 1996 """ 1997 return popwhile( lambda c: not testfn(c), maxchars=maxchars ) 1998 1999 def __getitem__( self, index ): 2000 """Returns the character at the given index relative to the current position. 2001 2002 If the index goes beyond the end of the input, or prior to the 2003 start when negative, then '' is returned. 2004 2005 If the index provided is a slice object, then that range of 2006 characters is returned as a string. Note that a stride value other 2007 than 1 is not supported in the slice. To use a slice, do: 2008 2009 s = my_stream[ 1:4 ] 2010 2011 """ 2012 if isinstance( index, slice ): 2013 return self.peekstr( index.stop - index.start, index.start ) 2014 else: 2015 return self.peek( index ) 2016 2017 2018# ---------------------------------------------------------------------- 2019# Exception classes. 2020# ---------------------------------------------------------------------- 2021 2022class JSONException(Exception): 2023 """Base class for all JSON-related exceptions. 2024 """ 2025 pass 2026 2027class JSONSkipHook(JSONException): 2028 """An exception to be raised by user-defined code within hook 2029 callbacks to indicate the callback does not want to handle the 2030 situation. 2031 2032 """ 2033 pass 2034 2035class JSONStopProcessing(JSONException): 2036 """Can be raised by anyplace, including inside a hook function, to 2037 cause the entire encode or decode process to immediately stop 2038 with an error. 2039 2040 """ 2041 pass 2042 2043class JSONAbort(JSONException): 2044 pass 2045 2046class JSONError(JSONException): 2047 """Base class for all JSON-related errors. 2048 2049 In addition to standard Python exceptions, these exceptions may 2050 also have additional properties: 2051 2052 * severity - One of: 'fatal', 'error', 'warning', 'info' 2053 * position - An indication of the position in the input where the error occured. 2054 * outer_position - A secondary position (optional) that gives 2055 the location of the outer data item in which the error 2056 occured, such as the beginning of a string or an array. 2057 * context_description - A string that identifies the context 2058 in which the error occured. Default is "Context". 2059 """ 2060 severities = frozenset(['fatal','error','warning','info']) 2061 def __init__(self, message, *args, **kwargs ): 2062 self.severity = 'error' 2063 self._position = None 2064 self.outer_position = None 2065 self.context_description = None 2066 for kw,val in kwargs.items(): 2067 if kw == 'severity': 2068 if val not in self.severities: 2069 raise TypeError("%s given invalid severity %r" % (self.__class__.__name__, val)) 2070 self.severity = val 2071 elif kw == 'position': 2072 self.position = val 2073 elif kw == 'outer_position': 2074 self.outer_position = val 2075 elif kw == 'context_description' or kw=='context': 2076 self.context_description = val 2077 else: 2078 raise TypeError("%s does not accept %r keyword argument" % (self.__class__.__name__, kw)) 2079 super( JSONError, self ).__init__( message, *args ) 2080 self.message = message 2081 2082 @property 2083 def position(self): 2084 return self._position 2085 @position.setter 2086 def position(self, pos): 2087 if pos == 0: 2088 self._position = 0 #position_marker() # start of input 2089 else: 2090 self._position = pos 2091 2092 def __repr__(self): 2093 s = "%s(%r" % (self.__class__.__name__, self.message) 2094 for a in self.args[1:]: 2095 s += ", %r" % (a,) 2096 if self.position: 2097 s += ", position=%r" % (self.position,) 2098 if self.outer_position: 2099 s += ", outer_position=%r" % (self.outer_position,) 2100 s += ", severity=%r)" % (self.severity,) 2101 return s 2102 2103 def pretty_description(self, show_positions=True, filename=None): 2104 if filename: 2105 pfx = filename.rstrip().rstrip(':') + ':' 2106 else: 2107 pfx = '' 2108 # Print file position as numeric abbreviation 2109 err = pfx 2110 if self.position == 0: 2111 err += '0:0:' 2112 elif self.position: 2113 err += '%d:%d:' % (self.position.line, self.position.column) 2114 else: 2115 err += ' ' 2116 # Print severity and main error message 2117 err += " %s: %s" % (self.severity.capitalize(), self.message) 2118 if len(self.args) > 1: 2119 err += ': ' 2120 for anum, a in enumerate(self.args[1:]): 2121 if anum > 1: 2122 err += ', ' 2123 astr = repr(a) 2124 if len(astr) > 30: 2125 astr = astr[:30] + '...' 2126 err += astr 2127 # Print out exception chain 2128 e2 = self 2129 while e2: 2130 if hasattr(e2,'__cause__') and isinstance(e2.__cause__,Exception): 2131 e2 = e2.__cause__ 2132 e2desc = str(e2).strip() 2133 if not e2desc: 2134 e2desc = repr(e2).strip() 2135 err += "\n | Cause: %s" % e2desc.strip().replace('\n','\n | ') 2136 else: 2137 e2 = None 2138 # Show file position 2139 if show_positions and self.position is not None: 2140 if self.position == 0: 2141 err += "\n | At start of input" 2142 else: 2143 err += "\n | At %s" % (self.position.describe(show_text=False),) 2144 if self.position.text_after: 2145 err += "\n | near text: %r" % (self.position.text_after,) 2146 # Show context 2147 if show_positions and self.outer_position: 2148 if self.context_description: 2149 cdesc = self.context_description.capitalize() 2150 else: 2151 cdesc = "Context" 2152 err += "\n | %s started at %s" % (cdesc, self.outer_position.describe(show_text=False),) 2153 if self.outer_position.text_after: 2154 err += "\n | with text: %r" % (self.outer_position.text_after,) 2155 return err 2156 2157class JSONDecodeError(JSONError): 2158 """An exception class raised when a JSON decoding error (syntax error) occurs.""" 2159 pass 2160 2161class JSONDecodeHookError(JSONDecodeError): 2162 """An exception that occured within a decoder hook. 2163 2164 The original exception is available in the 'hook_exception' attribute. 2165 """ 2166 def __init__(self, hook_name, exc_info, encoded_obj, *args, **kwargs): 2167 self.hook_name = hook_name 2168 if not exc_info: 2169 exc_info = (None, None, None) 2170 exc_type, self.hook_exception, self.hook_traceback = exc_info 2171 self.object_type = type(encoded_obj) 2172 msg = "Hook %s raised %r while decoding type <%s>" % (hook_name, self.hook_exception.__class__.__name__, self.object_type.__name__) 2173 if len(args) >= 1: 2174 msg += ": " + args[0] 2175 args = args[1:] 2176 super(JSONDecodeHookError,self).__init__(msg, *args,**kwargs) 2177 2178class JSONEncodeError(JSONError): 2179 """An exception class raised when a python object can not be encoded as a JSON string.""" 2180 pass 2181 2182class JSONEncodeHookError(JSONEncodeError): 2183 """An exception that occured within an encoder hook. 2184 2185 The original exception is available in the 'hook_exception' attribute. 2186 """ 2187 def __init__(self, hook_name, exc_info, encoded_obj, *args, **kwargs): 2188 self.hook_name = hook_name 2189 if not exc_info: 2190 exc_info = (None, None, None) 2191 exc_type, self.hook_exception, self.hook_traceback = exc_info 2192 self.object_type = type(encoded_obj) 2193 msg = "Hook %s raised %r while encoding type <%s>" % (self.hook_name, self.hook_exception.__class__.__name__, self.object_type.__name__) 2194 if len(args) >= 1: 2195 msg += ": " + args[0] 2196 args = args[1:] 2197 super(JSONEncodeHookError,self).__init__(msg, *args, **kwargs) 2198 2199 2200#---------------------------------------------------------------------- 2201# Encoder state object 2202#---------------------------------------------------------------------- 2203 2204class encode_state(object): 2205 """An internal transient object used during JSON encoding to 2206 record the current construction state. 2207 2208 """ 2209 def __init__(self, jsopts=None, parent=None ): 2210 import sys 2211 self.chunks = [] 2212 if not parent: 2213 self.parent = None 2214 self.nest_level = 0 2215 self.options = jsopts 2216 self.escape_unicode_test = False # or a function f(unichar)=>True/False 2217 else: 2218 self.parent = parent 2219 self.nest_level = parent.nest_level + 1 2220 self.escape_unicode_test = parent.escape_unicode_test 2221 self.options = parent.options 2222 2223 def make_substate(self): 2224 return encode_state( parent=self ) 2225 2226 def join_substate(self, other_state): 2227 self.chunks.extend( other_state.chunks ) 2228 other_state.chunks = [] 2229 2230 def append(self, s): 2231 """Adds a string to the end of the current JSON document""" 2232 self.chunks.append(s) 2233 2234 def combine(self): 2235 """Returns the accumulated string and resets the state to empty""" 2236 s = ''.join( self.chunks ) 2237 self.chunks = [] 2238 return s 2239 2240 def __eq__(self, other_state): 2241 return self.nest_level == other_state.nest_level and \ 2242 self.chunks == other_state.chunks 2243 2244 def __lt__(self, other_state): 2245 if self.nest_level != other_state.nest_level: 2246 return self.nest_level < other_state.nest_level 2247 return self.chunks < other_state.chunks 2248 2249 2250#---------------------------------------------------------------------- 2251# Decoder statistics 2252#---------------------------------------------------------------------- 2253 2254class decode_statistics(object): 2255 """An object that records various statistics about a decoded JSON document. 2256 2257 """ 2258 int8_max = 0x7f 2259 int8_min = - 0x7f - 1 2260 int16_max = 0x7fff 2261 int16_min = - 0x7fff - 1 2262 int32_max = 0x7fffffff 2263 int32_min = - 0x7fffffff - 1 2264 int64_max = 0x7fffffffffffffff 2265 int64_min = - 0x7fffffffffffffff - 1 2266 2267 double_int_max = 2**53 - 1 2268 double_int_min = - (2**53 - 1) 2269 2270 def __init__(self): 2271 # Nesting 2272 self.max_depth = 0 2273 self.max_items_in_array = 0 2274 self.max_items_in_object = 0 2275 # Integer stats 2276 self.num_ints = 0 2277 self.num_ints_8bit = 0 2278 self.num_ints_16bit = 0 2279 self.num_ints_32bit = 0 2280 self.num_ints_53bit = 0 # ints which will overflow IEEE doubles 2281 self.num_ints_64bit = 0 2282 self.num_ints_long = 0 2283 self.num_negative_zero_ints = 0 2284 # Floating-point stats 2285 self.num_negative_zero_floats = 0 2286 self.num_floats = 0 2287 self.num_floats_decimal = 0 # overflowed 'float' 2288 # String stats 2289 self.num_strings = 0 2290 self.max_string_length = 0 2291 self.total_string_length = 0 2292 self.min_codepoint = None 2293 self.max_codepoint = None 2294 # Other data type stats 2295 self.num_arrays = 0 2296 self.num_objects = 0 2297 self.num_bools = 0 2298 self.num_nulls = 0 2299 self.num_undefineds = 0 2300 self.num_nans = 0 2301 self.num_infinities = 0 2302 self.num_comments = 0 2303 self.num_identifiers = 0 # JavaScript identifiers 2304 self.num_excess_whitespace = 0 2305 2306 @property 2307 def num_infinites(self): 2308 """Misspelled 'num_infinities' for backwards compatibility""" 2309 return self.num_infinities 2310 2311 def pretty_description(self, prefix=''): 2312 import unicodedata 2313 lines = [ 2314 "Number of integers:", 2315 " 8-bit: %5d (%d to %d)" % (self.num_ints_8bit, self.int8_min, self.int8_max), 2316 " 16-bit: %5d (%d to %d)" % (self.num_ints_16bit, self.int16_min, self.int16_max), 2317 " 32-bit: %5d (%d to %d)" % (self.num_ints_32bit, self.int32_min, self.int32_max), 2318 " > 53-bit: %5d (%d to %d - overflows JavaScript)" % (self.num_ints_53bit, self.double_int_min, self.double_int_max), 2319 " 64-bit: %5d (%d to %d)" % (self.num_ints_64bit, self.int64_min, self.int64_max), 2320 " > 64 bit: %5d (not portable, may require a \"Big Num\" package)" % self.num_ints_long, 2321 " total ints: %5d" % self.num_ints, 2322 " Num -0: %5d (negative-zero integers are not portable)" % self.num_negative_zero_ints, 2323 "Number of floats:", 2324 " doubles: %5d" % self.num_floats, 2325 " > doubles: %5d (will overflow IEEE doubles)" % self.num_floats_decimal, 2326 " total flts: %5d" % (self.num_floats + self.num_floats_decimal), 2327 " Num -0.0: %5d (negative-zero floats are usually portable)" % self.num_negative_zero_floats, 2328 "Number of:", 2329 " nulls: %5d" % self.num_nulls, 2330 " booleans: %5d" % self.num_bools, 2331 " arrays: %5d" % self.num_arrays, 2332 " objects: %5d" % self.num_objects, 2333 "Strings:", 2334 " number: %5d strings" % self.num_strings, 2335 " max length: %5d characters" % self.max_string_length, 2336 " total chars: %5d across all strings" % self.total_string_length, 2337 ] 2338 2339 if self.min_codepoint is not None: 2340 cp = 'U+%04X' % self.min_codepoint 2341 try: 2342 charname = unicodedata.name(unichr(self.min_codepoint)) 2343 except ValueError: 2344 charname = '? UNKNOWN CHARACTER' 2345 lines.append(" min codepoint: %6s (%s)" % (cp, charname)) 2346 else: 2347 lines.append(" min codepoint: %6s" % ('n/a',)) 2348 2349 if self.max_codepoint is not None: 2350 cp = 'U+%04X' % self.max_codepoint 2351 try: 2352 charname = unicodedata.name(unichr(self.max_codepoint)) 2353 except ValueError: 2354 charname = '? UNKNOWN CHARACTER' 2355 lines.append(" max codepoint: %6s (%s)" % (cp, charname)) 2356 else: 2357 lines.append(" max codepoint: %6s" % ('n/a',)) 2358 2359 lines.extend([ 2360 "Other JavaScript items:", 2361 " NaN: %5d" % self.num_nans, 2362 " Infinite: %5d" % self.num_infinities, 2363 " undefined: %5d" % self.num_undefineds, 2364 " Comments: %5d" % self.num_comments, 2365 " Identifiers: %5d" % self.num_identifiers, 2366 "Max items in any array: %5d" % self.max_items_in_array, 2367 "Max keys in any object: %5d" % self.max_items_in_object, 2368 "Max nesting depth: %5d" % self.max_depth, 2369 ]) 2370 if self.total_chars == 0: 2371 lines.append("Unnecessary whitespace: 0 of 0 characters") 2372 else: 2373 lines.append( 2374 "Unnecessary whitespace: %5d of %d characters (%.2f%%)" \ 2375 % (self.num_excess_whitespace, self.total_chars, 2376 self.num_excess_whitespace * 100.0 / self.total_chars) ) 2377 if prefix: 2378 return '\n'.join([ prefix+s for s in lines ]) + '\n' 2379 else: 2380 return '\n'.join( lines ) + '\n' 2381 2382 2383#---------------------------------------------------------------------- 2384# Decoder state object 2385#---------------------------------------------------------------------- 2386 2387class decode_state(object): 2388 """An internal transient object used during JSON decoding to 2389 record the current parsing state and error messages. 2390 2391 """ 2392 def __init__(self, options=None): 2393 self.reset() 2394 self.options = options 2395 2396 def reset(self): 2397 """Clears all errors, statistics, and input text.""" 2398 self.buf = None 2399 self.errors = [] 2400 self.obj = None 2401 self.cur_depth = 0 # how deep in nested structures are we? 2402 self.stats = decode_statistics() 2403 self._have_warned_nonbmp = False 2404 self._have_warned_long_string = False 2405 self._have_warned_max_depth = False 2406 2407 @property 2408 def should_stop(self): 2409 if self.has_fatal: 2410 return True 2411 return False 2412 2413 @property 2414 def has_errors(self): 2415 """Have any errors been seen already?""" 2416 return len([err for err in self.errors if err.severity in ('fatal','error')]) > 0 2417 2418 @property 2419 def has_fatal(self): 2420 """Have any errors been seen already?""" 2421 return len([err for err in self.errors if err.severity in ('fatal',)]) > 0 2422 2423 def set_input( self, txt, encoding=None ): 2424 """Initialize the state by setting the input document text.""" 2425 import sys 2426 self.reset() 2427 try: 2428 self.buf = buffered_stream( txt, encoding=encoding ) 2429 except JSONError as err: 2430 err.position = 0 # set position to start of file 2431 err.severity = 'fatal' 2432 self.push_exception( err ) 2433 except Exception as err: 2434 # Re-raise as JSONDecodeError 2435 e2 = sys.exc_info() 2436 newerr = JSONDecodeError("Error while reading input", position=0, severity='fatal') 2437 self.push_exception( err ) 2438 self.buf = None 2439 else: 2440 if self.buf.bom: 2441 self.push_cond( self.options.bom, 2442 "JSON document was prefixed by a BOM (Byte Order Mark)", 2443 self.buf.bom ) 2444 if not self.buf: 2445 self.push_fatal( "Aborting, can not read JSON document.", position=0 ) 2446 2447 def push_exception(self, exc): 2448 """Add an already-built exception to the error list.""" 2449 self.errors.append(exc) 2450 2451 2452 def push_fatal(self, message, *args, **kwargs): 2453 """Create a fatal error.""" 2454 kwargs['severity'] = 'fatal' 2455 self.__push_err( message, *args, **kwargs) 2456 2457 def push_error(self, message, *args, **kwargs): 2458 """Create an error.""" 2459 kwargs['severity'] = 'error' 2460 self.__push_err( message, *args, **kwargs) 2461 2462 def push_warning(self, message, *args, **kwargs): 2463 """Create a warning.""" 2464 kwargs['severity'] = 'warning' 2465 self.__push_err( message, *args, **kwargs) 2466 2467 def push_info(self, message, *args, **kwargs): 2468 """Create a informational message.""" 2469 kwargs['severity'] = 'info' 2470 self.__push_err( message, *args, **kwargs) 2471 2472 def push_cond(self, behavior_value, message, *args, **kwargs): 2473 """Creates an conditional error or warning message. 2474 2475 The behavior value (from json_options) controls whether 2476 a message will be pushed and whether it is an error 2477 or warning message. 2478 2479 """ 2480 if behavior_value == ALLOW: 2481 return 2482 elif behavior_value == WARN: 2483 kwargs['severity'] = 'warning' 2484 else: 2485 kwargs['severity'] = 'error' 2486 self.__push_err( message, *args, **kwargs ) 2487 2488 def __push_err(self, message, *args, **kwargs): 2489 """Stores an error in the error list.""" 2490 position = None 2491 outer_position = None 2492 severity = 'error' 2493 context_description = None 2494 for kw, val in kwargs.items(): 2495 if kw == 'position': position = val 2496 elif kw == 'outer_position': outer_position = val 2497 elif kw == 'severity': severity = val 2498 elif kw == 'context_description' or kw == 'context': 2499 context_description=val 2500 else: 2501 raise TypeError('Unknown keyword argument',kw) 2502 if position is None and self.buf: 2503 position = self.buf.position # Current position 2504 err = JSONDecodeError( message, position=position, outer_position=outer_position, context_description=context_description, severity=severity, *args) 2505 self.push_exception( err ) 2506 2507 def update_depth_stats(self, **kwargs): 2508 st = self.stats 2509 st.max_depth = max(st.max_depth, self.cur_depth) 2510 if not self._have_warned_max_depth and self.cur_depth > self.options.warn_max_depth: 2511 self._have_warned_max_depth = True 2512 self.push_cond( self.options.non_portable, 2513 "Arrays or objects nested deeper than %d levels may not be portable" \ 2514 % self.options.warn_max_depth ) 2515 2516 def update_string_stats(self, s, **kwargs): 2517 st = self.stats 2518 st.num_strings += 1 2519 st.max_string_length = max(st.max_string_length, len(s)) 2520 st.total_string_length += len(s) 2521 if self.options.warn_string_length and len(s) > self.options.warn_string_length and not self._have_warned_long_string: 2522 self._have_warned_long_string = True 2523 self.push_cond( self.options.non_portable, 2524 "Strings longer than %d may not be portable" % self.options.warn_string_length, 2525 **kwargs ) 2526 if len(s) > 0: 2527 mincp = ord(min(s)) 2528 maxcp = ord(max(s)) 2529 if st.min_codepoint is None: 2530 st.min_codepoint = mincp 2531 st.max_codepoint = maxcp 2532 else: 2533 st.min_codepoint = min( st.min_codepoint, mincp ) 2534 st.max_codepoint = max( st.max_codepoint, maxcp ) 2535 if maxcp > 0xffff and not self._have_warned_nonbmp: 2536 self._have_warned_nonbmp = True 2537 self.push_cond( self.options.non_portable, 2538 "Strings containing non-BMP characters (U+%04X) may not be portable" % maxcp, 2539 **kwargs ) 2540 2541 def update_negzero_int_stats(self, **kwargs): 2542 st = self.stats 2543 st.num_negative_zero_ints += 1 2544 if st.num_negative_zero_ints == 1: # Only warn once 2545 self.push_cond( self.options.non_portable, 2546 "Negative zero (-0) integers are usually not portable", 2547 **kwargs ) 2548 2549 def update_negzero_float_stats(self, **kwargs): 2550 st = self.stats 2551 st.num_negative_zero_floats += 1 2552 if st.num_negative_zero_floats == 1: # Only warn once 2553 self.push_cond( self.options.non_portable, 2554 "Negative zero (-0.0) numbers may not be portable", 2555 **kwargs) 2556 2557 def update_float_stats(self, float_value, **kwargs): 2558 st = self.stats 2559 if 'sign' in kwargs: 2560 del kwargs['sign'] 2561 2562 if helpers.is_negzero( float_value ): 2563 self.update_negzero_float_stats( **kwargs ) 2564 2565 if helpers.is_infinite( float_value ): 2566 st.num_infinities += 1 2567 2568 if isinstance(float_value, decimal.Decimal): 2569 st.num_floats_decimal += 1 2570 if st.num_floats_decimal == 1: # Only warn once 2571 self.push_cond( self.options.non_portable, 2572 "Floats larger or more precise than an IEEE \"double\" may not be portable", 2573 **kwargs) 2574 elif isinstance(float_value, float): 2575 st.num_floats += 1 2576 2577 2578 def update_integer_stats(self, int_value, **kwargs ): 2579 sign=kwargs.get('sign', 1) 2580 if 'sign' in kwargs: 2581 del kwargs['sign'] 2582 2583 if int_value == 0 and sign < 0: 2584 self.update_negzero_int_stats( **kwargs ) 2585 2586 if sign < 0: 2587 int_value = - int_value 2588 2589 st = self.stats 2590 st.num_ints += 1 2591 if st.int8_min <= int_value <= st.int8_max: 2592 st.num_ints_8bit += 1 2593 elif st.int16_min <= int_value <= st.int16_max: 2594 st.num_ints_16bit += 1 2595 elif st.int32_min <= int_value <= st.int32_max: 2596 st.num_ints_32bit += 1 2597 elif st.int64_min <= int_value <= st.int64_max: 2598 st.num_ints_64bit += 1 2599 else: 2600 st.num_ints_long += 1 2601 2602 if int_value < st.double_int_min or st.double_int_max < int_value: 2603 st.num_ints_53bit += 1 2604 if st.num_ints_53bit == 1: # Only warn once 2605 self.push_cond( self.options.non_portable, 2606 "Integers larger than 53-bits are not portable", 2607 **kwargs ) 2608 2609 2610# ---------------------------------------------------------------------- 2611# JSON strictness options 2612# ---------------------------------------------------------------------- 2613 2614STRICTNESS_STRICT = 'strict' 2615STRICTNESS_WARN = 'warn' 2616STRICTNESS_TOLERANT = 'tolerant' 2617 2618ALLOW = 'allow' 2619WARN = 'warn' 2620FORBID = 'forbid' 2621 2622# For float_type option 2623NUMBER_AUTO = 'auto' 2624NUMBER_FLOAT = 'float' 2625NUMBER_DECIMAL = 'decimal' 2626 2627# For json_int class 2628NUMBER_FORMAT_DECIMAL = 'decimal' 2629NUMBER_FORMAT_HEX = 'hex' 2630NUMBER_FORMAT_LEGACYOCTAL = 'legacyoctal' 2631NUMBER_FORMAT_OCTAL = 'octal' 2632NUMBER_FORMAT_BINARY = 'binary' 2633 2634 2635class _behaviors_metaclass(type): 2636 """Meta class used to establish a set of "behavior" options. 2637 2638 Classes that use this meta class must defined a class-level 2639 variable called '_behaviors' that is a list of tuples, each of 2640 which describes one behavior and is like: (behavior_name, 2641 documentation). Also define a second class-level variable called 2642 '_behavior_values' which is a list of the permitted values for 2643 each behavior, each being strings. 2644 2645 For each behavior (e.g., pretty), and for each value (e.g., 2646 yes) the following methods/properties will be created: 2647 2648 * pretty - value of 'pretty' behavior (read-write) 2649 * ispretty_yes - returns True if 'pretty' is 'yes' 2650 2651 For each value (e.g., pink) the following methods/properties 2652 will be created: 2653 2654 * all_behaviors - set of all behaviors (read-only) 2655 * pink_behaviors - set of behaviors with value of 'pink' (read-only) 2656 * set_all('pink') 2657 * set_all_pink() - set all behaviors to value of 'pink' 2658 2659 """ 2660 def __new__(cls, clsname, bases, attrs): 2661 values = attrs.get('_behavior_values') 2662 attrs['values'] = property( lambda self: set(self._behavior_values), doc='Set of possible behavior values') 2663 behaviors = attrs.get('_behaviors') 2664 2665 def get_behavior(self, name): 2666 """Returns the value for a given behavior""" 2667 try: 2668 return getattr( self, '_behavior_'+name ) 2669 except AttributeError: 2670 raise ValueError('Unknown behavior',name) 2671 attrs['get_behavior'] = get_behavior 2672 2673 def set_behavior(self, name, value): 2674 """Changes the value for a given behavior""" 2675 if value not in self._behavior_values: 2676 raise ValueError('Unknown value for behavior',value) 2677 varname = '_behavior_'+name 2678 if hasattr(self,varname): 2679 setattr( self, varname, value ) 2680 else: 2681 raise ValueError('Unknown behavior',name) 2682 attrs['set_behavior'] = set_behavior 2683 2684 def describe_behavior(self,name): 2685 """Returns documentation about a given behavior.""" 2686 for n, doc in self._behaviors: 2687 if n==name: 2688 return doc 2689 else: 2690 raise AttributeError('No such behavior',name) 2691 attrs['describe_behavior'] = describe_behavior 2692 2693 for name, doc in behaviors: 2694 attrs['_behavior_'+name] = True 2695 for v in values: 2696 vs = v + '_' + name 2697 def getx(self,name=name,forval=v): 2698 return self.get_behavior(name) == forval 2699 attrs['is_'+v+'_'+name] = property(getx,doc=v.capitalize()+' '+doc) 2700 # method value_name() 2701 fnset = lambda self,_name=name,_value=v: self.set_behavior(_name,_value) 2702 fnset.__name__ = v+'_'+name 2703 fnset.__doc__ = 'Set behavior ' + name + ' to ' + v + "." 2704 attrs[fnset.__name__] = fnset 2705 def get_value_for_behavior(self,name=name): 2706 return self.get_behavior(name) 2707 def set_value_for_behavior(self,value,name=name): 2708 self.set_behavior(name,value) 2709 attrs[name] = property(get_value_for_behavior,set_value_for_behavior,doc=doc) 2710 2711 @property 2712 def all_behaviors(self): 2713 """Returns the names of all known behaviors.""" 2714 return set([t[0] for t in self._behaviors]) 2715 attrs['all_behaviors'] = all_behaviors 2716 2717 def set_all(self,value): 2718 """Changes all behaviors to have the given value.""" 2719 if value not in self._behavior_values: 2720 raise ValueError('Unknown behavior',value) 2721 for name in self.all_behaviors: 2722 setattr(self, '_behavior_'+name, value) 2723 attrs['set_all'] = set_all 2724 2725 def is_all(self,value): 2726 """Determines if all the behaviors have the given value.""" 2727 if value not in self._behavior_values: 2728 raise ValueError('Unknown behavior',value) 2729 for name in self.all_behaviors: 2730 if getattr(self, '_behavior_'+name) != value: 2731 return False 2732 return True 2733 attrs['is_all'] = is_all 2734 2735 for v in values: 2736 # property value_behaviors 2737 def getbehaviorsfor(self,value=v): 2738 return set([name for name in self.all_behaviors if getattr(self,name)==value]) 2739 attrs[v+'_behaviors'] = property(getbehaviorsfor,doc='Return the set of behaviors with the value '+v+'.') 2740 # method set_all_value() 2741 setfn = lambda self,_value=v: set_all(self,_value) 2742 setfn.__name__ = 'set_all_'+v 2743 setfn.__doc__ = 'Set all behaviors to value ' + v + "." 2744 attrs[setfn.__name__] = setfn 2745 # property is_all_value 2746 attrs['is_all_'+v] = property( lambda self,v=v: is_all(self,v), doc='Determines if all the behaviors have the value '+v+'.') 2747 def behaviors_eq(self, other): 2748 """Determines if two options objects are equivalent.""" 2749 if self.all_behaviors != other.all_behaviors: 2750 return False 2751 return self.allowed_behaviors == other.allowed_behaviors 2752 attrs['__eq__'] = behaviors_eq 2753 2754 return super(_behaviors_metaclass, cls).__new__(cls, clsname, bases, attrs) 2755 2756 2757SORT_NONE = 'none' 2758SORT_PRESERVE = 'preserve' 2759SORT_ALPHA = 'alpha' 2760SORT_ALPHA_CI = 'alpha_ci' 2761SORT_SMART = 'smart' 2762 2763sorting_methods = { 2764 SORT_NONE: "Do not sort, resulting order may be random", 2765 SORT_PRESERVE: "Preserve original order when reformatting", 2766 SORT_ALPHA: "Sort strictly alphabetically", 2767 SORT_ALPHA_CI: "Sort alphabetically case-insensitive", 2768 SORT_SMART: "Sort alphabetically and numerically (DEFAULT)" 2769} 2770sorting_method_aliases = { 2771 'ci': SORT_ALPHA_CI 2772} 2773def smart_sort_transform( key ): 2774 numfmt = '%012d' 2775 digits = '0123456789' 2776 zero = ord('0') 2777 if not key: 2778 key = '' 2779 elif isinstance( key, (int,long) ): 2780 key = numfmt % key 2781 elif isinstance( key, basestring ): 2782 keylen = len(key) 2783 words = [] 2784 i=0 2785 while i < keylen: 2786 if key[i] in digits: 2787 num = 0 2788 while i < keylen and key[i] in digits: 2789 num *= 10 2790 num += ord(key[i]) - zero 2791 i += 1 2792 words.append( numfmt % num ) 2793 else: 2794 words.append( key[i].upper() ) 2795 i += 1 2796 key = ''.join(words) 2797 else: 2798 key = str(key) 2799 return key 2800 2801# Find Enum type (introduced in Python 3.4) 2802try: 2803 from enum import Enum as _enum 2804except ImportError: 2805 _enum = None 2806# Find OrderedDict type 2807try: 2808 from collections import OrderedDict as _OrderedDict 2809except ImportError: 2810 _OrderedDict = None 2811 2812 2813class json_options(object): 2814 """Options to determine how strict the decoder or encoder should be.""" 2815 2816 __metaclass__ = _behaviors_metaclass 2817 _behavior_values = (ALLOW, WARN, FORBID) 2818 _behaviors = ( 2819 ("all_numeric_signs", 2820 "Numbers may be prefixed by any \'+\' and \'-\', e.g., +4, -+-+77"), 2821 ("any_type_at_start", 2822 "A JSON document may start with any type, not just arrays or objects"), 2823 ("comments", 2824 "JavaScript comments, both /*...*/ and //... styles"), 2825 ("control_char_in_string", 2826 "Strings may contain raw control characters without \\u-escaping"), 2827 ("hex_numbers", 2828 "Hexadecimal numbers, e.g., 0x1f"), 2829 ("binary_numbers", 2830 "Binary numbers, e.g., 0b1001"), 2831 ("octal_numbers", 2832 "New-style octal numbers, e.g., 0o731 (see leading-zeros for legacy octals)"), 2833 ("initial_decimal_point", 2834 "Floating-point numbers may start with a decimal point (no units digit)"), 2835 ("extended_unicode_escapes", 2836 "Extended Unicode escape sequence \\u{..} for non-BMP characters"), 2837 ("js_string_escapes", 2838 "All JavaScript character \\-escape sequences may be in strings"), 2839 ("leading_zeros", 2840 "Numbers may have extra leading zeros (see --leading-zero-radix option)"), 2841 ("non_numbers", 2842 "Non-numbers may be used, such as NaN or Infinity"), 2843 ("nonescape_characters", 2844 "Unknown character \\-escape sequences stand for that character (\\Q -> 'Q')"), 2845 ("identifier_keys", 2846 "JavaScript identifiers are converted to strings when used as object keys"), 2847 ("nonstring_keys", 2848 "Value types other than strings (or identifiers) may be used as object keys"), 2849 ("omitted_array_elements", 2850 "Arrays may have omitted/elided elements, e.g., [1,,3] == [1,undefined,3]"), 2851 ("single_quoted_strings", 2852 "Strings may be delimited with both double (\") and single (\') quotation marks"), 2853 ("trailing_comma", 2854 "A final comma may end the list of array or object members"), 2855 ("trailing_decimal_point", 2856 "Floating-point number may end with a decimal point and no following fractional digits"), 2857 ("undefined_values", 2858 "The JavaScript 'undefined' value may be used"), 2859 ("format_control_chars", 2860 "Unicode \"format control characters\" may appear in the input"), 2861 ("unicode_whitespace", 2862 "Treat any Unicode whitespace character as valid whitespace"), 2863 # Never legal 2864 ("leading_zeros", 2865 "Numbers may have leading zeros"), 2866 # Normally warnings 2867 ("duplicate_keys", 2868 "Objects may have repeated keys"), 2869 ("zero_byte", 2870 "Strings may contain U+0000, which may not be safe for C-based programs"), 2871 ("bom", 2872 "A JSON document may start with a Unicode BOM (Byte Order Mark)"), 2873 ("non_portable", 2874 "Anything technically valid but likely to cause data portablibity issues"), 2875 ) # end behavior list 2876 2877 def reset_to_defaults(self): 2878 # Plain attrs (other than above behaviors) are simply copied 2879 # by value, either during initialization (via keyword 2880 # arguments) or via the copy() method. 2881 self._plain_attrs = ['leading_zero_radix', 2882 'encode_namedtuple_as_object', 2883 'encode_enum_as', 2884 'encode_compactly', 2885 'escape_unicode', 2886 'always_escape_chars', 2887 'warn_string_length', 2888 'warn_max_depth', 2889 'int_as_float', 2890 'decimal_context', 2891 'float_type', 2892 'keep_format', 2893 'date_format', 2894 'datetime_format', 2895 'time_format', 2896 'timedelta_format', 2897 'sort_keys', 2898 'indent_amount', 'indent_tab_width', 'indent_limit', 2899 'max_items_per_line', 2900 'py2str_encoding' ] 2901 2902 self.strictness = STRICTNESS_WARN 2903 self._leading_zero_radix = 8 # via property: leading_zero_radix 2904 self._sort_keys = SORT_SMART # via property: sort_keys 2905 2906 self.int_as_float = False 2907 self.float_type = NUMBER_AUTO 2908 self.decimal_context = (decimal.DefaultContext if decimal else None) 2909 self.keep_format = False # keep track of when numbers are hex, octal, etc. 2910 2911 self.encode_namedtuple_as_object = True 2912 self._encode_enum_as = 'name' # via property 2913 self.encode_compactly = True 2914 self.escape_unicode = False 2915 self.always_escape_chars = None # None, or a set of Unicode characters to always escape 2916 2917 self.warn_string_length = 0xfffd # with 16-bit length prefix 2918 self.warn_max_depth = 64 2919 2920 self.date_format = 'iso' # or strftime format 2921 self.datetime_format = 'iso' # or strftime format 2922 self.time_format = 'iso' # or strftime format 2923 self.timedelta_format = 'iso' # or 'hms' 2924 2925 self.sort_keys = SORT_ALPHA 2926 self.indent_amount = 2 2927 self.indent_tab_width = 0 # 0, or number of equivalent spaces 2928 self.indent_limit = None 2929 self.max_items_per_line = 1 # When encoding how many items per array/object 2930 # before breaking into multiple lines 2931 # For interpreting Python 2 'str' types: 2932 if _py_major == 2: 2933 self.py2str_encoding = 'ascii' 2934 else: 2935 self.py2str_encoding = None 2936 2937 def __init__(self, **kwargs): 2938 """Set JSON encoding and decoding options. 2939 2940 If 'strict' is set to True, then only strictly-conforming JSON 2941 output will be produced. Note that this means that some types 2942 of values may not be convertable and will result in a 2943 JSONEncodeError exception. 2944 2945 If 'compactly' is set to True, then the resulting string will 2946 have all extraneous white space removed; if False then the 2947 string will be "pretty printed" with whitespace and indentation 2948 added to make it more readable. 2949 2950 If 'escape_unicode' is set to True, then all non-ASCII characters 2951 will be represented as a unicode escape sequence; if False then 2952 the actual real unicode character will be inserted if possible. 2953 2954 The 'escape_unicode' can also be a function, which when called 2955 with a single argument of a unicode character will return True 2956 if the character should be escaped or False if it should not. 2957 2958 """ 2959 self.reset_to_defaults() 2960 2961 if 'strict' in kwargs: 2962 # Do this keyword first, so other keywords may override specific behaviors 2963 self.strictness = kwargs['strict'] 2964 2965 for kw,val in kwargs.items(): 2966 if kw == 'compactly': # alias for 'encode_compactly' 2967 self.encode_compactly = val 2968 elif kw == 'strict': 2969 pass # Already handled 2970 elif kw == 'warnings': 2971 if val: 2972 self.suppress_warnings() 2973 elif kw == 'html_safe' or kw == 'xml_safe': 2974 if bool(val): 2975 if self.always_escape_chars is None: 2976 self.always_escape_chars = set(u'<>/&') 2977 else: 2978 self.always_escape_chars.update( set(u'<>/&') ) 2979 elif kw == 'always_escape': 2980 if val: 2981 if self.always_escape_chars is None: 2982 self.always_escape_chars = set(val) 2983 else: 2984 self.always_escape_chars.update( set(val) ) 2985 elif kw == 'int_as_float': 2986 self.int_as_float = bool(val) 2987 elif kw == 'keep_format': 2988 self.keep_format = bool(val) 2989 elif kw == 'float_type': 2990 if val in (NUMBER_AUTO, NUMBER_FLOAT, NUMBER_DECIMAL): 2991 self.float_type = val 2992 else: 2993 raise ValueError("Unknown option %r for argument %r to initialize %s" % (val,kw,self.__class__.__name__)) 2994 elif kw == 'decimal' or kw == 'decimal_context': 2995 if decimal: 2996 if not val or val == 'default': 2997 self.decimal_context = decimal.DefaultContext 2998 elif val == 'basic': 2999 self.decimal_context = decimal.BasicContext 3000 elif val == 'extended': 3001 self.decimal_context = decimal.ExtendedContext 3002 elif isinstance(val, decimal.Context): 3003 self.decimal_context = val 3004 elif isinstance(val,(int,long)) or val[0].isdigit: 3005 prec = int(val) 3006 self.decimal_context = decimal.Context( prec=prec ) 3007 else: 3008 raise ValueError("Option for %r should be a decimal.Context, a number of significant digits, or one of 'default','basic', or 'extended'." % (kw,)) 3009 elif kw in ('allow','warn','forbid','prevent','deny'): 3010 action = {'allow':ALLOW, 'warn':WARN, 'forbid':FORBID, 'prevent':FORBID, 'deny':FORBID}[ kw ] 3011 if isinstance(val,basestring): 3012 val = [b.replace('-','_') for b in val.replace(',',' ').split()] 3013 for behavior in val: 3014 self.set_behavior( behavior, action ) 3015 elif kw.startswith('allow_') or kw.startswith('forbid_') or kw.startswith('prevent_') or kw.startswith('deny_') or kw.startswith('warn_'): 3016 action, behavior = kw.split('_',1) 3017 if action == 'allow': 3018 if val: 3019 self.set_behavior( behavior, ALLOW ) 3020 else: 3021 self.set_behavior( behavior, FORBID ) 3022 elif action in ('forbid','prevent','deny'): 3023 if val: 3024 self.set_behavior( behavior, FORBID ) 3025 else: 3026 self.set_behavior( behavior, ALLOW ) 3027 elif action == 'warn': 3028 if val: 3029 self.set_behavior( behavior, WARN ) 3030 else: 3031 self.set_behavior( behavior, ALLOW ) 3032 elif kw in self._plain_attrs: 3033 setattr(self, kw, val) 3034 else: 3035 raise ValueError("Unknown keyword argument %r to initialize %s" % (kw,self.__class__.__name__)) 3036 3037 def copy(self): 3038 other = self.__class__() 3039 other.copy_from( self ) 3040 return other 3041 3042 def copy_from(self, other): 3043 if self is other: 3044 return # Myself! 3045 3046 self.strictness = other.strictness # sets behaviors in bulk 3047 3048 for name in self.all_behaviors: 3049 self.set_behavior( name, other.get_behavior(name) ) 3050 3051 for name in self._plain_attrs: 3052 val = getattr(other,name) 3053 if isinstance(val, set): 3054 val = val.copy() 3055 elif decimal and isinstance(val, decimal.Decimal): 3056 val = val.copy() 3057 3058 setattr(self, name, val) 3059 3060 3061 def spaces_to_next_indent_level( self, min_spaces=1, subtract=0 ): 3062 n = self.indent_amount - subtract 3063 if n < 0: 3064 n = 0 3065 n = max( min_spaces, n ) 3066 return ' ' * n 3067 3068 def indentation_for_level( self, level=0 ): 3069 """Returns a whitespace string used for indenting.""" 3070 if self.indent_limit is not None and level > self.indent_limit: 3071 n = self.indent_limit 3072 else: 3073 n = level 3074 n *= self.indent_amount 3075 if self.indent_tab_width: 3076 tw, sw = divmod(n, self.indent_tab_width) 3077 return '\t'*tw + ' '*sw 3078 else: 3079 return ' ' * n 3080 3081 def set_indent( self, num_spaces, tab_width=0, limit=None ): 3082 """Changes the indentation properties when outputting JSON in non-compact mode. 3083 3084 'num_spaces' is the number of spaces to insert for each level 3085 of indentation, which defaults to 2. 3086 3087 'tab_width', if not 0, is the number of spaces which is equivalent 3088 to one tab character. Tabs will be output where possible rather 3089 than runs of spaces. 3090 3091 'limit', if not None, is the maximum indentation level after 3092 which no further indentation will be output. 3093 3094 """ 3095 n = int(num_spaces) 3096 if n < 0: 3097 raise ValueError("indentation amount can not be negative",n) 3098 self.indent_amount = n 3099 self.indent_tab_width = tab_width 3100 self.indent_limit = limit 3101 3102 @property 3103 def sort_keys(self): 3104 """The method used to sort dictionary keys when encoding JSON 3105 """ 3106 return self._sort_keys 3107 @sort_keys.setter 3108 def sort_keys(self, method): 3109 if not method: 3110 self._sort_keys = SORT_NONE 3111 elif callable(method): 3112 self._sort_keys = method 3113 elif method in sorting_methods: 3114 self._sort_keys = method 3115 elif method in sorting_method_aliases: # alias 3116 self._sort_keys = sorting_method_aliases[method] 3117 elif method == True: 3118 self._sort_keys = SORT_ALPHA 3119 else: 3120 raise ValueError("Not a valid sorting method: %r" % method) 3121 3122 @property 3123 def encode_enum_as(self): 3124 """The strategy for encoding Python Enum values. 3125 """ 3126 return self._encode_enum_as 3127 @encode_enum_as.setter 3128 def encode_enum_as(self, val): 3129 if val not in ('name','qname','value'): 3130 raise ValueError("encode_enum_as must be one of 'name','qname', or 'value'") 3131 self._encode_enum_as = val 3132 3133 @property 3134 def zero_float(self): 3135 """The numeric value 0.0, either a float or a decimal.""" 3136 if decimal and self.float_type == NUMBER_DECIMAL: 3137 return self.decimal_context.create_decimal('0.0') 3138 else: 3139 return 0.0 3140 @property 3141 def negzero_float(self): 3142 """The numeric value -0.0, either a float or a decimal.""" 3143 if decimal and self.float_type == NUMBER_DECIMAL: 3144 return self.decimal_context.create_decimal('-0.0') 3145 else: 3146 return -0.0 3147 3148 @property 3149 def nan(self): 3150 """The numeric value NaN, either a float or a decimal.""" 3151 if decimal and self.float_type == NUMBER_DECIMAL: 3152 return self.decimal_context.create_decimal('NaN') 3153 else: 3154 return nan 3155 @property 3156 def inf(self): 3157 """The numeric value Infinity, either a float or a decimal.""" 3158 if decimal and self.float_type == NUMBER_DECIMAL: 3159 return self.decimal_context.create_decimal('Infinity') 3160 else: 3161 return inf 3162 @property 3163 def neginf(self): 3164 """The numeric value -Infinity, either a float or a decimal.""" 3165 if decimal and self.float_type == NUMBER_DECIMAL: 3166 return self.decimal_context.create_decimal('-Infinity') 3167 else: 3168 return neginf 3169 3170 3171 def make_int( self, s, sign=None, number_format=NUMBER_FORMAT_DECIMAL ): 3172 """Makes an integer value according to the current options. 3173 3174 First argument should be a string representation of the number, 3175 or an integer. 3176 3177 Returns a number value, which could be an int, float, or decimal. 3178 3179 """ 3180 if isinstance(sign, (int,long)): 3181 if sign < 0: 3182 sign = '-' 3183 else: 3184 sign = '+' 3185 if isinstance(s,basestring): 3186 if s.startswith('-') or s.startswith('+'): 3187 sign = s[0] 3188 s = s[1:] 3189 3190 if self.int_as_float: 3191 # Making a float/decimal 3192 if isinstance(s, (int,long)): 3193 if self.float_type == NUMBER_DECIMAL: 3194 n = self.decimal_context.create_decimal( s ) 3195 if sign=='-': 3196 n = n.copy_negate() 3197 elif s == 0 and sign=='-': 3198 n = self.negzero_float 3199 elif -999999999999999 <= s <= 999999999999999: 3200 n = float(s) 3201 if sign=='-': 3202 n *= -1 3203 else: 3204 n = float(s) 3205 if (n == inf or int(n) != s) and self.float_type != NUMBER_FLOAT: 3206 n = self.decimal_context.create_decimal( s ) 3207 if sign=='-': 3208 n = n.copy_negate() 3209 elif sign=='-': 3210 n *= -1 3211 else: # not already an int 3212 n = self.make_float( s, sign ) 3213 n2 = self.make_float( s[:-1] + ('9' if s[-1]<='5' else '0'), sign ) 3214 if (n==inf or n==n2) and self.float_type != NUMBER_FLOAT: 3215 n = self.make_decimal( s, sign ) 3216 elif isinstance( s, (int,long) ): 3217 # already an integer 3218 n = s 3219 if sign=='-': 3220 if n == 0: 3221 n = self.negzero_float 3222 else: 3223 n *= -1 3224 else: 3225 # Making an actual integer 3226 try: 3227 n = int( s ) 3228 except ValueError: 3229 n = self.nan 3230 else: 3231 if sign=='-': 3232 if n==0: 3233 n = self.negzero_float 3234 else: 3235 n *= -1 3236 if isinstance(n,(int,long)) and self.keep_format: 3237 n = json_int(n, number_format=number_format) 3238 return n 3239 3240 3241 def make_decimal( self, s, sign='+' ): 3242 """Converts a string into a decimal or float value.""" 3243 if not decimal or self.float_type == NUMBER_FLOAT: 3244 return self.make_float( s, sign ) 3245 3246 if s.startswith('-') or s.startswith('+'): 3247 sign = s[0] 3248 s = s[1:] 3249 elif isinstance(sign, (int,long)): 3250 if sign < 0: 3251 sign = '-' 3252 else: 3253 sign = '+' 3254 3255 try: 3256 f = self.decimal_context.create_decimal( s ) 3257 except decimal.InvalidOperation: 3258 f = self.decimal_context.create_decimal( 'NaN' ) 3259 except decimal.Overflow: 3260 if sign=='-': 3261 f = self.decimal_context.create_decimal( '-Infinity' ) 3262 else: 3263 f = self.decimal_context.create_decimal( 'Infinity' ) 3264 else: 3265 if sign=='-': 3266 f = f.copy_negate() 3267 return f 3268 3269 def make_float( self, s, sign='+' ): 3270 """Converts a string into a float or decimal value.""" 3271 if decimal and self.float_type == NUMBER_DECIMAL: 3272 return self.make_decimal( s, sign ) 3273 3274 if s.startswith('-') or s.startswith('+'): 3275 sign = s[0] 3276 s = s[1:] 3277 elif isinstance(sign, (int,long)): 3278 if sign < 0: 3279 sign = '-' 3280 else: 3281 sign = '+' 3282 3283 try: 3284 f = float(s) 3285 except ValueError: 3286 f = nan 3287 else: 3288 if sign=='-': 3289 f *= -1 3290 return f 3291 3292 @property 3293 def leading_zero_radix(self): 3294 """The radix to be used for numbers with leading zeros. 8 or 10 3295 """ 3296 return self._leading_zero_radix 3297 @leading_zero_radix.setter 3298 def leading_zero_radix(self, radix): 3299 if isinstance(radix,basestring): 3300 try: 3301 radix = int(radix) 3302 except ValueError: 3303 radix = radix.lower() 3304 if radix=='octal' or radix=='oct' or radix=='8': 3305 radix = 8 3306 elif radix=='decimal' or radix=='dec': 3307 radix = 10 3308 if radix not in (8,10): 3309 raise ValueError("Radix must either be 8 (octal) or 10 (decimal)") 3310 self._leading_zero_radix = radix 3311 @property 3312 def leading_zero_radix_as_word(self): 3313 return {8:'octal', 10:'decimal'}[ self._leading_zero_radix ] 3314 3315 def suppress_warnings(self): 3316 for name in self.warn_behaviors: 3317 self.set_behavior(name, 'allow') 3318 3319 @property 3320 def allow_or_warn_behaviors(self): 3321 """Returns the set of all behaviors that are not forbidden (i.e., are allowed or warned).""" 3322 return self.allow_behaviors.union( self.warn_behaviors ) 3323 3324 @property 3325 def strictness(self): 3326 return self._strictness 3327 3328 @strictness.setter 3329 def strictness(self, strict): 3330 """Changes whether the options should be re-configured for strict JSON conformance.""" 3331 if strict == STRICTNESS_WARN: 3332 self._strictness = STRICTNESS_WARN 3333 self.set_all_warn() 3334 elif strict == STRICTNESS_STRICT or strict is True: 3335 self._strictness = STRICTNESS_STRICT 3336 self.keep_format = False 3337 self.set_all_forbid() 3338 self.warn_duplicate_keys() 3339 self.warn_zero_byte() 3340 self.warn_bom() 3341 self.warn_non_portable() 3342 elif strict == STRICTNESS_TOLERANT or strict is False: 3343 self._strictness = STRICTNESS_TOLERANT 3344 self.set_all_allow() 3345 self.warn_duplicate_keys() 3346 self.warn_zero_byte() 3347 self.warn_leading_zeros() 3348 self.leading_zero_radix = 8 3349 self.warn_bom() 3350 self.allow_non_portable() 3351 else: 3352 raise ValueError("Unknown strictness options %r" % strict) 3353 self.allow_any_type_at_start() 3354 3355 3356# ---------------------------------------------------------------------- 3357# The main JSON encoder/decoder class. 3358# ---------------------------------------------------------------------- 3359 3360class JSON(object): 3361 """An encoder/decoder for JSON data streams. 3362 3363 Usually you will call the encode() or decode() methods. The other 3364 methods are for lower-level processing. 3365 3366 Whether the JSON parser runs in strict mode (which enforces exact 3367 compliance with the JSON spec) or the more forgiving non-string mode 3368 can be affected by setting the 'strict' argument in the object's 3369 initialization; or by assigning True or False to the 'strict' 3370 property of the object. 3371 3372 You can also adjust a finer-grained control over strictness by 3373 allowing or forbidding specific behaviors. You can get a list of 3374 all the available behaviors by accessing the 'behaviors' property. 3375 Likewise the 'allowed_behaviors' and 'forbidden_behaviors' list which 3376 behaviors will be allowed and which will not. Call the allow() 3377 or forbid() methods to adjust these. 3378 3379 """ 3380 _string_quotes = '"\'' 3381 3382 _escapes_json = { # character escapes in JSON 3383 '"': '"', 3384 '/': '/', 3385 '\\': '\\', 3386 'b': '\b', 3387 'f': '\f', 3388 'n': '\n', 3389 'r': '\r', 3390 't': '\t', 3391 } 3392 3393 _escapes_js = { # character escapes in Javascript 3394 '"': '"', 3395 '\'': '\'', 3396 '\\': '\\', 3397 'b': '\b', 3398 'f': '\f', 3399 'n': '\n', 3400 'r': '\r', 3401 't': '\t', 3402 'v': '\v', 3403 '0': '\x00' 3404 } 3405 3406 # Following is a reverse mapping of escape characters, used when we 3407 # output JSON. Only those escapes which are always safe (e.g., in JSON) 3408 # are here. It won't hurt if we leave questionable ones out. 3409 _rev_escapes = {'\n': '\\n', 3410 '\t': '\\t', 3411 '\b': '\\b', 3412 '\r': '\\r', 3413 '\f': '\\f', 3414 '"': '\\"', 3415 '\\': '\\\\' } 3416 _optional_rev_escapes = { '/': '\\/' } # only escaped if forced to do so 3417 3418 json_syntax_characters = u"{}[]\"\\,:0123456789.-+abcdefghijklmnopqrstuvwxyz \t\n\r" 3419 3420 all_hook_names = ('decode_number', 'decode_float', 'decode_object', 3421 'decode_array', 'decode_string', 3422 'encode_value', 'encode_dict', 'encode_dict_key', 3423 'encode_sequence', 'encode_bytes', 'encode_default') 3424 3425 def __init__(self, **kwargs): 3426 """Creates a JSON encoder/decoder object. 3427 3428 You may pass encoding and decoding options either by passing 3429 an argument named 'json_options' with an instance of a 3430 json_options class; or with individual keyword/values that will 3431 be used to initialize a new json_options object. 3432 3433 You can also set hooks by using keyword arguments using the 3434 hook name; e.g., encode_dict=my_hook_func. 3435 3436 """ 3437 import sys, unicodedata, re 3438 3439 kwargs = kwargs.copy() 3440 # Initialize hooks 3441 for hookname in self.all_hook_names: 3442 if hookname in kwargs: 3443 self.set_hook( hookname, kwargs[hookname] ) 3444 del kwargs[hookname] 3445 else: 3446 self.set_hook( hookname, None ) 3447 3448 # Set options 3449 if 'json_options' in kwargs: 3450 self._options = kwargs['json_options'] 3451 else: 3452 self._options = json_options(**kwargs) 3453 3454 3455 # The following is a boolean map of the first 256 characters 3456 # which will quickly tell us which of those characters never 3457 # need to be escaped. 3458 3459 self._asciiencodable = \ 3460 [32 <= c < 128 \ 3461 and not self._rev_escapes.has_key(chr(c)) \ 3462 and not unicodedata.category(unichr(c)) in ['Cc','Cf','Zl','Zp'] 3463 for c in range(0,256)] 3464 3465 @property 3466 def options(self): 3467 """The optional behaviors used, e.g., the JSON conformance 3468 strictness. Returns an instance of json_options. 3469 3470 """ 3471 return self._options 3472 3473 3474 def clear_hook(self, hookname): 3475 """Unsets a hook callback, as previously set with set_hook().""" 3476 self.set_hook( hookname, None ) 3477 3478 def clear_all_hooks(self): 3479 """Unsets all hook callbacks, as previously set with set_hook().""" 3480 for hookname in self.all_hook_names: 3481 self.clear_hook( hookname ) 3482 3483 def set_hook(self, hookname, function): 3484 """Sets a user-defined callback function used during encoding or decoding. 3485 3486 The 'hookname' argument must be a string containing the name of 3487 one of the available hooks, listed below. 3488 3489 The 'function' argument must either be None, which disables the hook, 3490 or a callable function. Hooks do not stack, if you set a hook it will 3491 undo any previously set hook. 3492 3493 Netsted values. When decoding JSON that has nested objects or 3494 arrays, the decoding hooks will be called once for every 3495 corresponding value, even if nested. Generally the decoding 3496 hooks will be called from the inner-most value outward, and 3497 then left to right. 3498 3499 Skipping. Any hook function may raise a JSONSkipHook exception 3500 if it does not wish to handle the particular invocation. This 3501 will have the effect of skipping the hook for that particular 3502 value, as if the hook was net set. 3503 3504 AVAILABLE HOOKS: 3505 3506 * decode_string 3507 Called for every JSON string literal with the 3508 Python-equivalent string value as an argument. Expects to 3509 get a Python object in return. 3510 3511 * decode_float: 3512 Called for every JSON number that looks like a float (has 3513 a "."). The string representation of the number is passed 3514 as an argument. Expects to get a Python object in return. 3515 3516 * decode_number: 3517 Called for every JSON number. The string representation of 3518 the number is passed as an argument. Expects to get a 3519 Python object in return. NOTE: If the number looks like a 3520 float and the 'decode_float' hook is set, then this hook 3521 will not be called. 3522 3523 * decode_array: 3524 Called for every JSON array. A Python list is passed as 3525 the argument, and expects to get a Python object back. 3526 NOTE: this hook will get called for every array, even 3527 for nested arrays. 3528 3529 * decode_object: 3530 Called for every JSON object. A Python dictionary is passed 3531 as the argument, and expects to get a Python object back. 3532 NOTE: this hook will get called for every object, even 3533 for nested objects. 3534 3535 * encode_value: 3536 Called for every Python object which is to be encoded into JSON. 3537 3538 * encode_dict: 3539 Called for every Python dictionary or anything that looks 3540 like a dictionary. 3541 3542 * encode_dict_key: 3543 Called for every dictionary key. 3544 3545 * encode_sequence: 3546 Called for every Python sequence-like object that is not a 3547 dictionary or string. This includes lists and tuples. 3548 3549 * encode_bytes: 3550 Called for every Python bytes or bytearray type; or for 3551 any memoryview with a byte ('B') item type. (Python 3 only) 3552 3553 * encode_default: 3554 Called for any Python type which can not otherwise be converted 3555 into JSON, even after applying any other encoding hooks. 3556 3557 """ 3558 if hookname in self.all_hook_names: 3559 att = hookname + '_hook' 3560 if function != None and not callable(function): 3561 raise ValueError("Hook %r must be None or a callable function" % hookname) 3562 setattr( self, att, function ) 3563 else: 3564 raise ValueError("Unknown hook name %r" % hookname) 3565 3566 3567 def has_hook(self, hook_name): 3568 if not hook_name or hook_name not in self.all_hook_names: 3569 return False 3570 hook = getattr( self, hook_name + '_hook' ) 3571 return callable(hook) 3572 3573 3574 def call_hook(self, hook_name, input_object, position=None, *args, **kwargs): 3575 """Wrapper function to invoke a user-supplied hook function. 3576 3577 This will capture any exceptions raised by the hook and do something 3578 appropriate with it. 3579 3580 """ 3581 import sys 3582 if hook_name not in self.all_hook_names: 3583 raise AttributeError("No such hook %r" % hook_name) 3584 hook = getattr( self, hook_name + '_hook' ) 3585 if not callable(hook): 3586 raise TypeError("Hook is not callable: %r" % (hook,)) 3587 try: 3588 rval = hook( input_object, *args, **kwargs ) 3589 except JSONSkipHook: 3590 raise # Do nothing 3591 except Exception, err: 3592 exc_info = sys.exc_info() 3593 if hook_name.startswith('encode_'): 3594 ex_class = JSONEncodeHookError 3595 else: 3596 ex_class = JSONDecodeHookError 3597 3598 if isinstance(err, JSONStopProcessing): 3599 severity = 'fatal' 3600 else: 3601 severity = 'error' 3602 3603 newerr = ex_class( hook_name, exc_info, input_object, *args, position=position, severity=severity ) 3604 3605 # Simulate Python 3's: "raise X from Y" exception chaining 3606 newerr.__cause__ = err 3607 newerr.__traceback__ = exc_info[2] 3608 raise newerr 3609 return rval 3610 3611 3612 def isws(self, c): 3613 """Determines if the given character is considered as white space. 3614 3615 Note that Javscript is much more permissive on what it considers 3616 to be whitespace than does JSON. 3617 3618 Ref. ECMAScript section 7.2 3619 3620 """ 3621 if not self.options.unicode_whitespace: 3622 return c in ' \t\n\r' 3623 else: 3624 if not isinstance(c,unicode): 3625 c = unicode(c) 3626 if c in u' \t\n\r\f\v': 3627 return True 3628 import unicodedata 3629 return unicodedata.category(c) == 'Zs' 3630 3631 def islineterm(self, c): 3632 """Determines if the given character is considered a line terminator. 3633 3634 Ref. ECMAScript section 7.3 3635 3636 """ 3637 if c == '\r' or c == '\n': 3638 return True 3639 if c == u'\u2028' or c == u'\u2029': # unicodedata.category(c) in ['Zl', 'Zp'] 3640 return True 3641 return False 3642 3643 3644 def recover_parser(self, state): 3645 """Try to recover after a syntax error by locating the next "known" position.""" 3646 buf = state.buf 3647 buf.skipuntil( lambda c: c in ",:[]{}\"\';" or helpers.char_is_unicode_eol(c) ) 3648 stopchar = buf.peek() 3649 self.skipws(state) 3650 if buf.at_end: 3651 state.push_info("Could not recover parsing after previous error",position=buf.position) 3652 else: 3653 state.push_info("Recovering parsing after character %r" % stopchar, position=buf.position) 3654 return stopchar 3655 3656 3657 def decode_null(self, state): 3658 """Intermediate-level decoder for ECMAScript 'null' keyword. 3659 3660 Takes a string and a starting index, and returns a Python 3661 None object and the index of the next unparsed character. 3662 3663 """ 3664 buf = state.buf 3665 start_position = buf.position 3666 kw = buf.pop_identifier() 3667 if not kw or kw != 'null': 3668 state.push_error("Expected a 'null' keyword'", kw, position=start_position) 3669 else: 3670 state.stats.num_nulls += 1 3671 return None 3672 3673 def encode_undefined(self, state): 3674 """Produces the ECMAScript 'undefined' keyword.""" 3675 state.append('undefined') 3676 3677 def encode_null(self, state): 3678 """Produces the JSON 'null' keyword.""" 3679 state.append('null') 3680 3681 def decode_boolean(self, state): 3682 """Intermediate-level decode for JSON boolean literals. 3683 3684 Takes a string and a starting index, and returns a Python bool 3685 (True or False) and the index of the next unparsed character. 3686 3687 """ 3688 buf = state.buf 3689 start_position = buf.position 3690 kw = buf.pop_identifier() 3691 if not kw or kw not in ('true','false'): 3692 state.push_error("Expected a 'true' or 'false' keyword'", kw, position=start_position) 3693 else: 3694 state.stats.num_bools += 1 3695 return (kw == 'true') 3696 3697 def encode_boolean(self, bval, state): 3698 """Encodes the Python boolean into a JSON Boolean literal.""" 3699 state.append( 'true' if bool(bval) else 'false' ) 3700 3701 def decode_number(self, state): 3702 """Intermediate-level decoder for JSON numeric literals. 3703 3704 Takes a string and a starting index, and returns a Python 3705 suitable numeric type and the index of the next unparsed character. 3706 3707 The returned numeric type can be either of a Python int, 3708 long, or float. In addition some special non-numbers may 3709 also be returned such as nan, inf, and neginf (technically 3710 which are Python floats, but have no numeric value.) 3711 3712 Ref. ECMAScript section 8.5. 3713 3714 """ 3715 buf = state.buf 3716 self.skipws(state) 3717 start_position = buf.position 3718 3719 # Use external number parser hook if available 3720 if self.has_hook('decode_number') or self.has_hook('decode_float'): 3721 c = buf.peek() 3722 if c and c in '-+0123456789.': # First chars for a number-like value 3723 buf.save_position() 3724 nbr = buf.pop_while_in( '-+0123456789abcdefABCDEF' 'NaN' 'Infinity.' ) 3725 if '.' in nbr and self.has_hook('decode_float'): 3726 hook_name = 'decode_float' 3727 elif self.has_hook('decode_number'): 3728 hook_name = 'decode_number' 3729 else: 3730 hook_name = None 3731 3732 if hook_name: 3733 try: 3734 val = self.call_hook( hook_name, nbr, position=start_position ) 3735 except JSONSkipHook: 3736 pass 3737 except JSONError, err: 3738 state.push_exception(err) 3739 val = undefined 3740 else: 3741 buf.clear_saved_position() 3742 return val 3743 # Hook didn't handle it, restore old position 3744 buf.restore_position() 3745 3746 # Detect initial sign character(s) 3747 sign = +1 3748 sign_count = 0 3749 sign_saw_plus = False 3750 sign_saw_ws = False 3751 c = buf.peek() 3752 while c and c in '+-': 3753 if c == '-': 3754 sign = sign * -1 3755 elif c == '+': 3756 sign_saw_plus = True 3757 sign_count += 1 3758 buf.skip() 3759 if self.skipws_nocomments(state) > 0: 3760 sign_saw_ws = True 3761 c = buf.peek() 3762 3763 if sign_count > 1 or sign_saw_plus: 3764 state.push_cond( self.options.all_numeric_signs, 3765 'Numbers may only have a single "-" as a sign prefix', 3766 position=start_position) 3767 if sign_saw_ws: 3768 state.push_error('Spaces may not appear between a +/- number sign and the digits', position=start_position) 3769 3770 # Check for ECMAScript symbolic non-numbers 3771 if not c: 3772 state.push_error('Missing numeric value after sign', position=start_position) 3773 self.recover_parser(state) 3774 self.stats.num_undefineds += 1 3775 return undefined 3776 elif c.isalpha() or c in '_$': 3777 kw = buf.popwhile( lambda c: c.isalnum() or c in '_$' ) 3778 if kw == 'NaN': 3779 state.push_cond( self.options.non_numbers, 3780 'NaN literals are not allowed in strict JSON', 3781 position=start_position) 3782 state.stats.num_nans += 1 3783 return self.options.nan 3784 elif kw == 'Infinity': 3785 state.push_cond( self.options.non_numbers, 3786 'Infinity literals are not allowed in strict JSON', 3787 position=start_position) 3788 state.stats.num_infinities += 1 3789 if sign < 0: 3790 return self.options.neginf 3791 else: 3792 return self.options.inf 3793 else: 3794 state.push_error('Unknown numeric value keyword', kw, position=start_position) 3795 return undefined 3796 3797 # Check for radix-prefixed numbers 3798 elif c == '0' and (buf.peek(1) in [u'x',u'X']): 3799 # ----- HEX NUMBERS 0x123 3800 prefix = buf.popstr(2) 3801 digits = buf.popwhile( helpers.is_hex_digit ) 3802 state.push_cond( self.options.hex_numbers, 3803 'Hexadecimal literals are not allowed in strict JSON', prefix+digits, 3804 position=start_position ) 3805 if len(digits)==0: 3806 state.push_error('Hexadecimal number is invalid', position=start_position) 3807 self.recover_parser(state) 3808 return undefined 3809 ival = helpers.decode_hex( digits ) 3810 state.update_integer_stats( ival, sign=sign, position=start_position ) 3811 n = state.options.make_int( ival, sign, number_format=NUMBER_FORMAT_HEX ) 3812 return n 3813 elif c == '0' and (buf.peek(1) in [u'o','O']): 3814 # ----- NEW-STYLE OCTAL NUMBERS 0o123 3815 prefix = buf.popstr(2) 3816 digits = buf.popwhile( helpers.is_octal_digit ) 3817 state.push_cond( self.options.octal_numbers, 3818 "Octal literals are not allowed in strict JSON", prefix+digits, 3819 position=start_position ) 3820 if len(digits)==0: 3821 state.push_error("Octal number is invalid", position=start_position) 3822 self.recover_parser(state) 3823 return undefined 3824 ival = helpers.decode_octal( digits ) 3825 state.update_integer_stats( ival, sign=sign, position=start_position ) 3826 n = state.options.make_int( ival, sign, number_format=NUMBER_FORMAT_OCTAL ) 3827 return n 3828 elif c == '0' and (buf.peek(1) in [u'b','B']): 3829 # ----- NEW-STYLE BINARY NUMBERS 0b1101 3830 prefix = buf.popstr(2) 3831 digits = buf.popwhile( helpers.is_binary_digit ) 3832 state.push_cond( self.options.binary_numbers, 3833 "Binary literals are not allowed in strict JSON", prefix+digits, 3834 position=start_position ) 3835 if len(digits)==0: 3836 state.push_error("Binary number is invalid", position=start_position) 3837 self.recover_parser(state) 3838 return undefined 3839 ival = helpers.decode_binary( digits ) 3840 state.update_integer_stats( ival, sign=sign, position=start_position ) 3841 n = state.options.make_int( ival, sign, number_format=NUMBER_FORMAT_BINARY ) 3842 return n 3843 else: 3844 # ----- DECIMAL OR LEGACY-OCTAL NUMBER. 123, 0123 3845 # General syntax is: \d+[\.\d+][e[+-]?\d+] 3846 number = buf.popwhile( lambda c: c in '0123456789.+-eE' ) 3847 imax = len(number) 3848 if imax == 0: 3849 state.push_error('Missing numeric value', position=start_position) 3850 has_leading_zero = False 3851 units_digits = [] # digits making up whole number portion 3852 fraction_digits = [] # digits making up fractional portion 3853 exponent_digits = [] # digits making up exponent portion (excluding sign) 3854 esign = '+' # sign of exponent 3855 sigdigits = 0 # number of significant digits (approximate) 3856 saw_decimal_point = False 3857 saw_exponent = False 3858 3859 # Break number into parts in a first pass...use a mini state machine 3860 in_part = 'units' 3861 for i, c in enumerate(number): 3862 3863 if c == '.': 3864 if in_part != 'units': 3865 state.push_error('Bad number', number, position=start_position) 3866 self.recover_parser(state) 3867 return undefined 3868 in_part = 'fraction' 3869 saw_decimal_point = True 3870 elif c in 'eE': 3871 if in_part == 'exponent': 3872 state.push_error('Bad number', number, position=start_position) 3873 self.recover_parser(state) 3874 return undefined 3875 in_part = 'exponent' 3876 saw_exponent = True 3877 elif c in '+-': 3878 if in_part != 'exponent' or exponent_digits: 3879 state.push_error('Bad number', number, position=start_position) 3880 self.recover_parser(state) 3881 return undefined 3882 esign = c 3883 else: #digit 3884 if in_part == 'units': 3885 units_digits.append( c ) 3886 elif in_part == 'fraction': 3887 fraction_digits.append( c ) 3888 elif in_part == 'exponent': 3889 exponent_digits.append( c ) 3890 units_s = ''.join(units_digits) 3891 fraction_s = ''.join(fraction_digits) 3892 exponent_s = ''.join(exponent_digits) 3893 3894 # Basic syntax rules checking 3895 is_integer = not (saw_decimal_point or saw_exponent) 3896 3897 if not units_s and not fraction_s: 3898 state.push_error('Bad number', number, position=start_position) 3899 self.recover_parser(state) 3900 return undefined 3901 3902 if saw_decimal_point and not fraction_s: 3903 state.push_cond( self.options.trailing_decimal_point, 3904 'Bad number, decimal point must be followed by at least one digit', 3905 number, position=start_position) 3906 fraction_s = '0' 3907 3908 if saw_exponent and not exponent_s: 3909 state.push_error('Bad number, exponent is missing', number, position=start_position) 3910 self.recover_parser(state) 3911 return undefined 3912 3913 if not units_s: 3914 state.push_cond( self.options.initial_decimal_point, 3915 'Bad number, decimal point must be preceded by at least one digit', 3916 number, position=start_position) 3917 units = '0' 3918 elif len(units_s) > 1 and units_s[0] == '0': 3919 has_leading_zero = True 3920 if self.options.is_forbid_leading_zeros: 3921 state.push_cond( self.options.leading_zeros, 3922 'Numbers may not have extra leading zeros', 3923 number, position=start_position) 3924 elif self.options.is_warn_leading_zeros: 3925 state.push_cond( self.options.leading_zeros, 3926 'Numbers may not have leading zeros; interpreting as %s' \ 3927 % self.options.leading_zero_radix_as_word, 3928 number, position=start_position) 3929 3930 # Estimate number of significant digits 3931 sigdigits = len( (units_s + fraction_s).replace('0',' ').strip() ) 3932 3933 # Handle legacy octal integers. 3934 if has_leading_zero and is_integer and self.options.leading_zero_radix == 8: 3935 # ----- LEGACY-OCTAL 0123 3936 try: 3937 ival = helpers.decode_octal( units_s ) 3938 except ValueError: 3939 state.push_error('Bad number, not a valid octal value', number, position=start_position) 3940 self.recover_parser(state) 3941 return self.options.nan # undefined 3942 state.update_integer_stats( ival, sign=sign, position=start_position ) 3943 n = state.options.make_int( ival, sign, number_format=NUMBER_FORMAT_LEGACYOCTAL ) 3944 return n 3945 3946 # Determine the exponential part 3947 if exponent_s: 3948 try: 3949 exponent = int(exponent_s) 3950 except ValueError: 3951 state.push_error('Bad number, bad exponent', number, position=start_position) 3952 self.recover_parser(state) 3953 return undefined 3954 if esign == '-': 3955 exponent = - exponent 3956 else: 3957 exponent = 0 3958 3959 # Try to make an int/long first. 3960 if not saw_decimal_point and exponent >= 0: 3961 # ----- A DECIMAL INTEGER 3962 ival = int(units_s) 3963 if exponent != 0: 3964 ival *= 10**exponent 3965 state.update_integer_stats( ival, sign=sign, position=start_position ) 3966 n = state.options.make_int( ival, sign ) 3967 else: 3968 # ----- A FLOATING-POINT NUMBER 3969 try: 3970 if exponent < float_minexp or exponent > float_maxexp or sigdigits > float_sigdigits: 3971 n = state.options.make_decimal( number, sign ) 3972 else: 3973 n = state.options.make_float( number, sign ) 3974 except ValueError as err: 3975 state.push_error('Bad number, %s' % err.message, number, position=start_position) 3976 n = undefined 3977 else: 3978 state.update_float_stats( n, sign=sign, position=start_position ) 3979 return n 3980 3981 3982 def encode_number(self, n, state): 3983 """Encodes a Python numeric type into a JSON numeric literal. 3984 3985 The special non-numeric values of float('nan'), float('inf') 3986 and float('-inf') are translated into appropriate JSON 3987 literals. 3988 3989 Note that Python complex types are not handled, as there is no 3990 ECMAScript equivalent type. 3991 3992 """ 3993 if isinstance(n, complex): 3994 if n.imag: 3995 raise JSONEncodeError('Can not encode a complex number that has a non-zero imaginary part',n) 3996 n = n.real 3997 3998 if isinstance(n, json_int): 3999 state.append( n.json_format() ) 4000 return 4001 4002 if isinstance(n, (int,long)): 4003 state.append( str(n) ) 4004 return 4005 4006 if decimal and isinstance(n, decimal.Decimal): 4007 if n.is_nan(): # Could be 'NaN' or 'sNaN' 4008 state.append( 'NaN' ) 4009 elif n.is_infinite(): 4010 if n.is_signed(): 4011 state.append( '-Infinity' ) 4012 else: 4013 state.append( 'Infinity' ) 4014 else: 4015 s = str(n).lower() 4016 if 'e' not in s and '.' not in s: 4017 s = s + '.0' 4018 state.append( s ) 4019 return 4020 4021 global nan, inf, neginf 4022 if n is nan: 4023 state.append( 'NaN' ) 4024 elif n is inf: 4025 state.append( 'Infinity' ) 4026 elif n is neginf: 4027 state.append( '-Infinity' ) 4028 elif isinstance(n, float): 4029 # Check for non-numbers. 4030 # In python nan == inf == -inf, so must use repr() to distinguish 4031 reprn = repr(n).lower() 4032 if ('inf' in reprn and '-' in reprn) or n == neginf: 4033 state.append( '-Infinity' ) 4034 elif 'inf' in reprn or n is inf: 4035 state.append( 'Infinity' ) 4036 elif 'nan' in reprn or n is nan: 4037 state.append( 'NaN' ) 4038 else: 4039 # A normal float. 4040 state.append( repr(n) ) 4041 else: 4042 raise TypeError('encode_number expected an integral, float, or decimal number type',type(n)) 4043 4044 4045 def decode_string(self, state): 4046 """Intermediate-level decoder for JSON string literals. 4047 4048 Takes a string and a starting index, and returns a Python 4049 string (or unicode string) and the index of the next unparsed 4050 character. 4051 4052 """ 4053 buf = state.buf 4054 self.skipws(state) 4055 quote = buf.peek() 4056 if quote == '"': 4057 pass 4058 elif quote == "'": 4059 state.push_cond( self.options.single_quoted_strings, 4060 'String literals must use double quotation marks in strict JSON' ) 4061 else: 4062 state.push_error('String literal must be properly quoted') 4063 return undefined 4064 4065 string_position = buf.position 4066 buf.skip() 4067 4068 if self.options.is_forbid_js_string_escapes: 4069 escapes = self._escapes_json 4070 else: 4071 escapes = self._escapes_js 4072 ccallowed = not self.options.is_forbid_control_char_in_string 4073 chunks = [] 4074 _append = chunks.append 4075 4076 # Used to track the last seen high-surrogate character 4077 high_surrogate = None 4078 highsur_position = None 4079 4080 # Used to track if errors occured so we don't keep reporting multiples 4081 had_lineterm_error = False 4082 4083 # Start looping character by character until the final quotation mark 4084 saw_final_quote = False 4085 should_stop = False 4086 while not saw_final_quote and not should_stop: 4087 if buf.at_end: 4088 state.push_error("String literal is not terminated", 4089 outer_position=string_position, context='String') 4090 break 4091 c = buf.peek() 4092 4093 # Make sure a high surrogate is immediately followed by a low surrogate 4094 if high_surrogate: 4095 if 0xdc00 <= ord(c) <= 0xdfff: 4096 low_surrogate = buf.pop() 4097 try: 4098 uc = helpers.surrogate_pair_as_unicode( high_surrogate, low_surrogate ) 4099 except ValueError as err: 4100 state.push_error( 'Illegal Unicode surrogate pair', (high_surrogate, low_surrogate), 4101 position=highsur_position, outer_position=string_position, 4102 context='String') 4103 should_stop = state.should_stop 4104 uc = u'\ufffd' # replacement char 4105 _append( uc ) 4106 high_surrogate = None 4107 highsur_position = None 4108 continue # ==== NEXT CHAR 4109 elif buf.peekstr(2) != '\\u': 4110 state.push_error('High unicode surrogate must be followed by a low surrogate', 4111 position=highsur_position, outer_position=string_position, 4112 context='String') 4113 should_stop = state.should_stop 4114 _append( u'\ufffd' ) # replacement char 4115 high_surrogate = None 4116 highsur_position = None 4117 4118 if c == quote: 4119 buf.skip() # skip over closing quote 4120 saw_final_quote = True 4121 break 4122 elif c == '\\': 4123 # Escaped character 4124 escape_position = buf.position 4125 buf.skip() # skip over backslash 4126 c = buf.peek() 4127 if not c: 4128 state.push_error('Escape in string literal is incomplete', position=escape_position, 4129 outer_position=string_position, context='String') 4130 should_stop = state.should_stop 4131 break 4132 elif helpers.is_octal_digit(c): 4133 # Handle octal escape codes first so special \0 doesn't kick in yet. 4134 # Follow Annex B.1.2 of ECMAScript standard. 4135 if '0' <= c <= '3': 4136 maxdigits = 3 4137 else: 4138 maxdigits = 2 4139 digits = buf.popwhile( helpers.is_octal_digit, maxchars=maxdigits ) 4140 n = helpers.decode_octal(digits) 4141 if n == 0: 4142 state.push_cond( self.options.zero_byte, 4143 'Zero-byte character (U+0000) in string may not be universally safe', 4144 "\\"+digits, position=escape_position, outer_position=string_position, 4145 context='String') 4146 else: # n != 0 4147 state.push_cond( self.options.octal_numbers, 4148 "JSON does not allow octal character escapes other than \"\\0\"", 4149 "\\"+digits, position=escape_position, outer_position=string_position, 4150 context='String') 4151 should_stop = state.should_stop 4152 if n < 128: 4153 _append( chr(n) ) 4154 else: 4155 _append( helpers.safe_unichr(n) ) 4156 elif escapes.has_key(c): 4157 buf.skip() 4158 _append( escapes[c] ) 4159 elif c == 'u' or c == 'x': 4160 buf.skip() 4161 esc_opener = '\\' + c 4162 esc_closer = '' 4163 if c == 'u': 4164 if buf.peek() == '{': 4165 buf.skip() 4166 esc_opener += '{' 4167 esc_closer = '}' 4168 maxdigits = None 4169 state.push_cond( self.options.extended_unicode_escapes, 4170 "JSON strings do not allow \\u{...} escapes", 4171 position=escape_position, outer_position=string_position, 4172 context='String') 4173 else: 4174 maxdigits = 4 4175 else: # c== 'x' 4176 state.push_cond( self.options.js_string_escapes, 4177 "JSON strings may not use the \\x hex-escape", 4178 position=escape_position, outer_position=string_position, 4179 context='String') 4180 should_stop = state.should_stop 4181 maxdigits = 2 4182 4183 digits = buf.popwhile( helpers.is_hex_digit, maxchars=maxdigits ) 4184 4185 if esc_closer: 4186 if buf.peek() != esc_closer: 4187 state.push_error( "Unicode escape sequence is missing closing \'%s\'" % esc_closer, esc_opener+digits, 4188 position=escape_position, outer_position=string_position, 4189 context='String') 4190 should_stop = state.should_stop 4191 else: 4192 buf.skip() 4193 4194 esc_sequence = esc_opener + digits + esc_closer 4195 4196 if not digits: 4197 state.push_error('numeric character escape sequence is truncated', esc_sequence, 4198 position=escape_position, outer_position=string_position, 4199 context='String') 4200 should_stop = state.should_stop 4201 codepoint = 0xfffd # replacement char 4202 else: 4203 if maxdigits and len(digits) != maxdigits: 4204 state.push_error('escape sequence has too few hexadecimal digits', esc_sequence, 4205 position=escape_position, outer_position=string_position, 4206 context='String') 4207 codepoint = helpers.decode_hex( digits ) 4208 4209 if codepoint > 0x10FFFF: 4210 state.push_error( 'Unicode codepoint is beyond U+10FFFF', esc_opener+digits+esc_closer, 4211 position=escape_position, outer_position=string_position, 4212 context='String') 4213 codepoint = 0xfffd # replacement char 4214 4215 if high_surrogate: 4216 # Decode surrogate pair and clear high surrogate 4217 low_surrogate = unichr(codepoint) 4218 try: 4219 uc = helpers.surrogate_pair_as_unicode( high_surrogate, low_surrogate ) 4220 except ValueError as err: 4221 state.push_error( 'Illegal Unicode surrogate pair', (high_surrogate, low_surrogate), position=highsur_position, 4222 outer_position=string_position, 4223 context='String') 4224 should_stop = state.should_stop 4225 uc = u'\ufffd' # replacement char 4226 _append( uc ) 4227 high_surrogate = None 4228 highsur_position = None 4229 elif codepoint < 128: 4230 # ASCII chars always go in as a str 4231 if codepoint==0: 4232 state.push_cond( self.options.zero_byte, 4233 'Zero-byte character (U+0000) in string may not be universally safe', 4234 position=escape_position, outer_position=string_position, 4235 context='String') 4236 should_stop = state.should_stop 4237 _append( chr(codepoint) ) 4238 elif 0xd800 <= codepoint <= 0xdbff: # high surrogate 4239 high_surrogate = unichr(codepoint) # remember until we get to the low surrogate 4240 highsur_position = escape_position.copy() 4241 elif 0xdc00 <= codepoint <= 0xdfff: # low surrogate 4242 state.push_error('Low unicode surrogate must be proceeded by a high surrogate', position=escape_position, 4243 outer_position=string_position, 4244 context='String') 4245 should_stop = state.should_stop 4246 _append( u'\ufffd' ) # replacement char 4247 else: 4248 # Other chars go in as a unicode char 4249 _append( helpers.safe_unichr(codepoint) ) 4250 else: 4251 # Unknown escape sequence 4252 state.push_cond( self.options.nonescape_characters, 4253 'String escape code is not allowed in strict JSON', 4254 '\\'+c, position=escape_position, outer_position=string_position, 4255 context='String') 4256 should_stop = state.should_stop 4257 _append( c ) 4258 buf.skip() 4259 elif ord(c) <= 0x1f: # A control character 4260 if ord(c) == 0: 4261 state.push_cond( self.options.zero_byte, 4262 'Zero-byte character (U+0000) in string may not be universally safe', 4263 position=buf.position, outer_position=string_position, 4264 context='String') 4265 should_stop = state.should_stop 4266 if self.islineterm(c): 4267 if not had_lineterm_error: 4268 state.push_error('Line terminator characters must be escaped inside string literals', 4269 'U+%04X'%ord(c), 4270 position=buf.position, outer_position=string_position, 4271 context='String') 4272 should_stop = state.should_stop 4273 had_lineterm_error = True 4274 _append( c ) 4275 buf.skip() 4276 elif ccallowed: 4277 _append( c ) 4278 buf.skip() 4279 else: 4280 state.push_error('Control characters must be escaped inside JSON string literals', 4281 'U+%04X'%ord(c), 4282 position=buf.position, outer_position=string_position, 4283 context='String') 4284 should_stop = state.should_stop 4285 buf.skip() 4286 elif 0xd800 <= ord(c) <= 0xdbff: # a raw high surrogate 4287 high_surrogate = buf.pop() # remember until we get to the low surrogate 4288 highsur_position = buf.position.copy() 4289 else: # A normal character; not an escape sequence or end-quote. 4290 # Find a whole sequence of "safe" characters so we can append them 4291 # all at once rather than one a time, for speed. 4292 chunk = buf.popwhile( lambda c: c not in helpers.unsafe_string_chars and c != quote ) 4293 if not chunk: 4294 _append( c ) 4295 buf.skip() 4296 else: 4297 _append( chunk ) 4298 4299 # Check proper string termination 4300 if high_surrogate: 4301 state.push_error('High unicode surrogate must be followed by a low surrogate', 4302 position=highsur_position, outer_position=string_position, 4303 context='String') 4304 _append( u'\ufffd' ) # replacement char 4305 high_surrogate = None 4306 highsur_position = None 4307 4308 if not saw_final_quote: 4309 state.push_error('String literal is not terminated with a quotation mark', position=buf.position, 4310 outer_position=string_position, 4311 context='String') 4312 4313 if state.should_stop: 4314 return undefined 4315 4316 # Compose the python string and update stats 4317 s = ''.join( chunks ) 4318 state.update_string_stats( s, position=string_position ) 4319 4320 # Call string hook 4321 if self.has_hook('decode_string'): 4322 try: 4323 s = self.call_hook( 'decode_string', s, position=string_position ) 4324 except JSONSkipHook: 4325 pass 4326 except JSONError, err: 4327 state.push_exception(err) 4328 s = undefined 4329 return s 4330 4331 def encode_string(self, s, state): 4332 """Encodes a Python string into a JSON string literal. 4333 4334 """ 4335 # Must handle instances of UserString specially in order to be 4336 # able to use ord() on it's simulated "characters". Also 4337 # convert Python2 'str' types to unicode strings first. 4338 import unicodedata, sys 4339 import UserString 4340 py2strenc = self.options.py2str_encoding 4341 if isinstance(s, UserString.UserString): 4342 def tochar(c): 4343 c2 = c.data 4344 if py2strenc and not isinstance(c2,unicode): 4345 return c2.decode( py2strenc ) 4346 else: 4347 return c2 4348 elif py2strenc and not isinstance(s,unicode): 4349 s = s.decode( py2strenc ) 4350 tochar = None 4351 else: 4352 # Could use "lambda c:c", but that is too slow. So we set to None 4353 # and use an explicit if test inside the loop. 4354 tochar = None 4355 4356 chunks = [] 4357 chunks.append('"') 4358 revesc = self._rev_escapes 4359 optrevesc = self._optional_rev_escapes 4360 asciiencodable = self._asciiencodable 4361 always_escape = state.options.always_escape_chars 4362 encunicode = state.escape_unicode_test 4363 i = 0 4364 imax = len(s) 4365 while i < imax: 4366 if tochar: 4367 c = tochar(s[i]) 4368 else: 4369 c = s[i] 4370 cord = ord(c) 4371 if cord < 256 and asciiencodable[cord] and isinstance(encunicode, bool) \ 4372 and not (always_escape and c in always_escape): 4373 # Contiguous runs of plain old printable ASCII can be copied 4374 # directly to the JSON output without worry (unless the user 4375 # has supplied a custom is-encodable function). 4376 j = i 4377 i += 1 4378 while i < imax: 4379 if tochar: 4380 c = tochar(s[i]) 4381 else: 4382 c = s[i] 4383 cord = ord(c) 4384 if cord < 256 and asciiencodable[cord] \ 4385 and not (always_escape and c in always_escape): 4386 i += 1 4387 else: 4388 break 4389 chunks.append( unicode(s[j:i]) ) 4390 elif revesc.has_key(c): 4391 # Has a shortcut escape sequence, like "\n" 4392 chunks.append(revesc[c]) 4393 i += 1 4394 elif cord <= 0x1F: 4395 # Always unicode escape ASCII-control characters 4396 chunks.append(r'\u%04x' % cord) 4397 i += 1 4398 elif 0xD800 <= cord <= 0xDFFF: 4399 # A raw surrogate character! 4400 # This should ONLY happen in "narrow" Python builds 4401 # where (sys.maxunicode == 65535) as Python itself 4402 # uses UTF-16. But for "wide" Python builds, a raw 4403 # surrogate should never happen. 4404 handled_raw_surrogates = False 4405 if sys.maxunicode == 0xFFFF and 0xD800 <= cord <= 0xDBFF and (i+1) < imax: 4406 # In a NARROW Python, output surrogate pair as-is 4407 hsurrogate = cord 4408 i += 1 4409 if tochar: 4410 c = tochar(s[i]) 4411 else: 4412 c = s[i] 4413 cord = ord(c) 4414 i += 1 4415 if 0xDC00 <= cord <= 0xDFFF: 4416 lsurrogate = cord 4417 chunks.append(r'\u%04x\u%04x' % (hsurrogate,lsurrogate)) 4418 handled_raw_surrogates = True 4419 if not handled_raw_surrogates: 4420 cname = 'U+%04X' % cord 4421 raise JSONEncodeError('can not include or escape a Unicode surrogate character',cname) 4422 elif cord <= 0xFFFF: 4423 # Other BMP Unicode character 4424 if always_escape and c in always_escape: 4425 doesc = True 4426 elif unicodedata.category( c ) in ['Cc','Cf','Zl','Zp']: 4427 doesc = True 4428 elif callable(encunicode): 4429 doesc = encunicode( c ) 4430 else: 4431 doesc = encunicode 4432 4433 if doesc: 4434 if optrevesc.has_key(c): 4435 chunks.append(optrevesc[c]) 4436 else: 4437 chunks.append(r'\u%04x' % cord) 4438 else: 4439 chunks.append( c ) 4440 i += 1 4441 else: # ord(c) >= 0x10000 4442 # Non-BMP Unicode 4443 if always_escape and c in always_escape: 4444 doesc = True 4445 elif unicodedata.category( c ) in ['Cc','Cf','Zl','Zp']: 4446 doesc = True 4447 elif callable(encunicode): 4448 doesc = encunicode( c ) 4449 else: 4450 doesc = encunicode 4451 4452 if doesc: 4453 for surrogate in helpers.unicode_as_surrogate_pair(c): 4454 chunks.append(r'\u%04x' % ord(surrogate)) 4455 else: 4456 chunks.append( c ) 4457 i += 1 4458 4459 4460 chunks.append('"') 4461 state.append( ''.join( chunks ) ) 4462 4463 4464 def decode_identifier(self, state, identifier_as_string=False): 4465 """Decodes an identifier/keyword. 4466 4467 """ 4468 buf = state.buf 4469 self.skipws(state) 4470 start_position = buf.position 4471 obj = None 4472 4473 kw = buf.pop_identifier() 4474 4475 if not kw: 4476 state.push_error("Expected an identifier", position=start_position) 4477 elif kw == 'null': 4478 obj = None 4479 state.stats.num_nulls += 1 4480 elif kw == 'true': 4481 obj = True 4482 state.stats.num_bools += 1 4483 elif kw == 'false': 4484 obj = False 4485 state.stats.num_bools += 1 4486 elif kw == 'undefined': 4487 state.push_cond( self.options.undefined_values, 4488 "Strict JSON does not allow the 'undefined' keyword", 4489 kw, position=start_position) 4490 obj = undefined 4491 state.stats.num_undefineds += 1 4492 elif kw == 'NaN' or kw == 'Infinity': 4493 state.push_cond( self.options.non_numbers, 4494 "%s literals are not allowed in strict JSON" % kw, 4495 kw, position=start_position) 4496 if self.has_hook('decode_float'): 4497 try: 4498 val = self.call_hook( 'decode_float', kw, position=start_position ) 4499 except JSONSkipHook: 4500 pass 4501 except JSONError, err: 4502 state.push_exception(err) 4503 return undefined 4504 else: 4505 return val 4506 elif self.has_hook('decode_number'): 4507 try: 4508 val = self.call_hook( 'decode_number', kw, position=start_position ) 4509 except JSONSkipHook: 4510 pass 4511 except JSONError, err: 4512 state.push_exception(err) 4513 return undefined 4514 else: 4515 return val 4516 if kw == 'NaN': 4517 state.stats.num_nans += 1 4518 obj = state.options.nan 4519 else: 4520 state.stats.num_infinities += 1 4521 obj = state.options.inf 4522 else: 4523 # Convert unknown identifiers into strings 4524 if identifier_as_string: 4525 if kw in helpers.javascript_reserved_words: 4526 state.push_warning( "Identifier is a JavaScript reserved word", 4527 kw, position=start_position) 4528 state.push_cond( self.options.identifier_keys, 4529 "JSON does not allow identifiers to be used as strings", 4530 kw, position=start_position) 4531 state.stats.num_identifiers += 1 4532 obj = self.decode_javascript_identifier( kw ) 4533 else: 4534 state.push_error("Unknown identifier", kw, position=start_position) 4535 obj = undefined 4536 state.stats.num_identifiers += 1 4537 return obj 4538 4539 4540 def skip_comment(self, state): 4541 """Skips an ECMAScript comment, either // or /* style. 4542 4543 The contents of the comment are returned as a string, as well 4544 as the index of the character immediately after the comment. 4545 4546 """ 4547 buf = state.buf 4548 uniws = self.options.unicode_whitespace 4549 s = buf.peekstr(2) 4550 if s != '//' and s != '/*': 4551 return None 4552 state.push_cond( self.options.comments, 'Comments are not allowed in strict JSON' ) 4553 start_position = buf.position 4554 buf.skip(2) 4555 multiline = (s == '/*') 4556 saw_close = False 4557 while not buf.at_end: 4558 if multiline: 4559 if buf.peekstr(2) == '*/': 4560 buf.skip(2) 4561 saw_close = True 4562 break 4563 elif buf.peekstr(2) == '/*': 4564 state.push_error('Multiline /* */ comments may not nest', 4565 outer_position=start_position, 4566 context='Comment') 4567 else: 4568 if buf.at_eol( uniws ): 4569 buf.skip_to_next_line( uniws ) 4570 saw_close = True 4571 break 4572 buf.pop() 4573 4574 if not saw_close and multiline: 4575 state.push_error('Comment was never terminated', outer_position=start_position, 4576 context='Comment') 4577 state.stats.num_comments += 1 4578 4579 4580 def skipws_nocomments(self, state): 4581 """Skips whitespace (will not allow comments). 4582 """ 4583 return state.buf.skipws( not self.options.is_forbid_unicode_whitespace ) 4584 4585 4586 def skipws(self, state): 4587 """Skips all whitespace, including comments and unicode whitespace 4588 4589 Takes a string and a starting index, and returns the index of the 4590 next non-whitespace character. 4591 4592 If the 'skip_comments' behavior is True and not running in 4593 strict JSON mode, then comments will be skipped over just like 4594 whitespace. 4595 4596 """ 4597 buf = state.buf 4598 uniws = not self.options.unicode_whitespace 4599 while not buf.at_end: 4600 c = buf.peekstr(2) 4601 if c == '/*' or c == '//': 4602 cmt = self.skip_comment( state ) 4603 elif buf.at_ws( uniws ): 4604 buf.skipws( uniws ) 4605 else: 4606 break 4607 4608 def decode_composite(self, state): 4609 """Intermediate-level JSON decoder for composite literal types (array and object). 4610 4611 """ 4612 if state.should_stop: 4613 return None 4614 buf = state.buf 4615 self.skipws(state) 4616 opener = buf.peek() 4617 if opener not in '{[': 4618 state.push_error('Composite data must start with "[" or "{"') 4619 return None 4620 start_position = buf.position 4621 buf.skip() 4622 if opener == '[': 4623 isdict = False 4624 closer = ']' 4625 obj = [] 4626 else: 4627 isdict = True 4628 closer = '}' 4629 if state.options.sort_keys == SORT_PRESERVE and _OrderedDict: 4630 obj = _OrderedDict() 4631 else: 4632 obj = {} 4633 num_items = 0 4634 self.skipws(state) 4635 4636 c = buf.peek() 4637 if c == closer: 4638 # empty composite 4639 buf.skip() 4640 done = True 4641 else: 4642 saw_value = False # set to false at beginning and after commas 4643 done = False 4644 while not done and not buf.at_end and not state.should_stop: 4645 self.skipws(state) 4646 c = buf.peek() 4647 if c == '': 4648 break # will report error futher down because done==False 4649 elif c == ',': 4650 if not saw_value: 4651 # no preceeding value, an elided (omitted) element 4652 if isdict: 4653 state.push_error('Can not omit elements of an object (dictionary)', 4654 outer_position=start_position, 4655 context='Object') 4656 else: 4657 state.push_cond( self.options.omitted_array_elements, 4658 'Can not omit elements of an array (list)', 4659 outer_position=start_position, 4660 context='Array') 4661 obj.append( undefined ) 4662 if state.stats: 4663 state.stats.num_undefineds += 1 4664 buf.skip() # skip over comma 4665 saw_value = False 4666 continue 4667 elif c == closer: 4668 if not saw_value: 4669 if isdict: 4670 state.push_cond( self.options.trailing_comma, 4671 'Strict JSON does not allow a final comma in an object (dictionary) literal', 4672 outer_position=start_position, 4673 context='Object') 4674 else: 4675 state.push_cond( self.options.trailing_comma, 4676 'Strict JSON does not allow a final comma in an array (list) literal', 4677 outer_position=start_position, 4678 context='Array') 4679 buf.skip() # skip over closer 4680 done = True 4681 break 4682 elif c in ']}': 4683 if isdict: 4684 cdesc='Object' 4685 else: 4686 cdesc='Array' 4687 state.push_error("Expected a '%c' but saw '%c'" % (closer,c), 4688 outer_position=start_position, context=cdesc) 4689 done = True 4690 break 4691 4692 if state.should_stop: 4693 break 4694 4695 # Decode the item/value 4696 value_position = buf.position 4697 4698 if isdict: 4699 val = self.decodeobj(state, identifier_as_string=True) 4700 else: 4701 val = self.decodeobj(state, identifier_as_string=False) 4702 4703 if val is syntax_error: 4704 recover_c = self.recover_parser(state) 4705 if recover_c not in ':': 4706 continue 4707 4708 if state.should_stop: 4709 break 4710 4711 if saw_value: 4712 # Two values without a separating comma 4713 if isdict: 4714 cdesc='Object' 4715 else: 4716 cdesc='Array' 4717 state.push_error('Values must be separated by a comma', 4718 position=value_position, outer_position=start_position, 4719 context=cdesc) 4720 4721 saw_value = True 4722 self.skipws(state) 4723 4724 if state.should_stop: 4725 break 4726 4727 if isdict: 4728 skip_item = False 4729 key = val # Ref 11.1.5 4730 key_position = value_position 4731 if not helpers.isstringtype(key): 4732 if helpers.isnumbertype(key): 4733 state.push_cond( self.options.nonstring_keys, 4734 'JSON only permits string literals as object properties (keys)', 4735 position=key_position, outer_position=start_position, 4736 context='Object') 4737 else: 4738 state.push_error('Object properties (keys) must be string literals, numbers, or identifiers', 4739 position=key_position, outer_position=start_position, 4740 context='Object') 4741 skip_item = True 4742 c = buf.peek() 4743 if c != ':': 4744 state.push_error('Missing value for object property, expected ":"', 4745 position=value_position, outer_position=start_position, 4746 context='Object') 4747 buf.skip() # skip over colon 4748 self.skipws(state) 4749 4750 rval = self.decodeobj(state) 4751 self.skipws(state) 4752 if not skip_item: 4753 if key in obj: 4754 state.push_cond( self.options.duplicate_keys, 4755 'Object contains duplicate key', 4756 key, position=key_position, outer_position=start_position, 4757 context='Object') 4758 if key == '': 4759 state.push_cond( self.options.non_portable, 4760 'Using an empty string "" as an object key may not be portable', 4761 position=key_position, outer_position=start_position, 4762 context='Object') 4763 obj[ key ] = rval 4764 num_items += 1 4765 else: # islist 4766 obj.append( val ) 4767 num_items += 1 4768 # end while 4769 4770 if state.stats: 4771 if isdict: 4772 state.stats.max_items_in_object = max(state.stats.max_items_in_object, num_items) 4773 else: 4774 state.stats.max_items_in_array = max(state.stats.max_items_in_array, num_items) 4775 4776 if state.should_stop: 4777 return obj 4778 4779 # Make sure composite value is properly terminated 4780 if not done: 4781 if isdict: 4782 state.push_error('Object literal (dictionary) is not terminated', 4783 outer_position=start_position, context='Object') 4784 else: 4785 state.push_error('Array literal (list) is not terminated', 4786 outer_position=start_position, context='Array') 4787 4788 # Update stats and run hooks 4789 if isdict: 4790 state.stats.num_objects += 1 4791 if self.has_hook('decode_object'): 4792 try: 4793 obj = self.call_hook( 'decode_object', obj, position=start_position ) 4794 except JSONSkipHook: 4795 pass 4796 except JSONError, err: 4797 state.push_exception(err) 4798 obj = undefined 4799 else: 4800 state.stats.num_arrays += 1 4801 if self.has_hook('decode_array'): 4802 try: 4803 obj = self.call_hook( 'decode_array', obj, position=start_position ) 4804 except JSONSkipHook: 4805 pass 4806 except JSONError, err: 4807 state.push_exception(err) 4808 obj = undefined 4809 return obj 4810 4811 4812 def decode_javascript_identifier(self, name): 4813 """Convert a JavaScript identifier into a Python string object. 4814 4815 This method can be overriden by a subclass to redefine how JavaScript 4816 identifiers are turned into Python objects. By default this just 4817 converts them into strings. 4818 4819 """ 4820 return name 4821 4822 4823 def decodeobj(self, state, identifier_as_string=False, at_document_start=False): 4824 """Intermediate-level JSON decoder. 4825 4826 Takes a string and a starting index, and returns a two-tuple consting 4827 of a Python object and the index of the next unparsed character. 4828 4829 If there is no value at all (empty string, etc), then None is 4830 returned instead of a tuple. 4831 4832 """ 4833 buf = state.buf 4834 obj = None 4835 self.skipws(state) 4836 if buf.at_end: 4837 state.push_error('Unexpected end of input') 4838 4839 c = buf.peek() 4840 if c in '{[': 4841 state.cur_depth += 1 4842 try: 4843 state.update_depth_stats() 4844 obj = self.decode_composite(state) 4845 finally: 4846 state.cur_depth -= 1 4847 else: 4848 if at_document_start: 4849 state.push_cond( self.options.any_type_at_start, 4850 'JSON document must start with an object or array type only' ) 4851 if c in self._string_quotes: 4852 obj = self.decode_string(state) 4853 elif c.isdigit() or c in '.+-': 4854 obj = self.decode_number(state) 4855 elif c.isalpha() or c in'_$': 4856 obj = self.decode_identifier(state, identifier_as_string=identifier_as_string) 4857 else: 4858 state.push_error('Can not decode value starting with character %r' % c) 4859 buf.skip() 4860 self.recover_parser(state) 4861 obj = syntax_error 4862 return obj 4863 4864 4865 def decode(self, txt, encoding=None, return_errors=False, return_stats=False): 4866 """Decodes a JSON-encoded string into a Python object. 4867 4868 The 'return_errors' parameter controls what happens if the 4869 input JSON has errors in it. 4870 4871 * False: the first error will be raised as a Python 4872 exception. If there are no errors then the corresponding 4873 Python object will be returned. 4874 4875 * True: the return value is always a 2-tuple: (object, error_list) 4876 4877 """ 4878 import sys 4879 state = decode_state( options=self.options ) 4880 4881 # Prepare the input 4882 state.set_input( txt, encoding=encoding ) 4883 4884 # Do the decoding 4885 if not state.has_errors: 4886 self.__sanity_check_start( state ) 4887 4888 if not state.has_errors: 4889 try: 4890 self._do_decode( state ) # DECODE! 4891 except JSONException, err: 4892 state.push_exception( err ) 4893 except Exception, err: # Mainly here to catch maximum recursion depth exceeded 4894 e2 = sys.exc_info() 4895 raise 4896 newerr = JSONDecodeError("An unexpected failure occured", severity='fatal', position=state.buf.position) 4897 newerr.__cause__ = err 4898 newerr.__traceback__ = e2[2] 4899 state.push_exception( newerr ) 4900 4901 if return_stats and state.buf: 4902 state.stats.num_excess_whitespace = state.buf.num_ws_skipped 4903 state.stats.total_chars = state.buf.position.char_position 4904 4905 # Handle the errors 4906 result_type = _namedtuple('json_results',['object','errors','stats']) 4907 4908 if return_errors: 4909 if return_stats: 4910 return result_type(state.obj, state.errors, state.stats) 4911 else: 4912 return result_type(state.obj, state.errors, None) 4913 else: 4914 # Don't cause warnings to raise an error 4915 errors = [err for err in state.errors if err.severity in ('fatal','error')] 4916 if errors: 4917 raise errors[0] 4918 if return_stats: 4919 return result_type(state.obj, None, state.stats) 4920 else: 4921 return state.obj 4922 4923 def __sanity_check_start(self, state): 4924 """Check that the document seems sane by looking at the first couple characters. 4925 4926 Check that the decoding seems sane. Per RFC 4627 section 3: 4927 "Since the first two characters of a JSON text will 4928 always be ASCII characters [RFC0020], ..." 4929 [WAS removed from RFC 7158, but still valid via the grammar.] 4930 4931 This check is probably not necessary, but it allows us to 4932 raise a suitably descriptive error rather than an obscure 4933 syntax error later on. 4934 4935 Note that the RFC requirements of two ASCII characters seems 4936 to be an incorrect statement as a JSON string literal may have 4937 as it's first character any unicode character. Thus the first 4938 two characters will always be ASCII, unless the first 4939 character is a quotation mark. And in non-strict mode we can 4940 also have a few other characters too. 4941 4942 """ 4943 is_sane = True 4944 unitxt = state.buf.peekstr(2) 4945 if len(unitxt) >= 2: 4946 first, second = unitxt[:2] 4947 if first in self._string_quotes: 4948 pass # second can be anything inside string literal 4949 else: 4950 if ((ord(first) < 0x20 or ord(first) > 0x7f) or \ 4951 (ord(second) < 0x20 or ord(second) > 0x7f)) and \ 4952 (not self.isws(first) and not self.isws(second)): 4953 # Found non-printable ascii, must check unicode 4954 # categories to see if the character is legal. 4955 # Only whitespace, line and paragraph separators, 4956 # and format control chars are legal here. 4957 import unicodedata 4958 catfirst = unicodedata.category(unicode(first)) 4959 catsecond = unicodedata.category(unicode(second)) 4960 if catfirst not in ('Zs','Zl','Zp','Cf') or \ 4961 catsecond not in ('Zs','Zl','Zp','Cf'): 4962 state.push_fatal( 'The input is gibberish, is the Unicode encoding correct?' ) 4963 return is_sane 4964 4965 def _do_decode(self, state): 4966 """This is the internal function that does the JSON decoding. 4967 4968 Called by the decode() method, after it has performed any Unicode decoding, etc. 4969 """ 4970 buf = state.buf 4971 self.skipws(state) 4972 4973 if buf.at_end: 4974 state.push_error('No value to decode') 4975 else: 4976 if state.options.decimal_context: 4977 dec_ctx = decimal.localcontext( state.options.decimal_context ) 4978 else: 4979 dec_ctx = _dummy_context_manager 4980 4981 with dec_ctx: 4982 state.obj = self.decodeobj(state, at_document_start=True ) 4983 4984 if not state.should_stop: 4985 # Make sure there's nothing at the end 4986 self.skipws(state) 4987 if not buf.at_end: 4988 state.push_error('Unexpected text after end of JSON value') 4989 4990 def _classify_for_encoding( self, obj ): 4991 import datetime 4992 c = 'other' 4993 if obj is None: 4994 c = 'null' 4995 elif obj is undefined: 4996 c = 'undefined' 4997 elif isinstance(obj,bool): 4998 c = 'bool' 4999 elif isinstance(obj, (int,long,float,complex)) or\ 5000 (decimal and isinstance(obj, decimal.Decimal)): 5001 c = 'number' 5002 elif isinstance(obj, basestring) or helpers.isstringtype(obj): 5003 c = 'string' 5004 else: 5005 if isinstance(obj,dict): 5006 c = 'dict' 5007 elif isinstance(obj,tuple) and hasattr(obj,'_asdict') and callable(obj._asdict): 5008 # Have a named tuple 5009 enc_nt = self.options.encode_namedtuple_as_object 5010 if enc_nt and (enc_nt is True or (callable(enc_nt) and enc_nt(obj))): 5011 c = 'namedtuple' 5012 else: 5013 c = 'sequence' 5014 elif isinstance(obj, (list,tuple,set,frozenset)): 5015 c = 'sequence' 5016 elif hasattr(obj,'iterkeys') or (hasattr(obj,'__getitem__') and hasattr(obj,'keys')): 5017 c = 'dict' 5018 elif isinstance(obj, datetime.datetime): 5019 # Check datetime before date because it is a subclass! 5020 c = 'datetime' 5021 elif isinstance(obj, datetime.date): 5022 c = 'date' 5023 elif isinstance(obj, datetime.time): 5024 c = 'time' 5025 elif isinstance(obj, datetime.timedelta): 5026 c = 'timedelta' 5027 elif _py_major >= 3 and isinstance(obj,(bytes,bytearray)): 5028 c = 'bytes' 5029 elif _py_major >= 3 and isinstance(obj,memoryview): 5030 c = 'memoryview' 5031 elif _enum is not None and isinstance(obj,_enum): 5032 c = 'enum' 5033 else: 5034 c = 'other' 5035 return c 5036 5037 def encode(self, obj, encoding=None ): 5038 """Encodes the Python object into a JSON string representation. 5039 5040 This method will first attempt to encode an object by seeing 5041 if it has a json_equivalent() method. If so than it will 5042 call that method and then recursively attempt to encode 5043 the object resulting from that call. 5044 5045 Next it will attempt to determine if the object is a native 5046 type or acts like a squence or dictionary. If so it will 5047 encode that object directly. 5048 5049 Finally, if no other strategy for encoding the object of that 5050 type exists, it will call the encode_default() method. That 5051 method currently raises an error, but it could be overridden 5052 by subclasses to provide a hook for extending the types which 5053 can be encoded. 5054 5055 """ 5056 import sys, codecs 5057 5058 # Make a fresh encoding state 5059 state = encode_state( self.options ) 5060 5061 # Find the codec to use. CodecInfo will be in 'cdk' and name in 'encoding'. 5062 # 5063 # Also set the state's 'escape_unicode_test' property which is used to 5064 # determine what characters to \u-escape. 5065 if encoding is None: 5066 cdk = None 5067 elif isinstance(encoding, codecs.CodecInfo): 5068 cdk = encoding 5069 encoding = cdk.name 5070 else: 5071 cdk = helpers.lookup_codec( encoding ) 5072 if not cdk: 5073 raise JSONEncodeError('no codec available for character encoding',encoding) 5074 5075 if self.options.escape_unicode and callable(self.options.escape_unicode): 5076 # User-supplied repertoire test function 5077 state.escape_unicode_test = self.options.escape_unicode 5078 else: 5079 if self.options.escape_unicode==True or not cdk or cdk.name.lower() == 'ascii': 5080 # ASCII, ISO8859-1, or and Unknown codec -- \u escape anything not ASCII 5081 state.escape_unicode_test = lambda c: ord(c) >= 0x80 5082 elif cdk.name == 'iso8859-1': 5083 state.escape_unicode_test = lambda c: ord(c) >= 0x100 5084 elif cdk and cdk.name.lower().startswith('utf'): 5085 # All UTF-x encodings can do the whole Unicode repertoire, so 5086 # do nothing special. 5087 state.escape_unicode_test = False 5088 else: 5089 # An unusual codec. We need to test every character 5090 # to see if it is in the codec's repertoire to determine 5091 # if we should \u escape that character. 5092 enc_func = cdk.encode 5093 def escape_unicode_hardway( c ): 5094 try: 5095 enc_func( c ) 5096 except UnicodeEncodeError: 5097 return True 5098 else: 5099 return False 5100 state.escape_unicode_test = escape_unicode_hardway 5101 5102 # Make sure the encoding is not degenerate: it can encode the minimal 5103 # number of characters needed by the JSON syntax rules. 5104 if encoding is not None: 5105 try: 5106 output, nchars = cdk.encode( JSON.json_syntax_characters ) 5107 except UnicodeError, err: 5108 raise JSONEncodeError("Output encoding %s is not sufficient to encode JSON" % cdk.name) 5109 5110 # Do the JSON encoding! 5111 self._do_encode( obj, state ) 5112 if not self.options.encode_compactly: 5113 state.append('\n') 5114 unitxt = state.combine() 5115 5116 # Do the final Unicode encoding 5117 if encoding is None: 5118 output = unitxt 5119 else: 5120 try: 5121 output, nchars = cdk.encode( unitxt ) 5122 except UnicodeEncodeError, err: 5123 # Re-raise as a JSONDecodeError 5124 e2 = sys.exc_info() 5125 newerr = JSONEncodeError("a Unicode encoding error occurred") 5126 # Simulate Python 3's: "raise X from Y" exception chaining 5127 newerr.__cause__ = err 5128 newerr.__traceback__ = e2[2] 5129 raise newerr 5130 return output 5131 5132 5133 def _do_encode(self, obj, state): 5134 """Internal encode function.""" 5135 obj_classification = self._classify_for_encoding( obj ) 5136 5137 if self.has_hook('encode_value'): 5138 orig_obj = obj 5139 try: 5140 obj = self.call_hook( 'encode_value', obj ) 5141 except JSONSkipHook: 5142 pass 5143 5144 if obj is not orig_obj: 5145 prev_cls = obj_classification 5146 obj_classification = self._classify_for_encoding( obj ) 5147 if obj_classification != prev_cls: 5148 # Got a different type of object, re-encode again 5149 self._do_encode( obj, state ) 5150 return 5151 5152 if hasattr(obj, 'json_equivalent'): 5153 success = self.encode_equivalent( obj, state ) 5154 if success: 5155 return 5156 5157 if obj_classification == 'null': 5158 self.encode_null( state ) 5159 elif obj_classification == 'undefined': 5160 if not self.options.is_forbid_undefined_values: 5161 self.encode_undefined( state ) 5162 else: 5163 raise JSONEncodeError('strict JSON does not permit "undefined" values') 5164 elif obj_classification == 'bool': 5165 self.encode_boolean( obj, state ) 5166 elif obj_classification == 'number': 5167 try: 5168 self.encode_number( obj, state ) 5169 except JSONEncodeError, err1: 5170 # Bad number, probably a complex with non-zero imaginary part. 5171 # Let the default encoders take a shot at encoding. 5172 try: 5173 self.try_encode_default(obj, state) 5174 except Exception, err2: 5175 # Default handlers couldn't deal with it, re-raise original exception. 5176 raise err1 5177 elif obj_classification == 'string': 5178 self.encode_string( obj, state ) 5179 elif obj_classification == 'enum': # Python 3.4 enum.Enum 5180 self.encode_enum( obj, state ) 5181 elif obj_classification == 'datetime': # Python datetime.datetime 5182 self.encode_datetime( obj, state ) 5183 elif obj_classification == 'date': # Python datetime.date 5184 self.encode_date( obj, state ) 5185 elif obj_classification == 'time': # Python datetime.time 5186 self.encode_time( obj, state ) 5187 elif obj_classification == 'timedelta': # Python datetime.time 5188 self.encode_timedelta( obj, state ) 5189 else: 5190 # Anything left is probably composite, or an unconvertable type. 5191 self.encode_composite( obj, state ) 5192 5193 5194 def encode_enum(self, val, state): 5195 """Encode a Python Enum value into JSON.""" 5196 eas = self.options.encode_enum_as 5197 if eas == 'qname': 5198 self.encode_string( str(obj), state ) 5199 elif eas == 'value': 5200 self._do_encode( obj.value, state ) 5201 else: # eas == 'name' 5202 self.encode_string( obj.name, state ) 5203 5204 def encode_date(self, dt, state): 5205 fmt = self.options.date_format 5206 if not fmt or fmt == 'iso': 5207 fmt = '%Y-%m-%d' 5208 self.encode_string( dt.strftime(fmt), state ) 5209 5210 def encode_datetime(self, dt, state): 5211 fmt = self.options.datetime_format 5212 is_iso = not fmt or fmt == 'iso' 5213 if is_iso: 5214 if dt.microsecond == 0: 5215 fmt = '%Y-%m-%dT%H:%M:%S%z' 5216 else: 5217 fmt = '%Y-%m-%dT%H:%M:%S.%f%z' 5218 s = dt.strftime(fmt) 5219 if is_iso and s.endswith('-00:00') or s.endswith('+00:00'): 5220 s = s[:-6] + 'Z' # Change UTC to use 'Z' notation 5221 self.encode_string( s, state ) 5222 5223 def encode_time(self, t, state): 5224 fmt = self.options.datetime_format 5225 is_iso = not fmt or fmt == 'iso' 5226 if is_iso: 5227 if dt.microsecond == 0: 5228 fmt = 'T%H:%M:%S%z' 5229 else: 5230 fmt = 'T%H:%M:%S.%f%z' 5231 s = t.strftime(fmt) 5232 if is_iso and s.endswith('-00:00') or s.endswith('+00:00'): 5233 s = s[:-6] + 'Z' # Change UTC to use 'Z' notation 5234 self.encode_string( s, state ) 5235 5236 def encode_timedelta(self, td, state): 5237 fmt = self.options.timedelta_format 5238 if not fmt or fmt == 'iso': 5239 s = helpers.format_timedelta_iso( td ) 5240 elif fmt == 'hms': 5241 s = str(td) 5242 else: 5243 raise ValueError("Unknown timedelta_format %r" % fmt) 5244 self.encode_string( s, state ) 5245 5246 def encode_composite(self, obj, state, obj_classification=None): 5247 """Encodes just composite objects: dictionaries, lists, or sequences. 5248 5249 Basically handles any python type for which iter() can create 5250 an iterator object. 5251 5252 This method is not intended to be called directly. Use the 5253 encode() method instead. 5254 5255 """ 5256 import sys 5257 if not obj_classification: 5258 obj_classification = self._classify_for_encoding(obj) 5259 5260 # Convert namedtuples to dictionaries 5261 if obj_classification == 'namedtuple': 5262 obj = obj._asdict() 5263 obj_classification = 'dict' 5264 5265 # Convert 'unsigned byte' memory views into plain bytes 5266 if obj_classification == 'memoryview' and obj.format == 'B': 5267 obj = obj.tobytes() 5268 obj_classification = 'bytes' 5269 5270 # Run hooks 5271 hook_name = None 5272 if obj_classification == 'dict': 5273 hook_name = 'encode_dict' 5274 elif obj_classification == 'sequence': 5275 hook_name = 'encode_sequence' 5276 elif obj_classification == 'bytes': 5277 hook_name = 'encode_bytes' 5278 5279 if self.has_hook(hook_name): 5280 try: 5281 new_obj = self.call_hook( hook_name, obj ) 5282 except JSONSkipHook: 5283 pass 5284 else: 5285 if new_obj is not obj: 5286 obj = new_obj 5287 prev_cls = obj_classification 5288 obj_classification = self._classify_for_encoding( obj ) 5289 if obj_classification != prev_cls: 5290 # Transformed to a different kind of object, call 5291 # back to the general encode() method. 5292 self._do_encode( obj, state ) 5293 return 5294 # Else, fall through 5295 5296 # At his point we have decided to do with an object or an array 5297 isdict = (obj_classification == 'dict') 5298 5299 # Get iterator 5300 it = None 5301 if isdict and hasattr(obj,'iterkeys'): 5302 try: 5303 it = obj.iterkeys() 5304 except AttributeError: 5305 pass 5306 else: 5307 try: 5308 it = iter(obj) 5309 except TypeError: 5310 pass 5311 5312 # Convert each member to JSON 5313 if it is not None: 5314 # Try to get length, but don't fail if we can't 5315 try: 5316 numitems = len(obj) 5317 except TypeError: 5318 numitems = 0 5319 5320 # Output the opening bracket or brace 5321 compactly = self.options.encode_compactly 5322 if not compactly: 5323 indent0 = self.options.indentation_for_level( state.nest_level ) 5324 indent = self.options.indentation_for_level( state.nest_level+1 ) 5325 5326 spaces_after_opener = '' 5327 if isdict: 5328 opener = '{' 5329 closer = '}' 5330 if compactly: 5331 dictcolon = ':' 5332 else: 5333 dictcolon = ' : ' 5334 else: 5335 opener = '[' 5336 closer = ']' 5337 if not compactly: 5338 #opener = opener + ' ' 5339 spaces_after_opener = self.options.spaces_to_next_indent_level(subtract=len(opener)) 5340 5341 state.append( opener ) 5342 state.append( spaces_after_opener ) 5343 5344 # Now iterate through all the items and collect their representations 5345 parts = [] # Collects each of the members 5346 part_keys = [] # For dictionary key sorting, tuples (key,index) 5347 5348 try: # while not StopIteration 5349 part_idx = 0 5350 while True: 5351 obj2 = it.next() 5352 part_idx += 1 # Note, will start counting at 1 5353 if obj2 is obj: 5354 raise JSONEncodeError('trying to encode an infinite sequence',obj) 5355 if isdict: 5356 obj3 = obj[obj2] 5357 # Dictionary key is in obj2 and value in obj3. 5358 5359 # Let any hooks transform the key. 5360 if self.has_hook('encode_value'): 5361 try: 5362 newobj = self.call_hook( 'encode_value', obj2 ) 5363 except JSONSkipHook: 5364 pass 5365 else: 5366 obj2 = newobj 5367 if self.has_hook('encode_dict_key'): 5368 try: 5369 newkey = self.call_hook( 'encode_dict_key', obj2 ) 5370 except JSONSkipHook: 5371 pass 5372 else: 5373 obj2 = newkey 5374 5375 # Check JSON restrictions on key types 5376 if not helpers.isstringtype(obj2): 5377 if helpers.isnumbertype(obj2): 5378 if not self.options.is_allow_nonstring_keys: 5379 raise JSONEncodeError('object properties (dictionary keys) must be strings in strict JSON',obj2) 5380 else: 5381 raise JSONEncodeError('object properties (dictionary keys) can only be strings or numbers in ECMAScript',obj2) 5382 part_keys.append( (obj2, part_idx-1) ) 5383 5384 # Encode this item in the sequence and put into item_chunks 5385 substate = state.make_substate() 5386 self._do_encode( obj2, substate ) 5387 if isdict: 5388 substate.append( dictcolon ) 5389 substate2 = substate.make_substate() 5390 self._do_encode( obj3, substate2 ) 5391 substate.join_substate( substate2 ) 5392 parts.append( substate ) 5393 # Next item iteration 5394 except StopIteration: 5395 pass 5396 5397 # Sort dictionary keys 5398 if isdict: 5399 srt = self.options.sort_keys 5400 if srt == SORT_PRESERVE: 5401 if _OrderedDict and isinstance(obj,_OrderedDict): 5402 srt = SORT_NONE # Will keep order 5403 else: 5404 srt = SORT_SMART 5405 5406 if not srt or srt in (SORT_NONE, SORT_PRESERVE): 5407 srt = None 5408 elif callable(srt): 5409 part_keys.sort( key=(lambda t: (srt(t[0]),t[0])) ) 5410 elif srt == SORT_SMART: 5411 part_keys.sort( key=(lambda t: (smart_sort_transform(t[0]),t[0])) ) 5412 elif srt == SORT_ALPHA_CI: 5413 part_keys.sort( key=(lambda t: (unicode(t[0]).upper(),t[0])) ) 5414 elif srt or srt == SORT_ALPHA: 5415 part_keys.sort( key=(lambda t: unicode(t[0])) ) 5416 # Now make parts match the new sort order 5417 if srt is not None: 5418 parts = [parts[pk[1]] for pk in part_keys] 5419 5420 if compactly: 5421 sep = ',' 5422 elif len(parts) <= self.options.max_items_per_line: 5423 sep = ', ' 5424 else: 5425 #state.append(spaces_after_opener) 5426 state.append('\n' + indent) 5427 sep = ',\n' + indent 5428 5429 for pnum, substate in enumerate(parts): 5430 if pnum > 0: 5431 state.append( sep ) 5432 state.join_substate( substate ) 5433 5434 if not compactly: 5435 if numitems > self.options.max_items_per_line: 5436 state.append('\n' + indent0) 5437 else: 5438 state.append(' ') 5439 state.append(closer) # final '}' or ']' 5440 else: # Can't create an iterator for the object 5441 self.try_encode_default( obj, state ) 5442 5443 5444 def encode_equivalent( self, obj, state ): 5445 """This method is used to encode user-defined class objects. 5446 5447 The object being encoded should have a json_equivalent() 5448 method defined which returns another equivalent object which 5449 is easily JSON-encoded. If the object in question has no 5450 json_equivalent() method available then None is returned 5451 instead of a string so that the encoding will attempt the next 5452 strategy. 5453 5454 If a caller wishes to disable the calling of json_equivalent() 5455 methods, then subclass this class and override this method 5456 to just return None. 5457 5458 """ 5459 if hasattr(obj, 'json_equivalent') \ 5460 and callable(getattr(obj,'json_equivalent')): 5461 obj2 = obj.json_equivalent() 5462 if obj2 is obj: 5463 # Try to prevent careless infinite recursion 5464 raise JSONEncodeError('object has a json_equivalent() method that returns itself',obj) 5465 self._do_encode( obj2, state ) 5466 return True 5467 else: 5468 return False 5469 5470 def try_encode_default( self, obj, state ): 5471 orig_obj = obj 5472 if self.has_hook('encode_default'): 5473 try: 5474 obj = self.call_hook( 'encode_default', obj ) 5475 except JSONSkipHook: 5476 pass 5477 else: 5478 if obj is not orig_obj: 5479 # Hook made a transformation, re-encode it 5480 return self._do_encode( obj, state ) 5481 5482 # End of the road. 5483 raise JSONEncodeError('can not encode object into a JSON representation',obj) 5484 5485 5486# ------------------------------ 5487 5488def encode( obj, encoding=None, **kwargs ): 5489 r"""Encodes a Python object into a JSON-encoded string. 5490 5491 * 'strict' (Boolean, default False) 5492 5493 If 'strict' is set to True, then only strictly-conforming JSON 5494 output will be produced. Note that this means that some types 5495 of values may not be convertable and will result in a 5496 JSONEncodeError exception. 5497 5498 * 'compactly' (Boolean, default True) 5499 5500 If 'compactly' is set to True, then the resulting string will 5501 have all extraneous white space removed; if False then the 5502 string will be "pretty printed" with whitespace and 5503 indentation added to make it more readable. 5504 5505 * 'encode_namedtuple_as_object' (Boolean or callable, default True) 5506 5507 If True, then objects of type namedtuple, or subclasses of 5508 'tuple' that have an _asdict() method, will be encoded as an 5509 object rather than an array. 5510 If can also be a predicate function that takes a namedtuple 5511 object as an argument and returns True or False. 5512 5513 * 'indent_amount' (Integer, default 2) 5514 5515 The number of spaces to output for each indentation level. 5516 If 'compactly' is True then indentation is ignored. 5517 5518 * 'indent_limit' (Integer or None, default None) 5519 5520 If not None, then this is the maximum limit of indentation 5521 levels, after which further indentation spaces are not 5522 inserted. If None, then there is no limit. 5523 5524 CONCERNING CHARACTER ENCODING: 5525 5526 The 'encoding' argument should be one of: 5527 5528 * None - The return will be a Unicode string. 5529 * encoding_name - A string which is the name of a known 5530 encoding, such as 'UTF-8' or 'ascii'. 5531 * codec - A CodecInfo object, such as as found by codecs.lookup(). 5532 This allows you to use a custom codec as well as those 5533 built into Python. 5534 5535 If an encoding is given (either by name or by codec), then the 5536 returned value will be a byte array (Python 3), or a 'str' string 5537 (Python 2); which represents the raw set of bytes. Otherwise, 5538 if encoding is None, then the returned value will be a Unicode 5539 string. 5540 5541 The 'escape_unicode' argument is used to determine which characters 5542 in string literals must be \u escaped. Should be one of: 5543 5544 * True -- All non-ASCII characters are always \u escaped. 5545 * False -- Try to insert actual Unicode characters if possible. 5546 * function -- A user-supplied function that accepts a single 5547 unicode character and returns True or False; where True 5548 means to \u escape that character. 5549 5550 Regardless of escape_unicode, certain characters will always be 5551 \u escaped. Additionaly any characters not in the output encoding 5552 repertoire for the encoding codec will be \u escaped as well. 5553 5554 """ 5555 # Do the JSON encoding 5556 j = JSON( **kwargs ) 5557 output = j.encode( obj, encoding ) 5558 return output 5559 5560 5561def decode( txt, encoding=None, **kwargs ): 5562 """Decodes a JSON-encoded string into a Python object. 5563 5564 == Optional arguments == 5565 5566 * 'encoding' (string, default None) 5567 5568 This argument provides a hint regarding the character encoding 5569 that the input text is assumed to be in (if it is not already a 5570 unicode string type). 5571 5572 If set to None then autodetection of the encoding is attempted 5573 (see discussion above). Otherwise this argument should be the 5574 name of a registered codec (see the standard 'codecs' module). 5575 5576 * 'strict' (Boolean, default False) 5577 5578 If 'strict' is set to True, then those strings that are not 5579 entirely strictly conforming to JSON will result in a 5580 JSONDecodeError exception. 5581 5582 * 'return_errors' (Boolean, default False) 5583 5584 Controls the return value from this function. If False, then 5585 only the Python equivalent object is returned on success, or 5586 an error will be raised as an exception. 5587 5588 If True then a 2-tuple is returned: (object, error_list). The 5589 error_list will be an empty list [] if the decoding was 5590 successful, otherwise it will be a list of all the errors 5591 encountered. Note that it is possible for an object to be 5592 returned even if errors were encountered. 5593 5594 * 'return_stats' (Boolean, default False) 5595 5596 Controls whether statistics about the decoded JSON document 5597 are returns (and instance of decode_statistics). 5598 5599 If True, then the stats object will be added to the end of the 5600 tuple returned. If return_errors is also set then a 3-tuple 5601 is returned, otherwise a 2-tuple is returned. 5602 5603 * 'write_errors' (Boolean OR File-like object, default False) 5604 5605 Controls what to do with errors. 5606 5607 - If False, then the first decoding error is raised as an exception. 5608 - If True, then errors will be printed out to sys.stderr. 5609 - If a File-like object, then errors will be printed to that file. 5610 5611 The write_errors and return_errors arguments can be set 5612 independently. 5613 5614 * 'filename_for_errors' (string or None) 5615 5616 Provides a filename to be used when writting error messages. 5617 5618 * 'allow_xxx', 'warn_xxx', and 'forbid_xxx' (Booleans) 5619 5620 These arguments allow for fine-adjustments to be made to the 5621 'strict' argument, by allowing or forbidding specific 5622 syntaxes. 5623 5624 There are many of these arguments, named by replacing the 5625 "xxx" with any number of possible behavior names (See the JSON 5626 class for more details). 5627 5628 Each of these will allow (or forbid) the specific behavior, 5629 after the evaluation of the 'strict' argument. For example, 5630 if strict=True then by also passing 'allow_comments=True' then 5631 comments will be allowed. If strict=False then 5632 forbid_comments=True will allow everything except comments. 5633 5634 Unicode decoding: 5635 ----------------- 5636 The input string can be either a python string or a python unicode 5637 string (or a byte array in Python 3). If it is already a unicode 5638 string, then it is assumed that no character set decoding is 5639 required. 5640 5641 However, if you pass in a non-Unicode text string (a Python 2 5642 'str' type or a Python 3 'bytes' or 'bytearray') then an attempt 5643 will be made to auto-detect and decode the character encoding. 5644 This will be successful if the input was encoded in any of UTF-8, 5645 UTF-16 (BE or LE), or UTF-32 (BE or LE), and of course plain ASCII 5646 works too. 5647 5648 Note though that if you know the character encoding, then you 5649 should convert to a unicode string yourself, or pass it the name 5650 of the 'encoding' to avoid the guessing made by the auto 5651 detection, as with 5652 5653 python_object = demjson.decode( input_bytes, encoding='utf8' ) 5654 5655 Callback hooks: 5656 --------------- 5657 You may supply callback hooks by using the hook name as the 5658 named argument, such as: 5659 decode_float=decimal.Decimal 5660 5661 See the hooks documentation on the JSON.set_hook() method. 5662 5663 """ 5664 import sys 5665 # Initialize the JSON object 5666 return_errors = False 5667 return_stats = False 5668 write_errors = False 5669 filename_for_errors = None 5670 write_stats = False 5671 5672 kwargs = kwargs.copy() 5673 5674 todel = [] 5675 for kw,val in kwargs.items(): 5676 if kw == "return_errors": 5677 return_errors = bool(val) 5678 todel.append(kw) 5679 elif kw == 'return_stats': 5680 return_stats = bool(val) 5681 todel.append(kw) 5682 elif kw == "write_errors": 5683 write_errors = val 5684 todel.append(kw) 5685 elif kw == "filename_for_errors": 5686 filename_for_errors = val 5687 todel.append(kw) 5688 elif kw == "write_stats": 5689 write_stats = val 5690 todel.append(kw) 5691 # next keyword argument 5692 for kw in todel: 5693 del kwargs[kw] 5694 5695 j = JSON( **kwargs ) 5696 5697 # Now do the actual JSON decoding 5698 result = j.decode( txt, 5699 encoding=encoding, 5700 return_errors=(return_errors or write_errors), 5701 return_stats=(return_stats or write_stats) ) 5702 5703 if write_errors: 5704 import sys 5705 if write_errors is True: 5706 write_errors = sys.stderr 5707 for err in result.errors: 5708 write_errors.write( err.pretty_description(filename=filename_for_errors) + "\n" ) 5709 5710 if write_stats: 5711 import sys 5712 if write_stats is True: 5713 write_stats = sys.stderr 5714 if result.stats: 5715 write_stats.write( "%s----- Begin JSON statistics\n" % filename_for_errors ) 5716 write_stats.write( result.stats.pretty_description( prefix=" | " ) ) 5717 write_stats.write( "%s----- End of JSON statistics\n" % filename_for_errors ) 5718 return result 5719 5720 5721 5722def encode_to_file( filename, obj, encoding='utf-8', overwrite=False, **kwargs ): 5723 """Encodes a Python object into JSON and writes into the given file. 5724 5725 If no encoding is given, then UTF-8 will be used. 5726 5727 See the encode() function for a description of other possible options. 5728 5729 If the file already exists and the 'overwrite' option is not set 5730 to True, then the existing file will not be overwritten. (Note, 5731 there is a subtle race condition in the check so there are 5732 possible conditions in which a file may be overwritten) 5733 5734 """ 5735 import os, errno 5736 if not encoding: 5737 encoding = 'utf-8' 5738 5739 if not isinstance(filename,basestring) or not filename: 5740 raise TypeError("Expected a file name") 5741 5742 if not overwrite and os.path.exists(filename): 5743 raise IOError(errno.EEXIST, "File exists: %r" % filename) 5744 5745 jsondata = encode( obj, encoding=encoding, **kwargs ) 5746 5747 try: 5748 fp = open(filename, 'wb') 5749 except Exception: 5750 raise 5751 else: 5752 try: 5753 fp.write( jsondata ) 5754 finally: 5755 fp.close() 5756 5757 5758def decode_file( filename, encoding=None, **kwargs ): 5759 """Decodes JSON found in the given file. 5760 5761 See the decode() function for a description of other possible options. 5762 5763 """ 5764 if isinstance(filename,basestring): 5765 try: 5766 fp = open(filename, 'rb') 5767 except Exception: 5768 raise 5769 else: 5770 try: 5771 jsondata = fp.read() 5772 finally: 5773 fp.close() 5774 else: 5775 raise TypeError("Expected a file name") 5776 return decode( jsondata, encoding=encoding, **kwargs ) 5777 5778 5779# ====================================================================== 5780 5781class jsonlint(object): 5782 """This class contains most of the logic for the "jsonlint" command. 5783 5784 You generally create an instance of this class, to defined the 5785 program's environment, and then call the main() method. A simple 5786 wrapper to turn this into a script might be: 5787 5788 import sys, demjson 5789 if __name__ == '__main__': 5790 lint = demjson.jsonlint( sys.argv[0] ) 5791 return lint.main( sys.argv[1:] ) 5792 5793 """ 5794 _jsonlint_usage = r"""Usage: %(program_name)s [<options> ...] [--] inputfile.json ... 5795 5796With no input filename, or "-", it will read from standard input. 5797 5798The return status will be 0 if the file is conforming JSON (per the 5799RFC 7159 specification), or non-zero otherwise. 5800 5801GENERAL OPTIONS: 5802 5803 -v | --verbose Show details of lint checking 5804 -q | --quiet Don't show any output (except for reformatting) 5805 5806STRICTNESS OPTIONS (WARNINGS AND ERRORS): 5807 5808 -W | --tolerant Be tolerant, but warn about non-conformance (default) 5809 -s | --strict Be strict in what is considered conforming JSON 5810 -S | --nonstrict Be tolerant in what is considered conforming JSON 5811 5812 --allow=... -\ 5813 --warn=... |-- These options let you pick specific behaviors. 5814 --forbid=... -/ Use --help-behaviors for more 5815 5816STATISTICS OPTIONS: 5817 5818 --stats Show statistics about JSON document 5819 5820REFORMATTING OPTIONS: 5821 5822 -f | --format Reformat the JSON text (if conforming) to stdout 5823 -F | --format-compactly 5824 Reformat the JSON simlar to -f, but do so compactly by 5825 removing all unnecessary whitespace 5826 5827 -o filename | --output filename 5828 The filename to which reformatted JSON is to be written. 5829 Without this option the standard output is used. 5830 5831 --[no-]keep-format Try to preserve numeric radix, e.g., hex, octal, etc. 5832 --html-safe Escape characters that are not safe to embed in HTML/XML. 5833 5834 --sort <kind> How to sort object/dictionary keys, <kind> is one of: 5835%(sort_options_help)s 5836 5837 --indent tabs | <nnn> Number of spaces to use per indentation level, 5838 or use tab characters if "tabs" given. 5839 5840UNICODE OPTIONS: 5841 5842 -e codec | --encoding=codec Set both input and output encodings 5843 --input-encoding=codec Set the input encoding 5844 --output-encoding=codec Set the output encoding 5845 5846 These options set the character encoding codec (e.g., "ascii", 5847 "utf-8", "utf-16"). The -e will set both the input and output 5848 encodings to the same thing. The output encoding is used when 5849 reformatting with the -f or -F options. 5850 5851 Unless set, the input encoding is guessed and the output 5852 encoding will be "utf-8". 5853 5854OTHER OPTIONS: 5855 5856 --recursion-limit=nnn Set the Python recursion limit to number 5857 --leading-zero-radix=8|10 The radix to use for numbers with leading 5858 zeros. 8=octal, 10=decimal. 5859 5860REFORMATTING / PRETTY-PRINTING: 5861 5862 When reformatting JSON with -f or -F, output is only produced if 5863 the input passed validation. By default the reformatted JSON will 5864 be written to standard output, unless the -o option was given. 5865 5866 The default output codec is UTF-8, unless an encoding option is 5867 provided. Any Unicode characters will be output as literal 5868 characters if the encoding permits, otherwise they will be 5869 \u-escaped. You can use "--output-encoding ascii" to force all 5870 Unicode characters to be escaped. 5871 5872MORE INFORMATION: 5873 5874 Use '%(program_name)s --version [-v]' to see versioning information. 5875 Use '%(program_name)s --copyright' to see author and copyright details. 5876 Use '%(program_name)s [-W|-s|-S] --help-behaviors' for help on specific checks. 5877 5878 %(program_name)s is distributed as part of the "demjson" Python module. 5879 See %(homepage)s 5880""" 5881 SUCCESS_FAIL = 'E' 5882 SUCCESS_WARNING = 'W' 5883 SUCCESS_OK = 'OK' 5884 5885 def __init__(self, program_name='jsonlint', stdin=None, stdout=None, stderr=None ): 5886 """Create an instance of a "jsonlint" program. 5887 5888 You can optionally pass options to define the program's environment: 5889 5890 * program_name - the name of the program, usually sys.argv[0] 5891 * stdin - the file object to use for input, default sys.stdin 5892 * stdout - the file object to use for outut, default sys.stdout 5893 * stderr - the file object to use for error output, default sys.stderr 5894 5895 After creating an instance, you typically call the main() method. 5896 5897 """ 5898 import os, sys 5899 self.program_path = program_name 5900 self.program_name = os.path.basename(program_name) 5901 if stdin: 5902 self.stdin = stdin 5903 else: 5904 self.stdin = sys.stdin 5905 5906 if stdout: 5907 self.stdout = stdout 5908 else: 5909 self.stdout = sys.stdout 5910 5911 if stderr: 5912 self.stderr = stderr 5913 else: 5914 self.stderr = sys.stderr 5915 5916 @property 5917 def usage(self): 5918 """A multi-line string containing the program usage instructions. 5919 """ 5920 sorthelp = '\n'.join([ 5921 " %12s - %s" % (sm, sd) 5922 for sm, sd in sorted(sorting_methods.items()) if sm != SORT_NONE ]) 5923 return self._jsonlint_usage % {'program_name':self.program_name, 5924 'homepage':__homepage__, 5925 'sort_options_help': sorthelp } 5926 5927 def _lintcheck_data( self, 5928 jsondata, 5929 verbose_fp=None, 5930 reformat=False, 5931 show_stats=False, 5932 input_encoding=None, output_encoding=None, escape_unicode=True, 5933 pfx='', 5934 jsonopts=None ): 5935 global decode, encode 5936 success = self.SUCCESS_FAIL 5937 reformatted = None 5938 if show_stats: 5939 stats_fp = verbose_fp 5940 else: 5941 stats_fp = None 5942 try: 5943 results = decode( jsondata, encoding=input_encoding, 5944 return_errors=True, 5945 return_stats=True, 5946 write_errors=verbose_fp, 5947 write_stats=stats_fp, 5948 filename_for_errors=pfx, 5949 json_options=jsonopts ) 5950 except JSONError, err: 5951 success = self.SUCCESS_FAIL 5952 if verbose_fp: 5953 verbose_fp.write('%s%s\n' % (pfx, err.pretty_description()) ) 5954 except Exception, err: 5955 success = self.SUCCESS_FAIL 5956 if verbose_fp: 5957 verbose_fp.write('%s%s\n' % (pfx, str(err) )) 5958 else: 5959 errors = [err for err in results.errors if err.severity in ('fatal','error')] 5960 warnings = [err for err in results.errors if err.severity in ('warning',)] 5961 if errors: 5962 success = self.SUCCESS_FAIL 5963 elif warnings: 5964 success = self.SUCCESS_WARNING 5965 else: 5966 success = self.SUCCESS_OK 5967 5968 if reformat: 5969 encopts = jsonopts.copy() 5970 encopts.strictness = STRICTNESS_TOLERANT 5971 if reformat == 'compactly': 5972 encopts.encode_compactly = True 5973 else: 5974 encopts.encode_compactly = False 5975 5976 reformatted = encode(results.object, encoding=output_encoding, json_options=encopts) 5977 5978 return (success, reformatted) 5979 5980 5981 def _lintcheck( self, filename, output_filename, 5982 verbose=False, 5983 reformat=False, 5984 show_stats=False, 5985 input_encoding=None, output_encoding=None, escape_unicode=True, 5986 jsonopts=None ): 5987 import sys 5988 verbose_fp = None 5989 5990 if not filename or filename == "-": 5991 pfx = '<stdin>: ' 5992 jsondata = self.stdin.read() 5993 if verbose: 5994 verbose_fp = self.stderr 5995 else: 5996 pfx = '%s: ' % filename 5997 try: 5998 fp = open( filename, 'rb' ) 5999 jsondata = fp.read() 6000 fp.close() 6001 except IOError, err: 6002 self.stderr.write('%s: %s\n' % (pfx, str(err)) ) 6003 return self.SUCCESS_FAIL 6004 if verbose: 6005 verbose_fp = self.stdout 6006 6007 success, reformatted = self._lintcheck_data( 6008 jsondata, 6009 verbose_fp=verbose_fp, 6010 reformat=reformat, 6011 show_stats=show_stats, 6012 input_encoding=input_encoding, output_encoding=output_encoding, 6013 pfx=pfx, 6014 jsonopts=jsonopts ) 6015 6016 if success != self.SUCCESS_FAIL and reformat: 6017 if output_filename: 6018 try: 6019 fp = open( output_filename, 'wb' ) 6020 fp.write( reformatted ) 6021 except IOError, err: 6022 self.stderr.write('%s: %s\n' % (pfx, str(err)) ) 6023 success = False 6024 else: 6025 if hasattr(sys.stdout,'buffer'): # To write binary data rather than strings 6026 self.stdout.buffer.write( reformatted ) 6027 else: 6028 self.stdout.write( reformatted ) 6029 elif success == self.SUCCESS_OK and verbose_fp: 6030 verbose_fp.write('%sok\n' % pfx) 6031 elif success == self.SUCCESS_WARNING and verbose_fp: 6032 verbose_fp.write('%sok, with warnings\n' % pfx) 6033 elif verbose_fp: 6034 verbose_fp.write("%shas errors\n" % pfx) 6035 6036 return success 6037 6038 6039 def main( self, argv ): 6040 """The main routine for program "jsonlint". 6041 6042 Should be called with sys.argv[1:] as its sole argument. 6043 6044 Note sys.argv[0] which normally contains the program name 6045 should not be passed to main(); instead this class itself 6046 is initialized with sys.argv[0]. 6047 6048 Use "--help" for usage syntax, or consult the 'usage' member. 6049 6050 """ 6051 import sys, os, getopt, unicodedata 6052 6053 recursion_limit = None 6054 success = True 6055 verbose = 'auto' # one of 'auto', True, or False 6056 reformat = False 6057 show_stats = False 6058 output_filename = None 6059 input_encoding = None 6060 output_encoding = 'utf-8' 6061 6062 kwoptions = { # Will be used to initialize json_options 6063 "sort_keys": SORT_SMART, 6064 "strict": STRICTNESS_WARN, 6065 "keep_format": True, 6066 "decimal_context": 100, 6067 } 6068 6069 try: 6070 opts, args = getopt.getopt( argv, 6071 'vqfFe:o:sSW', 6072 ['verbose','quiet', 6073 'format','format-compactly', 6074 'stats', 6075 'output', 6076 'strict','nonstrict','warn', 6077 'html-safe','xml-safe', 6078 'encoding=', 6079 'input-encoding=','output-encoding=', 6080 'sort=', 6081 'recursion-limit=', 6082 'leading-zero-radix=', 6083 'keep-format', 6084 'no-keep-format', 6085 'indent=', 6086 'indent-amount=', 6087 'indent-limit=', 6088 'indent-tab-width=', 6089 'max-items-per-line=', 6090 'allow=', 'warn=', 'forbid=', 'deny=', 6091 'help', 'help-behaviors', 6092 'version','copyright'] ) 6093 except getopt.GetoptError, err: 6094 self.stderr.write( "Error: %s. Use \"%s --help\" for usage information.\n" \ 6095 % (err.msg, self.program_name) ) 6096 return 1 6097 6098 # Set verbose before looking at any other options 6099 for opt, val in opts: 6100 if opt in ('-v', '--verbose'): 6101 verbose=True 6102 6103 # Process all options 6104 for opt, val in opts: 6105 if opt in ('-h', '--help'): 6106 self.stdout.write( self.usage ) 6107 return 0 6108 elif opt == '--help-behaviors': 6109 self.stdout.write(""" 6110BEHAVIOR OPTIONS: 6111 6112These set of options let you control which checks are to be performed. 6113They may be turned on or off by listing them as arguments to one of 6114the options --allow, --warn, or --forbid ; for example: 6115 6116 %(program_name)s --allow comments,hex-numbers --forbid duplicate-keys 6117 6118""" % {"program_name":self.program_name}) 6119 self.stdout.write("The default shown is for %s mode\n\n" % kwoptions['strict']) 6120 self.stdout.write('%-7s %-25s %s\n' % ("Default", "Behavior_name", "Description")) 6121 self.stdout.write('-'*7 + ' ' + '-'*25 + ' ' + '-'*50 + '\n') 6122 j = json_options( **kwoptions ) 6123 for behavior in sorted(j.all_behaviors): 6124 v = j.get_behavior( behavior ) 6125 desc = j.describe_behavior( behavior ) 6126 self.stdout.write('%-7s %-25s %s\n' % (v.lower(), behavior.replace('_','-'), desc)) 6127 return 0 6128 elif opt == '--version': 6129 self.stdout.write( '%s (%s) version %s (%s)\n' \ 6130 % (self.program_name, __name__, __version__, __date__) ) 6131 if verbose == True: 6132 self.stdout.write( 'demjson from %r\n' % (__file__,) ) 6133 if verbose == True: 6134 self.stdout.write( 'Python version: %s\n' % (sys.version.replace('\n',' '),) ) 6135 self.stdout.write( 'This python implementation supports:\n' ) 6136 self.stdout.write( ' * Max unicode: U+%X\n' % (sys.maxunicode,) ) 6137 self.stdout.write( ' * Unicode version: %s\n' % (unicodedata.unidata_version,) ) 6138 self.stdout.write( ' * Floating-point significant digits: %d\n' % (float_sigdigits,) ) 6139 self.stdout.write( ' * Floating-point max 10^exponent: %d\n' % (float_maxexp,) ) 6140 if str(0.0)==str(-0.0): 6141 szero = 'No' 6142 else: 6143 szero = 'Yes' 6144 self.stdout.write( ' * Floating-point has signed-zeros: %s\n' % (szero,) ) 6145 if decimal: 6146 has_dec = 'Yes' 6147 else: 6148 has_dec = 'No' 6149 self.stdout.write( ' * Decimal (bigfloat) support: %s\n' % (has_dec,) ) 6150 return 0 6151 elif opt == '--copyright': 6152 self.stdout.write( "%s is distributed as part of the \"demjson\" python package.\n" \ 6153 % (self.program_name,) ) 6154 self.stdout.write( "See %s\n\n\n" % (__homepage__,) ) 6155 self.stdout.write( __credits__ ) 6156 return 0 6157 elif opt in ('-v', '--verbose'): 6158 verbose = True 6159 elif opt in ('-q', '--quiet'): 6160 verbose = False 6161 elif opt in ('-s', '--strict'): 6162 kwoptions['strict'] = STRICTNESS_STRICT 6163 kwoptions['keep_format'] = False 6164 elif opt in ('-S', '--nonstrict'): 6165 kwoptions['strict'] = STRICTNESS_TOLERANT 6166 elif opt in ('-W', '--tolerant'): 6167 kwoptions['strict'] = STRICTNESS_WARN 6168 elif opt in ('-f', '--format'): 6169 reformat = True 6170 kwoptions['encode_compactly'] = False 6171 elif opt in ('-F', '--format-compactly'): 6172 kwoptions['encode_compactly'] = True 6173 reformat = 'compactly' 6174 elif opt in ('--stats',): 6175 show_stats=True 6176 elif opt in ('-o', '--output'): 6177 output_filename = val 6178 elif opt in ('-e','--encoding'): 6179 input_encoding = val 6180 output_encoding = val 6181 escape_unicode = False 6182 elif opt in ('--output-encoding'): 6183 output_encoding = val 6184 escape_unicode = False 6185 elif opt in ('--input-encoding'): 6186 input_encoding = val 6187 elif opt in ('--html-safe','--xml-safe'): 6188 kwoptions['html_safe'] = True 6189 elif opt in ('--allow','--warn','--forbid'): 6190 action = opt[2:] 6191 if action in kwoptions: 6192 kwoptions[action] += "," + val 6193 else: 6194 kwoptions[action] = val 6195 elif opt in ('--keep-format',): 6196 kwoptions['keep_format']=True 6197 elif opt in ('--no-keep-format',): 6198 kwoptions['keep_format']=False 6199 elif opt == '--leading-zero-radix': 6200 kwoptions['leading_zero_radix'] = val 6201 elif opt in ('--indent', '--indent-amount'): 6202 if val in ('tab','tabs'): 6203 kwoptions['indent_amount'] = 8 6204 kwoptions['indent_tab_width'] = 8 6205 else: 6206 try: 6207 kwoptions['indent_amount'] = int(val) 6208 except ValueError: 6209 self.stderr.write("Indentation amount must be a number\n") 6210 return 1 6211 elif opt == 'indent-tab-width': 6212 try: 6213 kwoptions['indent_tab_width'] = int(val) 6214 except ValueError: 6215 self.stderr.write("Indentation tab width must be a number\n") 6216 return 1 6217 elif opt == '--max-items-per-line': 6218 try: 6219 kwoptions['max_items_per_line'] = int(val) 6220 except ValueError: 6221 self.stderr.write("Max items per line must be a number\n") 6222 return 1 6223 elif opt == '--sort': 6224 val = val.lower() 6225 if val == 'alpha': 6226 kwoptions['sort_keys'] = SORT_ALPHA 6227 elif val == 'alpha_ci': 6228 kwoptions['sort_keys'] = SORT_ALPHA_CI 6229 elif val == 'preserve': 6230 kwoptions['sort_keys'] = SORT_PRESERVE 6231 else: 6232 kwoptions['sort_keys'] = SORT_SMART 6233 elif opt == '--recursion-limit': 6234 try: 6235 recursion_limit = int(val) 6236 except ValueError: 6237 self.stderr.write("Recursion limit must be a number: %r\n" % val) 6238 return 1 6239 else: 6240 max_limit = 100000 6241 old_limit = sys.getrecursionlimit() 6242 if recursion_limit > max_limit: 6243 self.stderr.write("Recursion limit must be a number between %d and %d\n" % (old_limit,max_limit)) 6244 return 1 6245 elif recursion_limit > old_limit: 6246 sys.setrecursionlimit( recursion_limit ) 6247 else: 6248 self.stderr.write('Unknown option %r\n' % opt) 6249 return 1 6250 6251 # Make the JSON options 6252 kwoptions['decimal_context'] = 100 6253 jsonopts = json_options( **kwoptions ) 6254 6255 # Now decode each file... 6256 if not args: 6257 args = [None] 6258 6259 for fn in args: 6260 try: 6261 rc = self._lintcheck( fn, output_filename=output_filename, 6262 verbose=verbose, 6263 reformat=reformat, 6264 show_stats=show_stats, 6265 input_encoding=input_encoding, 6266 output_encoding=output_encoding, 6267 jsonopts=jsonopts ) 6268 if rc != self.SUCCESS_OK: 6269 # Warnings or errors should result in failure. If 6270 # checking multiple files, do not change a 6271 # previous error back to ok. 6272 success = False 6273 except KeyboardInterrupt, err: 6274 sys.stderr.write("\njsonlint interrupted!\n") 6275 sys.exit(1) 6276 6277 if not success: 6278 return 1 6279 return 0 6280 6281# end file 6282