1# -*- coding:utf-8 -*- 2from __future__ import unicode_literals 3import unittest 4from io import BytesIO, StringIO 5from decimal import Decimal 6import threading 7from importlib import import_module 8 9from ijson import common 10from ijson.backends.python import basic_parse, Lexer 11from ijson.compat import IS_PY2 12 13 14JSON = b''' 15{ 16 "docs": [ 17 { 18 "null": null, 19 "boolean": false, 20 "true": true, 21 "integer": 0, 22 "double": 0.5, 23 "exponent": 1.0e+2, 24 "long": 10000000000, 25 "string": "\\u0441\\u0442\\u0440\\u043e\\u043a\\u0430 - \xd1\x82\xd0\xb5\xd1\x81\xd1\x82" 26 }, 27 { 28 "meta": [[1], {}] 29 }, 30 { 31 "meta": {"key": "value"} 32 }, 33 { 34 "meta": null 35 } 36 ] 37} 38''' 39JSON_EVENTS = [ 40 ('start_map', None), 41 ('map_key', 'docs'), 42 ('start_array', None), 43 ('start_map', None), 44 ('map_key', 'null'), 45 ('null', None), 46 ('map_key', 'boolean'), 47 ('boolean', False), 48 ('map_key', 'true'), 49 ('boolean', True), 50 ('map_key', 'integer'), 51 ('number', 0), 52 ('map_key', 'double'), 53 ('number', Decimal('0.5')), 54 ('map_key', 'exponent'), 55 ('number', 100), 56 ('map_key', 'long'), 57 ('number', 10000000000), 58 ('map_key', 'string'), 59 ('string', 'строка - тест'), 60 ('end_map', None), 61 ('start_map', None), 62 ('map_key', 'meta'), 63 ('start_array', None), 64 ('start_array', None), 65 ('number', 1), 66 ('end_array', None), 67 ('start_map', None), 68 ('end_map', None), 69 ('end_array', None), 70 ('end_map', None), 71 ('start_map', None), 72 ('map_key', 'meta'), 73 ('start_map', None), 74 ('map_key', 'key'), 75 ('string', 'value'), 76 ('end_map', None), 77 ('end_map', None), 78 ('start_map', None), 79 ('map_key', 'meta'), 80 ('null', None), 81 ('end_map', None), 82 ('end_array', None), 83 ('end_map', None), 84] 85SCALAR_JSON = b'0' 86INVALID_JSONS = [ 87 b'["key", "value",]', # trailing comma 88 b'["key" "value"]', # no comma 89 b'{"key": "value",}', # trailing comma 90 b'{"key": "value" "key"}', # no comma 91 b'{"key" "value"}', # no colon 92 b'invalid', # unknown lexeme 93 b'[1, 2] dangling junk' # dangling junk 94] 95YAJL1_PASSING_INVALID = INVALID_JSONS[6] 96INCOMPLETE_JSONS = [ 97 b'', 98 b'"test', 99 b'[', 100 b'[1', 101 b'[1,', 102 b'{', 103 b'{"key"', 104 b'{"key":', 105 b'{"key": "value"', 106 b'{"key": "value",', 107] 108STRINGS_JSON = br''' 109{ 110 "str1": "", 111 "str2": "\"", 112 "str3": "\\", 113 "str4": "\\\\", 114 "special\t": "\b\f\n\r\t" 115} 116''' 117NUMBERS_JSON = b'[1, 1.0, 1E2]' 118SURROGATE_PAIRS_JSON = b'"\uD83D\uDCA9"' 119 120 121class Parse(object): 122 ''' 123 Base class for parsing tests that is used to create test cases for each 124 available backends. 125 ''' 126 def test_basic_parse(self): 127 events = list(self.backend.basic_parse(BytesIO(JSON))) 128 self.assertEqual(events, JSON_EVENTS) 129 130 def test_basic_parse_threaded(self): 131 thread = threading.Thread(target=self.test_basic_parse) 132 thread.start() 133 thread.join() 134 135 def test_scalar(self): 136 events = list(self.backend.basic_parse(BytesIO(SCALAR_JSON))) 137 self.assertEqual(events, [('number', 0)]) 138 139 def test_strings(self): 140 events = list(self.backend.basic_parse(BytesIO(STRINGS_JSON))) 141 strings = [value for event, value in events if event == 'string'] 142 self.assertEqual(strings, ['', '"', '\\', '\\\\', '\b\f\n\r\t']) 143 self.assertTrue(('map_key', 'special\t') in events) 144 145 def test_surrogate_pairs(self): 146 event = next(self.backend.basic_parse(BytesIO(SURROGATE_PAIRS_JSON))) 147 parsed_string = event[1] 148 self.assertEqual(parsed_string, '') 149 150 def test_numbers(self): 151 events = list(self.backend.basic_parse(BytesIO(NUMBERS_JSON))) 152 types = [type(value) for event, value in events if event == 'number'] 153 self.assertEqual(types, [int, Decimal, Decimal]) 154 155 def test_invalid(self): 156 for json in INVALID_JSONS: 157 # Yajl1 doesn't complain about additional data after the end 158 # of a parsed object. Skipping this test. 159 if self.__class__.__name__ == 'YajlParse' and json == YAJL1_PASSING_INVALID: 160 continue 161 with self.assertRaises(common.JSONError) as cm: 162 list(self.backend.basic_parse(BytesIO(json))) 163 164 def test_incomplete(self): 165 for json in INCOMPLETE_JSONS: 166 with self.assertRaises(common.IncompleteJSONError): 167 list(self.backend.basic_parse(BytesIO(json))) 168 169 def test_utf8_split(self): 170 buf_size = JSON.index(b'\xd1') + 1 171 try: 172 events = list(self.backend.basic_parse(BytesIO(JSON), buf_size=buf_size)) 173 except UnicodeDecodeError: 174 self.fail('UnicodeDecodeError raised') 175 176 def test_lazy(self): 177 # shouldn't fail since iterator is not exhausted 178 self.backend.basic_parse(BytesIO(INVALID_JSONS[0])) 179 self.assertTrue(True) 180 181 def test_boundary_lexeme(self): 182 buf_size = JSON.index(b'false') + 1 183 events = list(self.backend.basic_parse(BytesIO(JSON), buf_size=buf_size)) 184 self.assertEqual(events, JSON_EVENTS) 185 186 def test_boundary_whitespace(self): 187 buf_size = JSON.index(b' ') + 1 188 events = list(self.backend.basic_parse(BytesIO(JSON), buf_size=buf_size)) 189 self.assertEqual(events, JSON_EVENTS) 190 191 def test_api(self): 192 self.assertTrue(list(self.backend.items(BytesIO(JSON), ''))) 193 self.assertTrue(list(self.backend.parse(BytesIO(JSON)))) 194 195# Generating real TestCase classes for each importable backend 196for name in ['python', 'yajl', 'yajl2', 'yajl2_cffi']: 197 try: 198 classname = '%sParse' % ''.join(p.capitalize() for p in name.split('_')) 199 if IS_PY2: 200 classname = classname.encode('ascii') 201 202 locals()[classname] = type( 203 classname, 204 (unittest.TestCase, Parse), 205 {'backend': import_module('ijson.backends.%s' % name)}, 206 ) 207 except ImportError: 208 pass 209 210 211class Common(unittest.TestCase): 212 ''' 213 Backend independent tests. They all use basic_parse imported explicitly from 214 the python backend to generate parsing events. 215 ''' 216 def test_object_builder(self): 217 builder = common.ObjectBuilder() 218 for event, value in basic_parse(BytesIO(JSON)): 219 builder.event(event, value) 220 self.assertEqual(builder.value, { 221 'docs': [ 222 { 223 'string': 'строка - тест', 224 'null': None, 225 'boolean': False, 226 'true': True, 227 'integer': 0, 228 'double': Decimal('0.5'), 229 'exponent': 100, 230 'long': 10000000000, 231 }, 232 { 233 'meta': [[1], {}], 234 }, 235 { 236 'meta': {'key': 'value'}, 237 }, 238 { 239 'meta': None, 240 }, 241 ], 242 }) 243 244 def test_scalar_builder(self): 245 builder = common.ObjectBuilder() 246 for event, value in basic_parse(BytesIO(SCALAR_JSON)): 247 builder.event(event, value) 248 self.assertEqual(builder.value, 0) 249 250 def test_parse(self): 251 events = common.parse(basic_parse(BytesIO(JSON))) 252 events = [value 253 for prefix, event, value in events 254 if prefix == 'docs.item.meta.item.item' 255 ] 256 self.assertEqual(events, [1]) 257 258 def test_items(self): 259 events = basic_parse(BytesIO(JSON)) 260 meta = list(common.items(common.parse(events), 'docs.item.meta')) 261 self.assertEqual(meta, [ 262 [[1], {}], 263 {'key': 'value'}, 264 None, 265 ]) 266 267 268class Stream(unittest.TestCase): 269 def test_bytes(self): 270 l = Lexer(BytesIO(JSON)) 271 self.assertEqual(next(l)[1], '{') 272 273 def test_string(self): 274 l = Lexer(StringIO(JSON.decode('utf-8'))) 275 self.assertEqual(next(l)[1], '{') 276 277 278if __name__ == '__main__': 279 unittest.main() 280