1 /*
2 Copyright (c) 2012 John-Anthony Owens
3
4 Permission is hereby granted, free of charge, to any person obtaining a
5 copy of this software and associated documentation files (the "Software"),
6 to deal in the Software without restriction, including without limitation
7 the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the
9 Software is furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be included
12 in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 IN THE SOFTWARE.
21 */
22
23 #include <stdlib.h>
24 #include <string.h>
25
26 /* Ensure uint32_t type (compiler-dependent). */
27 #if defined(_MSC_VER)
28 typedef unsigned __int32 uint32_t;
29 #else
30 #include <stdint.h>
31 #endif
32
33 /* Ensure SIZE_MAX defined. */
34 #ifndef SIZE_MAX
35 #define SIZE_MAX ((size_t)-1)
36 #endif
37
38 /* Mark APIs for export (as opposed to import) when we build this file. */
39 #define JSON_BUILDING
40 #include <formats/jsonsax_full.h>
41
42 /* Default allocation constants. */
43 #define DEFAULT_TOKEN_BYTES_LENGTH 64 /* MUST be a power of 2 */
44 #define DEFAULT_SYMBOL_STACK_SIZE 32 /* MUST be a power of 2 */
45
46 /* Types for readability. */
47 typedef unsigned char byte;
48 typedef uint32_t Codepoint;
49
50 /* Especially-relevant Unicode codepoints. */
51 #define U_(x) ((Codepoint)(x))
52 #define NULL_CODEPOINT U_(0x0000)
53 #define BACKSPACE_CODEPOINT U_(0x0008)
54 #define TAB_CODEPOINT U_(0x0009)
55 #define LINE_FEED_CODEPOINT U_(0x000A)
56 #define FORM_FEED_CODEPOINT U_(0x000C)
57 #define CARRIAGE_RETURN_CODEPOINT U_(0x000D)
58 #define FIRST_NON_CONTROL_CODEPOINT U_(0x0020)
59 #define DELETE_CODEPOINT U_(0x007F)
60 #define FIRST_NON_ASCII_CODEPOINT U_(0x0080)
61 #define FIRST_2_BYTE_UTF8_CODEPOINT U_(0x0080)
62 #define FIRST_3_BYTE_UTF8_CODEPOINT U_(0x0800)
63 #define LINE_SEPARATOR_CODEPOINT U_(0x2028)
64 #define PARAGRAPH_SEPARATOR_CODEPOINT U_(0x2029)
65 #define BOM_CODEPOINT U_(0xFEFF)
66 #define REPLACEMENT_CHARACTER_CODEPOINT U_(0xFFFD)
67 #define FIRST_NON_BMP_CODEPOINT U_(0x10000)
68 #define FIRST_4_BYTE_UTF8_CODEPOINT U_(0x10000)
69 #define MAX_CODEPOINT U_(0x10FFFF)
70 #define EOF_CODEPOINT U_(0xFFFFFFFF)
71
72 /* Bit-masking macros. */
73 #define BOTTOM_3_BITS(x) ((x) & 0x7)
74 #define BOTTOM_4_BITS(x) ((x) & 0xF)
75 #define BOTTOM_5_BITS(x) ((x) & 0x1F)
76 #define BOTTOM_6_BITS(x) ((x) & 0x3F)
77
78 /* Bit-flag macros. */
79 #define GET_FLAGS(x, f) ((x) & (f))
80 #define SET_FLAGS_ON(flagstype, x, f) do { (x) |= (flagstype)(f); } while (0)
81 #define SET_FLAGS_OFF(flagstype, x, f) do { (x) &= (flagstype)~(f); } while (0)
82 #define SET_FLAGS(flagstype, x, f, cond) do { if (cond) (x) |= (flagstype)(f); else (x) &= (flagstype)~(f); } while (0)
83
84 /* UTF-8 byte-related macros. */
85 #define IS_UTF8_SINGLE_BYTE(b) (((b) & 0x80) == 0)
86 #define IS_UTF8_CONTINUATION_BYTE(b) (((b) & 0xC0) == 0x80)
87 #define IS_UTF8_FIRST_BYTE_OF_2(b) (((b) & 0xE0) == 0xC0)
88 #define IS_UTF8_FIRST_BYTE_OF_3(b) (((b) & 0xF0) == 0xE0)
89 #define IS_UTF8_FIRST_BYTE_OF_4(b) (((b) & 0xF8) == 0xF0)
90
91 /* Unicode codepoint-related macros. */
92 #define IS_NONCHARACTER(c) ((((c) & 0xFE) == 0xFE) || (((c) >= 0xFDD0) && ((c) <= 0xFDEF)))
93 #define IS_SURROGATE(c) (((c) & 0xFFFFF800) == 0xD800)
94 #define IS_LEADING_SURROGATE(c) (((c) & 0xFFFFFC00) == 0xD800)
95 #define IS_TRAILING_SURROGATE(c) (((c) & 0xFFFFFC00) == 0xDC00)
96 #define CODEPOINT_FROM_SURROGATES(hi_lo) ((((hi_lo) >> 16) << 10) + ((hi_lo) & 0xFFFF) + 0xFCA02400)
97 #define SURROGATES_FROM_CODEPOINT(c) ((((c) << 6) & 0x7FF0000) + ((c) & 0x3FF) + 0xD7C0DC00)
98 #define SHORTEST_ENCODING_SEQUENCE(enc) (UINT32_C(1) << ((enc) >> 1))
99 #define LONGEST_ENCODING_SEQUENCE 4
100
101 /* Internal types that alias enum types in the public API.
102 By using byte to represent these values internally,
103 we can guarantee minimal storage size and avoid compiler
104 warnings when using values of the type in switch statements
105 that don't have (or need) a default case. */
106 typedef byte Encoding;
107 typedef byte Error;
108 typedef byte TokenAttributes;
109
110 /******************** Default Memory Suite ********************/
111
DefaultReallocHandler(void * userData,void * ptr,size_t size)112 static void* JSON_CALL DefaultReallocHandler(void* userData, void* ptr, size_t size)
113 {
114 (void)userData; /* unused */
115 return realloc(ptr, size);
116 }
117
DefaultFreeHandler(void * userData,void * ptr)118 static void JSON_CALL DefaultFreeHandler(void* userData, void* ptr)
119 {
120 (void)userData; /* unused */
121 free(ptr);
122 }
123
124 static const JSON_MemorySuite defaultMemorySuite = { NULL, &DefaultReallocHandler, &DefaultFreeHandler };
125
DoubleBuffer(const JSON_MemorySuite * pMemorySuite,byte * pDefaultBuffer,byte * pBuffer,size_t length)126 static byte* DoubleBuffer(const JSON_MemorySuite* pMemorySuite, byte* pDefaultBuffer, byte* pBuffer, size_t length)
127 {
128 size_t newLength = length * 2;
129 if (newLength < length)
130 {
131 pBuffer = NULL;
132 }
133 else if (pBuffer == pDefaultBuffer)
134 {
135 pBuffer = (byte*)pMemorySuite->realloc(pMemorySuite->userData, NULL, newLength);
136 if (pBuffer)
137 {
138 memcpy(pBuffer, pDefaultBuffer, length);
139 }
140 }
141 else
142 {
143 pBuffer = (byte*)pMemorySuite->realloc(pMemorySuite->userData, pBuffer, newLength);
144 }
145 return pBuffer;
146 }
147
148 /******************** Unicode Decoder ********************/
149
150 /* Mutually-exclusive decoder states. */
151 /* The bits of DecoderState are layed out as follows:
152
153 ---lllnn
154
155 - = unused (3 bits)
156 l = expected total sequence length (3 bits)
157 d = number of bytes decoded so far (2 bits)
158 */
159
160 #define DECODER_RESET 0x00
161 #define DECODED_1_OF_2 0x09 /* 00001001 */
162 #define DECODED_1_OF_3 0x0D /* 00001101 */
163 #define DECODED_2_OF_3 0x0E /* 00001110 */
164 #define DECODED_1_OF_4 0x11 /* 00010001 */
165 #define DECODED_2_OF_4 0x12 /* 00010010 */
166 #define DECODED_3_OF_4 0x13 /* 00010011 */
167 typedef byte DecoderState;
168
169 #define DECODER_STATE_BYTES(s) (size_t)((s) & 0x3)
170
171 /* Decoder data. */
172 typedef struct tag_DecoderData
173 {
174 uint32_t bits;
175 DecoderState state; /* byte alignment */
176 } DecoderData;
177 typedef DecoderData* Decoder;
178
179 /* The bits of DecoderOutput are layed out as follows:
180
181 ------rrlllccccccccccccccccccccc
182
183 - = unused (6 bits)
184 r = result code (2 bits)
185 l = sequence length (3 bits)
186 c = codepoint (21 bits)
187 */
188 #define SEQUENCE_PENDING 0
189 #define SEQUENCE_COMPLETE 1
190 #define SEQUENCE_INVALID_INCLUSIVE 2
191 #define SEQUENCE_INVALID_EXCLUSIVE 3
192 typedef uint32_t DecoderResultCode;
193
194 #define DECODER_OUTPUT(r, l, c) (DecoderOutput)(((r) << 24) | ((l) << 21) | (c))
195 #define DECODER_RESULT_CODE(o) (DecoderResultCode)((DecoderOutput)(o) >> 24)
196 #define DECODER_SEQUENCE_LENGTH(o) (size_t)(((DecoderOutput)(o) >> 21) & 0x7)
197 #define DECODER_CODEPOINT(o) (Codepoint)((DecoderOutput)(o) & 0x001FFFFF)
198 typedef uint32_t DecoderOutput;
199
200 /* Decoder functions. */
201
Decoder_Reset(Decoder decoder)202 static void Decoder_Reset(Decoder decoder)
203 {
204 decoder->state = DECODER_RESET;
205 decoder->bits = 0;
206 }
207
Decoder_SequencePending(Decoder decoder)208 static int Decoder_SequencePending(Decoder decoder)
209 {
210 return decoder->state != DECODER_RESET;
211 }
212
Decoder_ProcessByte(Decoder decoder,Encoding encoding,byte b)213 static DecoderOutput Decoder_ProcessByte(Decoder decoder, Encoding encoding, byte b)
214 {
215 DecoderOutput output = DECODER_OUTPUT(SEQUENCE_PENDING, 0, 0);
216 switch (encoding)
217 {
218 case JSON_UTF8:
219 /* When the input encoding is UTF-8, the decoded codepoint's bits are
220 recorded in the bottom 3 bytes of bits as they are decoded.
221 The top byte is not used. */
222 switch (decoder->state)
223 {
224 case DECODER_RESET:
225 if (IS_UTF8_SINGLE_BYTE(b))
226 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 1, b);
227 else if (IS_UTF8_FIRST_BYTE_OF_2(b))
228 {
229 /* UTF-8 2-byte sequences that are overlong encodings can be
230 detected from just the first byte (C0 or C1). */
231 decoder->bits = (uint32_t)BOTTOM_5_BITS(b) << 6;
232 if (decoder->bits < FIRST_2_BYTE_UTF8_CODEPOINT)
233 output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
234 else
235 {
236 decoder->state = DECODED_1_OF_2;
237 goto noreset;
238 }
239 }
240 else if (IS_UTF8_FIRST_BYTE_OF_3(b))
241 {
242 decoder->bits = (uint32_t)BOTTOM_4_BITS(b) << 12;
243 decoder->state = DECODED_1_OF_3;
244 goto noreset;
245 }
246 else if (IS_UTF8_FIRST_BYTE_OF_4(b))
247 {
248 /* Some UTF-8 4-byte sequences that encode out-of-range
249 codepoints can be detected from the first byte (F5 - FF). */
250 decoder->bits = (uint32_t)BOTTOM_3_BITS(b) << 18;
251 if (decoder->bits > MAX_CODEPOINT)
252 output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
253 else
254 {
255 decoder->state = DECODED_1_OF_4;
256 goto noreset;
257 }
258 }
259 else
260 /* The byte is of the form 11111xxx or 10xxxxxx, and is not
261 a valid first byte for a UTF-8 sequence. */
262 output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
263 break;
264
265 case DECODED_1_OF_2:
266 if (IS_UTF8_CONTINUATION_BYTE(b))
267 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits | BOTTOM_6_BITS(b));
268 else
269 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
270 break;
271
272 case DECODED_1_OF_3:
273 if (IS_UTF8_CONTINUATION_BYTE(b))
274 {
275 /* UTF-8 3-byte sequences that are overlong
276 * encodings or encode surrogate codepoints
277 * can be detected after 2 bytes. */
278 decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 6;
279 if ((decoder->bits < FIRST_3_BYTE_UTF8_CODEPOINT) ||
280 IS_SURROGATE(decoder->bits))
281 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
282 else
283 {
284 decoder->state = DECODED_2_OF_3;
285 goto noreset;
286 }
287 }
288 else
289 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
290 break;
291
292 case DECODED_2_OF_3:
293 if (IS_UTF8_CONTINUATION_BYTE(b))
294 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 3, decoder->bits | BOTTOM_6_BITS(b));
295 else
296 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
297 break;
298
299 case DECODED_1_OF_4:
300 if (IS_UTF8_CONTINUATION_BYTE(b))
301 {
302 /* UTF-8 4-byte sequences that are overlong encodings or encode
303 out-of-range codepoints can be detected after 2 bytes. */
304 decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 12;
305 if ( (decoder->bits < FIRST_4_BYTE_UTF8_CODEPOINT) ||
306 (decoder->bits > MAX_CODEPOINT))
307 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
308 else
309 {
310 decoder->state = DECODED_2_OF_4;
311 goto noreset;
312 }
313 }
314 else
315 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
316 break;
317
318 case DECODED_2_OF_4:
319 if (IS_UTF8_CONTINUATION_BYTE(b))
320 {
321 decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 6;
322 decoder->state = DECODED_3_OF_4;
323 goto noreset;
324 }
325
326 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
327 break;
328
329 case DECODED_3_OF_4:
330 if (IS_UTF8_CONTINUATION_BYTE(b))
331 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits | BOTTOM_6_BITS(b));
332 else
333 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 3, 0);
334 break;
335 }
336 break;
337
338 case JSON_UTF16LE:
339 /* When the input encoding is UTF-16, the decoded codepoint's bits are
340 recorded in the bottom 2 bytes of bits as they are decoded.
341 If those 2 bytes form a leading surrogate, the decoder treats the
342 surrogate pair as a single 4-byte sequence, shifts the leading
343 surrogate into the high 2 bytes of bits, and decodes the
344 trailing surrogate's bits in the bottom 2 bytes of bits. */
345 switch (decoder->state)
346 {
347 case DECODER_RESET:
348 decoder->bits = b;
349 decoder->state = DECODED_1_OF_2;
350 goto noreset;
351
352 case DECODED_1_OF_2:
353 decoder->bits |= (uint32_t)b << 8;
354 /* A trailing surrogate cannot appear on its own. */
355 if (IS_TRAILING_SURROGATE(decoder->bits))
356 output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 2, 0);
357 else if (IS_LEADING_SURROGATE(decoder->bits))
358 {
359 /* A leading surrogate implies a 4-byte surrogate pair. */
360 decoder->bits <<= 16;
361 decoder->state = DECODED_2_OF_4;
362 goto noreset;
363 }
364 else
365 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits);
366 break;
367
368 case DECODED_2_OF_4:
369 decoder->bits |= b;
370 decoder->state = DECODED_3_OF_4;
371 goto noreset;
372
373 case DECODED_3_OF_4:
374 decoder->bits |= (uint32_t)b << 8;
375 if (!IS_TRAILING_SURROGATE(decoder->bits & 0xFFFF))
376 {
377 /* A leading surrogate must be followed by a trailing one.
378 Treat the previous 3 bytes as an invalid 2-byte sequence
379 followed by the first byte of a new sequence. */
380 decoder->bits &= 0xFF;
381 decoder->state = DECODED_1_OF_2;
382 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
383 goto noreset;
384 }
385
386 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, CODEPOINT_FROM_SURROGATES(decoder->bits));
387 break;
388 }
389 break;
390
391 case JSON_UTF16BE:
392 /* When the input encoding is UTF-16, the decoded codepoint's bits are
393 recorded in the bottom 2 bytes of bits as they are decoded.
394 If those 2 bytes form a leading surrogate, the decoder treats the
395 surrogate pair as a single 4-byte sequence, shifts the leading
396 surrogate into the high 2 bytes of bits, and decodes the
397 trailing surrogate's bits in the bottom 2 bytes of bits. */
398 switch (decoder->state)
399 {
400 case DECODER_RESET:
401 decoder->bits = (uint32_t)b << 8;
402 decoder->state = DECODED_1_OF_2;
403 goto noreset;
404
405 case DECODED_1_OF_2:
406 decoder->bits |= b;
407 /* A trailing surrogate cannot appear on its own. */
408 if (IS_TRAILING_SURROGATE(decoder->bits))
409 output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 2, 0);
410 else if (IS_LEADING_SURROGATE(decoder->bits))
411 {
412 /* A leading surrogate implies a 4-byte surrogate pair. */
413 decoder->bits <<= 16;
414 decoder->state = DECODED_2_OF_4;
415 goto noreset;
416 }
417 else
418 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits);
419 break;
420
421 case DECODED_2_OF_4:
422 decoder->bits |= (uint32_t)b << 8;
423 decoder->state = DECODED_3_OF_4;
424 goto noreset;
425
426 case DECODED_3_OF_4:
427 decoder->bits |= b;
428 if (!IS_TRAILING_SURROGATE(decoder->bits & 0xFFFF))
429 {
430 /* A leading surrogate must be followed by a trailing one.
431 Treat the previous 3 bytes as an invalid 2-byte sequence
432 followed by the first byte of a new sequence. */
433 decoder->bits &= 0xFF00;
434 decoder->state = DECODED_1_OF_2;
435 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
436 goto noreset;
437 }
438
439 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4,
440 CODEPOINT_FROM_SURROGATES(decoder->bits));
441 break;
442 }
443 break;
444
445 case JSON_UTF32LE:
446 /* When the input encoding is UTF-32, the decoded codepoint's bits are
447 recorded in bits as they are decoded. */
448 switch (decoder->state)
449 {
450 case DECODER_RESET:
451 decoder->state = DECODED_1_OF_4;
452 decoder->bits = (uint32_t)b;
453 goto noreset;
454
455 case DECODED_1_OF_4:
456 decoder->state = DECODED_2_OF_4;
457 decoder->bits |= (uint32_t)b << 8;
458 goto noreset;
459
460 case DECODED_2_OF_4:
461 decoder->state = DECODED_3_OF_4;
462 decoder->bits |= (uint32_t)b << 16;
463 goto noreset;
464
465 case DECODED_3_OF_4:
466 decoder->bits |= (uint32_t)b << 24;
467 output = (
468 IS_SURROGATE(decoder->bits) ||
469 (decoder->bits > MAX_CODEPOINT))
470 ? DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 4, 0)
471 : DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits);
472 break;
473 }
474 break;
475
476 case JSON_UTF32BE:
477 /* When the input encoding is UTF-32, the decoded codepoint's bits are
478 recorded in bits as they are decoded. */
479 switch (decoder->state)
480 {
481 case DECODER_RESET:
482 decoder->state = DECODED_1_OF_4;
483 decoder->bits = (uint32_t)b << 24;
484 goto noreset;
485
486 case DECODED_1_OF_4:
487 decoder->state = DECODED_2_OF_4;
488 decoder->bits |= (uint32_t)b << 16;
489 goto noreset;
490
491 case DECODED_2_OF_4:
492 decoder->state = DECODED_3_OF_4;
493 decoder->bits |= (uint32_t)b << 8;
494 goto noreset;
495
496 case DECODED_3_OF_4:
497 decoder->bits |= b;
498 output = (IS_SURROGATE(decoder->bits) ||
499 (decoder->bits > MAX_CODEPOINT))
500 ? DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 4, 0)
501 : DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits);
502 break;
503 }
504 break;
505 }
506
507 /* Reset the decoder for the next sequence. */
508 Decoder_Reset(decoder);
509
510 noreset:
511 return output;
512 }
513
514 /******************** Unicode Encoder ********************/
515
516 /* This function makes the following assumptions about its input:
517
518 1. The c argument is a valid codepoint (U+0000 - U+10FFFF).
519 2. The encoding argument is not JSON_UnknownEncoding.
520 3. The pBytes argument points to an array of at least 4 bytes.
521 */
EncodeCodepoint(Codepoint c,Encoding encoding,byte * pBytes)522 static size_t EncodeCodepoint(Codepoint c, Encoding encoding, byte* pBytes)
523 {
524 size_t length = 0;
525 switch (encoding)
526 {
527 case JSON_UTF8:
528 if (c < FIRST_2_BYTE_UTF8_CODEPOINT)
529 {
530 pBytes[0] = (byte)c;
531 length = 1;
532 }
533 else if (c < FIRST_3_BYTE_UTF8_CODEPOINT)
534 {
535 pBytes[0] = (byte)(0xC0 | (c >> 6));
536 pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c));
537 length = 2;
538 }
539 else if (c < FIRST_4_BYTE_UTF8_CODEPOINT)
540 {
541 pBytes[0] = (byte)(0xE0 | (c >> 12));
542 pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c >> 6));
543 pBytes[2] = (byte)(0x80 | BOTTOM_6_BITS(c));
544 length = 3;
545 }
546 else
547 {
548 pBytes[0] = (byte)(0xF0 | (c >> 18));
549 pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c >> 12));
550 pBytes[2] = (byte)(0x80 | BOTTOM_6_BITS(c >> 6));
551 pBytes[3] = (byte)(0x80 | BOTTOM_6_BITS(c));
552 length = 4;
553 }
554 break;
555
556 case JSON_UTF16LE:
557 if (c < FIRST_NON_BMP_CODEPOINT)
558 {
559 pBytes[0] = (byte)(c);
560 pBytes[1] = (byte)(c >> 8);
561 length = 2;
562 }
563 else
564 {
565 uint32_t surrogates = SURROGATES_FROM_CODEPOINT(c);
566
567 /* Leading surrogate. */
568 pBytes[0] = (byte)(surrogates >> 16);
569 pBytes[1] = (byte)(surrogates >> 24);
570
571 /* Trailing surrogate. */
572 pBytes[2] = (byte)(surrogates);
573 pBytes[3] = (byte)(surrogates >> 8);
574 length = 4;
575 }
576 break;
577
578 case JSON_UTF16BE:
579 if (c < FIRST_NON_BMP_CODEPOINT)
580 {
581 pBytes[1] = (byte)(c);
582 pBytes[0] = (byte)(c >> 8);
583 length = 2;
584 }
585 else
586 {
587 /* The codepoint requires a surrogate pair in UTF-16. */
588 uint32_t surrogates = SURROGATES_FROM_CODEPOINT(c);
589
590 /* Leading surrogate. */
591 pBytes[1] = (byte)(surrogates >> 16);
592 pBytes[0] = (byte)(surrogates >> 24);
593
594 /* Trailing surrogate. */
595 pBytes[3] = (byte)(surrogates);
596 pBytes[2] = (byte)(surrogates >> 8);
597 length = 4;
598 }
599 break;
600
601 case JSON_UTF32LE:
602 pBytes[0] = (byte)(c);
603 pBytes[1] = (byte)(c >> 8);
604 pBytes[2] = (byte)(c >> 16);
605 pBytes[3] = (byte)(c >> 24);
606 length = 4;
607 break;
608
609 case JSON_UTF32BE:
610 pBytes[3] = (byte)(c);
611 pBytes[2] = (byte)(c >> 8);
612 pBytes[1] = (byte)(c >> 16);
613 pBytes[0] = (byte)(c >> 24);
614 length = 4;
615 break;
616 }
617 return length;
618 }
619
620 /******************** JSON Lexer States ********************/
621
622 /* Mutually-exclusive lexer states. */
623 #define LEXING_WHITESPACE 0
624 #define LEXING_LITERAL 1
625 #define LEXING_STRING 2
626 #define LEXING_STRING_ESCAPE 3
627 #define LEXING_STRING_HEX_ESCAPE_BYTE_1 4
628 #define LEXING_STRING_HEX_ESCAPE_BYTE_2 5
629 #define LEXING_STRING_HEX_ESCAPE_BYTE_3 6
630 #define LEXING_STRING_HEX_ESCAPE_BYTE_4 7
631 #define LEXING_STRING_HEX_ESCAPE_BYTE_5 8
632 #define LEXING_STRING_HEX_ESCAPE_BYTE_6 9
633 #define LEXING_STRING_HEX_ESCAPE_BYTE_7 10
634 #define LEXING_STRING_HEX_ESCAPE_BYTE_8 11
635 #define LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH 12
636 #define LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U 13
637 #define LEXING_NUMBER_AFTER_MINUS 14
638 #define LEXING_NUMBER_AFTER_LEADING_ZERO 15
639 #define LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO 16
640 #define LEXING_NUMBER_AFTER_X 17
641 #define LEXING_NUMBER_HEX_DIGITS 18
642 #define LEXING_NUMBER_DECIMAL_DIGITS 19
643 #define LEXING_NUMBER_AFTER_DOT 20
644 #define LEXING_NUMBER_FRACTIONAL_DIGITS 21
645 #define LEXING_NUMBER_AFTER_E 22
646 #define LEXING_NUMBER_AFTER_EXPONENT_SIGN 23
647 #define LEXING_NUMBER_EXPONENT_DIGITS 24
648 #define LEXING_COMMENT_AFTER_SLASH 25
649 #define LEXING_SINGLE_LINE_COMMENT 26
650 #define LEXING_MULTI_LINE_COMMENT 27
651 #define LEXING_MULTI_LINE_COMMENT_AFTER_STAR 28
652 #define LEXER_ERROR 255
653 typedef byte LexerState;
654
655 /******************** JSON Grammarian ********************/
656
657 /* The JSON grammar comprises the following productions:
658
659 1. VALUE => null
660 2. VALUE => boolean
661 3. VALUE => string
662 4. VALUE => number
663 5. VALUE => specialnumber
664 6. VALUE => { MEMBERS }
665 7. VALUE => [ ITEMS ]
666 8. MEMBERS => MEMBER MORE_MEMBERS
667 9. MEMBERS => e
668 10. MEMBER => string : VALUE
669 11. MORE_MEMBERS => , MEMBER MORE_MEMBERS
670 12. MORE_MEMBERS => e
671 13. ITEMS => ITEM MORE_ITEMS
672 14. ITEMS => e
673 15. ITEM => VALUE
674 16. MORE_ITEMS => , ITEM MORE_ITEMS
675 17. MORE_ITEMS => e
676
677 We implement a simple LL(1) parser based on this grammar, with events
678 emitted when certain non-terminals are replaced.
679 */
680
681 /* Mutually-exclusive grammar tokens and non-terminals. The values are defined
682 so that the bottom 4 bits of a value can be used as an index into the
683 grammar production rule table. */
684 #define T_NONE 0x00 /* tokens are in the form 0x0X */
685 #define T_NULL 0x01
686 #define T_TRUE 0x02
687 #define T_FALSE 0x03
688 #define T_STRING 0x04
689 #define T_NUMBER 0x05
690 #define T_NAN 0x06
691 #define T_INFINITY 0x07
692 #define T_NEGATIVE_INFINITY 0x08
693 #define T_LEFT_CURLY 0x09
694 #define T_RIGHT_CURLY 0x0A
695 #define T_LEFT_SQUARE 0x0B
696 #define T_RIGHT_SQUARE 0x0C
697 #define T_COLON 0x0D
698 #define T_COMMA 0x0E
699 #define NT_VALUE 0x10 /* non-terminals are in the form 0x1X */
700 #define NT_MEMBERS 0x11
701 #define NT_MEMBER 0x12
702 #define NT_MORE_MEMBERS 0x13
703 #define NT_ITEMS 0x14
704 #define NT_ITEM 0x15
705 #define NT_MORE_ITEMS 0x16
706 typedef byte Symbol;
707
708 #define IS_NONTERMINAL(s) ((s) & 0x10)
709 #define IS_TOKEN(s) !IS_NONTERMINAL(s)
710
711 /* Grammarian data. */
712 typedef struct tag_GrammarianData
713 {
714 Symbol* pStack; /* initially set to defaultStack */
715 size_t stackSize;
716 size_t stackUsed;
717 Symbol defaultStack[DEFAULT_SYMBOL_STACK_SIZE];
718 } GrammarianData;
719 typedef GrammarianData* Grammarian;
720
721 /* Mutually-exclusive result codes returned by the grammarian
722 after processing a token. */
723 #define ACCEPTED_TOKEN 0
724 #define REJECTED_TOKEN 1
725 #define SYMBOL_STACK_FULL 2
726 typedef uint32_t GrammarianResultCode;
727
728 /* Events emitted by the grammarian as a result of processing a
729 token. Note that EMIT_ARRAY_ITEM always appears bitwise OR-ed
730 with one of the other values. */
731 #define EMIT_NOTHING 0x00
732 #define EMIT_NULL 0x01
733 #define EMIT_BOOLEAN 0x02
734 #define EMIT_STRING 0x03
735 #define EMIT_NUMBER 0x04
736 #define EMIT_SPECIAL_NUMBER 0x05
737 #define EMIT_START_OBJECT 0x06
738 #define EMIT_END_OBJECT 0x07
739 #define EMIT_OBJECT_MEMBER 0x08
740 #define EMIT_START_ARRAY 0x09
741 #define EMIT_END_ARRAY 0x0A
742 #define EMIT_ARRAY_ITEM 0x10 /* may be combined with other values */
743 typedef byte GrammarEvent;
744
745 /* The bits of GrammarianOutput are layed out as follows:
746
747 -rreeeee
748
749 - = unused (1 bit)
750 r = result code (2 bits)
751 e = event (5 bits)
752 */
753 #define GRAMMARIAN_OUTPUT(r, e) (GrammarianOutput)(((GrammarianResultCode)(r) << 5) | (GrammarEvent)(e))
754 #define GRAMMARIAN_RESULT_CODE(o) (GrammarianResultCode)((GrammarianOutput)(o) >> 5)
755 #define GRAMMARIAN_EVENT(o) (GrammarEvent)((GrammarianOutput)(o) & 0x1F)
756 typedef byte GrammarianOutput;
757
758 /* Grammar rule used by the grammarian to process a token. */
759 typedef struct tag_GrammarRule
760 {
761 Symbol symbolToPush1; /* byte alignment */
762 Symbol symbolToPush2; /* byte alignment */
763 byte reprocess;
764 GrammarEvent emit; /* byte alignment */
765 } GrammarRule;
766
767 /* Grammarian functions. */
768
Grammarian_Reset(Grammarian grammarian,int isInitialized)769 static void Grammarian_Reset(Grammarian grammarian, int isInitialized)
770 {
771 /* When we reset the grammarian, we keep the symbol stack that has
772 already been allocated, if any. If the client wants to reclaim the
773 memory used by the that buffer, he needs to free the grammarian
774 and create a new one. */
775 if (!isInitialized)
776 {
777 grammarian->pStack = grammarian->defaultStack;
778 grammarian->stackSize = sizeof(grammarian->defaultStack);
779 }
780
781 /* The grammarian always starts with NT_VALUE on the symbol stack. */
782 grammarian->pStack[0] = NT_VALUE;
783 grammarian->stackUsed = 1;
784 }
785
Grammarian_FreeAllocations(Grammarian grammarian,const JSON_MemorySuite * pMemorySuite)786 static void Grammarian_FreeAllocations(Grammarian grammarian,
787 const JSON_MemorySuite* pMemorySuite)
788 {
789 if (grammarian->pStack != grammarian->defaultStack)
790 pMemorySuite->free(pMemorySuite->userData, grammarian->pStack);
791 }
792
Grammarian_FinishedDocument(Grammarian grammarian)793 static int Grammarian_FinishedDocument(Grammarian grammarian)
794 {
795 return !grammarian->stackUsed;
796 }
797
Grammarian_ProcessToken(Grammarian grammarian,Symbol token,const JSON_MemorySuite * pMemorySuite)798 static GrammarianOutput Grammarian_ProcessToken(Grammarian grammarian,
799 Symbol token, const JSON_MemorySuite* pMemorySuite)
800 {
801 /* The order and number of the rows and columns in this table must
802 match the defined token and non-terminal symbol values.
803
804 The row index is the incoming token's Symbol value.
805
806 The column index is the bottom 4 bits of Symbol value of
807 the non-terminal at the top of the processing stack.
808 Since non-terminal Symbol values start at 0x10, taking
809 the bottom 4 bits yields a 0-based index. */
810 static const byte ruleLookup[15][7] =
811 {
812 /* V MS M MM IS I MI */
813 /* ---- */ { 0, 0, 0, 0, 0, 0, 0 },
814 /* null */ { 1, 0, 0, 0, 13, 15, 0 },
815 /* true */ { 2, 0, 0, 0, 13, 15, 0 },
816 /* false */ { 2, 0, 0, 0, 13, 15, 0 },
817 /* string */ { 3, 8, 10, 0, 13, 15, 0 },
818 /* number */ { 4, 0, 0, 0, 13, 15, 0 },
819 /* NaN */ { 5, 0, 0, 0, 13, 15, 0 },
820 /* Inf */ { 5, 0, 0, 0, 13, 15, 0 },
821 /* -Inf */ { 5, 0, 0, 0, 13, 15, 0 },
822 /* { */ { 6, 0, 0, 0, 13, 15, 0 },
823 /* } */ { 0, 9, 0, 12, 0, 0, 0 },
824 /* [ */ { 7, 0, 0, 0, 13, 15, 0 },
825 /* ] */ { 0, 0, 0, 0, 14, 0, 17 },
826 /* : */ { 0, 0, 0, 0, 0, 0, 0 },
827 /* , */ { 0, 0, 0, 11, 0, 0, 16 }
828 };
829
830 static const GrammarRule rules[17] =
831 {
832 /* 1. */ { T_NONE, T_NONE, 0, EMIT_NULL },
833 /* 2. */ { T_NONE, T_NONE, 0, EMIT_BOOLEAN },
834 /* 3. */ { T_NONE, T_NONE, 0, EMIT_STRING },
835 /* 4. */ { T_NONE, T_NONE, 0, EMIT_NUMBER },
836 /* 5. */ { T_NONE, T_NONE, 0, EMIT_SPECIAL_NUMBER },
837 /* 6. */ { T_RIGHT_CURLY, NT_MEMBERS, 0, EMIT_START_OBJECT },
838 /* 7. */ { T_RIGHT_SQUARE, NT_ITEMS, 0, EMIT_START_ARRAY },
839 /* 8. */ { NT_MORE_MEMBERS, NT_MEMBER, 1, EMIT_NOTHING },
840 /* 9. */ { T_NONE, T_NONE, 1, EMIT_END_OBJECT },
841 /* 10. */ { NT_VALUE, T_COLON, 0, EMIT_OBJECT_MEMBER },
842 /* 11. */ { NT_MORE_MEMBERS, NT_MEMBER, 0, EMIT_NOTHING },
843 /* 12. */ { T_NONE, T_NONE, 1, EMIT_END_OBJECT },
844 /* 13. */ { NT_MORE_ITEMS, NT_ITEM, 1, EMIT_NOTHING },
845 /* 14. */ { T_NONE, T_NONE, 1, EMIT_END_ARRAY },
846 /* 15. */ { NT_VALUE, T_NONE, 1, EMIT_ARRAY_ITEM },
847 /* 16. */ { NT_MORE_ITEMS, NT_ITEM, 0, EMIT_NOTHING },
848 /* 17. */ { T_NONE, T_NONE, 1, EMIT_END_ARRAY }
849 };
850
851 GrammarEvent emit = EMIT_NOTHING;
852
853 /* If the stack is empty, no more tokens were expected. */
854 if (Grammarian_FinishedDocument(grammarian))
855 return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
856
857 for (;;)
858 {
859 Symbol topSymbol = grammarian->pStack[grammarian->stackUsed - 1];
860 if (IS_TOKEN(topSymbol))
861 {
862 if (topSymbol != token)
863 return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
864 grammarian->stackUsed--;
865 break;
866 }
867 else
868 {
869 const GrammarRule* pRule = NULL;
870 byte ruleNumber = ruleLookup[token][BOTTOM_4_BITS(topSymbol)];
871
872 if (ruleNumber == 0)
873 return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
874
875 pRule = &rules[ruleNumber - 1];
876
877 /* The rule removes the top symbol and does not replace it. */
878 if (pRule->symbolToPush1 == T_NONE)
879 grammarian->stackUsed--;
880 else
881 {
882 /* The rule replaces the top symbol with 1 or 2 symbols. */
883 grammarian->pStack[grammarian->stackUsed - 1] = pRule->symbolToPush1;
884 if (pRule->symbolToPush2 != T_NONE)
885 {
886 /* The rule replaces the top symbol with 2 symbols.
887 Make sure the stack has room for the second one. */
888 if (grammarian->stackUsed == grammarian->stackSize)
889 {
890 Symbol* pBiggerStack = DoubleBuffer(pMemorySuite,
891 grammarian->defaultStack, grammarian->pStack,
892 grammarian->stackSize);
893
894 if (!pBiggerStack)
895 return GRAMMARIAN_OUTPUT(SYMBOL_STACK_FULL, EMIT_NOTHING);
896
897 grammarian->pStack = pBiggerStack;
898 grammarian->stackSize *= 2;
899 }
900 grammarian->pStack[grammarian->stackUsed] = pRule->symbolToPush2;
901 grammarian->stackUsed++;
902 }
903 }
904 emit |= pRule->emit;
905 if (!pRule->reprocess)
906 break;
907 }
908 }
909
910 return GRAMMARIAN_OUTPUT(ACCEPTED_TOKEN, emit);
911 }
912
913 /******************** JSON Parser ********************/
914
915 #ifndef JSON_NO_PARSER
916
917 /* Combinable parser state flags. */
918 #define PARSER_RESET 0x00
919 #define PARSER_STARTED 0x01
920 #define PARSER_FINISHED 0x02
921 #define PARSER_IN_PROTECTED_API 0x04
922 #define PARSER_IN_TOKEN_HANDLER 0x08
923 #define PARSER_AFTER_CARRIAGE_RETURN 0x10
924 typedef byte ParserState;
925
926 /* Combinable parser settings flags. */
927 #define PARSER_DEFAULT_FLAGS 0x00
928 #define PARSER_ALLOW_BOM 0x01
929 #define PARSER_ALLOW_COMMENTS 0x02
930 #define PARSER_ALLOW_SPECIAL_NUMBERS 0x04
931 #define PARSER_ALLOW_HEX_NUMBERS 0x08
932 #define PARSER_REPLACE_INVALID 0x10
933 #define PARSER_TRACK_OBJECT_MEMBERS 0x20
934 #define PARSER_ALLOW_CONTROL_CHARS 0x40
935 #define PARSER_EMBEDDED_DOCUMENT 0x80
936 typedef byte ParserFlags;
937
938 /* Sentinel value for parser error location offset. */
939 #define ERROR_LOCATION_IS_TOKEN_START 0xFF
940
941 /* An object member name stored in an unordered, singly-linked-list, used for
942 detecting duplicate member names. Note that the name string is not null-
943 terminated. */
944 typedef struct tag_MemberName
945 {
946 struct tag_MemberName* pNextName;
947 size_t length;
948 byte pBytes[1]; /* variable-size buffer */
949 } MemberName;
950
951 /* An object's list of member names, and a pointer to the object's
952 nearest ancestor object, if any. This is used as a stack. Because arrays
953 do not have named items, they do not need to be recorded in the stack. */
954 typedef struct tag_MemberNames
955 {
956 struct tag_MemberNames* pAncestor;
957 MemberName* pFirstName;
958 } MemberNames;
959
960 /* A parser instance. */
961 struct JSON_Parser_Data
962 {
963 JSON_MemorySuite memorySuite; /* ptr alignment */
964 void* userData;
965 byte* pTokenBytes;
966 MemberNames* pMemberNames;
967 GrammarianData grammarianData; /* ptr alignment */
968 JSON_Parser_EncodingDetectedHandler encodingDetectedHandler; /* ptr alignment */
969 JSON_Parser_NullHandler nullHandler;
970 JSON_Parser_BooleanHandler booleanHandler;
971 JSON_Parser_StringHandler stringHandler;
972 JSON_Parser_NumberHandler numberHandler;
973 JSON_Parser_SpecialNumberHandler specialNumberHandler;
974 JSON_Parser_StartObjectHandler startObjectHandler;
975 JSON_Parser_EndObjectHandler endObjectHandler;
976 JSON_Parser_ObjectMemberHandler objectMemberHandler;
977 JSON_Parser_StartArrayHandler startArrayHandler;
978 JSON_Parser_EndArrayHandler endArrayHandler;
979 JSON_Parser_ArrayItemHandler arrayItemHandler;
980 uint32_t lexerBits;
981 DecoderData decoderData;
982 /* uint32 alignment */
983 size_t codepointLocationByte;
984 size_t codepointLocationLine;
985 size_t codepointLocationColumn;
986 size_t tokenLocationByte;
987 size_t tokenLocationLine;
988 size_t tokenLocationColumn;
989 size_t depth;
990 size_t tokenBytesLength;
991 size_t tokenBytesUsed;
992 size_t maxStringLength;
993 size_t maxNumberLength;
994 ParserState state; /* byte alignment */
995 ParserFlags flags; /* byte alignment */
996 Encoding inputEncoding; /* byte alignment */
997 Encoding stringEncoding; /* byte alignment */
998 Encoding numberEncoding; /* byte alignment */
999 Symbol token; /* byte alignment */
1000 TokenAttributes tokenAttributes; /* byte alignment */
1001 Error error; /* byte alignment */
1002 byte errorOffset;
1003 LexerState lexerState; /* byte alignment */
1004 byte defaultTokenBytes[DEFAULT_TOKEN_BYTES_LENGTH];
1005 };
1006
1007 /* Parser internal functions. */
1008
JSON_Parser_SetErrorAtCodepoint(JSON_Parser parser,Error error)1009 static void JSON_Parser_SetErrorAtCodepoint(JSON_Parser parser, Error error)
1010 {
1011 parser->error = error;
1012 }
1013
JSON_Parser_SetErrorAtStringEscapeSequenceStart(JSON_Parser parser,Error error,int codepointsAgo)1014 static void JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1015 JSON_Parser parser, Error error, int codepointsAgo)
1016 {
1017 /* Note that backtracking from the current codepoint requires us to make
1018 three assumptions, which are always valid in the context of a string
1019 escape sequence:
1020
1021 1. The input encoding is not JSON_UnknownEncoding.
1022
1023 2 The codepoints we are backing up across are all in the range
1024 U+0000 - U+007F, aka ASCII, so we can assume the number of
1025 bytes comprising them based on the input encoding.
1026
1027 3. The codepoints we are backing up across do not include any
1028 line breaks, so we can assume that the line number stays the
1029 same and the column number can simply be decremented.
1030 */
1031 parser->error = error;
1032 parser->errorOffset = (byte)codepointsAgo;
1033 }
1034
JSON_Parser_SetErrorAtToken(JSON_Parser parser,Error error)1035 static void JSON_Parser_SetErrorAtToken(JSON_Parser parser, Error error)
1036 {
1037 parser->error = error;
1038 parser->errorOffset = ERROR_LOCATION_IS_TOKEN_START;
1039 }
1040
JSON_Parser_PushMemberNameList(JSON_Parser parser)1041 static JSON_Status JSON_Parser_PushMemberNameList(JSON_Parser parser)
1042 {
1043 MemberNames* pNames = (MemberNames*)parser->memorySuite.realloc(
1044 parser->memorySuite.userData, NULL, sizeof(MemberNames));
1045
1046 if (!pNames)
1047 {
1048 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
1049 return JSON_Failure;
1050 }
1051
1052 pNames->pAncestor = parser->pMemberNames;
1053 pNames->pFirstName = NULL;
1054 parser->pMemberNames = pNames;
1055 return JSON_Success;
1056 }
1057
JSON_Parser_PopMemberNameList(JSON_Parser parser)1058 static void JSON_Parser_PopMemberNameList(JSON_Parser parser)
1059 {
1060 MemberNames* pAncestor = parser->pMemberNames->pAncestor;
1061 while (parser->pMemberNames->pFirstName)
1062 {
1063 MemberName* pNextName = parser->pMemberNames->pFirstName->pNextName;
1064 parser->memorySuite.free(parser->memorySuite.userData, parser->pMemberNames->pFirstName);
1065 parser->pMemberNames->pFirstName = pNextName;
1066 }
1067 parser->memorySuite.free(parser->memorySuite.userData, parser->pMemberNames);
1068 parser->pMemberNames = pAncestor;
1069 }
1070
JSON_Parser_StartContainer(JSON_Parser parser,int isObject)1071 static JSON_Status JSON_Parser_StartContainer(JSON_Parser parser, int isObject)
1072 {
1073 if (isObject && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS) &&
1074 !JSON_Parser_PushMemberNameList(parser))
1075 {
1076 return JSON_Failure;
1077 }
1078 parser->depth++;
1079 return JSON_Success;
1080 }
1081
JSON_Parser_EndContainer(JSON_Parser parser,int isObject)1082 static void JSON_Parser_EndContainer(JSON_Parser parser, int isObject)
1083 {
1084 parser->depth--;
1085 if (isObject && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS))
1086 {
1087 JSON_Parser_PopMemberNameList(parser);
1088 }
1089 }
1090
JSON_Parser_AddMemberNameToList(JSON_Parser parser)1091 static JSON_Status JSON_Parser_AddMemberNameToList(JSON_Parser parser)
1092 {
1093 if (GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS))
1094 {
1095 MemberName* pName;
1096 for (pName = parser->pMemberNames->pFirstName; pName; pName = pName->pNextName)
1097 {
1098 if (pName->length == parser->tokenBytesUsed && !memcmp(pName->pBytes, parser->pTokenBytes, pName->length))
1099 {
1100 JSON_Parser_SetErrorAtToken(parser, JSON_Error_DuplicateObjectMember);
1101 return JSON_Failure;
1102 }
1103 }
1104 pName = (MemberName*)parser->memorySuite.realloc(parser->memorySuite.userData, NULL, sizeof(MemberName) + parser->tokenBytesUsed - 1);
1105 if (!pName)
1106 {
1107 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
1108 return JSON_Failure;
1109 }
1110 pName->pNextName = parser->pMemberNames->pFirstName;
1111 pName->length = parser->tokenBytesUsed;
1112 memcpy(pName->pBytes, parser->pTokenBytes, parser->tokenBytesUsed);
1113 parser->pMemberNames->pFirstName = pName;
1114 }
1115 return JSON_Success;
1116 }
1117
JSON_Parser_ResetData(JSON_Parser parser,int isInitialized)1118 static void JSON_Parser_ResetData(JSON_Parser parser, int isInitialized)
1119 {
1120 parser->userData = NULL;
1121 parser->flags = PARSER_DEFAULT_FLAGS;
1122 parser->inputEncoding = JSON_UnknownEncoding;
1123 parser->stringEncoding = JSON_UTF8;
1124 parser->numberEncoding = JSON_UTF8;
1125 parser->token = T_NONE;
1126 parser->tokenAttributes = 0;
1127 parser->error = JSON_Error_None;
1128 parser->errorOffset = 0;
1129 parser->lexerState = LEXING_WHITESPACE;
1130 parser->lexerBits = 0;
1131 parser->codepointLocationByte = 0;
1132 parser->codepointLocationLine = 0;
1133 parser->codepointLocationColumn = 0;
1134 parser->tokenLocationByte = 0;
1135 parser->tokenLocationLine = 0;
1136 parser->tokenLocationColumn = 0;
1137 parser->depth = 0;
1138
1139 if (!isInitialized)
1140 {
1141 parser->pTokenBytes = parser->defaultTokenBytes;
1142 parser->tokenBytesLength = sizeof(parser->defaultTokenBytes);
1143 }
1144 else
1145 {
1146 /* When we reset the parser, we keep the output buffer and the symbol
1147 stack that have already been allocated, if any. If the client wants
1148 to reclaim the memory used by the those buffers, he needs to free
1149 the parser and create a new one. */
1150 }
1151 parser->tokenBytesUsed = 0;
1152 parser->maxStringLength = SIZE_MAX;
1153 parser->maxNumberLength = SIZE_MAX;
1154 if (!isInitialized)
1155 parser->pMemberNames = NULL;
1156 else
1157 {
1158 while (parser->pMemberNames)
1159 JSON_Parser_PopMemberNameList(parser);
1160 }
1161 Decoder_Reset(&parser->decoderData);
1162 Grammarian_Reset(&parser->grammarianData, isInitialized);
1163 parser->encodingDetectedHandler = NULL;
1164 parser->nullHandler = NULL;
1165 parser->booleanHandler = NULL;
1166 parser->stringHandler = NULL;
1167 parser->numberHandler = NULL;
1168 parser->specialNumberHandler = NULL;
1169 parser->startObjectHandler = NULL;
1170 parser->endObjectHandler = NULL;
1171 parser->objectMemberHandler = NULL;
1172 parser->startArrayHandler = NULL;
1173 parser->endArrayHandler = NULL;
1174 parser->arrayItemHandler = NULL;
1175 parser->state = PARSER_RESET; /* do this last! */
1176 }
1177
JSON_Parser_NullTerminateToken(JSON_Parser parser)1178 static void JSON_Parser_NullTerminateToken(JSON_Parser parser)
1179 {
1180 /* Because we always ensure that there are LONGEST_ENCODING_SEQUENCE bytes
1181 available at the end of the token buffer when we record codepoints, we
1182 can write the null terminator to the buffer with impunity. */
1183 static const byte nullTerminatorBytes[LONGEST_ENCODING_SEQUENCE] = { 0 };
1184 Encoding encoding = (Encoding)((parser->token == T_NUMBER) ? parser->numberEncoding : parser->stringEncoding);
1185 memcpy(parser->pTokenBytes + parser->tokenBytesUsed, nullTerminatorBytes, (size_t)SHORTEST_ENCODING_SEQUENCE(encoding));
1186 }
1187
JSON_Parser_FlushParser(JSON_Parser parser)1188 static JSON_Status JSON_Parser_FlushParser(JSON_Parser parser)
1189 {
1190 /* The symbol stack should be empty when parsing finishes. */
1191 if (!Grammarian_FinishedDocument(&parser->grammarianData))
1192 {
1193 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_ExpectedMoreTokens);
1194 return JSON_Failure;
1195 }
1196 return JSON_Success;
1197 }
1198
1199 typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_SimpleTokenHandler)(JSON_Parser parser);
JSON_Parser_CallSimpleTokenHandler(JSON_Parser parser,JSON_Parser_SimpleTokenHandler handler)1200 static JSON_Status JSON_Parser_CallSimpleTokenHandler(JSON_Parser parser, JSON_Parser_SimpleTokenHandler handler)
1201 {
1202 if (handler)
1203 {
1204 JSON_Parser_HandlerResult result;
1205 SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1206 result = handler(parser);
1207 SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1208 if (result != JSON_Parser_Continue)
1209 {
1210 JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1211 return JSON_Failure;
1212 }
1213 }
1214 return JSON_Success;
1215 }
1216
JSON_Parser_CallBooleanHandler(JSON_Parser parser)1217 static JSON_Status JSON_Parser_CallBooleanHandler(JSON_Parser parser)
1218 {
1219 if (parser->booleanHandler)
1220 {
1221 JSON_Parser_HandlerResult result;
1222 SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1223 result = parser->booleanHandler(parser, parser->token == T_TRUE ? JSON_True : JSON_False);
1224 SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1225 if (result != JSON_Parser_Continue)
1226 {
1227 JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1228 return JSON_Failure;
1229 }
1230 }
1231 return JSON_Success;
1232 }
1233
JSON_Parser_CallStringHandler(JSON_Parser parser,int isObjectMember)1234 static JSON_Status JSON_Parser_CallStringHandler(JSON_Parser parser, int isObjectMember)
1235 {
1236 JSON_Parser_StringHandler handler = isObjectMember ? parser->objectMemberHandler : parser->stringHandler;
1237 if (handler)
1238 {
1239 JSON_Parser_HandlerResult result;
1240 JSON_Parser_NullTerminateToken(parser);
1241 SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1242 result = handler(parser, (char*)parser->pTokenBytes, parser->tokenBytesUsed, parser->tokenAttributes);
1243 SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1244
1245 if (result != JSON_Parser_Continue)
1246 {
1247 JSON_Parser_SetErrorAtToken(parser,
1248 (isObjectMember && result == JSON_Parser_TreatAsDuplicateObjectMember)
1249 ? JSON_Error_DuplicateObjectMember
1250 : JSON_Error_AbortedByHandler);
1251 return JSON_Failure;
1252 }
1253 }
1254 return JSON_Success;
1255 }
1256
JSON_Parser_CallNumberHandler(JSON_Parser parser)1257 static JSON_Status JSON_Parser_CallNumberHandler(JSON_Parser parser)
1258 {
1259 if (parser->numberHandler)
1260 {
1261 JSON_Parser_HandlerResult result;
1262 JSON_Parser_NullTerminateToken(parser);
1263 SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1264 result = parser->numberHandler(parser, (char*)parser->pTokenBytes,
1265 parser->tokenBytesUsed, parser->tokenAttributes);
1266
1267 SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1268
1269 if (result != JSON_Parser_Continue)
1270 {
1271 JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1272 return JSON_Failure;
1273 }
1274 }
1275 return JSON_Success;
1276 }
1277
JSON_Parser_CallSpecialNumberHandler(JSON_Parser parser)1278 static JSON_Status JSON_Parser_CallSpecialNumberHandler(JSON_Parser parser)
1279 {
1280 if (parser->specialNumberHandler)
1281 {
1282 JSON_Parser_HandlerResult result;
1283 SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1284 result = parser->specialNumberHandler(parser, parser->token == T_NAN ? JSON_NaN :
1285 (parser->token == T_INFINITY ? JSON_Infinity : JSON_NegativeInfinity));
1286 SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1287
1288 if (result != JSON_Parser_Continue)
1289 {
1290 JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1291 return JSON_Failure;
1292 }
1293 }
1294 return JSON_Success;
1295 }
1296
JSON_Parser_HandleGrammarEvents(JSON_Parser parser,byte emit)1297 static JSON_Status JSON_Parser_HandleGrammarEvents(JSON_Parser parser, byte emit)
1298 {
1299 if (GET_FLAGS(emit, EMIT_ARRAY_ITEM))
1300 {
1301 if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->arrayItemHandler))
1302 {
1303 return JSON_Failure;
1304 }
1305 SET_FLAGS_OFF(byte, emit, EMIT_ARRAY_ITEM);
1306 }
1307 switch (emit)
1308 {
1309 case EMIT_NULL:
1310 if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->nullHandler))
1311 return JSON_Failure;
1312 break;
1313
1314 case EMIT_BOOLEAN:
1315 if (!JSON_Parser_CallBooleanHandler(parser))
1316 return JSON_Failure;
1317 break;
1318
1319 case EMIT_STRING:
1320 if (!JSON_Parser_CallStringHandler(parser, 0/* isObjectMember */))
1321 return JSON_Failure;
1322 break;
1323
1324 case EMIT_NUMBER:
1325 if (!JSON_Parser_CallNumberHandler(parser))
1326 return JSON_Failure;
1327 break;
1328
1329 case EMIT_SPECIAL_NUMBER:
1330 if (!JSON_Parser_CallSpecialNumberHandler(parser))
1331 return JSON_Failure;
1332 break;
1333
1334 case EMIT_START_OBJECT:
1335 if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->startObjectHandler) ||
1336 !JSON_Parser_StartContainer(parser, 1/*isObject*/))
1337 return JSON_Failure;
1338 break;
1339
1340 case EMIT_END_OBJECT:
1341 JSON_Parser_EndContainer(parser, 1/*isObject*/);
1342 if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->endObjectHandler))
1343 return JSON_Failure;
1344 break;
1345 case EMIT_OBJECT_MEMBER:
1346 if (!JSON_Parser_AddMemberNameToList(parser) || /* will fail if member is duplicate */
1347 !JSON_Parser_CallStringHandler(parser, 1 /* isObjectMember */))
1348 return JSON_Failure;
1349 break;
1350
1351 case EMIT_START_ARRAY:
1352 if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->startArrayHandler) ||
1353 !JSON_Parser_StartContainer(parser, 0/*isObject*/))
1354 return JSON_Failure;
1355 break;
1356
1357 case EMIT_END_ARRAY:
1358 JSON_Parser_EndContainer(parser, 0/*isObject*/);
1359 if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->endArrayHandler))
1360 return JSON_Failure;
1361 break;
1362 }
1363
1364 if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
1365 {
1366 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_StoppedAfterEmbeddedDocument);
1367 return JSON_Failure;
1368 }
1369 return JSON_Success;
1370 }
1371
JSON_Parser_ProcessToken(JSON_Parser parser)1372 static JSON_Status JSON_Parser_ProcessToken(JSON_Parser parser)
1373 {
1374 GrammarianOutput output;
1375 output = Grammarian_ProcessToken(&parser->grammarianData, parser->token, &parser->memorySuite);
1376 switch (GRAMMARIAN_RESULT_CODE(output))
1377 {
1378 case ACCEPTED_TOKEN:
1379 if (!JSON_Parser_HandleGrammarEvents(parser, GRAMMARIAN_EVENT(output)))
1380 return JSON_Failure;
1381 break;
1382
1383 case REJECTED_TOKEN:
1384 JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnexpectedToken);
1385 return JSON_Failure;
1386
1387 case SYMBOL_STACK_FULL:
1388 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
1389 return JSON_Failure;
1390 }
1391
1392 /* Reset the lexer to prepare for the next token. */
1393 parser->lexerState = LEXING_WHITESPACE;
1394 parser->lexerBits = 0;
1395 parser->token = T_NONE;
1396 parser->tokenAttributes = 0;
1397 parser->tokenBytesUsed = 0;
1398 return JSON_Success;
1399 }
1400
1401 /* Lexer functions. */
1402
1403 static const byte expectedLiteralChars[] = { 'u', 'l', 'l', 0, 'r', 'u', 'e', 0, 'a', 'l', 's', 'e', 0, 'a', 'N', 0, 'n', 'f', 'i', 'n', 'i', 't', 'y', 0 };
1404
1405 #define NULL_LITERAL_EXPECTED_CHARS_START_INDEX 0
1406 #define TRUE_LITERAL_EXPECTED_CHARS_START_INDEX 4
1407 #define FALSE_LITERAL_EXPECTED_CHARS_START_INDEX 8
1408 #define NAN_LITERAL_EXPECTED_CHARS_START_INDEX 13
1409 #define INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX 16
1410
1411 /* Forward declaration. */
1412 static JSON_Status JSON_Parser_FlushLexer(JSON_Parser parser);
1413 static JSON_Status JSON_Parser_ProcessCodepoint(
1414 JSON_Parser parser, Codepoint c, size_t encodedLength);
1415
JSON_Parser_HandleInvalidEncodingSequence(JSON_Parser parser,size_t encodedLength)1416 static JSON_Status JSON_Parser_HandleInvalidEncodingSequence(
1417 JSON_Parser parser, size_t encodedLength)
1418 {
1419 if (parser->token == T_STRING && GET_FLAGS(parser->flags, PARSER_REPLACE_INVALID))
1420 {
1421 /* Since we're inside a string token, replacing the invalid sequence
1422 with the Unicode replacement character as requested by the client
1423 is a viable way to avoid a parse failure. Outside a string token,
1424 such a replacement would simply trigger JSON_Error_UnknownToken
1425 when we tried to process the replacement character, so it's less
1426 confusing to stick with JSON_Error_InvalidEncodingSequence in that
1427 case. */
1428 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsReplacedCharacter);
1429 return JSON_Parser_ProcessCodepoint(parser, REPLACEMENT_CHARACTER_CODEPOINT, encodedLength);
1430 }
1431 else if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
1432 {
1433 /* Since we're parsing the top-level value of an embedded
1434 document, assume that the invalid encoding sequence we've
1435 encountered does not actually belong to the document, and
1436 finish parsing by pretending that we've encountered EOF
1437 instead of an invalid sequence. If the content is valid,
1438 this will fail with JSON_Error_StoppedAfterEmbeddedDocument;
1439 otherwise, it will fail with an appropriate error. */
1440 return (JSON_Status)(JSON_Parser_FlushLexer(parser) && JSON_Parser_FlushParser(parser));
1441 }
1442 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_InvalidEncodingSequence);
1443 return JSON_Failure;
1444 }
1445
JSON_Parser_HandleInvalidNumber(JSON_Parser parser,Codepoint c,int codepointsSinceValidNumber,TokenAttributes attributesToRemove)1446 static JSON_Status JSON_Parser_HandleInvalidNumber(JSON_Parser parser,
1447 Codepoint c, int codepointsSinceValidNumber, TokenAttributes attributesToRemove)
1448 {
1449 SET_FLAGS_OFF(TokenAttributes, parser->tokenAttributes, attributesToRemove);
1450 if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
1451 {
1452 /* The invalid number is the top-level value of an embedded document,
1453 and it has a prefix that can be interpreted as a valid number.
1454 We want to backtrack so that we are at the end of that prefix,
1455 and then process the valid token.
1456
1457 Note that backtracking requires us to make three assumptions, which
1458 are always valid in the context of a number token:
1459
1460 1. The input encoding is not JSON_UnknownEncoding.
1461
1462 2 The codepoints we are backing up across are all in the range
1463 U+0000 - U+007F, aka ASCII, so we can assume the number of
1464 bytes comprising them based on the input encoding.
1465
1466 3. The codepoints we are backing up across do not include any
1467 line breaks, so we can assume that the line number stays the
1468 same and the column number can simply be decremented.
1469
1470 For example:
1471
1472 "01" => "0"
1473 "123.!" => "123"
1474 "123e!" => "123"
1475 "123e+!" => "123"
1476 "123e-!" => "123"
1477 "1.2e!" => "1.2"
1478 "1.2e+!" => "1.2"
1479 "1.2e-!" => "1.2"
1480 */
1481 parser->codepointLocationByte -= (size_t)codepointsSinceValidNumber
1482 * (size_t)SHORTEST_ENCODING_SEQUENCE(parser->inputEncoding);
1483 parser->codepointLocationColumn -= (size_t)codepointsSinceValidNumber;
1484 parser->tokenBytesUsed -= (size_t)codepointsSinceValidNumber
1485 * (size_t)SHORTEST_ENCODING_SEQUENCE(parser->numberEncoding);
1486 return JSON_Parser_ProcessToken(parser); /* always fails */
1487 }
1488 /* Allow JSON_Parser_FlushLexer() to fail. */
1489 else if (c == EOF_CODEPOINT)
1490 return JSON_Success;
1491
1492 JSON_Parser_SetErrorAtToken(parser, JSON_Error_InvalidNumber);
1493 return JSON_Failure;
1494 }
1495
JSON_Parser_StartToken(JSON_Parser parser,Symbol token)1496 static void JSON_Parser_StartToken(JSON_Parser parser, Symbol token)
1497 {
1498 parser->token = token;
1499 parser->tokenLocationByte = parser->codepointLocationByte;
1500 parser->tokenLocationLine = parser->codepointLocationLine;
1501 parser->tokenLocationColumn = parser->codepointLocationColumn;
1502 }
1503
JSON_Parser_ProcessCodepoint(JSON_Parser parser,Codepoint c,size_t encodedLength)1504 static JSON_Status JSON_Parser_ProcessCodepoint(JSON_Parser parser, Codepoint c, size_t encodedLength)
1505 {
1506 Encoding tokenEncoding;
1507 size_t maxTokenLength;
1508 int tokenFinished = 0;
1509 Codepoint codepointToRecord = EOF_CODEPOINT;
1510
1511 /* If the previous codepoint was U+000D (CARRIAGE RETURN), and the current
1512 codepoint is U+000A (LINE FEED), then treat the 2 codepoints as a single
1513 line break. */
1514 if (GET_FLAGS(parser->state, PARSER_AFTER_CARRIAGE_RETURN))
1515 {
1516 if (c == LINE_FEED_CODEPOINT)
1517 parser->codepointLocationLine--;
1518 SET_FLAGS_OFF(ParserState, parser->state, PARSER_AFTER_CARRIAGE_RETURN);
1519 }
1520
1521 reprocess:
1522
1523 switch (parser->lexerState)
1524 {
1525 case LEXING_WHITESPACE:
1526 if (c == '{')
1527 {
1528 JSON_Parser_StartToken(parser, T_LEFT_CURLY);
1529 tokenFinished = 1;
1530 }
1531 else if (c == '}')
1532 {
1533 JSON_Parser_StartToken(parser, T_RIGHT_CURLY);
1534 tokenFinished = 1;
1535 }
1536 else if (c == '[')
1537 {
1538 JSON_Parser_StartToken(parser, T_LEFT_SQUARE);
1539 tokenFinished = 1;
1540 }
1541 else if (c == ']')
1542 {
1543 JSON_Parser_StartToken(parser, T_RIGHT_SQUARE);
1544 tokenFinished = 1;
1545 }
1546 else if (c == ':')
1547 {
1548 JSON_Parser_StartToken(parser, T_COLON);
1549 tokenFinished = 1;
1550 }
1551 else if (c == ',')
1552 {
1553 JSON_Parser_StartToken(parser, T_COMMA);
1554 tokenFinished = 1;
1555 }
1556 else if (c == 'n')
1557 {
1558 JSON_Parser_StartToken(parser, T_NULL);
1559 parser->lexerBits = NULL_LITERAL_EXPECTED_CHARS_START_INDEX;
1560 parser->lexerState = LEXING_LITERAL;
1561 }
1562 else if (c == 't')
1563 {
1564 JSON_Parser_StartToken(parser, T_TRUE);
1565 parser->lexerBits = TRUE_LITERAL_EXPECTED_CHARS_START_INDEX;
1566 parser->lexerState = LEXING_LITERAL;
1567 }
1568 else if (c == 'f')
1569 {
1570 JSON_Parser_StartToken(parser, T_FALSE);
1571 parser->lexerBits = FALSE_LITERAL_EXPECTED_CHARS_START_INDEX;
1572 parser->lexerState = LEXING_LITERAL;
1573 }
1574 else if (c == '"')
1575 {
1576 JSON_Parser_StartToken(parser, T_STRING);
1577 parser->lexerState = LEXING_STRING;
1578 }
1579 else if (c == '-')
1580 {
1581 JSON_Parser_StartToken(parser, T_NUMBER);
1582 parser->tokenAttributes = JSON_IsNegative;
1583 codepointToRecord = '-';
1584 parser->lexerState = LEXING_NUMBER_AFTER_MINUS;
1585 goto recordNumberCodepointAndAdvance;
1586 }
1587 else if (c == '0')
1588 {
1589 JSON_Parser_StartToken(parser, T_NUMBER);
1590 codepointToRecord = '0';
1591 parser->lexerState = LEXING_NUMBER_AFTER_LEADING_ZERO;
1592 goto recordNumberCodepointAndAdvance;
1593 }
1594 else if (c >= '1' && c <= '9')
1595 {
1596 JSON_Parser_StartToken(parser, T_NUMBER);
1597 codepointToRecord = c;
1598 parser->lexerState = LEXING_NUMBER_DECIMAL_DIGITS;
1599 goto recordNumberCodepointAndAdvance;
1600 }
1601 else if (c == ' ' || c == TAB_CODEPOINT || c == LINE_FEED_CODEPOINT ||
1602 c == CARRIAGE_RETURN_CODEPOINT || c == EOF_CODEPOINT)
1603 {
1604 /* Ignore whitespace between tokens. */
1605 }
1606 else if (c == BOM_CODEPOINT && parser->codepointLocationByte == 0)
1607 {
1608 /* OK, we'll allow the BOM. */
1609 if (GET_FLAGS(parser->flags, PARSER_ALLOW_BOM)) { }
1610 else
1611 {
1612 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_BOMNotAllowed);
1613 return JSON_Failure;
1614 }
1615 }
1616 else if (c == '/' && GET_FLAGS(parser->flags, PARSER_ALLOW_COMMENTS))
1617 {
1618 /* Comments are not real tokens, but we save the location
1619 of the comment as the token location in case of an error. */
1620 parser->tokenLocationByte = parser->codepointLocationByte;
1621 parser->tokenLocationLine = parser->codepointLocationLine;
1622 parser->tokenLocationColumn = parser->codepointLocationColumn;
1623 parser->lexerState = LEXING_COMMENT_AFTER_SLASH;
1624 }
1625 else if (c == 'N' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
1626 {
1627 JSON_Parser_StartToken(parser, T_NAN);
1628 parser->lexerBits = NAN_LITERAL_EXPECTED_CHARS_START_INDEX;
1629 parser->lexerState = LEXING_LITERAL;
1630 }
1631 else if (c == 'I' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
1632 {
1633 JSON_Parser_StartToken(parser, T_INFINITY);
1634 parser->lexerBits = INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX;
1635 parser->lexerState = LEXING_LITERAL;
1636 }
1637 else
1638 {
1639 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_UnknownToken);
1640 return JSON_Failure;
1641 }
1642 goto advance;
1643
1644 case LEXING_LITERAL:
1645 /* While lexing a literal we store an index into expectedLiteralChars
1646 in lexerBits. */
1647 if (expectedLiteralChars[parser->lexerBits])
1648 {
1649 /* The codepoint should match the next character in the literal. */
1650 if (c != expectedLiteralChars[parser->lexerBits])
1651 {
1652 JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
1653 return JSON_Failure;
1654 }
1655 parser->lexerBits++;
1656
1657 /* If the literal is the top-level value of an embedded document,
1658 process it as soon as we consume its last expected codepoint.
1659 Normally we defer processing until the following codepoint
1660 has been examined, so that we can treat sequences like "nullx"
1661 as a single, unknown token rather than a null literal followed
1662 by an unknown token. */
1663 if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT) &&
1664 !expectedLiteralChars[parser->lexerBits])
1665 tokenFinished = 1;
1666 }
1667 else
1668 {
1669 /* The literal should be finished, so the codepoint should not be
1670 a plausible JSON literal character, but rather EOF, whitespace,
1671 or the first character of the next token. */
1672 if ((c >= 'A' && c <= 'Z') ||
1673 (c >= 'a' && c <= 'z') ||
1674 (c >= '0' && c <= '9') ||
1675 (c == '_'))
1676 {
1677 JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
1678 return JSON_Failure;
1679 }
1680 if (!JSON_Parser_ProcessToken(parser))
1681 return JSON_Failure;
1682 goto reprocess;
1683 }
1684 goto advance;
1685
1686 case LEXING_STRING:
1687 /* Allow JSON_Parser_FlushLexer() to fail. */
1688 if (c == EOF_CODEPOINT) { }
1689 else if (c == '"')
1690 tokenFinished = 1;
1691 else if (c == '\\')
1692 parser->lexerState = LEXING_STRING_ESCAPE;
1693 else if (c < 0x20 && !GET_FLAGS(parser->flags, PARSER_ALLOW_CONTROL_CHARS))
1694 {
1695 /* ASCII control characters (U+0000 - U+001F) are not allowed to
1696 appear unescaped in string values unless specifically allowed. */
1697 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_UnescapedControlCharacter);
1698 return JSON_Failure;
1699 }
1700 else
1701 {
1702 codepointToRecord = c;
1703 goto recordStringCodepointAndAdvance;
1704 }
1705 goto advance;
1706
1707 case LEXING_STRING_ESCAPE:
1708 if (c == EOF_CODEPOINT)
1709 {
1710 /* Allow JSON_Parser_FlushLexer() to fail. */
1711 }
1712 else
1713 {
1714 if (c == 'u')
1715 parser->lexerState = LEXING_STRING_HEX_ESCAPE_BYTE_1;
1716 else
1717 {
1718 if (c == '"' || c == '\\' || c == '/')
1719 codepointToRecord = c;
1720 else if (c == 'b')
1721 codepointToRecord = BACKSPACE_CODEPOINT;
1722 else if (c == 't')
1723 codepointToRecord = TAB_CODEPOINT;
1724 else if (c == 'n')
1725 codepointToRecord = LINE_FEED_CODEPOINT;
1726 else if (c == 'f')
1727 codepointToRecord = FORM_FEED_CODEPOINT;
1728 else if (c == 'r')
1729 codepointToRecord = CARRIAGE_RETURN_CODEPOINT;
1730 else
1731 {
1732 /* The current codepoint location is the first character after
1733 the backslash that started the escape sequence. The error
1734 location should be the beginning of the escape sequence, 1
1735 character earlier. */
1736 JSON_Parser_SetErrorAtStringEscapeSequenceStart(parser, JSON_Error_InvalidEscapeSequence, 1);
1737 return JSON_Failure;
1738 }
1739 parser->lexerState = LEXING_STRING;
1740 goto recordStringCodepointAndAdvance;
1741 }
1742 }
1743 goto advance;
1744
1745 case LEXING_STRING_HEX_ESCAPE_BYTE_1:
1746 case LEXING_STRING_HEX_ESCAPE_BYTE_2:
1747 case LEXING_STRING_HEX_ESCAPE_BYTE_3:
1748 case LEXING_STRING_HEX_ESCAPE_BYTE_4:
1749 case LEXING_STRING_HEX_ESCAPE_BYTE_5:
1750 case LEXING_STRING_HEX_ESCAPE_BYTE_6:
1751 case LEXING_STRING_HEX_ESCAPE_BYTE_7:
1752 case LEXING_STRING_HEX_ESCAPE_BYTE_8:
1753 /* Allow JSON_Parser_FlushLexer() to fail. */
1754 if (c != EOF_CODEPOINT)
1755 {
1756 /* While lexing a string hex escape sequence we store the bytes
1757 of the escaped codepoint in the low 2 bytes of lexerBits. If
1758 the escape sequence represents a leading surrogate, we shift
1759 the leading surrogate into the high 2 bytes and lex a second
1760 hex escape sequence (which should be a trailing surrogate). */
1761 int byteNumber = (parser->lexerState - LEXING_STRING_HEX_ESCAPE_BYTE_1) & 0x3;
1762 uint32_t nibble;
1763 if (c >= '0' && c <= '9')
1764 nibble = c - '0';
1765 else if (c >= 'A' && c <= 'F')
1766 nibble = c - 'A' + 10;
1767 else if (c >= 'a' && c <= 'f')
1768 nibble = c - 'a' + 10;
1769 else
1770 {
1771 /* The current codepoint location is one of the 4 hex digit
1772 character slots in the hex escape sequence. The error
1773 location should be the beginning of the hex escape
1774 sequence, between 2 and 5 bytes earlier. */
1775 int codepointsAgo = 2 /* for "\u" */ + byteNumber;
1776 JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1777 parser, JSON_Error_InvalidEscapeSequence, codepointsAgo);
1778 return JSON_Failure;
1779 }
1780 /* Store the hex digit's bits in the appropriate byte of lexerBits. */
1781 nibble <<= (3 - byteNumber) * 4 /* shift left by 12, 8, 4, 0 */ ;
1782 parser->lexerBits |= nibble;
1783 if (parser->lexerState == LEXING_STRING_HEX_ESCAPE_BYTE_4)
1784 {
1785 /* The escape sequence is complete. We need to check whether
1786 it represents a leading surrogate (which implies that it
1787 will be immediately followed by a hex-escaped trailing
1788 surrogate), a trailing surrogate (which is invalid), or a
1789 valid codepoint (which should simply be appended to the
1790 string token value). */
1791 if (IS_LEADING_SURROGATE(parser->lexerBits))
1792 {
1793 /* Shift the leading surrogate into the high 2 bytes of
1794 lexerBits so that the trailing surrogate can be stored
1795 in the low 2 bytes. */
1796 parser->lexerBits <<= 16;
1797 parser->lexerState = LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH;
1798 }
1799 else if (IS_TRAILING_SURROGATE(parser->lexerBits))
1800 {
1801 /* The current codepoint location is the last hex digit
1802 of the hex escape sequence. The error location should
1803 be the beginning of the hex escape sequence, 5
1804 characters earlier. */
1805 JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1806 parser, JSON_Error_UnpairedSurrogateEscapeSequence, 5);
1807 return JSON_Failure;
1808 }
1809 else
1810 {
1811 /* The escape sequence represents a BMP codepoint. */
1812 codepointToRecord = parser->lexerBits;
1813 parser->lexerBits = 0;
1814 parser->lexerState = LEXING_STRING;
1815 goto recordStringCodepointAndAdvance;
1816 }
1817 }
1818 else if (parser->lexerState == LEXING_STRING_HEX_ESCAPE_BYTE_8)
1819 {
1820 /* The second hex escape sequence is complete. We need to
1821 check whether it represents a trailing surrogate as
1822 expected. If so, the surrogate pair represents a single
1823 non-BMP codepoint. */
1824 if (!IS_TRAILING_SURROGATE(parser->lexerBits & 0xFFFF))
1825 {
1826 /* The current codepoint location is the last hex digit of
1827 the second hex escape sequence. The error location
1828 should be the beginning of the leading surrogate
1829 hex escape sequence, 11 characters earlier. */
1830 JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1831 parser, JSON_Error_UnpairedSurrogateEscapeSequence, 11);
1832 return JSON_Failure;
1833 }
1834 /* The escape sequence represents a non-BMP codepoint. */
1835 codepointToRecord = CODEPOINT_FROM_SURROGATES(parser->lexerBits);
1836 parser->lexerBits = 0;
1837 parser->lexerState = LEXING_STRING;
1838 goto recordStringCodepointAndAdvance;
1839 }
1840 else
1841 parser->lexerState++;
1842 }
1843 goto advance;
1844
1845 case LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH:
1846 if (c != EOF_CODEPOINT)
1847 {
1848 if (c != '\\')
1849 {
1850 /* The current codepoint location is the first character after
1851 the leading surrogate hex escape sequence. The error
1852 location should be the beginning of the leading surrogate
1853 hex escape sequence, 6 characters earlier. */
1854 JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1855 parser, JSON_Error_UnpairedSurrogateEscapeSequence, 6);
1856 return JSON_Failure;
1857 }
1858 parser->lexerState = LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U;
1859 }
1860 goto advance;
1861
1862 case LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U:
1863 if (c != EOF_CODEPOINT)
1864 {
1865 if (c != 'u')
1866 {
1867 /* Distinguish between a totally bogus escape sequence
1868 and a valid one that just isn't the hex escape kind
1869 that we require for a trailing surrogate. The current
1870 codepoint location is the first character after the
1871 backslash that should have introduced the trailing
1872 surrogate hex escape sequence. */
1873 if (c == '"' || c == '\\' || c == '/' || c == 'b' ||
1874 c == 't' || c == 'n' || c == 'f' || c == 'r')
1875 {
1876 /* The error location should be at that beginning of the
1877 leading surrogate's hex escape sequence, 7 characters
1878 earlier. */
1879 JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1880 parser, JSON_Error_UnpairedSurrogateEscapeSequence, 7);
1881 }
1882 else
1883 {
1884 /* The error location should be at that backslash, 1
1885 character earlier. */
1886 JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1887 parser, JSON_Error_InvalidEscapeSequence, 1);
1888 }
1889 return JSON_Failure;
1890 }
1891 parser->lexerState = LEXING_STRING_HEX_ESCAPE_BYTE_5;
1892 }
1893 goto advance;
1894
1895 case LEXING_NUMBER_AFTER_MINUS:
1896 if (c == EOF_CODEPOINT)
1897 {
1898 /* Allow JSON_Parser_FlushLexer() to fail. */
1899 }
1900 else if (c == 'I' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
1901 {
1902 parser->token = T_NEGATIVE_INFINITY; /* changing horses mid-stream, so to speak */
1903 parser->lexerBits = INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX;
1904 parser->lexerState = LEXING_LITERAL;
1905 }
1906 else
1907 {
1908 if (c == '0')
1909 {
1910 codepointToRecord = '0';
1911 parser->lexerState = LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO;
1912 goto recordNumberCodepointAndAdvance;
1913 }
1914 else if (c >= '1' && c <= '9')
1915 {
1916 codepointToRecord = c;
1917 parser->lexerState = LEXING_NUMBER_DECIMAL_DIGITS;
1918 goto recordNumberCodepointAndAdvance;
1919 }
1920 else
1921 {
1922 /* We trigger an unknown token error rather than an invalid number
1923 error so that "Foo" and "-Foo" trigger the same error. */
1924 JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
1925 return JSON_Failure;
1926 }
1927 }
1928 goto advance;
1929
1930 case LEXING_NUMBER_AFTER_LEADING_ZERO:
1931 case LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO:
1932 if (c == '.')
1933 {
1934 codepointToRecord = '.';
1935 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsDecimalPoint);
1936 parser->lexerState = LEXING_NUMBER_AFTER_DOT;
1937 goto recordNumberCodepointAndAdvance;
1938 }
1939 else if (c == 'e' || c == 'E')
1940 {
1941 codepointToRecord = c;
1942 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
1943 parser->lexerState = LEXING_NUMBER_AFTER_E;
1944 goto recordNumberCodepointAndAdvance;
1945 }
1946 else if (c >= '0' && c <= '9')
1947 {
1948 /* JSON does not allow the integer part of a number to have any
1949 digits after a leading zero. */
1950 if (!JSON_Parser_HandleInvalidNumber(parser, c, 0, 0))
1951 return JSON_Failure;
1952 }
1953 else if ((c == 'x' || c == 'X') &&
1954 parser->lexerState == LEXING_NUMBER_AFTER_LEADING_ZERO &&
1955 GET_FLAGS(parser->flags, PARSER_ALLOW_HEX_NUMBERS))
1956 {
1957 codepointToRecord = c;
1958 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_IsHex);
1959 parser->lexerState = LEXING_NUMBER_AFTER_X;
1960 goto recordNumberCodepointAndAdvance;
1961 }
1962 else
1963 {
1964 /* The number is finished. */
1965 if (!JSON_Parser_ProcessToken(parser))
1966 return JSON_Failure;
1967 goto reprocess;
1968 }
1969 goto advance;
1970
1971 case LEXING_NUMBER_AFTER_X:
1972 if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
1973 {
1974 codepointToRecord = c;
1975 parser->lexerState = LEXING_NUMBER_HEX_DIGITS;
1976 goto recordNumberCodepointAndAdvance;
1977 }
1978 else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_IsHex))
1979 return JSON_Failure;
1980 goto advance;
1981
1982 case LEXING_NUMBER_HEX_DIGITS:
1983 if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
1984 {
1985 codepointToRecord = c;
1986 goto recordNumberCodepointAndAdvance;
1987 }
1988 /* The number is finished. */
1989 if (!JSON_Parser_ProcessToken(parser))
1990 return JSON_Failure;
1991 goto reprocess;
1992
1993 case LEXING_NUMBER_DECIMAL_DIGITS:
1994 if (c >= '0' && c <= '9')
1995 {
1996 codepointToRecord = c;
1997 goto recordNumberCodepointAndAdvance;
1998 }
1999 else if (c == '.')
2000 {
2001 codepointToRecord = '.';
2002 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsDecimalPoint);
2003 parser->lexerState = LEXING_NUMBER_AFTER_DOT;
2004 goto recordNumberCodepointAndAdvance;
2005 }
2006 else if (c == 'e' || c == 'E')
2007 {
2008 codepointToRecord = c;
2009 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
2010 parser->lexerState = LEXING_NUMBER_AFTER_E;
2011 goto recordNumberCodepointAndAdvance;
2012 }
2013 /* The number is finished. */
2014 if (!JSON_Parser_ProcessToken(parser))
2015 return JSON_Failure;
2016 goto reprocess;
2017
2018 case LEXING_NUMBER_AFTER_DOT:
2019 if (c >= '0' && c <= '9')
2020 {
2021 codepointToRecord = c;
2022 parser->lexerState = LEXING_NUMBER_FRACTIONAL_DIGITS;
2023 goto recordNumberCodepointAndAdvance;
2024 }
2025 else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_ContainsDecimalPoint))
2026 return JSON_Failure;
2027 goto advance;
2028
2029 case LEXING_NUMBER_FRACTIONAL_DIGITS:
2030 if (c >= '0' && c <= '9')
2031 {
2032 codepointToRecord = c;
2033 goto recordNumberCodepointAndAdvance;
2034 }
2035 else if (c == 'e' || c == 'E')
2036 {
2037 codepointToRecord = c;
2038 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
2039 parser->lexerState = LEXING_NUMBER_AFTER_E;
2040 goto recordNumberCodepointAndAdvance;
2041 }
2042 /* The number is finished. */
2043 if (!JSON_Parser_ProcessToken(parser))
2044 return JSON_Failure;
2045 goto reprocess;
2046
2047 case LEXING_NUMBER_AFTER_E:
2048 if (c == '+')
2049 {
2050 codepointToRecord = c;
2051 parser->lexerState = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
2052 goto recordNumberCodepointAndAdvance;
2053 }
2054 else if (c == '-')
2055 {
2056 codepointToRecord = c;
2057 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNegativeExponent);
2058 parser->lexerState = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
2059 goto recordNumberCodepointAndAdvance;
2060 }
2061 else if (c >= '0' && c <= '9')
2062 {
2063 codepointToRecord = c;
2064 parser->lexerState = LEXING_NUMBER_EXPONENT_DIGITS;
2065 goto recordNumberCodepointAndAdvance;
2066 }
2067 else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_ContainsExponent))
2068 return JSON_Failure;
2069 goto advance;
2070
2071 case LEXING_NUMBER_AFTER_EXPONENT_SIGN:
2072 if (c >= '0' && c <= '9')
2073 {
2074 codepointToRecord = c;
2075 parser->lexerState = LEXING_NUMBER_EXPONENT_DIGITS;
2076 goto recordNumberCodepointAndAdvance;
2077 }
2078 else if (!JSON_Parser_HandleInvalidNumber(parser, c, 2, JSON_ContainsExponent | JSON_ContainsNegativeExponent))
2079 return JSON_Failure;
2080 goto advance;
2081
2082 case LEXING_NUMBER_EXPONENT_DIGITS:
2083 if (c >= '0' && c <= '9')
2084 {
2085 codepointToRecord = c;
2086 goto recordNumberCodepointAndAdvance;
2087 }
2088 /* The number is finished. */
2089 if (!JSON_Parser_ProcessToken(parser))
2090 return JSON_Failure;
2091 goto reprocess;
2092
2093 case LEXING_COMMENT_AFTER_SLASH:
2094 if (c == '/')
2095 parser->lexerState = LEXING_SINGLE_LINE_COMMENT;
2096 else if (c == '*')
2097 parser->lexerState = LEXING_MULTI_LINE_COMMENT;
2098 else
2099 {
2100 JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
2101 return JSON_Failure;
2102 }
2103 goto advance;
2104
2105 case LEXING_SINGLE_LINE_COMMENT:
2106 if (c == CARRIAGE_RETURN_CODEPOINT || c == LINE_FEED_CODEPOINT || c == EOF_CODEPOINT)
2107 parser->lexerState = LEXING_WHITESPACE;
2108 goto advance;
2109
2110 case LEXING_MULTI_LINE_COMMENT:
2111 if (c == '*')
2112 parser->lexerState = LEXING_MULTI_LINE_COMMENT_AFTER_STAR;
2113 goto advance;
2114
2115 case LEXING_MULTI_LINE_COMMENT_AFTER_STAR:
2116 if (c == '/')
2117 parser->lexerState = LEXING_WHITESPACE;
2118 else if (c != '*')
2119 parser->lexerState = LEXING_MULTI_LINE_COMMENT;
2120 goto advance;
2121 }
2122
2123 recordStringCodepointAndAdvance:
2124
2125 tokenEncoding = parser->stringEncoding;
2126 maxTokenLength = parser->maxStringLength;
2127 if (!codepointToRecord)
2128 {
2129 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNullCharacter | JSON_ContainsControlCharacter);
2130 }
2131 else if (codepointToRecord < FIRST_NON_CONTROL_CODEPOINT)
2132 {
2133 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsControlCharacter);
2134 }
2135 else if (codepointToRecord >= FIRST_NON_BMP_CODEPOINT)
2136 {
2137 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNonASCIICharacter | JSON_ContainsNonBMPCharacter);
2138 }
2139 else if (codepointToRecord >= FIRST_NON_ASCII_CODEPOINT)
2140 {
2141 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNonASCIICharacter);
2142 }
2143 goto recordCodepointAndAdvance;
2144
2145 recordNumberCodepointAndAdvance:
2146
2147 tokenEncoding = parser->numberEncoding;
2148 maxTokenLength = parser->maxNumberLength;
2149 goto recordCodepointAndAdvance;
2150
2151 recordCodepointAndAdvance:
2152
2153 /* We always ensure that there are LONGEST_ENCODING_SEQUENCE bytes
2154 available in the buffer for the next codepoint, so we don't have to
2155 check whether there is room when we decode a new codepoint, and if
2156 there isn't another codepoint, we have space already allocated for
2157 the encoded null terminator.*/
2158 parser->tokenBytesUsed += EncodeCodepoint(codepointToRecord, tokenEncoding, parser->pTokenBytes + parser->tokenBytesUsed);
2159 if (parser->tokenBytesUsed > maxTokenLength)
2160 {
2161 JSON_Parser_SetErrorAtToken(parser, parser->token == T_NUMBER ? JSON_Error_TooLongNumber : JSON_Error_TooLongString);
2162 return JSON_Failure;
2163 }
2164 if (parser->tokenBytesUsed > parser->tokenBytesLength - LONGEST_ENCODING_SEQUENCE)
2165 {
2166 byte* pBiggerBuffer = DoubleBuffer(&parser->memorySuite, parser->defaultTokenBytes, parser->pTokenBytes, parser->tokenBytesLength);
2167 if (!pBiggerBuffer)
2168 {
2169 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
2170 return JSON_Failure;
2171 }
2172 parser->pTokenBytes = pBiggerBuffer;
2173 parser->tokenBytesLength *= 2;
2174 }
2175 goto advance;
2176
2177 advance:
2178
2179 /* The current codepoint has been accepted, so advance the codepoint
2180 location counters accordingly. Note that the one time we don't
2181 do this is when the codepoint is EOF, which doesn't actually
2182 appear in the input stream. */
2183 if (c == CARRIAGE_RETURN_CODEPOINT)
2184 {
2185 SET_FLAGS_ON(ParserState, parser->state, PARSER_AFTER_CARRIAGE_RETURN);
2186 }
2187 if (c != EOF_CODEPOINT)
2188 {
2189 parser->codepointLocationByte += encodedLength;
2190 if (c == CARRIAGE_RETURN_CODEPOINT || c == LINE_FEED_CODEPOINT)
2191 {
2192 /* The next character will begin a new line. */
2193 parser->codepointLocationLine++;
2194 parser->codepointLocationColumn = 0;
2195 }
2196 else
2197 {
2198 /* The next character will be on the same line. */
2199 parser->codepointLocationColumn++;
2200 }
2201 }
2202
2203 if (tokenFinished && !JSON_Parser_ProcessToken(parser))
2204 return JSON_Failure;
2205
2206 return JSON_Success;
2207 }
2208
JSON_Parser_FlushLexer(JSON_Parser parser)2209 static JSON_Status JSON_Parser_FlushLexer(JSON_Parser parser)
2210 {
2211 /* Push the EOF codepoint to the lexer so that it can finish the pending
2212 token, if any. The EOF codepoint is never emitted by the decoder
2213 itself, since it is outside the Unicode range and therefore cannot
2214 be encoded in any of the possible input encodings. */
2215 if (!JSON_Parser_ProcessCodepoint(parser, EOF_CODEPOINT, 0))
2216 return JSON_Failure;
2217
2218 /* The lexer should be idle when parsing finishes. */
2219 if (parser->lexerState != LEXING_WHITESPACE)
2220 {
2221 JSON_Parser_SetErrorAtToken(parser, JSON_Error_IncompleteToken);
2222 return JSON_Failure;
2223 }
2224 return JSON_Success;
2225 }
2226
2227 /* Parser's decoder functions. */
2228
JSON_Parser_CallEncodingDetectedHandler(JSON_Parser parser)2229 static JSON_Status JSON_Parser_CallEncodingDetectedHandler(JSON_Parser parser)
2230 {
2231 if (parser->encodingDetectedHandler && parser->encodingDetectedHandler(parser) != JSON_Parser_Continue)
2232 {
2233 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_AbortedByHandler);
2234 return JSON_Failure;
2235 }
2236 return JSON_Success;
2237 }
2238
2239 /* Forward declaration. */
2240 static JSON_Status JSON_Parser_ProcessInputBytes(JSON_Parser parser, const byte* pBytes, size_t length);
2241
JSON_Parser_ProcessUnknownByte(JSON_Parser parser,byte b)2242 static JSON_Status JSON_Parser_ProcessUnknownByte(JSON_Parser parser, byte b)
2243 {
2244 /* When the input encoding is unknown, the first 4 bytes of input are
2245 recorded in decoder.bits. */
2246 byte bytes[LONGEST_ENCODING_SEQUENCE];
2247
2248 switch (parser->decoderData.state)
2249 {
2250 case DECODER_RESET:
2251 parser->decoderData.state = DECODED_1_OF_4;
2252 parser->decoderData.bits = (uint32_t)b << 24;
2253 break;
2254
2255 case DECODED_1_OF_4:
2256 parser->decoderData.state = DECODED_2_OF_4;
2257 parser->decoderData.bits |= (uint32_t)b << 16;
2258 break;
2259
2260 case DECODED_2_OF_4:
2261 parser->decoderData.state = DECODED_3_OF_4;
2262 parser->decoderData.bits |= (uint32_t)b << 8;
2263 break;
2264
2265 case DECODED_3_OF_4:
2266 bytes[0] = (byte)(parser->decoderData.bits >> 24);
2267 bytes[1] = (byte)(parser->decoderData.bits >> 16);
2268 bytes[2] = (byte)(parser->decoderData.bits >> 8);
2269 bytes[3] = (byte)(b);
2270
2271 /* We try to match the following patterns in order, where .. is any
2272 byte value and nz is any non-zero byte value:
2273 EF BB BF .. => UTF-8 with BOM
2274 FF FE 00 00 => UTF-32LE with BOM
2275 FF FE nz 00 => UTF-16LE with BOM
2276 00 00 FE FF -> UTF-32BE with BOM
2277 FE FF .. .. => UTF-16BE with BOM
2278 nz nz .. .. => UTF-8
2279 nz 00 nz .. => UTF-16LE
2280 nz 00 00 00 => UTF-32LE
2281 00 nz .. .. => UTF-16BE
2282 00 00 00 nz => UTF-32BE
2283 .. .. .. .. => unknown encoding */
2284 if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF)
2285 {
2286 /* EF BB BF .. */
2287 parser->inputEncoding = JSON_UTF8;
2288 }
2289 else if (bytes[0] == 0xFF && bytes[1] == 0xFE && bytes[3] == 0x00)
2290 {
2291 /* FF FE 00 00 or
2292 FF FE nz 00 */
2293 parser->inputEncoding = (bytes[2] == 0x00) ? JSON_UTF32LE : JSON_UTF16LE;
2294 }
2295 else if (bytes[0] == 0x00 && bytes[1] == 0x00 && bytes[2] == 0xFE && bytes[3] == 0xFF)
2296 {
2297 /* 00 00 FE FF */
2298 parser->inputEncoding = JSON_UTF32BE;
2299 }
2300 else if (bytes[0] == 0xFE && bytes[1] == 0xFF)
2301 {
2302 /* FE FF .. .. */
2303 parser->inputEncoding = JSON_UTF16BE;
2304 }
2305 else if (bytes[0] != 0x00)
2306 {
2307 /* nz .. .. .. */
2308 if (bytes[1] != 0x00)
2309 {
2310 /* nz nz .. .. */
2311 parser->inputEncoding = JSON_UTF8;
2312 }
2313 else if (bytes[2] != 0x00)
2314 {
2315 /* nz 00 nz .. */
2316 parser->inputEncoding = JSON_UTF16LE;
2317 }
2318 else if (bytes[3] == 0x00)
2319 {
2320 /* nz 00 00 00 */
2321 parser->inputEncoding = JSON_UTF32LE;
2322 }
2323 else
2324 {
2325 /* nz 00 00 nz => error */
2326 }
2327 }
2328 else if (bytes[1] != 0x00)
2329 {
2330 /* 00 nz .. .. */
2331 parser->inputEncoding = JSON_UTF16BE;
2332 }
2333 else if (bytes[2] == 0x00 && bytes[3] != 0x00)
2334 {
2335 /* 00 00 00 nz */
2336 parser->inputEncoding = JSON_UTF32BE;
2337 }
2338 else
2339 {
2340 /* 00 00 nz .. or
2341 00 00 00 00 => error */
2342 }
2343
2344 if (parser->inputEncoding == JSON_UnknownEncoding)
2345 return JSON_Parser_HandleInvalidEncodingSequence(parser, 4);
2346
2347 if (!JSON_Parser_CallEncodingDetectedHandler(parser))
2348 return JSON_Failure;
2349
2350 /* Reset the decoder before reprocessing the bytes. */
2351 Decoder_Reset(&parser->decoderData);
2352 return JSON_Parser_ProcessInputBytes(parser, bytes, 4);
2353 }
2354
2355 /* We don't have 4 bytes yet. */
2356 return JSON_Success;
2357 }
2358
JSON_Parser_ProcessInputBytes(JSON_Parser parser,const byte * pBytes,size_t length)2359 JSON_Status JSON_Parser_ProcessInputBytes(JSON_Parser parser, const byte* pBytes, size_t length)
2360 {
2361 /* Note that if length is 0, pBytes is allowed to be NULL. */
2362 size_t i = 0;
2363 while (parser->inputEncoding == JSON_UnknownEncoding && i < length)
2364 {
2365 if (!JSON_Parser_ProcessUnknownByte(parser, pBytes[i]))
2366 return JSON_Failure;
2367 i++;
2368 }
2369 while (i < length)
2370 {
2371 DecoderOutput output = Decoder_ProcessByte(
2372 &parser->decoderData, parser->inputEncoding, pBytes[i]);
2373 DecoderResultCode result = DECODER_RESULT_CODE(output);
2374 switch (result)
2375 {
2376 case SEQUENCE_PENDING:
2377 i++;
2378 break;
2379
2380 case SEQUENCE_COMPLETE:
2381 if (!JSON_Parser_ProcessCodepoint(
2382 parser, DECODER_CODEPOINT(output),
2383 DECODER_SEQUENCE_LENGTH(output)))
2384 return JSON_Failure;
2385 i++;
2386 break;
2387
2388 case SEQUENCE_INVALID_INCLUSIVE:
2389 i++;
2390 /* fallthrough */
2391 case SEQUENCE_INVALID_EXCLUSIVE:
2392 if (!JSON_Parser_HandleInvalidEncodingSequence(
2393 parser, DECODER_SEQUENCE_LENGTH(output)))
2394 return JSON_Failure;
2395 break;
2396 }
2397 }
2398 return JSON_Success;
2399 }
2400
JSON_Parser_FlushDecoder(JSON_Parser parser)2401 static JSON_Status JSON_Parser_FlushDecoder(JSON_Parser parser)
2402 {
2403 /* If the input was 1, 2, or 3 bytes long, and the input encoding was not
2404 explicitly specified by the client, we can sometimes make a reasonable
2405 guess. If the input was 1 or 3 bytes long, the only encoding that could
2406 possibly be valid JSON is UF-8. If the input was 2 bytes long, we try
2407 to match the following patterns in order, where .. is any byte value
2408 and nz is any non-zero byte value:
2409 FF FE => UTF-16LE with BOM
2410 FE FF => UTF-16BE with BOM
2411 nz nz => UTF-8
2412 nz 00 => UTF-16LE
2413 00 nz => UTF-16BE
2414 .. .. => unknown encoding
2415 */
2416 if (parser->inputEncoding == JSON_UnknownEncoding &&
2417 parser->decoderData.state != DECODER_RESET)
2418 {
2419 byte bytes[3];
2420 size_t length = 0;
2421 bytes[0] = (byte)(parser->decoderData.bits >> 24);
2422 bytes[1] = (byte)(parser->decoderData.bits >> 16);
2423 bytes[2] = (byte)(parser->decoderData.bits >> 8);
2424
2425 switch (parser->decoderData.state)
2426 {
2427 case DECODED_1_OF_4:
2428 parser->inputEncoding = JSON_UTF8;
2429 length = 1;
2430 break;
2431
2432 case DECODED_2_OF_4:
2433 /* FF FE */
2434 if (bytes[0] == 0xFF && bytes[1] == 0xFE)
2435 parser->inputEncoding = JSON_UTF16LE;
2436 /* FE FF */
2437 else if (bytes[0] == 0xFE && bytes[1] == 0xFF)
2438 parser->inputEncoding = JSON_UTF16BE;
2439 else if (bytes[0] != 0x00)
2440 {
2441 /* nz nz or
2442 nz 00 */
2443 parser->inputEncoding = bytes[1] ? JSON_UTF8 : JSON_UTF16LE;
2444 }
2445 /* 00 nz */
2446 else if (bytes[1] != 0x00)
2447 parser->inputEncoding = JSON_UTF16BE;
2448 /* 00 00 */
2449 else
2450 return JSON_Parser_HandleInvalidEncodingSequence(parser, 2);
2451 length = 2;
2452 break;
2453
2454 case DECODED_3_OF_4:
2455 parser->inputEncoding = JSON_UTF8;
2456 length = 3;
2457 break;
2458 }
2459
2460 if (!JSON_Parser_CallEncodingDetectedHandler(parser))
2461 return JSON_Failure;
2462
2463 /* Reset the decoder before reprocessing the bytes. */
2464 parser->decoderData.state = DECODER_RESET;
2465 parser->decoderData.bits = 0;
2466 if (!JSON_Parser_ProcessInputBytes(parser, bytes, length))
2467 return JSON_Failure;
2468 }
2469
2470 /* The decoder should be idle when parsing finishes. */
2471 if (Decoder_SequencePending(&parser->decoderData))
2472 return JSON_Parser_HandleInvalidEncodingSequence(
2473 parser, DECODER_STATE_BYTES(parser->decoderData.state));
2474 return JSON_Success;
2475 }
2476
2477 /* Parser API functions. */
2478
JSON_Parser_Create(const JSON_MemorySuite * pMemorySuite)2479 JSON_Parser JSON_CALL JSON_Parser_Create(const JSON_MemorySuite* pMemorySuite)
2480 {
2481 JSON_Parser parser;
2482 JSON_MemorySuite memorySuite;
2483
2484 if (pMemorySuite)
2485 {
2486 memorySuite = *pMemorySuite;
2487
2488 /* The full memory suite must be specified. */
2489 if (!memorySuite.realloc || !memorySuite.free)
2490 return NULL;
2491 }
2492 else
2493 memorySuite = defaultMemorySuite;
2494
2495 parser = (JSON_Parser)memorySuite.realloc(memorySuite.userData, NULL, sizeof(struct JSON_Parser_Data));
2496
2497 if (!parser)
2498 return NULL;
2499
2500 parser->memorySuite = memorySuite;
2501 JSON_Parser_ResetData(parser, 0/* isInitialized */);
2502 return parser;
2503 }
2504
JSON_Parser_Free(JSON_Parser parser)2505 JSON_Status JSON_CALL JSON_Parser_Free(JSON_Parser parser)
2506 {
2507 if (!parser || GET_FLAGS(parser->state, PARSER_IN_PROTECTED_API))
2508 return JSON_Failure;
2509
2510 SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_PROTECTED_API);
2511
2512 if (parser->pTokenBytes != parser->defaultTokenBytes)
2513 parser->memorySuite.free(parser->memorySuite.userData, parser->pTokenBytes);
2514
2515 while (parser->pMemberNames)
2516 JSON_Parser_PopMemberNameList(parser);
2517
2518 Grammarian_FreeAllocations(&parser->grammarianData, &parser->memorySuite);
2519 parser->memorySuite.free(parser->memorySuite.userData, parser);
2520 return JSON_Success;
2521 }
2522
JSON_Parser_Reset(JSON_Parser parser)2523 JSON_Status JSON_CALL JSON_Parser_Reset(JSON_Parser parser)
2524 {
2525 if (!parser || GET_FLAGS(parser->state, PARSER_IN_PROTECTED_API))
2526 return JSON_Failure;
2527 SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_PROTECTED_API);
2528 JSON_Parser_ResetData(parser, 1/* isInitialized */);
2529 /* Note that JSON_Parser_ResetData() unset PARSER_IN_PROTECTED_API for us. */
2530 return JSON_Success;
2531 }
2532
JSON_Parser_GetUserData(JSON_Parser parser)2533 void* JSON_CALL JSON_Parser_GetUserData(JSON_Parser parser)
2534 {
2535 return parser ? parser->userData : NULL;
2536 }
2537
JSON_Parser_SetUserData(JSON_Parser parser,void * userData)2538 JSON_Status JSON_CALL JSON_Parser_SetUserData(JSON_Parser parser, void* userData)
2539 {
2540 if (!parser)
2541 return JSON_Failure;
2542 parser->userData = userData;
2543 return JSON_Success;
2544 }
2545
JSON_Parser_GetInputEncoding(JSON_Parser parser)2546 JSON_Encoding JSON_CALL JSON_Parser_GetInputEncoding(JSON_Parser parser)
2547 {
2548 return parser ? (JSON_Encoding)parser->inputEncoding : JSON_UnknownEncoding;
2549 }
2550
JSON_Parser_SetInputEncoding(JSON_Parser parser,JSON_Encoding encoding)2551 JSON_Status JSON_CALL JSON_Parser_SetInputEncoding(JSON_Parser parser, JSON_Encoding encoding)
2552 {
2553 if ( !parser
2554 || encoding < JSON_UnknownEncoding
2555 || encoding > JSON_UTF32BE
2556 || GET_FLAGS(parser->state, PARSER_STARTED))
2557 return JSON_Failure;
2558 parser->inputEncoding = (Encoding)encoding;
2559 return JSON_Success;
2560 }
2561
JSON_Parser_GetStringEncoding(JSON_Parser parser)2562 JSON_Encoding JSON_CALL JSON_Parser_GetStringEncoding(JSON_Parser parser)
2563 {
2564 return parser ? (JSON_Encoding)parser->stringEncoding : JSON_UTF8;
2565 }
2566
JSON_Parser_SetStringEncoding(JSON_Parser parser,JSON_Encoding encoding)2567 JSON_Status JSON_CALL JSON_Parser_SetStringEncoding(JSON_Parser parser, JSON_Encoding encoding)
2568 {
2569 if (
2570 !parser
2571 || encoding <= JSON_UnknownEncoding
2572 || encoding > JSON_UTF32BE
2573 || GET_FLAGS(parser->state, PARSER_STARTED))
2574 return JSON_Failure;
2575 parser->stringEncoding = (Encoding)encoding;
2576 return JSON_Success;
2577 }
2578
JSON_Parser_GetMaxStringLength(JSON_Parser parser)2579 size_t JSON_CALL JSON_Parser_GetMaxStringLength(JSON_Parser parser)
2580 {
2581 return parser ? parser->maxStringLength : SIZE_MAX;
2582 }
2583
JSON_Parser_SetMaxStringLength(JSON_Parser parser,size_t maxLength)2584 JSON_Status JSON_CALL JSON_Parser_SetMaxStringLength(JSON_Parser parser, size_t maxLength)
2585 {
2586 if ( !parser
2587 || GET_FLAGS(parser->state, PARSER_STARTED))
2588 return JSON_Failure;
2589 parser->maxStringLength = maxLength;
2590 return JSON_Success;
2591 }
2592
JSON_Parser_GetNumberEncoding(JSON_Parser parser)2593 JSON_Encoding JSON_CALL JSON_Parser_GetNumberEncoding(JSON_Parser parser)
2594 {
2595 return parser ? (JSON_Encoding)parser->numberEncoding : JSON_UTF8;
2596 }
2597
JSON_Parser_SetNumberEncoding(JSON_Parser parser,JSON_Encoding encoding)2598 JSON_Status JSON_CALL JSON_Parser_SetNumberEncoding(JSON_Parser parser, JSON_Encoding encoding)
2599 {
2600 if (!parser || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE || GET_FLAGS(parser->state, PARSER_STARTED))
2601 return JSON_Failure;
2602 parser->numberEncoding = (Encoding)encoding;
2603 return JSON_Success;
2604 }
2605
JSON_Parser_GetMaxNumberLength(JSON_Parser parser)2606 size_t JSON_CALL JSON_Parser_GetMaxNumberLength(JSON_Parser parser)
2607 {
2608 return parser ? parser->maxNumberLength : SIZE_MAX;
2609 }
2610
JSON_Parser_SetMaxNumberLength(JSON_Parser parser,size_t maxLength)2611 JSON_Status JSON_CALL JSON_Parser_SetMaxNumberLength(JSON_Parser parser, size_t maxLength)
2612 {
2613 if ( !parser
2614 || GET_FLAGS(parser->state, PARSER_STARTED))
2615 return JSON_Failure;
2616 parser->maxNumberLength = maxLength;
2617 return JSON_Success;
2618 }
2619
JSON_Parser_GetAllowBOM(JSON_Parser parser)2620 JSON_Boolean JSON_CALL JSON_Parser_GetAllowBOM(JSON_Parser parser)
2621 {
2622 return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_BOM)) ? JSON_True : JSON_False;
2623 }
2624
JSON_Parser_SetAllowBOM(JSON_Parser parser,JSON_Boolean allowBOM)2625 JSON_Status JSON_CALL JSON_Parser_SetAllowBOM(JSON_Parser parser, JSON_Boolean allowBOM)
2626 {
2627 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2628 return JSON_Failure;
2629 SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_BOM, allowBOM);
2630 return JSON_Success;
2631 }
2632
JSON_Parser_GetAllowComments(JSON_Parser parser)2633 JSON_Boolean JSON_CALL JSON_Parser_GetAllowComments(JSON_Parser parser)
2634 {
2635 return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_COMMENTS)) ? JSON_True : JSON_False;
2636 }
2637
JSON_Parser_SetAllowComments(JSON_Parser parser,JSON_Boolean allowComments)2638 JSON_Status JSON_CALL JSON_Parser_SetAllowComments(JSON_Parser parser, JSON_Boolean allowComments)
2639 {
2640 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2641 return JSON_Failure;
2642 SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_COMMENTS, allowComments);
2643 return JSON_Success;
2644 }
2645
JSON_Parser_GetAllowSpecialNumbers(JSON_Parser parser)2646 JSON_Boolean JSON_CALL JSON_Parser_GetAllowSpecialNumbers(JSON_Parser parser)
2647 {
2648 return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS)) ? JSON_True : JSON_False;
2649 }
2650
JSON_Parser_SetAllowSpecialNumbers(JSON_Parser parser,JSON_Boolean allowSpecialNumbers)2651 JSON_Status JSON_CALL JSON_Parser_SetAllowSpecialNumbers(JSON_Parser parser, JSON_Boolean allowSpecialNumbers)
2652 {
2653 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2654 return JSON_Failure;
2655 SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS, allowSpecialNumbers);
2656 return JSON_Success;
2657 }
2658
JSON_Parser_GetAllowHexNumbers(JSON_Parser parser)2659 JSON_Boolean JSON_CALL JSON_Parser_GetAllowHexNumbers(JSON_Parser parser)
2660 {
2661 return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_HEX_NUMBERS)) ? JSON_True : JSON_False;
2662 }
2663
JSON_Parser_SetAllowHexNumbers(JSON_Parser parser,JSON_Boolean allowHexNumbers)2664 JSON_Status JSON_CALL JSON_Parser_SetAllowHexNumbers(JSON_Parser parser, JSON_Boolean allowHexNumbers)
2665 {
2666 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2667 return JSON_Failure;
2668 SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_HEX_NUMBERS, allowHexNumbers);
2669 return JSON_Success;
2670 }
2671
JSON_Parser_GetAllowUnescapedControlCharacters(JSON_Parser parser)2672 JSON_Boolean JSON_CALL JSON_Parser_GetAllowUnescapedControlCharacters(JSON_Parser parser)
2673 {
2674 return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_CONTROL_CHARS)) ? JSON_True : JSON_False;
2675 }
2676
JSON_Parser_SetAllowUnescapedControlCharacters(JSON_Parser parser,JSON_Boolean allowUnescapedControlCharacters)2677 JSON_Status JSON_CALL JSON_Parser_SetAllowUnescapedControlCharacters(JSON_Parser parser, JSON_Boolean allowUnescapedControlCharacters)
2678 {
2679 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2680 return JSON_Failure;
2681 SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_CONTROL_CHARS, allowUnescapedControlCharacters);
2682 return JSON_Success;
2683 }
2684
JSON_Parser_GetReplaceInvalidEncodingSequences(JSON_Parser parser)2685 JSON_Boolean JSON_CALL JSON_Parser_GetReplaceInvalidEncodingSequences(JSON_Parser parser)
2686 {
2687 return (parser && GET_FLAGS(parser->flags, PARSER_REPLACE_INVALID)) ? JSON_True : JSON_False;
2688 }
2689
JSON_Parser_SetReplaceInvalidEncodingSequences(JSON_Parser parser,JSON_Boolean replaceInvalidEncodingSequences)2690 JSON_Status JSON_CALL JSON_Parser_SetReplaceInvalidEncodingSequences(
2691 JSON_Parser parser, JSON_Boolean replaceInvalidEncodingSequences)
2692 {
2693 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2694 return JSON_Failure;
2695 SET_FLAGS(ParserFlags, parser->flags, PARSER_REPLACE_INVALID, replaceInvalidEncodingSequences);
2696 return JSON_Success;
2697 }
2698
JSON_Parser_GetTrackObjectMembers(JSON_Parser parser)2699 JSON_Boolean JSON_CALL JSON_Parser_GetTrackObjectMembers(JSON_Parser parser)
2700 {
2701 return (parser && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS)) ? JSON_True : JSON_False;
2702 }
2703
JSON_Parser_SetTrackObjectMembers(JSON_Parser parser,JSON_Boolean trackObjectMembers)2704 JSON_Status JSON_CALL JSON_Parser_SetTrackObjectMembers(JSON_Parser parser, JSON_Boolean trackObjectMembers)
2705 {
2706 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2707 {
2708 return JSON_Failure;
2709 }
2710 SET_FLAGS(ParserFlags, parser->flags, PARSER_TRACK_OBJECT_MEMBERS, trackObjectMembers);
2711 return JSON_Success;
2712 }
2713
JSON_Parser_GetStopAfterEmbeddedDocument(JSON_Parser parser)2714 JSON_Boolean JSON_CALL JSON_Parser_GetStopAfterEmbeddedDocument(JSON_Parser parser)
2715 {
2716 return (parser && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT)) ? JSON_True : JSON_False;
2717 }
2718
JSON_Parser_SetStopAfterEmbeddedDocument(JSON_Parser parser,JSON_Boolean stopAfterEmbeddedDocument)2719 JSON_Status JSON_CALL JSON_Parser_SetStopAfterEmbeddedDocument(
2720 JSON_Parser parser, JSON_Boolean stopAfterEmbeddedDocument)
2721 {
2722 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2723 {
2724 return JSON_Failure;
2725 }
2726 SET_FLAGS(ParserFlags, parser->flags, PARSER_EMBEDDED_DOCUMENT, stopAfterEmbeddedDocument);
2727 return JSON_Success;
2728 }
2729
JSON_Parser_GetError(JSON_Parser parser)2730 JSON_Error JSON_CALL JSON_Parser_GetError(JSON_Parser parser)
2731 {
2732 return parser ? (JSON_Error)parser->error : JSON_Error_None;
2733 }
2734
JSON_Parser_GetErrorLocation(JSON_Parser parser,JSON_Location * pLocation)2735 JSON_Status JSON_CALL JSON_Parser_GetErrorLocation(
2736 JSON_Parser parser, JSON_Location* pLocation)
2737 {
2738 if (!pLocation || !parser || parser->error == JSON_Error_None)
2739 return JSON_Failure;
2740
2741 if (parser->errorOffset == ERROR_LOCATION_IS_TOKEN_START)
2742 {
2743 pLocation->byte = parser->tokenLocationByte;
2744 pLocation->line = parser->tokenLocationLine;
2745 pLocation->column = parser->tokenLocationColumn;
2746 }
2747 else
2748 {
2749 pLocation->byte = parser->codepointLocationByte - (SHORTEST_ENCODING_SEQUENCE(parser->inputEncoding) * parser->errorOffset);
2750 pLocation->line = parser->codepointLocationLine;
2751 pLocation->column = parser->codepointLocationColumn - parser->errorOffset;
2752 }
2753 pLocation->depth = parser->depth;
2754 return JSON_Success;
2755 }
2756
JSON_Parser_GetTokenLocation(JSON_Parser parser,JSON_Location * pLocation)2757 JSON_Status JSON_CALL JSON_Parser_GetTokenLocation(
2758 JSON_Parser parser, JSON_Location* pLocation)
2759 {
2760 if (!parser || !pLocation || !GET_FLAGS(parser->state, PARSER_IN_TOKEN_HANDLER))
2761 return JSON_Failure;
2762
2763 pLocation->byte = parser->tokenLocationByte;
2764 pLocation->line = parser->tokenLocationLine;
2765 pLocation->column = parser->tokenLocationColumn;
2766 pLocation->depth = parser->depth;
2767 return JSON_Success;
2768 }
2769
JSON_Parser_GetAfterTokenLocation(JSON_Parser parser,JSON_Location * pLocation)2770 JSON_Status JSON_CALL JSON_Parser_GetAfterTokenLocation(
2771 JSON_Parser parser, JSON_Location* pLocation)
2772 {
2773 if (!parser || !pLocation || !GET_FLAGS(parser->state, PARSER_IN_TOKEN_HANDLER))
2774 return JSON_Failure;
2775
2776 pLocation->byte = parser->codepointLocationByte;
2777 pLocation->line = parser->codepointLocationLine;
2778 pLocation->column = parser->codepointLocationColumn;
2779 pLocation->depth = parser->depth;
2780 return JSON_Success;
2781 }
2782
JSON_Parser_GetEncodingDetectedHandler(JSON_Parser parser)2783 JSON_Parser_NullHandler JSON_CALL JSON_Parser_GetEncodingDetectedHandler(JSON_Parser parser)
2784 {
2785 return parser ? parser->encodingDetectedHandler : NULL;
2786 }
2787
JSON_Parser_SetEncodingDetectedHandler(JSON_Parser parser,JSON_Parser_EncodingDetectedHandler handler)2788 JSON_Status JSON_CALL JSON_Parser_SetEncodingDetectedHandler(
2789 JSON_Parser parser, JSON_Parser_EncodingDetectedHandler handler)
2790 {
2791 if (!parser)
2792 return JSON_Failure;
2793
2794 parser->encodingDetectedHandler = handler;
2795 return JSON_Success;
2796 }
2797
JSON_Parser_GetNullHandler(JSON_Parser parser)2798 JSON_Parser_NullHandler JSON_CALL JSON_Parser_GetNullHandler(JSON_Parser parser)
2799 {
2800 return parser ? parser->nullHandler : NULL;
2801 }
2802
JSON_Parser_SetNullHandler(JSON_Parser parser,JSON_Parser_NullHandler handler)2803 JSON_Status JSON_CALL JSON_Parser_SetNullHandler(
2804 JSON_Parser parser, JSON_Parser_NullHandler handler)
2805 {
2806 if (!parser)
2807 return JSON_Failure;
2808
2809 parser->nullHandler = handler;
2810 return JSON_Success;
2811 }
2812
JSON_Parser_GetBooleanHandler(JSON_Parser parser)2813 JSON_Parser_BooleanHandler JSON_CALL JSON_Parser_GetBooleanHandler(JSON_Parser parser)
2814 {
2815 return parser ? parser->booleanHandler : NULL;
2816 }
2817
JSON_Parser_SetBooleanHandler(JSON_Parser parser,JSON_Parser_BooleanHandler handler)2818 JSON_Status JSON_CALL JSON_Parser_SetBooleanHandler(
2819 JSON_Parser parser, JSON_Parser_BooleanHandler handler)
2820 {
2821 if (!parser)
2822 return JSON_Failure;
2823
2824 parser->booleanHandler = handler;
2825 return JSON_Success;
2826 }
2827
JSON_Parser_GetStringHandler(JSON_Parser parser)2828 JSON_Parser_StringHandler JSON_CALL JSON_Parser_GetStringHandler(JSON_Parser parser)
2829 {
2830 return parser ? parser->stringHandler : NULL;
2831 }
2832
JSON_Parser_SetStringHandler(JSON_Parser parser,JSON_Parser_StringHandler handler)2833 JSON_Status JSON_CALL JSON_Parser_SetStringHandler(
2834 JSON_Parser parser, JSON_Parser_StringHandler handler)
2835 {
2836 if (!parser)
2837 return JSON_Failure;
2838
2839 parser->stringHandler = handler;
2840 return JSON_Success;
2841 }
2842
JSON_Parser_GetNumberHandler(JSON_Parser parser)2843 JSON_Parser_NumberHandler JSON_CALL JSON_Parser_GetNumberHandler(JSON_Parser parser)
2844 {
2845 return parser ? parser->numberHandler : NULL;
2846 }
2847
JSON_Parser_SetNumberHandler(JSON_Parser parser,JSON_Parser_NumberHandler handler)2848 JSON_Status JSON_CALL JSON_Parser_SetNumberHandler(
2849 JSON_Parser parser, JSON_Parser_NumberHandler handler)
2850 {
2851 if (!parser)
2852 return JSON_Failure;
2853
2854 parser->numberHandler = handler;
2855 return JSON_Success;
2856 }
2857
JSON_Parser_GetSpecialNumberHandler(JSON_Parser parser)2858 JSON_Parser_SpecialNumberHandler JSON_CALL JSON_Parser_GetSpecialNumberHandler(JSON_Parser parser)
2859 {
2860 return parser ? parser->specialNumberHandler : NULL;
2861 }
2862
JSON_Parser_SetSpecialNumberHandler(JSON_Parser parser,JSON_Parser_SpecialNumberHandler handler)2863 JSON_Status JSON_CALL JSON_Parser_SetSpecialNumberHandler(
2864 JSON_Parser parser, JSON_Parser_SpecialNumberHandler handler)
2865 {
2866 if (!parser)
2867 return JSON_Failure;
2868 parser->specialNumberHandler = handler;
2869 return JSON_Success;
2870 }
2871
JSON_Parser_GetStartObjectHandler(JSON_Parser parser)2872 JSON_Parser_StartObjectHandler JSON_CALL JSON_Parser_GetStartObjectHandler(JSON_Parser parser)
2873 {
2874 return parser ? parser->startObjectHandler : NULL;
2875 }
2876
JSON_Parser_SetStartObjectHandler(JSON_Parser parser,JSON_Parser_StartObjectHandler handler)2877 JSON_Status JSON_CALL JSON_Parser_SetStartObjectHandler(
2878 JSON_Parser parser, JSON_Parser_StartObjectHandler handler)
2879 {
2880 if (!parser)
2881 return JSON_Failure;
2882
2883 parser->startObjectHandler = handler;
2884 return JSON_Success;
2885 }
2886
JSON_Parser_GetEndObjectHandler(JSON_Parser parser)2887 JSON_Parser_EndObjectHandler JSON_CALL JSON_Parser_GetEndObjectHandler(JSON_Parser parser)
2888 {
2889 return parser ? parser->endObjectHandler : NULL;
2890 }
2891
JSON_Parser_SetEndObjectHandler(JSON_Parser parser,JSON_Parser_EndObjectHandler handler)2892 JSON_Status JSON_CALL JSON_Parser_SetEndObjectHandler(
2893 JSON_Parser parser, JSON_Parser_EndObjectHandler handler)
2894 {
2895 if (!parser)
2896 return JSON_Failure;
2897
2898 parser->endObjectHandler = handler;
2899 return JSON_Success;
2900 }
2901
JSON_Parser_GetObjectMemberHandler(JSON_Parser parser)2902 JSON_Parser_ObjectMemberHandler JSON_CALL JSON_Parser_GetObjectMemberHandler(JSON_Parser parser)
2903 {
2904 return parser ? parser->objectMemberHandler : NULL;
2905 }
2906
JSON_Parser_SetObjectMemberHandler(JSON_Parser parser,JSON_Parser_ObjectMemberHandler handler)2907 JSON_Status JSON_CALL JSON_Parser_SetObjectMemberHandler(
2908 JSON_Parser parser, JSON_Parser_ObjectMemberHandler handler)
2909 {
2910 if (!parser)
2911 return JSON_Failure;
2912
2913 parser->objectMemberHandler = handler;
2914 return JSON_Success;
2915 }
2916
JSON_Parser_GetStartArrayHandler(JSON_Parser parser)2917 JSON_Parser_StartArrayHandler JSON_CALL JSON_Parser_GetStartArrayHandler(JSON_Parser parser)
2918 {
2919 return parser ? parser->startArrayHandler : NULL;
2920 }
2921
JSON_Parser_SetStartArrayHandler(JSON_Parser parser,JSON_Parser_StartArrayHandler handler)2922 JSON_Status JSON_CALL JSON_Parser_SetStartArrayHandler(
2923 JSON_Parser parser, JSON_Parser_StartArrayHandler handler)
2924 {
2925 if (!parser)
2926 return JSON_Failure;
2927
2928 parser->startArrayHandler = handler;
2929 return JSON_Success;
2930 }
2931
JSON_Parser_GetEndArrayHandler(JSON_Parser parser)2932 JSON_Parser_EndArrayHandler JSON_CALL JSON_Parser_GetEndArrayHandler(JSON_Parser parser)
2933 {
2934 return parser ? parser->endArrayHandler : NULL;
2935 }
2936
JSON_Parser_SetEndArrayHandler(JSON_Parser parser,JSON_Parser_EndArrayHandler handler)2937 JSON_Status JSON_CALL JSON_Parser_SetEndArrayHandler(
2938 JSON_Parser parser, JSON_Parser_EndArrayHandler handler)
2939 {
2940 if (!parser)
2941 return JSON_Failure;
2942
2943 parser->endArrayHandler = handler;
2944 return JSON_Success;
2945 }
2946
JSON_Parser_GetArrayItemHandler(JSON_Parser parser)2947 JSON_Parser_ArrayItemHandler JSON_CALL JSON_Parser_GetArrayItemHandler(JSON_Parser parser)
2948 {
2949 return parser ? parser->arrayItemHandler : NULL;
2950 }
2951
JSON_Parser_SetArrayItemHandler(JSON_Parser parser,JSON_Parser_ArrayItemHandler handler)2952 JSON_Status JSON_CALL JSON_Parser_SetArrayItemHandler(
2953 JSON_Parser parser, JSON_Parser_ArrayItemHandler handler)
2954 {
2955 if (!parser)
2956 return JSON_Failure;
2957
2958 parser->arrayItemHandler = handler;
2959 return JSON_Success;
2960 }
2961
JSON_Parser_Parse(JSON_Parser parser,const char * pBytes,size_t length,JSON_Boolean isFinal)2962 JSON_Status JSON_CALL JSON_Parser_Parse(JSON_Parser parser, const char* pBytes, size_t length, JSON_Boolean isFinal)
2963 {
2964 JSON_Status status = JSON_Failure;
2965 if (parser && (pBytes || !length) && !GET_FLAGS(parser->state, PARSER_FINISHED | PARSER_IN_PROTECTED_API))
2966 {
2967 int finishedParsing = 0;
2968 SET_FLAGS_ON(ParserState, parser->state, PARSER_STARTED | PARSER_IN_PROTECTED_API);
2969 if (JSON_Parser_ProcessInputBytes(parser, (const byte*)pBytes, length))
2970 {
2971 /* New input was parsed successfully. */
2972 if (isFinal)
2973 {
2974 /* Make sure there is nothing pending in the decoder, lexer,
2975 or parser. */
2976 if (JSON_Parser_FlushDecoder(parser) &&
2977 JSON_Parser_FlushLexer(parser) &&
2978 JSON_Parser_FlushParser(parser))
2979 status = JSON_Success;
2980
2981 finishedParsing = 1;
2982 }
2983 else
2984 status = JSON_Success;
2985 }
2986 else
2987 {
2988 /* New input failed to parse. */
2989 finishedParsing = 1;
2990 }
2991 if (finishedParsing)
2992 {
2993 SET_FLAGS_ON(ParserState, parser->state, PARSER_FINISHED);
2994 }
2995 SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_PROTECTED_API);
2996 }
2997 return status;
2998 }
2999
3000 #endif /* JSON_NO_PARSER */
3001
3002 /******************** JSON Writer ********************/
3003
3004 #ifndef JSON_NO_WRITER
3005
3006 /* Combinable writer state flags. */
3007 #define WRITER_RESET 0x0
3008 #define WRITER_STARTED 0x1
3009 #define WRITER_IN_PROTECTED_API 0x2
3010 typedef byte WriterState;
3011
3012 /* Combinable writer settings flags. */
3013 #define WRITER_DEFAULT_FLAGS 0x0
3014 #define WRITER_USE_CRLF 0x1
3015 #define WRITER_REPLACE_INVALID 0x2
3016 #define WRITER_ESCAPE_NON_ASCII 0x4
3017 typedef byte WriterFlags;
3018
3019 /* A writer instance. */
3020 struct JSON_Writer_Data
3021 {
3022 JSON_MemorySuite memorySuite;
3023 void* userData;
3024 WriterState state;
3025 WriterFlags flags;
3026 Encoding outputEncoding;
3027 Error error;
3028 GrammarianData grammarianData;
3029 JSON_Writer_OutputHandler outputHandler;
3030 };
3031
3032 /* Writer internal functions. */
3033
JSON_Writer_ResetData(JSON_Writer writer,int isInitialized)3034 static void JSON_Writer_ResetData(JSON_Writer writer, int isInitialized)
3035 {
3036 writer->userData = NULL;
3037 writer->flags = WRITER_DEFAULT_FLAGS;
3038 writer->outputEncoding = JSON_UTF8;
3039 writer->error = JSON_Error_None;
3040 Grammarian_Reset(&writer->grammarianData, isInitialized);
3041 writer->outputHandler = NULL;
3042 writer->state = WRITER_RESET; /* do this last! */
3043 }
3044
JSON_Writer_SetError(JSON_Writer writer,Error error)3045 static void JSON_Writer_SetError(JSON_Writer writer, Error error)
3046 {
3047 writer->error = error;
3048 }
3049
JSON_Writer_ProcessToken(JSON_Writer writer,Symbol token)3050 static JSON_Status JSON_Writer_ProcessToken(JSON_Writer writer, Symbol token)
3051 {
3052 GrammarianOutput output = Grammarian_ProcessToken(&writer->grammarianData, token, &writer->memorySuite);
3053 switch (GRAMMARIAN_RESULT_CODE(output))
3054 {
3055 case REJECTED_TOKEN:
3056 JSON_Writer_SetError(writer, JSON_Error_UnexpectedToken);
3057 return JSON_Failure;
3058
3059 case SYMBOL_STACK_FULL:
3060 JSON_Writer_SetError(writer, JSON_Error_OutOfMemory);
3061 return JSON_Failure;
3062 }
3063 return JSON_Success;
3064 }
3065
JSON_Writer_OutputBytes(JSON_Writer writer,const byte * pBytes,size_t length)3066 static JSON_Status JSON_Writer_OutputBytes(JSON_Writer writer, const byte* pBytes, size_t length)
3067 {
3068 if (writer->outputHandler && length)
3069 {
3070 if (writer->outputHandler(writer, (const char*)pBytes, length) != JSON_Writer_Continue)
3071 {
3072 JSON_Writer_SetError(writer, JSON_Error_AbortedByHandler);
3073 return JSON_Failure;
3074 }
3075 }
3076 return JSON_Success;
3077 }
3078
JSON_Writer_GetCodepointEscapeCharacter(JSON_Writer writer,Codepoint c)3079 static Codepoint JSON_Writer_GetCodepointEscapeCharacter(JSON_Writer writer, Codepoint c)
3080 {
3081 switch (c)
3082 {
3083 case BACKSPACE_CODEPOINT:
3084 return 'b';
3085
3086 case TAB_CODEPOINT:
3087 return 't';
3088
3089 case LINE_FEED_CODEPOINT:
3090 return 'n';
3091
3092 case FORM_FEED_CODEPOINT:
3093 return 'f';
3094
3095 case CARRIAGE_RETURN_CODEPOINT:
3096 return 'r';
3097
3098 case '"':
3099 return '"';
3100 /* Don't escape forward slashes */
3101 /*case '/':
3102 return '/';*/
3103
3104 case '\\':
3105 return '\\';
3106
3107 case DELETE_CODEPOINT:
3108 case LINE_SEPARATOR_CODEPOINT:
3109 case PARAGRAPH_SEPARATOR_CODEPOINT:
3110 return 'u';
3111
3112 default:
3113 if (c < FIRST_NON_CONTROL_CODEPOINT || IS_NONCHARACTER(c) ||
3114 (GET_FLAGS(writer->flags, WRITER_ESCAPE_NON_ASCII) && c > FIRST_NON_ASCII_CODEPOINT))
3115 return 'u';
3116 break;
3117 }
3118 return 0;
3119 }
3120
3121 typedef struct tag_WriteBufferData
3122 {
3123 size_t used;
3124 byte bytes[256];
3125 } WriteBufferData;
3126 typedef WriteBufferData* WriteBuffer;
3127
WriteBuffer_Reset(WriteBuffer buffer)3128 static void WriteBuffer_Reset(WriteBuffer buffer)
3129 {
3130 buffer->used = 0;
3131 }
3132
WriteBuffer_Flush(WriteBuffer buffer,JSON_Writer writer)3133 static JSON_Status WriteBuffer_Flush(WriteBuffer buffer, JSON_Writer writer)
3134 {
3135 JSON_Status status = JSON_Writer_OutputBytes(writer, buffer->bytes, buffer->used);
3136 buffer->used = 0;
3137 return status;
3138 }
3139
WriteBuffer_WriteBytes(WriteBuffer buffer,JSON_Writer writer,const byte * pBytes,size_t length)3140 static JSON_Status WriteBuffer_WriteBytes(WriteBuffer buffer, JSON_Writer writer, const byte* pBytes, size_t length)
3141 {
3142 if (buffer->used + length > sizeof(buffer->bytes) &&
3143 !WriteBuffer_Flush(buffer, writer))
3144 return JSON_Failure;
3145
3146 memcpy(&buffer->bytes[buffer->used], pBytes, length);
3147 buffer->used += length;
3148 return JSON_Success;
3149 }
3150
WriteBuffer_WriteCodepoint(WriteBuffer buffer,JSON_Writer writer,Codepoint c)3151 static JSON_Status WriteBuffer_WriteCodepoint(WriteBuffer buffer, JSON_Writer writer, Codepoint c)
3152 {
3153 if (buffer->used + LONGEST_ENCODING_SEQUENCE > sizeof(buffer->bytes) &&
3154 !WriteBuffer_Flush(buffer, writer))
3155 return JSON_Failure;
3156
3157 buffer->used += EncodeCodepoint(c, writer->outputEncoding, &buffer->bytes[buffer->used]);
3158 return JSON_Success;
3159 }
3160
WriteBuffer_WriteHexEscapeSequence(WriteBuffer buffer,JSON_Writer writer,Codepoint c)3161 static JSON_Status WriteBuffer_WriteHexEscapeSequence(WriteBuffer buffer, JSON_Writer writer, Codepoint c)
3162 {
3163 if (c >= FIRST_NON_BMP_CODEPOINT)
3164 {
3165 /* Non-BMP codepoints must be hex-escaped by escaping the UTF-16
3166 surrogate pair for the codepoint. We put the leading surrogate
3167 in the low 16 bits of c so that it gets written first, then
3168 the second pass through the loop will write out the trailing
3169 surrogate. x*/
3170 c = SURROGATES_FROM_CODEPOINT(c);
3171 c = (c << 16) | (c >> 16);
3172 }
3173 do
3174 {
3175 static const byte hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
3176 byte escapeSequence[6];
3177 int i;
3178 escapeSequence[0] = '\\';
3179 escapeSequence[1] = 'u';
3180 escapeSequence[2] = hexDigits[(c >> 12) & 0xF];
3181 escapeSequence[3] = hexDigits[(c >> 8) & 0xF];
3182 escapeSequence[4] = hexDigits[(c >> 4) & 0xF];
3183 escapeSequence[5] = hexDigits[c & 0xF];
3184 for (i = 0; i < sizeof(escapeSequence); i++)
3185 {
3186 if (!WriteBuffer_WriteCodepoint(buffer, writer, escapeSequence[i]))
3187 return JSON_Failure;
3188 }
3189 c >>= 16;
3190 } while (c);
3191 return JSON_Success;
3192 }
3193
JSON_Writer_OutputString(JSON_Writer writer,const byte * pBytes,size_t length,Encoding encoding)3194 static JSON_Status JSON_Writer_OutputString(JSON_Writer writer, const byte* pBytes, size_t length, Encoding encoding)
3195 {
3196 static const byte quoteUTF[] = { 0, 0, 0, '"', 0, 0, 0 };
3197 static const byte* const quoteEncodings[5] = { quoteUTF + 3, quoteUTF + 3, quoteUTF + 2, quoteUTF + 3, quoteUTF };
3198
3199 const byte* pQuoteEncoded = quoteEncodings[writer->outputEncoding - 1];
3200 size_t minSequenceLength = (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3201 DecoderData decoderData;
3202 WriteBufferData bufferData;
3203 size_t i = 0;
3204
3205 WriteBuffer_Reset(&bufferData);
3206
3207 /* Start quote. */
3208 if (!WriteBuffer_WriteBytes(&bufferData, writer, pQuoteEncoded, minSequenceLength))
3209 return JSON_Failure;
3210
3211 /* String contents. */
3212 Decoder_Reset(&decoderData);
3213 while (i < length)
3214 {
3215 DecoderOutput output = Decoder_ProcessByte(&decoderData, encoding, pBytes[i]);
3216 DecoderResultCode result = DECODER_RESULT_CODE(output);
3217 Codepoint c;
3218 Codepoint escapeCharacter;
3219 switch (result)
3220 {
3221 case SEQUENCE_PENDING:
3222 i++;
3223 break;
3224
3225 case SEQUENCE_COMPLETE:
3226 c = DECODER_CODEPOINT(output);
3227 escapeCharacter = JSON_Writer_GetCodepointEscapeCharacter(writer, c);
3228 switch (escapeCharacter)
3229 {
3230 case 0:
3231 /* Output the codepoint as a normal encoding sequence. */
3232 if (!WriteBuffer_WriteCodepoint(&bufferData, writer, c))
3233 return JSON_Failure;
3234 break;
3235
3236 case 'u':
3237 /* Output the codepoint as 1 or 2 hex escape sequences. */
3238 if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, c))
3239 return JSON_Failure;
3240 break;
3241
3242 default:
3243 /* Output the codepoint as a simple escape sequence. */
3244 if (!WriteBuffer_WriteCodepoint(&bufferData, writer, '\\') ||
3245 !WriteBuffer_WriteCodepoint(&bufferData, writer, escapeCharacter))
3246 return JSON_Failure;
3247 break;
3248 }
3249 i++;
3250 break;
3251
3252 case SEQUENCE_INVALID_INCLUSIVE:
3253 i++;
3254 /* fallthrough */
3255 case SEQUENCE_INVALID_EXCLUSIVE:
3256 if (GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID))
3257 {
3258 if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, REPLACEMENT_CHARACTER_CODEPOINT))
3259 return JSON_Failure;
3260 }
3261 else
3262 {
3263 /* Output whatever valid bytes we've accumulated before failing. */
3264 if (WriteBuffer_Flush(&bufferData, writer))
3265 JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3266 return JSON_Failure;
3267 }
3268 break;
3269 }
3270 }
3271 if (Decoder_SequencePending(&decoderData))
3272 {
3273 if (GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID))
3274 {
3275 if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, REPLACEMENT_CHARACTER_CODEPOINT))
3276 return JSON_Failure;
3277 }
3278 else
3279 {
3280 /* Output whatever valid bytes we've accumulated before failing. */
3281 if (WriteBuffer_Flush(&bufferData, writer))
3282 JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3283 return JSON_Failure;
3284 }
3285 }
3286
3287 /* End quote. */
3288 if (!WriteBuffer_WriteBytes(&bufferData, writer, pQuoteEncoded, minSequenceLength) ||
3289 !WriteBuffer_Flush(&bufferData, writer))
3290 return JSON_Failure;
3291 return JSON_Success;
3292 }
3293
LexNumberCharacter(LexerState state,Codepoint c)3294 static LexerState LexNumberCharacter(LexerState state, Codepoint c)
3295 {
3296 switch (state)
3297 {
3298 case LEXING_WHITESPACE:
3299 if (c == '-')
3300 state = LEXING_NUMBER_AFTER_MINUS;
3301 else if (c == '0')
3302 state = LEXING_NUMBER_AFTER_LEADING_ZERO;
3303 else if (c >= '1' && c <= '9')
3304 state = LEXING_NUMBER_DECIMAL_DIGITS;
3305 else
3306 state = LEXER_ERROR;
3307 break;
3308
3309 case LEXING_NUMBER_AFTER_MINUS:
3310 if (c == '0')
3311 state = LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO;
3312 else if (c >= '1' && c <= '9')
3313 state = LEXING_NUMBER_DECIMAL_DIGITS;
3314 else
3315 state = LEXER_ERROR;
3316 break;
3317
3318 case LEXING_NUMBER_AFTER_LEADING_ZERO:
3319 case LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO:
3320 if (c == '.')
3321 state = LEXING_NUMBER_AFTER_DOT;
3322 else if (c == 'e' || c == 'E')
3323 state = LEXING_NUMBER_AFTER_E;
3324 else if ((c == 'x' || c == 'X') && state == LEXING_NUMBER_AFTER_LEADING_ZERO)
3325 state = LEXING_NUMBER_AFTER_X;
3326 else if (c == EOF_CODEPOINT)
3327 state = LEXING_WHITESPACE;
3328 else
3329 state = LEXER_ERROR;
3330 break;
3331
3332 case LEXING_NUMBER_AFTER_X:
3333 if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
3334 state = LEXING_NUMBER_HEX_DIGITS;
3335 else
3336 state = LEXER_ERROR;
3337 break;
3338
3339 case LEXING_NUMBER_HEX_DIGITS:
3340 if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
3341 {
3342 /* Still LEXING_NUMBER_HEX_DIGITS. */
3343 }
3344 else if (c == EOF_CODEPOINT)
3345 state = LEXING_WHITESPACE;
3346 else
3347 state = LEXER_ERROR;
3348 break;
3349
3350 case LEXING_NUMBER_DECIMAL_DIGITS:
3351 if (c >= '0' && c <= '9')
3352 {
3353 /* Still LEXING_NUMBER_DECIMAL_DIGITS. */
3354 }
3355 else if (c == '.')
3356 state = LEXING_NUMBER_AFTER_DOT;
3357 else if (c == 'e' || c == 'E')
3358 state = LEXING_NUMBER_AFTER_E;
3359 else if (c == EOF_CODEPOINT)
3360 state = LEXING_WHITESPACE;
3361 else
3362 state = LEXER_ERROR;
3363 break;
3364
3365 case LEXING_NUMBER_AFTER_DOT:
3366 if (c >= '0' && c <= '9')
3367 state = LEXING_NUMBER_FRACTIONAL_DIGITS;
3368 else
3369 state = LEXER_ERROR;
3370 break;
3371
3372 case LEXING_NUMBER_FRACTIONAL_DIGITS:
3373 if (c >= '0' && c <= '9')
3374 {
3375 /* Still LEXING_NUMBER_FRACTIONAL_DIGITS. */
3376 }
3377 else if (c == 'e' || c == 'E')
3378 state = LEXING_NUMBER_AFTER_E;
3379 else if (c == EOF_CODEPOINT)
3380 state = LEXING_WHITESPACE;
3381 else
3382 state = LEXER_ERROR;
3383 break;
3384
3385 case LEXING_NUMBER_AFTER_E:
3386 if (c == '+' || c == '-')
3387 state = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
3388 else if (c >= '0' && c <= '9')
3389 state = LEXING_NUMBER_EXPONENT_DIGITS;
3390 else
3391 state = LEXER_ERROR;
3392 break;
3393
3394 case LEXING_NUMBER_AFTER_EXPONENT_SIGN:
3395 if (c >= '0' && c <= '9')
3396 state = LEXING_NUMBER_EXPONENT_DIGITS;
3397 else
3398 state = LEXER_ERROR;
3399 break;
3400
3401 case LEXING_NUMBER_EXPONENT_DIGITS:
3402 if (c >= '0' && c <= '9')
3403 {
3404 /* Still LEXING_NUMBER_EXPONENT_DIGITS. */
3405 }
3406 else if (c == EOF_CODEPOINT)
3407 state = LEXING_WHITESPACE;
3408 else
3409 state = LEXER_ERROR;
3410 break;
3411 }
3412 return state;
3413 }
3414
JSON_Writer_OutputNumber(JSON_Writer writer,const byte * pBytes,size_t length,Encoding encoding)3415 static JSON_Status JSON_Writer_OutputNumber(JSON_Writer writer, const byte* pBytes, size_t length, Encoding encoding)
3416 {
3417 DecoderData decoderData;
3418 WriteBufferData bufferData;
3419 LexerState lexerState = LEXING_WHITESPACE;
3420 size_t i;
3421 Decoder_Reset(&decoderData);
3422 WriteBuffer_Reset(&bufferData);
3423 for (i = 0; i < length; i++)
3424 {
3425 DecoderOutput output = Decoder_ProcessByte(&decoderData, encoding, pBytes[i]);
3426 DecoderResultCode result = DECODER_RESULT_CODE(output);
3427 Codepoint c;
3428 switch (result)
3429 {
3430 case SEQUENCE_PENDING:
3431 break;
3432
3433 case SEQUENCE_COMPLETE:
3434 c = DECODER_CODEPOINT(output);
3435 lexerState = LexNumberCharacter(lexerState, c);
3436 if (lexerState == LEXER_ERROR)
3437 {
3438 /* Output whatever valid bytes we've accumulated before failing. */
3439 if (WriteBuffer_Flush(&bufferData, writer))
3440 JSON_Writer_SetError(writer, JSON_Error_InvalidNumber);
3441 return JSON_Failure;
3442 }
3443 if (!WriteBuffer_WriteCodepoint(&bufferData, writer, c))
3444 return JSON_Failure;
3445 break;
3446
3447 case SEQUENCE_INVALID_INCLUSIVE:
3448 case SEQUENCE_INVALID_EXCLUSIVE:
3449 /* Output whatever valid bytes we've accumulated before failing. */
3450 if (WriteBuffer_Flush(&bufferData, writer))
3451 JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3452 return JSON_Failure;
3453 }
3454 }
3455 if (!WriteBuffer_Flush(&bufferData, writer))
3456 return JSON_Failure;
3457 if (Decoder_SequencePending(&decoderData))
3458 {
3459 JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3460 return JSON_Failure;
3461 }
3462 if (LexNumberCharacter(lexerState, EOF_CODEPOINT) == LEXER_ERROR)
3463 {
3464 JSON_Writer_SetError(writer, JSON_Error_InvalidNumber);
3465 return JSON_Failure;
3466 }
3467 return JSON_Success;
3468 }
3469
3470 #define SPACES_PER_CHUNK 8
JSON_Writer_OutputSpaces(JSON_Writer writer,size_t numberOfSpaces)3471 static JSON_Status JSON_Writer_OutputSpaces(JSON_Writer writer, size_t numberOfSpaces)
3472 {
3473 static const byte spacesUTF8[SPACES_PER_CHUNK] = { ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' };
3474 static const byte spacesUTF16[SPACES_PER_CHUNK * 2 + 1] = { 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0 };
3475 static const byte spacesUTF32[SPACES_PER_CHUNK * 4 + 3] = { 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0 };
3476 static const byte* const spacesEncodings[5] = { spacesUTF8, spacesUTF16 + 1, spacesUTF16, spacesUTF32 + 3, spacesUTF32 };
3477
3478 size_t encodedLength = (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3479 const byte* encoded = spacesEncodings[writer->outputEncoding - 1];
3480 while (numberOfSpaces > SPACES_PER_CHUNK)
3481 {
3482 if (!JSON_Writer_OutputBytes(writer, encoded, SPACES_PER_CHUNK * encodedLength))
3483 return JSON_Failure;
3484 numberOfSpaces -= SPACES_PER_CHUNK;
3485 }
3486
3487 if (!JSON_Writer_OutputBytes(writer, encoded, numberOfSpaces * encodedLength))
3488 return JSON_Failure;
3489 return JSON_Success;
3490 }
3491
JSON_Writer_WriteSimpleToken(JSON_Writer writer,Symbol token,const byte * const * encodings,size_t length)3492 static JSON_Status JSON_Writer_WriteSimpleToken(JSON_Writer writer, Symbol token, const byte* const* encodings, size_t length)
3493 {
3494 JSON_Status status = JSON_Failure;
3495 if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3496 {
3497 size_t encodedLength = length * (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3498 SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3499 if (JSON_Writer_ProcessToken(writer, token) &&
3500 JSON_Writer_OutputBytes(writer, encodings[writer->outputEncoding - 1], encodedLength))
3501 status = JSON_Success;
3502 SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3503 }
3504 return status;
3505 }
3506
3507 /* Writer API functions. */
3508
JSON_Writer_Create(const JSON_MemorySuite * pMemorySuite)3509 JSON_Writer JSON_CALL JSON_Writer_Create(const JSON_MemorySuite* pMemorySuite)
3510 {
3511 JSON_Writer writer;
3512 JSON_MemorySuite memorySuite;
3513 if (pMemorySuite)
3514 {
3515 memorySuite = *pMemorySuite;
3516 /* The full memory suite must be specified. */
3517 if (!memorySuite.realloc || !memorySuite.free)
3518 return NULL;
3519 }
3520 else
3521 memorySuite = defaultMemorySuite;
3522
3523 writer = (JSON_Writer)memorySuite.realloc(memorySuite.userData, NULL, sizeof(struct JSON_Writer_Data));
3524
3525 if (!writer)
3526 return NULL;
3527
3528 writer->memorySuite = memorySuite;
3529 JSON_Writer_ResetData(writer, 0/* isInitialized */);
3530 return writer;
3531 }
3532
JSON_Writer_Free(JSON_Writer writer)3533 JSON_Status JSON_CALL JSON_Writer_Free(JSON_Writer writer)
3534 {
3535 if (!writer || GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API))
3536 return JSON_Failure;
3537
3538 SET_FLAGS_ON(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3539 Grammarian_FreeAllocations(&writer->grammarianData, &writer->memorySuite);
3540 writer->memorySuite.free(writer->memorySuite.userData, writer);
3541 return JSON_Success;
3542 }
3543
JSON_Writer_Reset(JSON_Writer writer)3544 JSON_Status JSON_CALL JSON_Writer_Reset(JSON_Writer writer)
3545 {
3546 if (!writer || GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API))
3547 return JSON_Failure;
3548
3549 SET_FLAGS_ON(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3550 JSON_Writer_ResetData(writer, 1/* isInitialized */);
3551 /* Note that JSON_Writer_ResetData() unset WRITER_IN_PROTECTED_API for us. */
3552 return JSON_Success;
3553 }
3554
JSON_Writer_GetUserData(JSON_Writer writer)3555 void* JSON_CALL JSON_Writer_GetUserData(JSON_Writer writer)
3556 {
3557 return writer ? writer->userData : NULL;
3558 }
3559
JSON_Writer_SetUserData(JSON_Writer writer,void * userData)3560 JSON_Status JSON_CALL JSON_Writer_SetUserData(JSON_Writer writer, void* userData)
3561 {
3562 if (!writer)
3563 return JSON_Failure;
3564
3565 writer->userData = userData;
3566 return JSON_Success;
3567 }
3568
JSON_Writer_GetOutputEncoding(JSON_Writer writer)3569 JSON_Encoding JSON_CALL JSON_Writer_GetOutputEncoding(JSON_Writer writer)
3570 {
3571 return writer ? (JSON_Encoding)writer->outputEncoding : JSON_UTF8;
3572 }
3573
JSON_Writer_SetOutputEncoding(JSON_Writer writer,JSON_Encoding encoding)3574 JSON_Status JSON_CALL JSON_Writer_SetOutputEncoding(JSON_Writer writer, JSON_Encoding encoding)
3575 {
3576 if (!writer || GET_FLAGS(writer->state, WRITER_STARTED) || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE)
3577 return JSON_Failure;
3578
3579 writer->outputEncoding = (Encoding)encoding;
3580 return JSON_Success;
3581 }
3582
JSON_Writer_GetUseCRLF(JSON_Writer writer)3583 JSON_Boolean JSON_CALL JSON_Writer_GetUseCRLF(JSON_Writer writer)
3584 {
3585 return (writer && GET_FLAGS(writer->flags, WRITER_USE_CRLF)) ? JSON_True : JSON_False;
3586 }
3587
JSON_Writer_SetUseCRLF(JSON_Writer writer,JSON_Boolean useCRLF)3588 JSON_Status JSON_CALL JSON_Writer_SetUseCRLF(JSON_Writer writer, JSON_Boolean useCRLF)
3589 {
3590 if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
3591 return JSON_Failure;
3592
3593 SET_FLAGS(WriterFlags, writer->flags, WRITER_USE_CRLF, useCRLF);
3594 return JSON_Success;
3595 }
3596
JSON_Writer_GetReplaceInvalidEncodingSequences(JSON_Writer writer)3597 JSON_Boolean JSON_CALL JSON_Writer_GetReplaceInvalidEncodingSequences(JSON_Writer writer)
3598 {
3599 return (writer && GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID)) ? JSON_True : JSON_False;
3600 }
3601
JSON_Writer_SetReplaceInvalidEncodingSequences(JSON_Writer writer,JSON_Boolean replaceInvalidEncodingSequences)3602 JSON_Status JSON_CALL JSON_Writer_SetReplaceInvalidEncodingSequences(JSON_Writer writer, JSON_Boolean replaceInvalidEncodingSequences)
3603 {
3604 if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
3605 return JSON_Failure;
3606
3607 SET_FLAGS(WriterFlags, writer->flags, WRITER_REPLACE_INVALID, replaceInvalidEncodingSequences);
3608 return JSON_Success;
3609 }
3610
JSON_Writer_GetEscapeAllNonASCIICharacters(JSON_Writer writer)3611 JSON_Boolean JSON_CALL JSON_Writer_GetEscapeAllNonASCIICharacters(JSON_Writer writer)
3612 {
3613 return (writer && GET_FLAGS(writer->flags, WRITER_ESCAPE_NON_ASCII)) ? JSON_True : JSON_False;
3614 }
3615
JSON_Writer_SetEscapeAllNonASCIICharacters(JSON_Writer writer,JSON_Boolean escapeAllNonASCIICharacters)3616 JSON_Status JSON_CALL JSON_Writer_SetEscapeAllNonASCIICharacters(JSON_Writer writer, JSON_Boolean escapeAllNonASCIICharacters)
3617 {
3618 if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
3619 return JSON_Failure;
3620
3621 SET_FLAGS(WriterFlags, writer->flags, WRITER_ESCAPE_NON_ASCII, escapeAllNonASCIICharacters);
3622 return JSON_Success;
3623 }
3624
JSON_Writer_GetError(JSON_Writer writer)3625 JSON_Error JSON_CALL JSON_Writer_GetError(JSON_Writer writer)
3626 {
3627 return writer ? (JSON_Error)writer->error : JSON_Error_None;
3628 }
3629
JSON_Writer_GetOutputHandler(JSON_Writer writer)3630 JSON_Writer_OutputHandler JSON_CALL JSON_Writer_GetOutputHandler(JSON_Writer writer)
3631 {
3632 return writer ? writer->outputHandler : NULL;
3633 }
3634
JSON_Writer_SetOutputHandler(JSON_Writer writer,JSON_Writer_OutputHandler handler)3635 JSON_Status JSON_CALL JSON_Writer_SetOutputHandler(JSON_Writer writer, JSON_Writer_OutputHandler handler)
3636 {
3637 if (!writer)
3638 return JSON_Failure;
3639
3640 writer->outputHandler = handler;
3641 return JSON_Success;
3642 }
3643
JSON_Writer_WriteNull(JSON_Writer writer)3644 JSON_Status JSON_CALL JSON_Writer_WriteNull(JSON_Writer writer)
3645 {
3646 static const byte nullUTF8[] = { 'n', 'u', 'l', 'l' };
3647 static const byte nullUTF16[] = { 0, 'n', 0, 'u', 0, 'l', 0, 'l', 0 };
3648 static const byte nullUTF32[] = { 0, 0, 0, 'n', 0, 0, 0, 'u', 0, 0, 0, 'l', 0, 0, 0, 'l', 0, 0, 0 };
3649 static const byte* const nullEncodings[5] = { nullUTF8, nullUTF16 + 1, nullUTF16, nullUTF32 + 3, nullUTF32 };
3650
3651 return JSON_Writer_WriteSimpleToken(writer, T_NULL, nullEncodings, sizeof(nullUTF8));
3652 }
3653
JSON_Writer_WriteBoolean(JSON_Writer writer,JSON_Boolean value)3654 JSON_Status JSON_CALL JSON_Writer_WriteBoolean(JSON_Writer writer, JSON_Boolean value)
3655 {
3656 static const byte trueUTF8[] = { 't', 'r', 'u', 'e' };
3657 static const byte trueUTF16[] = { 0, 't', 0, 'r', 0, 'u', 0, 'e', 0 };
3658 static const byte trueUTF32[] = { 0, 0, 0, 't', 0, 0, 0, 'r', 0, 0, 0, 'u', 0, 0, 0, 'e', 0, 0, 0 };
3659 static const byte* const trueEncodings[5] = { trueUTF8, trueUTF16 + 1, trueUTF16, trueUTF32 + 3, trueUTF32 };
3660
3661 static const byte falseUTF8[] = { 'f', 'a', 'l', 's', 'e' };
3662 static const byte falseUTF16[] = { 0, 'f', 0, 'a', 0, 'l', 0, 's', 0, 'e', 0 };
3663 static const byte falseUTF32[] = { 0, 0, 0, 'f', 0, 0, 0, 'a', 0, 0, 0, 'l', 0, 0, 0, 's', 0, 0, 0, 'e', 0, 0, 0 };
3664 static const byte* const falseEncodings[5] = { falseUTF8, falseUTF16 + 1, falseUTF16, falseUTF32 + 3, falseUTF32 };
3665
3666 Symbol token;
3667 const byte* const* encodings;
3668 size_t length;
3669 if (value)
3670 {
3671 token = T_TRUE;
3672 encodings = trueEncodings;
3673 length = sizeof(trueUTF8);
3674 }
3675 else
3676 {
3677 token = T_FALSE;
3678 encodings = falseEncodings;
3679 length = sizeof(falseUTF8);
3680 }
3681 return JSON_Writer_WriteSimpleToken(writer, token, encodings, length);
3682 }
3683
JSON_Writer_WriteString(JSON_Writer writer,const char * pValue,size_t length,JSON_Encoding encoding)3684 JSON_Status JSON_CALL JSON_Writer_WriteString(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding)
3685 {
3686 JSON_Status status = JSON_Failure;
3687 if (writer && (pValue || !length) && encoding > JSON_UnknownEncoding && encoding <= JSON_UTF32BE &&
3688 !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3689 {
3690 SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3691 if (JSON_Writer_ProcessToken(writer, T_STRING))
3692 status = JSON_Writer_OutputString(writer, (const byte*)pValue, length, (Encoding)encoding);
3693
3694 SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3695 }
3696 return status;
3697 }
3698
JSON_Writer_WriteNumber(JSON_Writer writer,const char * pValue,size_t length,JSON_Encoding encoding)3699 JSON_Status JSON_CALL JSON_Writer_WriteNumber(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding)
3700 {
3701 JSON_Status status = JSON_Failure;
3702 if (writer && pValue && length && encoding > JSON_UnknownEncoding && encoding <= JSON_UTF32BE &&
3703 !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3704 {
3705 SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3706 if (JSON_Writer_ProcessToken(writer, T_NUMBER))
3707 status = JSON_Writer_OutputNumber(writer, (const byte*)pValue, length, (Encoding)encoding);
3708
3709 SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3710 }
3711 return status;
3712 }
3713
JSON_Writer_WriteSpecialNumber(JSON_Writer writer,JSON_SpecialNumber value)3714 JSON_Status JSON_CALL JSON_Writer_WriteSpecialNumber(JSON_Writer writer, JSON_SpecialNumber value)
3715 {
3716 static const byte nanUTF8[] = { 'N', 'a', 'N' };
3717 static const byte nanUTF16[] = { 0, 'N', 0, 'a', 0, 'N', 0 };
3718 static const byte nanUTF32[] = { 0, 0, 0, 'N', 0, 0, 0, 'a', 0, 0, 0, 'N', 0, 0, 0 };
3719 static const byte* const nanEncodings[5] = { nanUTF8, nanUTF16 + 1, nanUTF16, nanUTF32 + 3, nanUTF32 };
3720
3721 static const byte ninfUTF8[] = { '-', 'I', 'n', 'f', 'i', 'n', 'i', 't', 'y' };
3722 static const byte ninfUTF16[] = { 0, '-', 0, 'I', 0, 'n', 0, 'f', 0, 'i', 0, 'n', 0, 'i', 0, 't', 0, 'y', 0 };
3723 static const byte ninfUTF32[] = { 0, 0, 0, '-', 0, 0, 0, 'I', 0, 0, 0, 'n', 0, 0, 0, 'f', 0, 0, 0, 'i', 0, 0, 0, 'n', 0, 0, 0, 'i', 0, 0, 0, 't', 0, 0, 0, 'y', 0, 0, 0 };
3724 static const byte* const infinityEncodings[5] = { ninfUTF8 + 1, ninfUTF16 + 3, ninfUTF16 + 2, ninfUTF32 + 7, ninfUTF32 + 4 };
3725 static const byte* const negativeInfinityEncodings[5] = { ninfUTF8, ninfUTF16 + 1, ninfUTF16, ninfUTF32 + 3, ninfUTF32 };
3726
3727 Symbol token;
3728 const byte* const* encodings;
3729 size_t length;
3730 if (value == JSON_Infinity)
3731 {
3732 token = T_INFINITY;
3733 encodings = infinityEncodings;
3734 length = sizeof(ninfUTF8) - 1/* - */;
3735 }
3736 else if (value == JSON_NegativeInfinity)
3737 {
3738 token = T_NEGATIVE_INFINITY;
3739 encodings = negativeInfinityEncodings;
3740 length = sizeof(ninfUTF8);
3741 }
3742 else
3743 {
3744 token = T_NAN;
3745 encodings = nanEncodings;
3746 length = sizeof(nanUTF8);
3747 }
3748 return JSON_Writer_WriteSimpleToken(writer, token, encodings, length);
3749 }
3750
JSON_Writer_WriteStartObject(JSON_Writer writer)3751 JSON_Status JSON_CALL JSON_Writer_WriteStartObject(JSON_Writer writer)
3752 {
3753 static const byte utf[] = { 0, 0, 0, '{', 0, 0, 0 };
3754 static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3755
3756 return JSON_Writer_WriteSimpleToken(writer, T_LEFT_CURLY, encodings, 1);
3757 }
3758
JSON_Writer_WriteEndObject(JSON_Writer writer)3759 JSON_Status JSON_CALL JSON_Writer_WriteEndObject(JSON_Writer writer)
3760 {
3761 static const byte utf[] = { 0, 0, 0, '}', 0, 0, 0 };
3762 static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3763
3764 return JSON_Writer_WriteSimpleToken(writer, T_RIGHT_CURLY, encodings, 1);
3765 }
3766
JSON_Writer_WriteStartArray(JSON_Writer writer)3767 JSON_Status JSON_CALL JSON_Writer_WriteStartArray(JSON_Writer writer)
3768 {
3769 static const byte utf[] = { 0, 0, 0, '[', 0, 0, 0 };
3770 static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3771
3772 return JSON_Writer_WriteSimpleToken(writer, T_LEFT_SQUARE, encodings, 1);
3773 }
3774
JSON_Writer_WriteEndArray(JSON_Writer writer)3775 JSON_Status JSON_CALL JSON_Writer_WriteEndArray(JSON_Writer writer)
3776 {
3777 static const byte utf[] = { 0, 0, 0, ']', 0, 0, 0 };
3778 static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3779
3780 return JSON_Writer_WriteSimpleToken(writer, T_RIGHT_SQUARE, encodings, 1);
3781 }
3782
JSON_Writer_WriteColon(JSON_Writer writer)3783 JSON_Status JSON_CALL JSON_Writer_WriteColon(JSON_Writer writer)
3784 {
3785 static const byte utf[] = { 0, 0, 0, ':', 0, 0, 0 };
3786 static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3787
3788 return JSON_Writer_WriteSimpleToken(writer, T_COLON, encodings, 1);
3789 }
3790
JSON_Writer_WriteComma(JSON_Writer writer)3791 JSON_Status JSON_CALL JSON_Writer_WriteComma(JSON_Writer writer)
3792 {
3793 static const byte utf[] = { 0, 0, 0, ',', 0, 0, 0 };
3794 static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3795
3796 return JSON_Writer_WriteSimpleToken(writer, T_COMMA, encodings, 1);
3797 }
3798
JSON_Writer_WriteSpace(JSON_Writer writer,size_t numberOfSpaces)3799 JSON_Status JSON_CALL JSON_Writer_WriteSpace(JSON_Writer writer, size_t numberOfSpaces)
3800 {
3801 JSON_Status status = JSON_Failure;
3802 if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3803 {
3804 SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3805 status = JSON_Writer_OutputSpaces(writer, numberOfSpaces);
3806 SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3807 }
3808 return status;
3809 }
3810
JSON_Writer_WriteNewLine(JSON_Writer writer)3811 JSON_Status JSON_CALL JSON_Writer_WriteNewLine(JSON_Writer writer)
3812 {
3813 static const byte lfUTF[] = { 0, 0, 0, LINE_FEED_CODEPOINT, 0, 0, 0 };
3814 static const byte* const lfEncodings[5] = { lfUTF + 3, lfUTF + 3, lfUTF + 2, lfUTF + 3, lfUTF };
3815
3816 static const byte crlfUTF8[] = { CARRIAGE_RETURN_CODEPOINT, LINE_FEED_CODEPOINT };
3817 static const byte crlfUTF16[] = { 0, CARRIAGE_RETURN_CODEPOINT, 0, LINE_FEED_CODEPOINT, 0 };
3818 static const byte crlfUTF32[] = { 0, 0, 0, CARRIAGE_RETURN_CODEPOINT, 0, 0, 0, LINE_FEED_CODEPOINT, 0, 0, 0 };
3819 static const byte* const crlfEncodings[5] = { crlfUTF8, crlfUTF16 + 1, crlfUTF16, crlfUTF32 + 3, crlfUTF32 };
3820
3821 JSON_Status status = JSON_Failure;
3822 if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3823 {
3824 const byte* const* encodings;
3825 size_t length;
3826 size_t encodedLength;
3827 SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3828 if (GET_FLAGS(writer->flags, WRITER_USE_CRLF))
3829 {
3830 encodings = crlfEncodings;
3831 length = 2;
3832 }
3833 else
3834 {
3835 encodings = lfEncodings;
3836 length = 1;
3837 }
3838 encodedLength = length * (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3839 if (JSON_Writer_OutputBytes(writer, encodings[writer->outputEncoding - 1], encodedLength))
3840 status = JSON_Success;
3841 SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3842 }
3843 return status;
3844 }
3845
3846 #endif /* JSON_NO_WRITER */
3847
3848 /******************** Miscellaneous API ********************/
3849
JSON_LibraryVersion(void)3850 const JSON_Version* JSON_CALL JSON_LibraryVersion(void)
3851 {
3852 static JSON_Version version = { JSON_MAJOR_VERSION, JSON_MINOR_VERSION, JSON_MICRO_VERSION };
3853 return &version;
3854 }
3855
JSON_ErrorString(JSON_Error error)3856 const char* JSON_CALL JSON_ErrorString(JSON_Error error)
3857 {
3858 /* This array must match the order and number of the JSON_Error enum. */
3859 static const char* errorStrings[] =
3860 {
3861 /* JSON_Error_None */ "no error",
3862 /* JSON_Error_OutOfMemory */ "could not allocate enough memory",
3863 /* JSON_Error_AbortedByHandler */ "the operation was aborted by a handler",
3864 /* JSON_Error_BOMNotAllowed */ "the input begins with a byte-order mark (BOM), which is not allowed by RFC 4627",
3865 /* JSON_Error_InvalidEncodingSequence */ "the input contains a byte or sequence of bytes that is not valid for the input encoding",
3866 /* JSON_Error_UnknownToken */ "the input contains an unknown token",
3867 /* JSON_Error_UnexpectedToken */ "the input contains an unexpected token",
3868 /* JSON_Error_IncompleteToken */ "the input ends in the middle of a token",
3869 /* JSON_Error_MoreTokensExpected */ "the input ends when more tokens are expected",
3870 /* JSON_Error_UnescapedControlCharacter */ "the input contains a string containing an unescaped control character (U+0000 - U+001F)",
3871 /* JSON_Error_InvalidEscapeSequence */ "the input contains a string containing an invalid escape sequence",
3872 /* JSON_Error_UnpairedSurrogateEscapeSequence */ "the input contains a string containing an unmatched UTF-16 surrogate codepoint",
3873 /* JSON_Error_TooLongString */ "the input contains a string that is too long",
3874 /* JSON_Error_InvalidNumber */ "the input contains an invalid number",
3875 /* JSON_Error_TooLongNumber */ "the input contains a number that is too long",
3876 /* JSON_Error_DuplicateObjectMember */ "the input contains an object with duplicate members",
3877 /* JSON_Error_StoppedAfterEmbeddedDocument */ "the end of the embedded document was reached"
3878 };
3879 return ((unsigned int)error < (sizeof(errorStrings) / sizeof(errorStrings[0])))
3880 ? errorStrings[error]
3881 : "";
3882 }
3883
3884 static const uint32_t endianEncodings = (((uint32_t)JSON_UTF32BE) << 24) | (((uint32_t)JSON_UTF16BE) << 16) | (((uint32_t)JSON_UTF16LE) << 8) | ((uint32_t)JSON_UTF32LE);
3885
JSON_NativeUTF16Encoding(void)3886 JSON_Encoding JSON_CALL JSON_NativeUTF16Encoding(void)
3887 {
3888 return (JSON_Encoding)(((byte*)&endianEncodings)[1]);
3889 }
3890
JSON_NativeUTF32Encoding(void)3891 JSON_Encoding JSON_CALL JSON_NativeUTF32Encoding(void)
3892 {
3893 return (JSON_Encoding)(((byte*)&endianEncodings)[0]);
3894 }
3895