1 /*
2 Copyright (c) 2012 John-Anthony Owens
3
4 Permission is hereby granted, free of charge, to any person obtaining a
5 copy of this software and associated documentation files (the "Software"),
6 to deal in the Software without restriction, including without limitation
7 the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 and/or sell copies of the Software, and to permit persons to whom the
9 Software is furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be included
12 in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 IN THE SOFTWARE.
21 */
22
23 #include <stdlib.h>
24 #include <string.h>
25
26 /* Ensure uint32_t type (compiler-dependent). */
27 #if defined(_MSC_VER)
28 typedef unsigned __int32 uint32_t;
29 #else
30 #include <stdint.h>
31 #endif
32
33 /* Ensure SIZE_MAX defined. */
34 #ifndef SIZE_MAX
35 #define SIZE_MAX ((size_t)-1)
36 #endif
37
38 /* Mark APIs for export (as opposed to import) when we build this file. */
39 #define JSON_BUILDING
40 #include <formats/jsonsax_full.h>
41
42 /* Default allocation constants. */
43 #define DEFAULT_TOKEN_BYTES_LENGTH 64 /* MUST be a power of 2 */
44 #define DEFAULT_SYMBOL_STACK_SIZE 32 /* MUST be a power of 2 */
45
46 /* Types for readability. */
47 typedef unsigned char byte;
48 typedef uint32_t Codepoint;
49
50 /* Especially-relevant Unicode codepoints. */
51 #define U_(x) ((Codepoint)(x))
52 #define NULL_CODEPOINT U_(0x0000)
53 #define BACKSPACE_CODEPOINT U_(0x0008)
54 #define TAB_CODEPOINT U_(0x0009)
55 #define LINE_FEED_CODEPOINT U_(0x000A)
56 #define FORM_FEED_CODEPOINT U_(0x000C)
57 #define CARRIAGE_RETURN_CODEPOINT U_(0x000D)
58 #define FIRST_NON_CONTROL_CODEPOINT U_(0x0020)
59 #define DELETE_CODEPOINT U_(0x007F)
60 #define FIRST_NON_ASCII_CODEPOINT U_(0x0080)
61 #define FIRST_2_BYTE_UTF8_CODEPOINT U_(0x0080)
62 #define FIRST_3_BYTE_UTF8_CODEPOINT U_(0x0800)
63 #define LINE_SEPARATOR_CODEPOINT U_(0x2028)
64 #define PARAGRAPH_SEPARATOR_CODEPOINT U_(0x2029)
65 #define BOM_CODEPOINT U_(0xFEFF)
66 #define REPLACEMENT_CHARACTER_CODEPOINT U_(0xFFFD)
67 #define FIRST_NON_BMP_CODEPOINT U_(0x10000)
68 #define FIRST_4_BYTE_UTF8_CODEPOINT U_(0x10000)
69 #define MAX_CODEPOINT U_(0x10FFFF)
70 #define EOF_CODEPOINT U_(0xFFFFFFFF)
71
72 /* Bit-masking macros. */
73 #define BOTTOM_3_BITS(x) ((x) & 0x7)
74 #define BOTTOM_4_BITS(x) ((x) & 0xF)
75 #define BOTTOM_5_BITS(x) ((x) & 0x1F)
76 #define BOTTOM_6_BITS(x) ((x) & 0x3F)
77
78 /* Bit-flag macros. */
79 #define GET_FLAGS(x, f) ((x) & (f))
80 #define SET_FLAGS_ON(flagstype, x, f) do { (x) |= (flagstype)(f); } while (0)
81 #define SET_FLAGS_OFF(flagstype, x, f) do { (x) &= (flagstype)~(f); } while (0)
82 #define SET_FLAGS(flagstype, x, f, cond) do { if (cond) (x) |= (flagstype)(f); else (x) &= (flagstype)~(f); } while (0)
83
84 /* UTF-8 byte-related macros. */
85 #define IS_UTF8_SINGLE_BYTE(b) (((b) & 0x80) == 0)
86 #define IS_UTF8_CONTINUATION_BYTE(b) (((b) & 0xC0) == 0x80)
87 #define IS_UTF8_FIRST_BYTE_OF_2(b) (((b) & 0xE0) == 0xC0)
88 #define IS_UTF8_FIRST_BYTE_OF_3(b) (((b) & 0xF0) == 0xE0)
89 #define IS_UTF8_FIRST_BYTE_OF_4(b) (((b) & 0xF8) == 0xF0)
90
91 /* Unicode codepoint-related macros. */
92 #define IS_NONCHARACTER(c) ((((c) & 0xFE) == 0xFE) || (((c) >= 0xFDD0) && ((c) <= 0xFDEF)))
93 #define IS_SURROGATE(c) (((c) & 0xFFFFF800) == 0xD800)
94 #define IS_LEADING_SURROGATE(c) (((c) & 0xFFFFFC00) == 0xD800)
95 #define IS_TRAILING_SURROGATE(c) (((c) & 0xFFFFFC00) == 0xDC00)
96 #define CODEPOINT_FROM_SURROGATES(hi_lo) ((((hi_lo) >> 16) << 10) + ((hi_lo) & 0xFFFF) + 0xFCA02400)
97 #define SURROGATES_FROM_CODEPOINT(c) ((((c) << 6) & 0x7FF0000) + ((c) & 0x3FF) + 0xD7C0DC00)
98 #define SHORTEST_ENCODING_SEQUENCE(enc) (1U << ((enc) >> 1))
99 #define LONGEST_ENCODING_SEQUENCE 4
100
101 /* Internal types that alias enum types in the public API.
102 By using byte to represent these values internally,
103 we can guarantee minimal storage size and avoid compiler
104 warnings when using values of the type in switch statements
105 that don't have (or need) a default case. */
106 typedef byte Encoding;
107 typedef byte Error;
108 typedef byte TokenAttributes;
109
110 /******************** Default Memory Suite ********************/
111
DefaultReallocHandler(void * userData,void * ptr,size_t size)112 static void* JSON_CALL DefaultReallocHandler(void* userData, void* ptr, size_t size)
113 {
114 (void)userData; /* unused */
115 return realloc(ptr, size);
116 }
117
DefaultFreeHandler(void * userData,void * ptr)118 static void JSON_CALL DefaultFreeHandler(void* userData, void* ptr)
119 {
120 (void)userData; /* unused */
121 free(ptr);
122 }
123
124 static const JSON_MemorySuite defaultMemorySuite = { NULL, &DefaultReallocHandler, &DefaultFreeHandler };
125
DoubleBuffer(const JSON_MemorySuite * pMemorySuite,byte * pDefaultBuffer,byte * pBuffer,size_t length)126 static byte* DoubleBuffer(const JSON_MemorySuite* pMemorySuite, byte* pDefaultBuffer, byte* pBuffer, size_t length)
127 {
128 size_t newLength = length * 2;
129 if (newLength < length)
130 {
131 pBuffer = NULL;
132 }
133 else if (pBuffer == pDefaultBuffer)
134 {
135 pBuffer = (byte*)pMemorySuite->realloc(pMemorySuite->userData, NULL, newLength);
136 if (pBuffer)
137 {
138 memcpy(pBuffer, pDefaultBuffer, length);
139 }
140 }
141 else
142 {
143 pBuffer = (byte*)pMemorySuite->realloc(pMemorySuite->userData, pBuffer, newLength);
144 }
145 return pBuffer;
146 }
147
148 /******************** Unicode Decoder ********************/
149
150 /* Mutually-exclusive decoder states. */
151 /* The bits of DecoderState are layed out as follows:
152
153 ---lllnn
154
155 - = unused (3 bits)
156 l = expected total sequence length (3 bits)
157 d = number of bytes decoded so far (2 bits)
158 */
159
160 #define DECODER_RESET 0x00
161 #define DECODED_1_OF_2 0x09 /* 00001001 */
162 #define DECODED_1_OF_3 0x0D /* 00001101 */
163 #define DECODED_2_OF_3 0x0E /* 00001110 */
164 #define DECODED_1_OF_4 0x11 /* 00010001 */
165 #define DECODED_2_OF_4 0x12 /* 00010010 */
166 #define DECODED_3_OF_4 0x13 /* 00010011 */
167 typedef byte DecoderState;
168
169 #define DECODER_STATE_BYTES(s) (size_t)((s) & 0x3)
170
171 /* Decoder data. */
172 typedef struct tag_DecoderData
173 {
174 DecoderState state;
175 uint32_t bits;
176 } DecoderData;
177 typedef DecoderData* Decoder;
178
179 /* The bits of DecoderOutput are layed out as follows:
180
181 ------rrlllccccccccccccccccccccc
182
183 - = unused (6 bits)
184 r = result code (2 bits)
185 l = sequence length (3 bits)
186 c = codepoint (21 bits)
187 */
188 #define SEQUENCE_PENDING 0
189 #define SEQUENCE_COMPLETE 1
190 #define SEQUENCE_INVALID_INCLUSIVE 2
191 #define SEQUENCE_INVALID_EXCLUSIVE 3
192 typedef uint32_t DecoderResultCode;
193
194 #define DECODER_OUTPUT(r, l, c) (DecoderOutput)(((r) << 24) | ((l) << 21) | (c))
195 #define DECODER_RESULT_CODE(o) (DecoderResultCode)((DecoderOutput)(o) >> 24)
196 #define DECODER_SEQUENCE_LENGTH(o) (size_t)(((DecoderOutput)(o) >> 21) & 0x7)
197 #define DECODER_CODEPOINT(o) (Codepoint)((DecoderOutput)(o) & 0x001FFFFF)
198 typedef uint32_t DecoderOutput;
199
200 /* Decoder functions. */
201
Decoder_Reset(Decoder decoder)202 static void Decoder_Reset(Decoder decoder)
203 {
204 decoder->state = DECODER_RESET;
205 decoder->bits = 0;
206 }
207
Decoder_SequencePending(Decoder decoder)208 static int Decoder_SequencePending(Decoder decoder)
209 {
210 return decoder->state != DECODER_RESET;
211 }
212
Decoder_ProcessByte(Decoder decoder,Encoding encoding,byte b)213 static DecoderOutput Decoder_ProcessByte(Decoder decoder, Encoding encoding, byte b)
214 {
215 DecoderOutput output = DECODER_OUTPUT(SEQUENCE_PENDING, 0, 0);
216 switch (encoding)
217 {
218 case JSON_UTF8:
219 /* When the input encoding is UTF-8, the decoded codepoint's bits are
220 recorded in the bottom 3 bytes of bits as they are decoded.
221 The top byte is not used. */
222 switch (decoder->state)
223 {
224 case DECODER_RESET:
225 if (IS_UTF8_SINGLE_BYTE(b))
226 {
227 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 1, b);
228 }
229 else if (IS_UTF8_FIRST_BYTE_OF_2(b))
230 {
231 /* UTF-8 2-byte sequences that are overlong encodings can be
232 detected from just the first byte (C0 or C1). */
233 decoder->bits = (uint32_t)BOTTOM_5_BITS(b) << 6;
234 if (decoder->bits < FIRST_2_BYTE_UTF8_CODEPOINT)
235 {
236 output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
237 }
238 else
239 {
240 decoder->state = DECODED_1_OF_2;
241 goto noreset;
242 }
243 }
244 else if (IS_UTF8_FIRST_BYTE_OF_3(b))
245 {
246 decoder->bits = (uint32_t)BOTTOM_4_BITS(b) << 12;
247 decoder->state = DECODED_1_OF_3;
248 goto noreset;
249 }
250 else if (IS_UTF8_FIRST_BYTE_OF_4(b))
251 {
252 /* Some UTF-8 4-byte sequences that encode out-of-range
253 codepoints can be detected from the first byte (F5 - FF). */
254 decoder->bits = (uint32_t)BOTTOM_3_BITS(b) << 18;
255 if (decoder->bits > MAX_CODEPOINT)
256 {
257 output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
258 }
259 else
260 {
261 decoder->state = DECODED_1_OF_4;
262 goto noreset;
263 }
264 }
265 else
266 {
267 /* The byte is of the form 11111xxx or 10xxxxxx, and is not
268 a valid first byte for a UTF-8 sequence. */
269 output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
270 }
271 break;
272
273 case DECODED_1_OF_2:
274 if (IS_UTF8_CONTINUATION_BYTE(b))
275 {
276 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits | BOTTOM_6_BITS(b));
277 }
278 else
279 {
280 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
281
282 }
283 break;
284
285 case DECODED_1_OF_3:
286 if (IS_UTF8_CONTINUATION_BYTE(b))
287 {
288 /* UTF-8 3-byte sequences that are overlong encodings or encode
289 surrogate codepoints can be detected after 2 bytes. */
290 decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 6;
291 if ((decoder->bits < FIRST_3_BYTE_UTF8_CODEPOINT) || IS_SURROGATE(decoder->bits))
292 {
293 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
294 }
295 else
296 {
297 decoder->state = DECODED_2_OF_3;
298 goto noreset;
299 }
300 }
301 else
302 {
303 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
304 }
305 break;
306
307 case DECODED_2_OF_3:
308 if (IS_UTF8_CONTINUATION_BYTE(b))
309 {
310 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 3, decoder->bits | BOTTOM_6_BITS(b));
311 }
312 else
313 {
314 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
315 }
316 break;
317
318 case DECODED_1_OF_4:
319 if (IS_UTF8_CONTINUATION_BYTE(b))
320 {
321 /* UTF-8 4-byte sequences that are overlong encodings or encode
322 out-of-range codepoints can be detected after 2 bytes. */
323 decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 12;
324 if ((decoder->bits < FIRST_4_BYTE_UTF8_CODEPOINT) || (decoder->bits > MAX_CODEPOINT))
325 {
326 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
327 }
328 else
329 {
330 decoder->state = DECODED_2_OF_4;
331 goto noreset;
332 }
333 }
334 else
335 {
336 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
337 }
338 break;
339
340 case DECODED_2_OF_4:
341 if (IS_UTF8_CONTINUATION_BYTE(b))
342 {
343 decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 6;
344 decoder->state = DECODED_3_OF_4;
345 goto noreset;
346 }
347 else
348 {
349 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
350 }
351 break;
352
353 case DECODED_3_OF_4:
354 if (IS_UTF8_CONTINUATION_BYTE(b))
355 {
356 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits | BOTTOM_6_BITS(b));
357 }
358 else
359 {
360 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 3, 0);
361 }
362 break;
363 }
364 break;
365
366 case JSON_UTF16LE:
367 /* When the input encoding is UTF-16, the decoded codepoint's bits are
368 recorded in the bottom 2 bytes of bits as they are decoded.
369 If those 2 bytes form a leading surrogate, the decoder treats the
370 surrogate pair as a single 4-byte sequence, shifts the leading
371 surrogate into the high 2 bytes of bits, and decodes the
372 trailing surrogate's bits in the bottom 2 bytes of bits. */
373 switch (decoder->state)
374 {
375 case DECODER_RESET:
376 decoder->bits = b;
377 decoder->state = DECODED_1_OF_2;
378 goto noreset;
379
380 case DECODED_1_OF_2:
381 decoder->bits |= (uint32_t)b << 8;
382 if (IS_TRAILING_SURROGATE(decoder->bits))
383 {
384 /* A trailing surrogate cannot appear on its own. */
385 output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 2, 0);
386 }
387 else if (IS_LEADING_SURROGATE(decoder->bits))
388 {
389 /* A leading surrogate implies a 4-byte surrogate pair. */
390 decoder->bits <<= 16;
391 decoder->state = DECODED_2_OF_4;
392 goto noreset;
393 }
394 else
395 {
396 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits);
397 }
398 break;
399
400 case DECODED_2_OF_4:
401 decoder->bits |= b;
402 decoder->state = DECODED_3_OF_4;
403 goto noreset;
404
405 case DECODED_3_OF_4:
406 decoder->bits |= (uint32_t)b << 8;
407 if (!IS_TRAILING_SURROGATE(decoder->bits & 0xFFFF))
408 {
409 /* A leading surrogate must be followed by a trailing one.
410 Treat the previous 3 bytes as an invalid 2-byte sequence
411 followed by the first byte of a new sequence. */
412 decoder->bits &= 0xFF;
413 decoder->state = DECODED_1_OF_2;
414 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
415 goto noreset;
416 }
417 else
418 {
419 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, CODEPOINT_FROM_SURROGATES(decoder->bits));
420 }
421 break;
422 }
423 break;
424
425 case JSON_UTF16BE:
426 /* When the input encoding is UTF-16, the decoded codepoint's bits are
427 recorded in the bottom 2 bytes of bits as they are decoded.
428 If those 2 bytes form a leading surrogate, the decoder treats the
429 surrogate pair as a single 4-byte sequence, shifts the leading
430 surrogate into the high 2 bytes of bits, and decodes the
431 trailing surrogate's bits in the bottom 2 bytes of bits. */
432 switch (decoder->state)
433 {
434 case DECODER_RESET:
435 decoder->bits = (uint32_t)b << 8;
436 decoder->state = DECODED_1_OF_2;
437 goto noreset;
438
439 case DECODED_1_OF_2:
440 decoder->bits |= b;
441 if (IS_TRAILING_SURROGATE(decoder->bits))
442 {
443 /* A trailing surrogate cannot appear on its own. */
444 output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 2, 0);
445 }
446 else if (IS_LEADING_SURROGATE(decoder->bits))
447 {
448 /* A leading surrogate implies a 4-byte surrogate pair. */
449 decoder->bits <<= 16;
450 decoder->state = DECODED_2_OF_4;
451 goto noreset;
452 }
453 else
454 {
455 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits);
456 }
457 break;
458
459 case DECODED_2_OF_4:
460 decoder->bits |= (uint32_t)b << 8;
461 decoder->state = DECODED_3_OF_4;
462 goto noreset;
463
464 case DECODED_3_OF_4:
465 decoder->bits |= b;
466 if (!IS_TRAILING_SURROGATE(decoder->bits & 0xFFFF))
467 {
468 /* A leading surrogate must be followed by a trailing one.
469 Treat the previous 3 bytes as an invalid 2-byte sequence
470 followed by the first byte of a new sequence. */
471 decoder->bits &= 0xFF00;
472 decoder->state = DECODED_1_OF_2;
473 output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
474 goto noreset;
475 }
476 else
477 {
478 output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, CODEPOINT_FROM_SURROGATES(decoder->bits));
479 }
480 break;
481 }
482 break;
483
484 case JSON_UTF32LE:
485 /* When the input encoding is UTF-32, the decoded codepoint's bits are
486 recorded in bits as they are decoded. */
487 switch (decoder->state)
488 {
489 case DECODER_RESET:
490 decoder->state = DECODED_1_OF_4;
491 decoder->bits = (uint32_t)b;
492 goto noreset;
493
494 case DECODED_1_OF_4:
495 decoder->state = DECODED_2_OF_4;
496 decoder->bits |= (uint32_t)b << 8;
497 goto noreset;
498
499 case DECODED_2_OF_4:
500 decoder->state = DECODED_3_OF_4;
501 decoder->bits |= (uint32_t)b << 16;
502 goto noreset;
503
504 case DECODED_3_OF_4:
505 decoder->bits |= (uint32_t)b << 24;
506 output = (IS_SURROGATE(decoder->bits) || (decoder->bits > MAX_CODEPOINT))
507 ? DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 4, 0)
508 : DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits);
509 break;
510 }
511 break;
512
513 case JSON_UTF32BE:
514 /* When the input encoding is UTF-32, the decoded codepoint's bits are
515 recorded in bits as they are decoded. */
516 switch (decoder->state)
517 {
518 case DECODER_RESET:
519 decoder->state = DECODED_1_OF_4;
520 decoder->bits = (uint32_t)b << 24;
521 goto noreset;
522
523 case DECODED_1_OF_4:
524 decoder->state = DECODED_2_OF_4;
525 decoder->bits |= (uint32_t)b << 16;
526 goto noreset;
527
528 case DECODED_2_OF_4:
529 decoder->state = DECODED_3_OF_4;
530 decoder->bits |= (uint32_t)b << 8;
531 goto noreset;
532
533 case DECODED_3_OF_4:
534 decoder->bits |= b;
535 output = (IS_SURROGATE(decoder->bits) || (decoder->bits > MAX_CODEPOINT))
536 ? DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 4, 0)
537 : DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits);
538 break;
539 }
540 break;
541 }
542
543 /* Reset the decoder for the next sequence. */
544 Decoder_Reset(decoder);
545
546 noreset:
547 return output;
548 }
549
550 /******************** Unicode Encoder ********************/
551
552 /* This function makes the following assumptions about its input:
553
554 1. The c argument is a valid codepoint (U+0000 - U+10FFFF).
555 2. The encoding argument is not JSON_UnknownEncoding.
556 3. The pBytes argument points to an array of at least 4 bytes.
557 */
EncodeCodepoint(Codepoint c,Encoding encoding,byte * pBytes)558 static size_t EncodeCodepoint(Codepoint c, Encoding encoding, byte* pBytes)
559 {
560 size_t length = 0;
561 switch (encoding)
562 {
563 case JSON_UTF8:
564 if (c < FIRST_2_BYTE_UTF8_CODEPOINT)
565 {
566 pBytes[0] = (byte)c;
567 length = 1;
568 }
569 else if (c < FIRST_3_BYTE_UTF8_CODEPOINT)
570 {
571 pBytes[0] = (byte)(0xC0 | (c >> 6));
572 pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c));
573 length = 2;
574 }
575 else if (c < FIRST_4_BYTE_UTF8_CODEPOINT)
576 {
577 pBytes[0] = (byte)(0xE0 | (c >> 12));
578 pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c >> 6));
579 pBytes[2] = (byte)(0x80 | BOTTOM_6_BITS(c));
580 length = 3;
581 }
582 else
583 {
584 pBytes[0] = (byte)(0xF0 | (c >> 18));
585 pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c >> 12));
586 pBytes[2] = (byte)(0x80 | BOTTOM_6_BITS(c >> 6));
587 pBytes[3] = (byte)(0x80 | BOTTOM_6_BITS(c));
588 length = 4;
589 }
590 break;
591
592 case JSON_UTF16LE:
593 if (c < FIRST_NON_BMP_CODEPOINT)
594 {
595 pBytes[0] = (byte)(c);
596 pBytes[1] = (byte)(c >> 8);
597 length = 2;
598 }
599 else
600 {
601 uint32_t surrogates = SURROGATES_FROM_CODEPOINT(c);
602
603 /* Leading surrogate. */
604 pBytes[0] = (byte)(surrogates >> 16);
605 pBytes[1] = (byte)(surrogates >> 24);
606
607 /* Trailing surrogate. */
608 pBytes[2] = (byte)(surrogates);
609 pBytes[3] = (byte)(surrogates >> 8);
610 length = 4;
611 }
612 break;
613
614 case JSON_UTF16BE:
615 if (c < FIRST_NON_BMP_CODEPOINT)
616 {
617 pBytes[1] = (byte)(c);
618 pBytes[0] = (byte)(c >> 8);
619 length = 2;
620 }
621 else
622 {
623 /* The codepoint requires a surrogate pair in UTF-16. */
624 uint32_t surrogates = SURROGATES_FROM_CODEPOINT(c);
625
626 /* Leading surrogate. */
627 pBytes[1] = (byte)(surrogates >> 16);
628 pBytes[0] = (byte)(surrogates >> 24);
629
630 /* Trailing surrogate. */
631 pBytes[3] = (byte)(surrogates);
632 pBytes[2] = (byte)(surrogates >> 8);
633 length = 4;
634 }
635 break;
636
637 case JSON_UTF32LE:
638 pBytes[0] = (byte)(c);
639 pBytes[1] = (byte)(c >> 8);
640 pBytes[2] = (byte)(c >> 16);
641 pBytes[3] = (byte)(c >> 24);
642 length = 4;
643 break;
644
645 case JSON_UTF32BE:
646 pBytes[3] = (byte)(c);
647 pBytes[2] = (byte)(c >> 8);
648 pBytes[1] = (byte)(c >> 16);
649 pBytes[0] = (byte)(c >> 24);
650 length = 4;
651 break;
652 }
653 return length;
654 }
655
656 /******************** JSON Lexer States ********************/
657
658 /* Mutually-exclusive lexer states. */
659 #define LEXING_WHITESPACE 0
660 #define LEXING_LITERAL 1
661 #define LEXING_STRING 2
662 #define LEXING_STRING_ESCAPE 3
663 #define LEXING_STRING_HEX_ESCAPE_BYTE_1 4
664 #define LEXING_STRING_HEX_ESCAPE_BYTE_2 5
665 #define LEXING_STRING_HEX_ESCAPE_BYTE_3 6
666 #define LEXING_STRING_HEX_ESCAPE_BYTE_4 7
667 #define LEXING_STRING_HEX_ESCAPE_BYTE_5 8
668 #define LEXING_STRING_HEX_ESCAPE_BYTE_6 9
669 #define LEXING_STRING_HEX_ESCAPE_BYTE_7 10
670 #define LEXING_STRING_HEX_ESCAPE_BYTE_8 11
671 #define LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH 12
672 #define LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U 13
673 #define LEXING_NUMBER_AFTER_MINUS 14
674 #define LEXING_NUMBER_AFTER_LEADING_ZERO 15
675 #define LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO 16
676 #define LEXING_NUMBER_AFTER_X 17
677 #define LEXING_NUMBER_HEX_DIGITS 18
678 #define LEXING_NUMBER_DECIMAL_DIGITS 19
679 #define LEXING_NUMBER_AFTER_DOT 20
680 #define LEXING_NUMBER_FRACTIONAL_DIGITS 21
681 #define LEXING_NUMBER_AFTER_E 22
682 #define LEXING_NUMBER_AFTER_EXPONENT_SIGN 23
683 #define LEXING_NUMBER_EXPONENT_DIGITS 24
684 #define LEXING_COMMENT_AFTER_SLASH 25
685 #define LEXING_SINGLE_LINE_COMMENT 26
686 #define LEXING_MULTI_LINE_COMMENT 27
687 #define LEXING_MULTI_LINE_COMMENT_AFTER_STAR 28
688 #define LEXER_ERROR 255
689 typedef byte LexerState;
690
691 /******************** JSON Grammarian ********************/
692
693 /* The JSON grammar comprises the following productions:
694
695 1. VALUE => null
696 2. VALUE => boolean
697 3. VALUE => string
698 4. VALUE => number
699 5. VALUE => specialnumber
700 6. VALUE => { MEMBERS }
701 7. VALUE => [ ITEMS ]
702 8. MEMBERS => MEMBER MORE_MEMBERS
703 9. MEMBERS => e
704 10. MEMBER => string : VALUE
705 11. MORE_MEMBERS => , MEMBER MORE_MEMBERS
706 12. MORE_MEMBERS => e
707 13. ITEMS => ITEM MORE_ITEMS
708 14. ITEMS => e
709 15. ITEM => VALUE
710 16. MORE_ITEMS => , ITEM MORE_ITEMS
711 17. MORE_ITEMS => e
712
713 We implement a simple LL(1) parser based on this grammar, with events
714 emitted when certain non-terminals are replaced.
715 */
716
717 /* Mutually-exclusive grammar tokens and non-terminals. The values are defined
718 so that the bottom 4 bits of a value can be used as an index into the
719 grammar production rule table. */
720 #define T_NONE 0x00 /* tokens are in the form 0x0X */
721 #define T_NULL 0x01
722 #define T_TRUE 0x02
723 #define T_FALSE 0x03
724 #define T_STRING 0x04
725 #define T_NUMBER 0x05
726 #define T_NAN 0x06
727 #define T_INFINITY 0x07
728 #define T_NEGATIVE_INFINITY 0x08
729 #define T_LEFT_CURLY 0x09
730 #define T_RIGHT_CURLY 0x0A
731 #define T_LEFT_SQUARE 0x0B
732 #define T_RIGHT_SQUARE 0x0C
733 #define T_COLON 0x0D
734 #define T_COMMA 0x0E
735 #define NT_VALUE 0x10 /* non-terminals are in the form 0x1X */
736 #define NT_MEMBERS 0x11
737 #define NT_MEMBER 0x12
738 #define NT_MORE_MEMBERS 0x13
739 #define NT_ITEMS 0x14
740 #define NT_ITEM 0x15
741 #define NT_MORE_ITEMS 0x16
742 typedef byte Symbol;
743
744 #define IS_NONTERMINAL(s) ((s) & 0x10)
745 #define IS_TOKEN(s) !IS_NONTERMINAL(s)
746
747 /* Grammarian data. */
748 typedef struct tag_GrammarianData
749 {
750 Symbol* pStack; /* initially set to defaultStack */
751 size_t stackSize;
752 size_t stackUsed;
753 Symbol defaultStack[DEFAULT_SYMBOL_STACK_SIZE];
754 } GrammarianData;
755 typedef GrammarianData* Grammarian;
756
757 /* Mutually-exclusive result codes returned by the grammarian
758 after processing a token. */
759 #define ACCEPTED_TOKEN 0
760 #define REJECTED_TOKEN 1
761 #define SYMBOL_STACK_FULL 2
762 typedef uint32_t GrammarianResultCode;
763
764 /* Events emitted by the grammarian as a result of processing a
765 token. Note that EMIT_ARRAY_ITEM always appears bitwise OR-ed
766 with one of the other values. */
767 #define EMIT_NOTHING 0x00
768 #define EMIT_NULL 0x01
769 #define EMIT_BOOLEAN 0x02
770 #define EMIT_STRING 0x03
771 #define EMIT_NUMBER 0x04
772 #define EMIT_SPECIAL_NUMBER 0x05
773 #define EMIT_START_OBJECT 0x06
774 #define EMIT_END_OBJECT 0x07
775 #define EMIT_OBJECT_MEMBER 0x08
776 #define EMIT_START_ARRAY 0x09
777 #define EMIT_END_ARRAY 0x0A
778 #define EMIT_ARRAY_ITEM 0x10 /* may be combined with other values */
779 typedef byte GrammarEvent;
780
781 /* The bits of GrammarianOutput are layed out as follows:
782
783 -rreeeee
784
785 - = unused (1 bit)
786 r = result code (2 bits)
787 e = event (5 bits)
788 */
789 #define GRAMMARIAN_OUTPUT(r, e) (GrammarianOutput)(((GrammarianResultCode)(r) << 5) | (GrammarEvent)(e))
790 #define GRAMMARIAN_RESULT_CODE(o) (GrammarianResultCode)((GrammarianOutput)(o) >> 5)
791 #define GRAMMARIAN_EVENT(o) (GrammarEvent)((GrammarianOutput)(o) & 0x1F)
792 typedef byte GrammarianOutput;
793
794 /* Grammar rule used by the grammarian to process a token. */
795 typedef struct tag_GrammarRule
796 {
797 Symbol symbolToPush1;
798 Symbol symbolToPush2;
799 byte reprocess;
800 GrammarEvent emit;
801 } GrammarRule;
802
803 /* Grammarian functions. */
804
Grammarian_Reset(Grammarian grammarian,int isInitialized)805 static void Grammarian_Reset(Grammarian grammarian, int isInitialized)
806 {
807 /* When we reset the grammarian, we keep the symbol stack that has
808 already been allocated, if any. If the client wants to reclaim the
809 memory used by the that buffer, he needs to free the grammarian
810 and create a new one. */
811 if (!isInitialized)
812 {
813 grammarian->pStack = grammarian->defaultStack;
814 grammarian->stackSize = sizeof(grammarian->defaultStack);
815 }
816
817 /* The grammarian always starts with NT_VALUE on the symbol stack. */
818 grammarian->pStack[0] = NT_VALUE;
819 grammarian->stackUsed = 1;
820 }
821
Grammarian_FreeAllocations(Grammarian grammarian,const JSON_MemorySuite * pMemorySuite)822 static void Grammarian_FreeAllocations(Grammarian grammarian,
823 const JSON_MemorySuite* pMemorySuite)
824 {
825 if (grammarian->pStack != grammarian->defaultStack)
826 pMemorySuite->free(pMemorySuite->userData, grammarian->pStack);
827 }
828
Grammarian_FinishedDocument(Grammarian grammarian)829 static int Grammarian_FinishedDocument(Grammarian grammarian)
830 {
831 return !grammarian->stackUsed;
832 }
833
Grammarian_ProcessToken(Grammarian grammarian,Symbol token,const JSON_MemorySuite * pMemorySuite)834 static GrammarianOutput Grammarian_ProcessToken(Grammarian grammarian,
835 Symbol token, const JSON_MemorySuite* pMemorySuite)
836 {
837 /* The order and number of the rows and columns in this table must
838 match the defined token and non-terminal symbol values.
839
840 The row index is the incoming token's Symbol value.
841
842 The column index is the bottom 4 bits of Symbol value of
843 the non-terminal at the top of the processing stack.
844 Since non-terminal Symbol values start at 0x10, taking
845 the bottom 4 bits yields a 0-based index. */
846 static const byte ruleLookup[15][7] =
847 {
848 /* V MS M MM IS I MI */
849 /* ---- */ { 0, 0, 0, 0, 0, 0, 0 },
850 /* null */ { 1, 0, 0, 0, 13, 15, 0 },
851 /* true */ { 2, 0, 0, 0, 13, 15, 0 },
852 /* false */ { 2, 0, 0, 0, 13, 15, 0 },
853 /* string */ { 3, 8, 10, 0, 13, 15, 0 },
854 /* number */ { 4, 0, 0, 0, 13, 15, 0 },
855 /* NaN */ { 5, 0, 0, 0, 13, 15, 0 },
856 /* Inf */ { 5, 0, 0, 0, 13, 15, 0 },
857 /* -Inf */ { 5, 0, 0, 0, 13, 15, 0 },
858 /* { */ { 6, 0, 0, 0, 13, 15, 0 },
859 /* } */ { 0, 9, 0, 12, 0, 0, 0 },
860 /* [ */ { 7, 0, 0, 0, 13, 15, 0 },
861 /* ] */ { 0, 0, 0, 0, 14, 0, 17 },
862 /* : */ { 0, 0, 0, 0, 0, 0, 0 },
863 /* , */ { 0, 0, 0, 11, 0, 0, 16 }
864 };
865
866 static const GrammarRule rules[17] =
867 {
868 /* 1. */ { T_NONE, T_NONE, 0, EMIT_NULL },
869 /* 2. */ { T_NONE, T_NONE, 0, EMIT_BOOLEAN },
870 /* 3. */ { T_NONE, T_NONE, 0, EMIT_STRING },
871 /* 4. */ { T_NONE, T_NONE, 0, EMIT_NUMBER },
872 /* 5. */ { T_NONE, T_NONE, 0, EMIT_SPECIAL_NUMBER },
873 /* 6. */ { T_RIGHT_CURLY, NT_MEMBERS, 0, EMIT_START_OBJECT },
874 /* 7. */ { T_RIGHT_SQUARE, NT_ITEMS, 0, EMIT_START_ARRAY },
875 /* 8. */ { NT_MORE_MEMBERS, NT_MEMBER, 1, EMIT_NOTHING },
876 /* 9. */ { T_NONE, T_NONE, 1, EMIT_END_OBJECT },
877 /* 10. */ { NT_VALUE, T_COLON, 0, EMIT_OBJECT_MEMBER },
878 /* 11. */ { NT_MORE_MEMBERS, NT_MEMBER, 0, EMIT_NOTHING },
879 /* 12. */ { T_NONE, T_NONE, 1, EMIT_END_OBJECT },
880 /* 13. */ { NT_MORE_ITEMS, NT_ITEM, 1, EMIT_NOTHING },
881 /* 14. */ { T_NONE, T_NONE, 1, EMIT_END_ARRAY },
882 /* 15. */ { NT_VALUE, T_NONE, 1, EMIT_ARRAY_ITEM },
883 /* 16. */ { NT_MORE_ITEMS, NT_ITEM, 0, EMIT_NOTHING },
884 /* 17. */ { T_NONE, T_NONE, 1, EMIT_END_ARRAY }
885 };
886
887 GrammarEvent emit = EMIT_NOTHING;
888
889 /* If the stack is empty, no more tokens were expected. */
890 if (Grammarian_FinishedDocument(grammarian))
891 return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
892
893 for (;;)
894 {
895 Symbol topSymbol = grammarian->pStack[grammarian->stackUsed - 1];
896 if (IS_TOKEN(topSymbol))
897 {
898 if (topSymbol != token)
899 return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
900 grammarian->stackUsed--;
901 break;
902 }
903 else
904 {
905 const GrammarRule* pRule = NULL;
906 byte ruleNumber = ruleLookup[token][BOTTOM_4_BITS(topSymbol)];
907
908 if (ruleNumber == 0)
909 return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
910
911 pRule = &rules[ruleNumber - 1];
912
913 /* The rule removes the top symbol and does not replace it. */
914 if (pRule->symbolToPush1 == T_NONE)
915 grammarian->stackUsed--;
916 else
917 {
918 /* The rule replaces the top symbol with 1 or 2 symbols. */
919 grammarian->pStack[grammarian->stackUsed - 1] = pRule->symbolToPush1;
920 if (pRule->symbolToPush2 != T_NONE)
921 {
922 /* The rule replaces the top symbol with 2 symbols.
923 Make sure the stack has room for the second one. */
924 if (grammarian->stackUsed == grammarian->stackSize)
925 {
926 Symbol* pBiggerStack = DoubleBuffer(pMemorySuite,
927 grammarian->defaultStack, grammarian->pStack,
928 grammarian->stackSize);
929
930 if (!pBiggerStack)
931 return GRAMMARIAN_OUTPUT(SYMBOL_STACK_FULL, EMIT_NOTHING);
932
933 grammarian->pStack = pBiggerStack;
934 grammarian->stackSize *= 2;
935 }
936 grammarian->pStack[grammarian->stackUsed] = pRule->symbolToPush2;
937 grammarian->stackUsed++;
938 }
939 }
940 emit |= pRule->emit;
941 if (!pRule->reprocess)
942 break;
943 }
944 }
945
946 return GRAMMARIAN_OUTPUT(ACCEPTED_TOKEN, emit);
947 }
948
949 /******************** JSON Parser ********************/
950
951 #ifndef JSON_NO_PARSER
952
953 /* Combinable parser state flags. */
954 #define PARSER_RESET 0x00
955 #define PARSER_STARTED 0x01
956 #define PARSER_FINISHED 0x02
957 #define PARSER_IN_PROTECTED_API 0x04
958 #define PARSER_IN_TOKEN_HANDLER 0x08
959 #define PARSER_AFTER_CARRIAGE_RETURN 0x10
960 typedef byte ParserState;
961
962 /* Combinable parser settings flags. */
963 #define PARSER_DEFAULT_FLAGS 0x00
964 #define PARSER_ALLOW_BOM 0x01
965 #define PARSER_ALLOW_COMMENTS 0x02
966 #define PARSER_ALLOW_SPECIAL_NUMBERS 0x04
967 #define PARSER_ALLOW_HEX_NUMBERS 0x08
968 #define PARSER_REPLACE_INVALID 0x10
969 #define PARSER_TRACK_OBJECT_MEMBERS 0x20
970 #define PARSER_ALLOW_CONTROL_CHARS 0x40
971 #define PARSER_EMBEDDED_DOCUMENT 0x80
972 typedef byte ParserFlags;
973
974 /* Sentinel value for parser error location offset. */
975 #define ERROR_LOCATION_IS_TOKEN_START 0xFF
976
977 /* An object member name stored in an unordered, singly-linked-list, used for
978 detecting duplicate member names. Note that the name string is not null-
979 terminated. */
980 typedef struct tag_MemberName
981 {
982 struct tag_MemberName* pNextName;
983 size_t length;
984 byte pBytes[1]; /* variable-size buffer */
985 } MemberName;
986
987 /* An object's list of member names, and a pointer to the object's
988 nearest ancestor object, if any. This is used as a stack. Because arrays
989 do not have named items, they do not need to be recorded in the stack. */
990 typedef struct tag_MemberNames
991 {
992 struct tag_MemberNames* pAncestor;
993 MemberName* pFirstName;
994 } MemberNames;
995
996 /* A parser instance. */
997 struct JSON_Parser_Data
998 {
999 JSON_MemorySuite memorySuite;
1000 void* userData;
1001 ParserState state;
1002 ParserFlags flags;
1003 Encoding inputEncoding;
1004 Encoding stringEncoding;
1005 Encoding numberEncoding;
1006 Symbol token;
1007 TokenAttributes tokenAttributes;
1008 Error error;
1009 byte errorOffset;
1010 LexerState lexerState;
1011 uint32_t lexerBits;
1012 size_t codepointLocationByte;
1013 size_t codepointLocationLine;
1014 size_t codepointLocationColumn;
1015 size_t tokenLocationByte;
1016 size_t tokenLocationLine;
1017 size_t tokenLocationColumn;
1018 size_t depth;
1019 byte* pTokenBytes;
1020 size_t tokenBytesLength;
1021 size_t tokenBytesUsed;
1022 size_t maxStringLength;
1023 size_t maxNumberLength;
1024 MemberNames* pMemberNames;
1025 DecoderData decoderData;
1026 GrammarianData grammarianData;
1027 JSON_Parser_EncodingDetectedHandler encodingDetectedHandler;
1028 JSON_Parser_NullHandler nullHandler;
1029 JSON_Parser_BooleanHandler booleanHandler;
1030 JSON_Parser_StringHandler stringHandler;
1031 JSON_Parser_NumberHandler numberHandler;
1032 JSON_Parser_SpecialNumberHandler specialNumberHandler;
1033 JSON_Parser_StartObjectHandler startObjectHandler;
1034 JSON_Parser_EndObjectHandler endObjectHandler;
1035 JSON_Parser_ObjectMemberHandler objectMemberHandler;
1036 JSON_Parser_StartArrayHandler startArrayHandler;
1037 JSON_Parser_EndArrayHandler endArrayHandler;
1038 JSON_Parser_ArrayItemHandler arrayItemHandler;
1039 byte defaultTokenBytes[DEFAULT_TOKEN_BYTES_LENGTH];
1040 };
1041
1042 /* Parser internal functions. */
1043
JSON_Parser_SetErrorAtCodepoint(JSON_Parser parser,Error error)1044 static void JSON_Parser_SetErrorAtCodepoint(JSON_Parser parser, Error error)
1045 {
1046 parser->error = error;
1047 }
1048
JSON_Parser_SetErrorAtStringEscapeSequenceStart(JSON_Parser parser,Error error,int codepointsAgo)1049 static void JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1050 JSON_Parser parser, Error error, int codepointsAgo)
1051 {
1052 /* Note that backtracking from the current codepoint requires us to make
1053 three assumptions, which are always valid in the context of a string
1054 escape sequence:
1055
1056 1. The input encoding is not JSON_UnknownEncoding.
1057
1058 2 The codepoints we are backing up across are all in the range
1059 U+0000 - U+007F, aka ASCII, so we can assume the number of
1060 bytes comprising them based on the input encoding.
1061
1062 3. The codepoints we are backing up across do not include any
1063 line breaks, so we can assume that the line number stays the
1064 same and the column number can simply be decremented.
1065 */
1066 parser->error = error;
1067 parser->errorOffset = (byte)codepointsAgo;
1068 }
1069
JSON_Parser_SetErrorAtToken(JSON_Parser parser,Error error)1070 static void JSON_Parser_SetErrorAtToken(JSON_Parser parser, Error error)
1071 {
1072 parser->error = error;
1073 parser->errorOffset = ERROR_LOCATION_IS_TOKEN_START;
1074 }
1075
JSON_Parser_PushMemberNameList(JSON_Parser parser)1076 static JSON_Status JSON_Parser_PushMemberNameList(JSON_Parser parser)
1077 {
1078 MemberNames* pNames = (MemberNames*)parser->memorySuite.realloc(
1079 parser->memorySuite.userData, NULL, sizeof(MemberNames));
1080
1081 if (!pNames)
1082 {
1083 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
1084 return JSON_Failure;
1085 }
1086
1087 pNames->pAncestor = parser->pMemberNames;
1088 pNames->pFirstName = NULL;
1089 parser->pMemberNames = pNames;
1090 return JSON_Success;
1091 }
1092
JSON_Parser_PopMemberNameList(JSON_Parser parser)1093 static void JSON_Parser_PopMemberNameList(JSON_Parser parser)
1094 {
1095 MemberNames* pAncestor = parser->pMemberNames->pAncestor;
1096 while (parser->pMemberNames->pFirstName)
1097 {
1098 MemberName* pNextName = parser->pMemberNames->pFirstName->pNextName;
1099 parser->memorySuite.free(parser->memorySuite.userData, parser->pMemberNames->pFirstName);
1100 parser->pMemberNames->pFirstName = pNextName;
1101 }
1102 parser->memorySuite.free(parser->memorySuite.userData, parser->pMemberNames);
1103 parser->pMemberNames = pAncestor;
1104 }
1105
JSON_Parser_StartContainer(JSON_Parser parser,int isObject)1106 static JSON_Status JSON_Parser_StartContainer(JSON_Parser parser, int isObject)
1107 {
1108 if (isObject && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS) &&
1109 !JSON_Parser_PushMemberNameList(parser))
1110 {
1111 return JSON_Failure;
1112 }
1113 parser->depth++;
1114 return JSON_Success;
1115 }
1116
JSON_Parser_EndContainer(JSON_Parser parser,int isObject)1117 static void JSON_Parser_EndContainer(JSON_Parser parser, int isObject)
1118 {
1119 parser->depth--;
1120 if (isObject && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS))
1121 {
1122 JSON_Parser_PopMemberNameList(parser);
1123 }
1124 }
1125
JSON_Parser_AddMemberNameToList(JSON_Parser parser)1126 static JSON_Status JSON_Parser_AddMemberNameToList(JSON_Parser parser)
1127 {
1128 if (GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS))
1129 {
1130 MemberName* pName;
1131 for (pName = parser->pMemberNames->pFirstName; pName; pName = pName->pNextName)
1132 {
1133 if (pName->length == parser->tokenBytesUsed && !memcmp(pName->pBytes, parser->pTokenBytes, pName->length))
1134 {
1135 JSON_Parser_SetErrorAtToken(parser, JSON_Error_DuplicateObjectMember);
1136 return JSON_Failure;
1137 }
1138 }
1139 pName = (MemberName*)parser->memorySuite.realloc(parser->memorySuite.userData, NULL, sizeof(MemberName) + parser->tokenBytesUsed - 1);
1140 if (!pName)
1141 {
1142 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
1143 return JSON_Failure;
1144 }
1145 pName->pNextName = parser->pMemberNames->pFirstName;
1146 pName->length = parser->tokenBytesUsed;
1147 memcpy(pName->pBytes, parser->pTokenBytes, parser->tokenBytesUsed);
1148 parser->pMemberNames->pFirstName = pName;
1149 }
1150 return JSON_Success;
1151 }
1152
JSON_Parser_ResetData(JSON_Parser parser,int isInitialized)1153 static void JSON_Parser_ResetData(JSON_Parser parser, int isInitialized)
1154 {
1155 parser->userData = NULL;
1156 parser->flags = PARSER_DEFAULT_FLAGS;
1157 parser->inputEncoding = JSON_UnknownEncoding;
1158 parser->stringEncoding = JSON_UTF8;
1159 parser->numberEncoding = JSON_UTF8;
1160 parser->token = T_NONE;
1161 parser->tokenAttributes = 0;
1162 parser->error = JSON_Error_None;
1163 parser->errorOffset = 0;
1164 parser->lexerState = LEXING_WHITESPACE;
1165 parser->lexerBits = 0;
1166 parser->codepointLocationByte = 0;
1167 parser->codepointLocationLine = 0;
1168 parser->codepointLocationColumn = 0;
1169 parser->tokenLocationByte = 0;
1170 parser->tokenLocationLine = 0;
1171 parser->tokenLocationColumn = 0;
1172 parser->depth = 0;
1173
1174 if (!isInitialized)
1175 {
1176 parser->pTokenBytes = parser->defaultTokenBytes;
1177 parser->tokenBytesLength = sizeof(parser->defaultTokenBytes);
1178 }
1179 else
1180 {
1181 /* When we reset the parser, we keep the output buffer and the symbol
1182 stack that have already been allocated, if any. If the client wants
1183 to reclaim the memory used by the those buffers, he needs to free
1184 the parser and create a new one. */
1185 }
1186 parser->tokenBytesUsed = 0;
1187 parser->maxStringLength = SIZE_MAX;
1188 parser->maxNumberLength = SIZE_MAX;
1189 if (!isInitialized)
1190 parser->pMemberNames = NULL;
1191 else
1192 {
1193 while (parser->pMemberNames)
1194 JSON_Parser_PopMemberNameList(parser);
1195 }
1196 Decoder_Reset(&parser->decoderData);
1197 Grammarian_Reset(&parser->grammarianData, isInitialized);
1198 parser->encodingDetectedHandler = NULL;
1199 parser->nullHandler = NULL;
1200 parser->booleanHandler = NULL;
1201 parser->stringHandler = NULL;
1202 parser->numberHandler = NULL;
1203 parser->specialNumberHandler = NULL;
1204 parser->startObjectHandler = NULL;
1205 parser->endObjectHandler = NULL;
1206 parser->objectMemberHandler = NULL;
1207 parser->startArrayHandler = NULL;
1208 parser->endArrayHandler = NULL;
1209 parser->arrayItemHandler = NULL;
1210 parser->state = PARSER_RESET; /* do this last! */
1211 }
1212
JSON_Parser_NullTerminateToken(JSON_Parser parser)1213 static void JSON_Parser_NullTerminateToken(JSON_Parser parser)
1214 {
1215 /* Because we always ensure that there are LONGEST_ENCODING_SEQUENCE bytes
1216 available at the end of the token buffer when we record codepoints, we
1217 can write the null terminator to the buffer with impunity. */
1218 static const byte nullTerminatorBytes[LONGEST_ENCODING_SEQUENCE] = { 0 };
1219 Encoding encoding = (Encoding)((parser->token == T_NUMBER) ? parser->numberEncoding : parser->stringEncoding);
1220 memcpy(parser->pTokenBytes + parser->tokenBytesUsed, nullTerminatorBytes, (size_t)SHORTEST_ENCODING_SEQUENCE(encoding));
1221 }
1222
JSON_Parser_FlushParser(JSON_Parser parser)1223 static JSON_Status JSON_Parser_FlushParser(JSON_Parser parser)
1224 {
1225 /* The symbol stack should be empty when parsing finishes. */
1226 if (!Grammarian_FinishedDocument(&parser->grammarianData))
1227 {
1228 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_ExpectedMoreTokens);
1229 return JSON_Failure;
1230 }
1231 return JSON_Success;
1232 }
1233
1234 typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_SimpleTokenHandler)(JSON_Parser parser);
JSON_Parser_CallSimpleTokenHandler(JSON_Parser parser,JSON_Parser_SimpleTokenHandler handler)1235 static JSON_Status JSON_Parser_CallSimpleTokenHandler(JSON_Parser parser, JSON_Parser_SimpleTokenHandler handler)
1236 {
1237 if (handler)
1238 {
1239 JSON_Parser_HandlerResult result;
1240 SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1241 result = handler(parser);
1242 SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1243 if (result != JSON_Parser_Continue)
1244 {
1245 JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1246 return JSON_Failure;
1247 }
1248 }
1249 return JSON_Success;
1250 }
1251
JSON_Parser_CallBooleanHandler(JSON_Parser parser)1252 static JSON_Status JSON_Parser_CallBooleanHandler(JSON_Parser parser)
1253 {
1254 if (parser->booleanHandler)
1255 {
1256 JSON_Parser_HandlerResult result;
1257 SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1258 result = parser->booleanHandler(parser, parser->token == T_TRUE ? JSON_True : JSON_False);
1259 SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1260 if (result != JSON_Parser_Continue)
1261 {
1262 JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1263 return JSON_Failure;
1264 }
1265 }
1266 return JSON_Success;
1267 }
1268
JSON_Parser_CallStringHandler(JSON_Parser parser,int isObjectMember)1269 static JSON_Status JSON_Parser_CallStringHandler(JSON_Parser parser, int isObjectMember)
1270 {
1271 JSON_Parser_StringHandler handler = isObjectMember ? parser->objectMemberHandler : parser->stringHandler;
1272 if (handler)
1273 {
1274 JSON_Parser_HandlerResult result;
1275 JSON_Parser_NullTerminateToken(parser);
1276 SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1277 result = handler(parser, (char*)parser->pTokenBytes, parser->tokenBytesUsed, parser->tokenAttributes);
1278 SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1279
1280 if (result != JSON_Parser_Continue)
1281 {
1282 JSON_Parser_SetErrorAtToken(parser,
1283 (isObjectMember && result == JSON_Parser_TreatAsDuplicateObjectMember)
1284 ? JSON_Error_DuplicateObjectMember
1285 : JSON_Error_AbortedByHandler);
1286 return JSON_Failure;
1287 }
1288 }
1289 return JSON_Success;
1290 }
1291
JSON_Parser_CallNumberHandler(JSON_Parser parser)1292 static JSON_Status JSON_Parser_CallNumberHandler(JSON_Parser parser)
1293 {
1294 if (parser->numberHandler)
1295 {
1296 JSON_Parser_HandlerResult result;
1297 JSON_Parser_NullTerminateToken(parser);
1298 SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1299 result = parser->numberHandler(parser, (char*)parser->pTokenBytes,
1300 parser->tokenBytesUsed, parser->tokenAttributes);
1301
1302 SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1303
1304 if (result != JSON_Parser_Continue)
1305 {
1306 JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1307 return JSON_Failure;
1308 }
1309 }
1310 return JSON_Success;
1311 }
1312
JSON_Parser_CallSpecialNumberHandler(JSON_Parser parser)1313 static JSON_Status JSON_Parser_CallSpecialNumberHandler(JSON_Parser parser)
1314 {
1315 if (parser->specialNumberHandler)
1316 {
1317 JSON_Parser_HandlerResult result;
1318 SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1319 result = parser->specialNumberHandler(parser, parser->token == T_NAN ? JSON_NaN :
1320 (parser->token == T_INFINITY ? JSON_Infinity : JSON_NegativeInfinity));
1321 SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1322
1323 if (result != JSON_Parser_Continue)
1324 {
1325 JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1326 return JSON_Failure;
1327 }
1328 }
1329 return JSON_Success;
1330 }
1331
JSON_Parser_HandleGrammarEvents(JSON_Parser parser,byte emit)1332 static JSON_Status JSON_Parser_HandleGrammarEvents(JSON_Parser parser, byte emit)
1333 {
1334 if (GET_FLAGS(emit, EMIT_ARRAY_ITEM))
1335 {
1336 if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->arrayItemHandler))
1337 {
1338 return JSON_Failure;
1339 }
1340 SET_FLAGS_OFF(byte, emit, EMIT_ARRAY_ITEM);
1341 }
1342 switch (emit)
1343 {
1344 case EMIT_NULL:
1345 if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->nullHandler))
1346 return JSON_Failure;
1347 break;
1348
1349 case EMIT_BOOLEAN:
1350 if (!JSON_Parser_CallBooleanHandler(parser))
1351 return JSON_Failure;
1352 break;
1353
1354 case EMIT_STRING:
1355 if (!JSON_Parser_CallStringHandler(parser, 0/* isObjectMember */))
1356 return JSON_Failure;
1357 break;
1358
1359 case EMIT_NUMBER:
1360 if (!JSON_Parser_CallNumberHandler(parser))
1361 return JSON_Failure;
1362 break;
1363
1364 case EMIT_SPECIAL_NUMBER:
1365 if (!JSON_Parser_CallSpecialNumberHandler(parser))
1366 return JSON_Failure;
1367 break;
1368
1369 case EMIT_START_OBJECT:
1370 if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->startObjectHandler) ||
1371 !JSON_Parser_StartContainer(parser, 1/*isObject*/))
1372 return JSON_Failure;
1373 break;
1374
1375 case EMIT_END_OBJECT:
1376 JSON_Parser_EndContainer(parser, 1/*isObject*/);
1377 if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->endObjectHandler))
1378 return JSON_Failure;
1379 break;
1380 case EMIT_OBJECT_MEMBER:
1381 if (!JSON_Parser_AddMemberNameToList(parser) || /* will fail if member is duplicate */
1382 !JSON_Parser_CallStringHandler(parser, 1 /* isObjectMember */))
1383 return JSON_Failure;
1384 break;
1385
1386 case EMIT_START_ARRAY:
1387 if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->startArrayHandler) ||
1388 !JSON_Parser_StartContainer(parser, 0/*isObject*/))
1389 return JSON_Failure;
1390 break;
1391
1392 case EMIT_END_ARRAY:
1393 JSON_Parser_EndContainer(parser, 0/*isObject*/);
1394 if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->endArrayHandler))
1395 return JSON_Failure;
1396 break;
1397 }
1398
1399 if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
1400 {
1401 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_StoppedAfterEmbeddedDocument);
1402 return JSON_Failure;
1403 }
1404 return JSON_Success;
1405 }
1406
JSON_Parser_ProcessToken(JSON_Parser parser)1407 static JSON_Status JSON_Parser_ProcessToken(JSON_Parser parser)
1408 {
1409 GrammarianOutput output;
1410 output = Grammarian_ProcessToken(&parser->grammarianData, parser->token, &parser->memorySuite);
1411 switch (GRAMMARIAN_RESULT_CODE(output))
1412 {
1413 case ACCEPTED_TOKEN:
1414 if (!JSON_Parser_HandleGrammarEvents(parser, GRAMMARIAN_EVENT(output)))
1415 return JSON_Failure;
1416 break;
1417
1418 case REJECTED_TOKEN:
1419 JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnexpectedToken);
1420 return JSON_Failure;
1421
1422 case SYMBOL_STACK_FULL:
1423 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
1424 return JSON_Failure;
1425 }
1426
1427 /* Reset the lexer to prepare for the next token. */
1428 parser->lexerState = LEXING_WHITESPACE;
1429 parser->lexerBits = 0;
1430 parser->token = T_NONE;
1431 parser->tokenAttributes = 0;
1432 parser->tokenBytesUsed = 0;
1433 return JSON_Success;
1434 }
1435
1436 /* Lexer functions. */
1437
1438 static const byte expectedLiteralChars[] = { 'u', 'l', 'l', 0, 'r', 'u', 'e', 0, 'a', 'l', 's', 'e', 0, 'a', 'N', 0, 'n', 'f', 'i', 'n', 'i', 't', 'y', 0 };
1439
1440 #define NULL_LITERAL_EXPECTED_CHARS_START_INDEX 0
1441 #define TRUE_LITERAL_EXPECTED_CHARS_START_INDEX 4
1442 #define FALSE_LITERAL_EXPECTED_CHARS_START_INDEX 8
1443 #define NAN_LITERAL_EXPECTED_CHARS_START_INDEX 13
1444 #define INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX 16
1445
1446 /* Forward declaration. */
1447 static JSON_Status JSON_Parser_FlushLexer(JSON_Parser parser);
1448 static JSON_Status JSON_Parser_ProcessCodepoint(
1449 JSON_Parser parser, Codepoint c, size_t encodedLength);
1450
JSON_Parser_HandleInvalidEncodingSequence(JSON_Parser parser,size_t encodedLength)1451 static JSON_Status JSON_Parser_HandleInvalidEncodingSequence(
1452 JSON_Parser parser, size_t encodedLength)
1453 {
1454 if (parser->token == T_STRING && GET_FLAGS(parser->flags, PARSER_REPLACE_INVALID))
1455 {
1456 /* Since we're inside a string token, replacing the invalid sequence
1457 with the Unicode replacement character as requested by the client
1458 is a viable way to avoid a parse failure. Outside a string token,
1459 such a replacement would simply trigger JSON_Error_UnknownToken
1460 when we tried to process the replacement character, so it's less
1461 confusing to stick with JSON_Error_InvalidEncodingSequence in that
1462 case. */
1463 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsReplacedCharacter);
1464 return JSON_Parser_ProcessCodepoint(parser, REPLACEMENT_CHARACTER_CODEPOINT, encodedLength);
1465 }
1466 else if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
1467 {
1468 /* Since we're parsing the top-level value of an embedded
1469 document, assume that the invalid encoding sequence we've
1470 encountered does not actually belong to the document, and
1471 finish parsing by pretending that we've encountered EOF
1472 instead of an invalid sequence. If the content is valid,
1473 this will fail with JSON_Error_StoppedAfterEmbeddedDocument;
1474 otherwise, it will fail with an appropriate error. */
1475 return (JSON_Status)(JSON_Parser_FlushLexer(parser) && JSON_Parser_FlushParser(parser));
1476 }
1477 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_InvalidEncodingSequence);
1478 return JSON_Failure;
1479 }
1480
JSON_Parser_HandleInvalidNumber(JSON_Parser parser,Codepoint c,int codepointsSinceValidNumber,TokenAttributes attributesToRemove)1481 static JSON_Status JSON_Parser_HandleInvalidNumber(JSON_Parser parser,
1482 Codepoint c, int codepointsSinceValidNumber, TokenAttributes attributesToRemove)
1483 {
1484 SET_FLAGS_OFF(TokenAttributes, parser->tokenAttributes, attributesToRemove);
1485 if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
1486 {
1487 /* The invalid number is the top-level value of an embedded document,
1488 and it has a prefix that can be interpreted as a valid number.
1489 We want to backtrack so that we are at the end of that prefix,
1490 and then process the valid token.
1491
1492 Note that backtracking requires us to make three assumptions, which
1493 are always valid in the context of a number token:
1494
1495 1. The input encoding is not JSON_UnknownEncoding.
1496
1497 2 The codepoints we are backing up across are all in the range
1498 U+0000 - U+007F, aka ASCII, so we can assume the number of
1499 bytes comprising them based on the input encoding.
1500
1501 3. The codepoints we are backing up across do not include any
1502 line breaks, so we can assume that the line number stays the
1503 same and the column number can simply be decremented.
1504
1505 For example:
1506
1507 "01" => "0"
1508 "123.!" => "123"
1509 "123e!" => "123"
1510 "123e+!" => "123"
1511 "123e-!" => "123"
1512 "1.2e!" => "1.2"
1513 "1.2e+!" => "1.2"
1514 "1.2e-!" => "1.2"
1515 */
1516 parser->codepointLocationByte -= (size_t)codepointsSinceValidNumber
1517 * (size_t)SHORTEST_ENCODING_SEQUENCE(parser->inputEncoding);
1518 parser->codepointLocationColumn -= (size_t)codepointsSinceValidNumber;
1519 parser->tokenBytesUsed -= (size_t)codepointsSinceValidNumber
1520 * (size_t)SHORTEST_ENCODING_SEQUENCE(parser->numberEncoding);
1521 return JSON_Parser_ProcessToken(parser); /* always fails */
1522 }
1523 /* Allow JSON_Parser_FlushLexer() to fail. */
1524 else if (c == EOF_CODEPOINT)
1525 return JSON_Success;
1526
1527 JSON_Parser_SetErrorAtToken(parser, JSON_Error_InvalidNumber);
1528 return JSON_Failure;
1529 }
1530
JSON_Parser_StartToken(JSON_Parser parser,Symbol token)1531 static void JSON_Parser_StartToken(JSON_Parser parser, Symbol token)
1532 {
1533 parser->token = token;
1534 parser->tokenLocationByte = parser->codepointLocationByte;
1535 parser->tokenLocationLine = parser->codepointLocationLine;
1536 parser->tokenLocationColumn = parser->codepointLocationColumn;
1537 }
1538
JSON_Parser_ProcessCodepoint(JSON_Parser parser,Codepoint c,size_t encodedLength)1539 static JSON_Status JSON_Parser_ProcessCodepoint(JSON_Parser parser, Codepoint c, size_t encodedLength)
1540 {
1541 Encoding tokenEncoding;
1542 size_t maxTokenLength;
1543 int tokenFinished = 0;
1544 Codepoint codepointToRecord = EOF_CODEPOINT;
1545
1546 /* If the previous codepoint was U+000D (CARRIAGE RETURN), and the current
1547 codepoint is U+000A (LINE FEED), then treat the 2 codepoints as a single
1548 line break. */
1549 if (GET_FLAGS(parser->state, PARSER_AFTER_CARRIAGE_RETURN))
1550 {
1551 if (c == LINE_FEED_CODEPOINT)
1552 parser->codepointLocationLine--;
1553 SET_FLAGS_OFF(ParserState, parser->state, PARSER_AFTER_CARRIAGE_RETURN);
1554 }
1555
1556 reprocess:
1557
1558 switch (parser->lexerState)
1559 {
1560 case LEXING_WHITESPACE:
1561 if (c == '{')
1562 {
1563 JSON_Parser_StartToken(parser, T_LEFT_CURLY);
1564 tokenFinished = 1;
1565 }
1566 else if (c == '}')
1567 {
1568 JSON_Parser_StartToken(parser, T_RIGHT_CURLY);
1569 tokenFinished = 1;
1570 }
1571 else if (c == '[')
1572 {
1573 JSON_Parser_StartToken(parser, T_LEFT_SQUARE);
1574 tokenFinished = 1;
1575 }
1576 else if (c == ']')
1577 {
1578 JSON_Parser_StartToken(parser, T_RIGHT_SQUARE);
1579 tokenFinished = 1;
1580 }
1581 else if (c == ':')
1582 {
1583 JSON_Parser_StartToken(parser, T_COLON);
1584 tokenFinished = 1;
1585 }
1586 else if (c == ',')
1587 {
1588 JSON_Parser_StartToken(parser, T_COMMA);
1589 tokenFinished = 1;
1590 }
1591 else if (c == 'n')
1592 {
1593 JSON_Parser_StartToken(parser, T_NULL);
1594 parser->lexerBits = NULL_LITERAL_EXPECTED_CHARS_START_INDEX;
1595 parser->lexerState = LEXING_LITERAL;
1596 }
1597 else if (c == 't')
1598 {
1599 JSON_Parser_StartToken(parser, T_TRUE);
1600 parser->lexerBits = TRUE_LITERAL_EXPECTED_CHARS_START_INDEX;
1601 parser->lexerState = LEXING_LITERAL;
1602 }
1603 else if (c == 'f')
1604 {
1605 JSON_Parser_StartToken(parser, T_FALSE);
1606 parser->lexerBits = FALSE_LITERAL_EXPECTED_CHARS_START_INDEX;
1607 parser->lexerState = LEXING_LITERAL;
1608 }
1609 else if (c == '"')
1610 {
1611 JSON_Parser_StartToken(parser, T_STRING);
1612 parser->lexerState = LEXING_STRING;
1613 }
1614 else if (c == '-')
1615 {
1616 JSON_Parser_StartToken(parser, T_NUMBER);
1617 parser->tokenAttributes = JSON_IsNegative;
1618 codepointToRecord = '-';
1619 parser->lexerState = LEXING_NUMBER_AFTER_MINUS;
1620 goto recordNumberCodepointAndAdvance;
1621 }
1622 else if (c == '0')
1623 {
1624 JSON_Parser_StartToken(parser, T_NUMBER);
1625 codepointToRecord = '0';
1626 parser->lexerState = LEXING_NUMBER_AFTER_LEADING_ZERO;
1627 goto recordNumberCodepointAndAdvance;
1628 }
1629 else if (c >= '1' && c <= '9')
1630 {
1631 JSON_Parser_StartToken(parser, T_NUMBER);
1632 codepointToRecord = c;
1633 parser->lexerState = LEXING_NUMBER_DECIMAL_DIGITS;
1634 goto recordNumberCodepointAndAdvance;
1635 }
1636 else if (c == ' ' || c == TAB_CODEPOINT || c == LINE_FEED_CODEPOINT ||
1637 c == CARRIAGE_RETURN_CODEPOINT || c == EOF_CODEPOINT)
1638 {
1639 /* Ignore whitespace between tokens. */
1640 }
1641 else if (c == BOM_CODEPOINT && parser->codepointLocationByte == 0)
1642 {
1643 /* OK, we'll allow the BOM. */
1644 if (GET_FLAGS(parser->flags, PARSER_ALLOW_BOM)) { }
1645 else
1646 {
1647 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_BOMNotAllowed);
1648 return JSON_Failure;
1649 }
1650 }
1651 else if (c == '/' && GET_FLAGS(parser->flags, PARSER_ALLOW_COMMENTS))
1652 {
1653 /* Comments are not real tokens, but we save the location
1654 of the comment as the token location in case of an error. */
1655 parser->tokenLocationByte = parser->codepointLocationByte;
1656 parser->tokenLocationLine = parser->codepointLocationLine;
1657 parser->tokenLocationColumn = parser->codepointLocationColumn;
1658 parser->lexerState = LEXING_COMMENT_AFTER_SLASH;
1659 }
1660 else if (c == 'N' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
1661 {
1662 JSON_Parser_StartToken(parser, T_NAN);
1663 parser->lexerBits = NAN_LITERAL_EXPECTED_CHARS_START_INDEX;
1664 parser->lexerState = LEXING_LITERAL;
1665 }
1666 else if (c == 'I' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
1667 {
1668 JSON_Parser_StartToken(parser, T_INFINITY);
1669 parser->lexerBits = INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX;
1670 parser->lexerState = LEXING_LITERAL;
1671 }
1672 else
1673 {
1674 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_UnknownToken);
1675 return JSON_Failure;
1676 }
1677 goto advance;
1678
1679 case LEXING_LITERAL:
1680 /* While lexing a literal we store an index into expectedLiteralChars
1681 in lexerBits. */
1682 if (expectedLiteralChars[parser->lexerBits])
1683 {
1684 /* The codepoint should match the next character in the literal. */
1685 if (c != expectedLiteralChars[parser->lexerBits])
1686 {
1687 JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
1688 return JSON_Failure;
1689 }
1690 parser->lexerBits++;
1691
1692 /* If the literal is the top-level value of an embedded document,
1693 process it as soon as we consume its last expected codepoint.
1694 Normally we defer processing until the following codepoint
1695 has been examined, so that we can treat sequences like "nullx"
1696 as a single, unknown token rather than a null literal followed
1697 by an unknown token. */
1698 if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT) &&
1699 !expectedLiteralChars[parser->lexerBits])
1700 tokenFinished = 1;
1701 }
1702 else
1703 {
1704 /* The literal should be finished, so the codepoint should not be
1705 a plausible JSON literal character, but rather EOF, whitespace,
1706 or the first character of the next token. */
1707 if ((c >= 'A' && c <= 'Z') ||
1708 (c >= 'a' && c <= 'z') ||
1709 (c >= '0' && c <= '9') ||
1710 (c == '_'))
1711 {
1712 JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
1713 return JSON_Failure;
1714 }
1715 if (!JSON_Parser_ProcessToken(parser))
1716 return JSON_Failure;
1717 goto reprocess;
1718 }
1719 goto advance;
1720
1721 case LEXING_STRING:
1722 /* Allow JSON_Parser_FlushLexer() to fail. */
1723 if (c == EOF_CODEPOINT) { }
1724 else if (c == '"')
1725 tokenFinished = 1;
1726 else if (c == '\\')
1727 parser->lexerState = LEXING_STRING_ESCAPE;
1728 else if (c < 0x20 && !GET_FLAGS(parser->flags, PARSER_ALLOW_CONTROL_CHARS))
1729 {
1730 /* ASCII control characters (U+0000 - U+001F) are not allowed to
1731 appear unescaped in string values unless specifically allowed. */
1732 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_UnescapedControlCharacter);
1733 return JSON_Failure;
1734 }
1735 else
1736 {
1737 codepointToRecord = c;
1738 goto recordStringCodepointAndAdvance;
1739 }
1740 goto advance;
1741
1742 case LEXING_STRING_ESCAPE:
1743 if (c == EOF_CODEPOINT)
1744 {
1745 /* Allow JSON_Parser_FlushLexer() to fail. */
1746 }
1747 else
1748 {
1749 if (c == 'u')
1750 parser->lexerState = LEXING_STRING_HEX_ESCAPE_BYTE_1;
1751 else
1752 {
1753 if (c == '"' || c == '\\' || c == '/')
1754 codepointToRecord = c;
1755 else if (c == 'b')
1756 codepointToRecord = BACKSPACE_CODEPOINT;
1757 else if (c == 't')
1758 codepointToRecord = TAB_CODEPOINT;
1759 else if (c == 'n')
1760 codepointToRecord = LINE_FEED_CODEPOINT;
1761 else if (c == 'f')
1762 codepointToRecord = FORM_FEED_CODEPOINT;
1763 else if (c == 'r')
1764 codepointToRecord = CARRIAGE_RETURN_CODEPOINT;
1765 else
1766 {
1767 /* The current codepoint location is the first character after
1768 the backslash that started the escape sequence. The error
1769 location should be the beginning of the escape sequence, 1
1770 character earlier. */
1771 JSON_Parser_SetErrorAtStringEscapeSequenceStart(parser, JSON_Error_InvalidEscapeSequence, 1);
1772 return JSON_Failure;
1773 }
1774 parser->lexerState = LEXING_STRING;
1775 goto recordStringCodepointAndAdvance;
1776 }
1777 }
1778 goto advance;
1779
1780 case LEXING_STRING_HEX_ESCAPE_BYTE_1:
1781 case LEXING_STRING_HEX_ESCAPE_BYTE_2:
1782 case LEXING_STRING_HEX_ESCAPE_BYTE_3:
1783 case LEXING_STRING_HEX_ESCAPE_BYTE_4:
1784 case LEXING_STRING_HEX_ESCAPE_BYTE_5:
1785 case LEXING_STRING_HEX_ESCAPE_BYTE_6:
1786 case LEXING_STRING_HEX_ESCAPE_BYTE_7:
1787 case LEXING_STRING_HEX_ESCAPE_BYTE_8:
1788 /* Allow JSON_Parser_FlushLexer() to fail. */
1789 if (c != EOF_CODEPOINT)
1790 {
1791 /* While lexing a string hex escape sequence we store the bytes
1792 of the escaped codepoint in the low 2 bytes of lexerBits. If
1793 the escape sequence represents a leading surrogate, we shift
1794 the leading surrogate into the high 2 bytes and lex a second
1795 hex escape sequence (which should be a trailing surrogate). */
1796 int byteNumber = (parser->lexerState - LEXING_STRING_HEX_ESCAPE_BYTE_1) & 0x3;
1797 uint32_t nibble;
1798 if (c >= '0' && c <= '9')
1799 nibble = c - '0';
1800 else if (c >= 'A' && c <= 'F')
1801 nibble = c - 'A' + 10;
1802 else if (c >= 'a' && c <= 'f')
1803 nibble = c - 'a' + 10;
1804 else
1805 {
1806 /* The current codepoint location is one of the 4 hex digit
1807 character slots in the hex escape sequence. The error
1808 location should be the beginning of the hex escape
1809 sequence, between 2 and 5 bytes earlier. */
1810 int codepointsAgo = 2 /* for "\u" */ + byteNumber;
1811 JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1812 parser, JSON_Error_InvalidEscapeSequence, codepointsAgo);
1813 return JSON_Failure;
1814 }
1815 /* Store the hex digit's bits in the appropriate byte of lexerBits. */
1816 nibble <<= (3 - byteNumber) * 4 /* shift left by 12, 8, 4, 0 */ ;
1817 parser->lexerBits |= nibble;
1818 if (parser->lexerState == LEXING_STRING_HEX_ESCAPE_BYTE_4)
1819 {
1820 /* The escape sequence is complete. We need to check whether
1821 it represents a leading surrogate (which implies that it
1822 will be immediately followed by a hex-escaped trailing
1823 surrogate), a trailing surrogate (which is invalid), or a
1824 valid codepoint (which should simply be appended to the
1825 string token value). */
1826 if (IS_LEADING_SURROGATE(parser->lexerBits))
1827 {
1828 /* Shift the leading surrogate into the high 2 bytes of
1829 lexerBits so that the trailing surrogate can be stored
1830 in the low 2 bytes. */
1831 parser->lexerBits <<= 16;
1832 parser->lexerState = LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH;
1833 }
1834 else if (IS_TRAILING_SURROGATE(parser->lexerBits))
1835 {
1836 /* The current codepoint location is the last hex digit
1837 of the hex escape sequence. The error location should
1838 be the beginning of the hex escape sequence, 5
1839 characters earlier. */
1840 JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1841 parser, JSON_Error_UnpairedSurrogateEscapeSequence, 5);
1842 return JSON_Failure;
1843 }
1844 else
1845 {
1846 /* The escape sequence represents a BMP codepoint. */
1847 codepointToRecord = parser->lexerBits;
1848 parser->lexerBits = 0;
1849 parser->lexerState = LEXING_STRING;
1850 goto recordStringCodepointAndAdvance;
1851 }
1852 }
1853 else if (parser->lexerState == LEXING_STRING_HEX_ESCAPE_BYTE_8)
1854 {
1855 /* The second hex escape sequence is complete. We need to
1856 check whether it represents a trailing surrogate as
1857 expected. If so, the surrogate pair represents a single
1858 non-BMP codepoint. */
1859 if (!IS_TRAILING_SURROGATE(parser->lexerBits & 0xFFFF))
1860 {
1861 /* The current codepoint location is the last hex digit of
1862 the second hex escape sequence. The error location
1863 should be the beginning of the leading surrogate
1864 hex escape sequence, 11 characters earlier. */
1865 JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1866 parser, JSON_Error_UnpairedSurrogateEscapeSequence, 11);
1867 return JSON_Failure;
1868 }
1869 /* The escape sequence represents a non-BMP codepoint. */
1870 codepointToRecord = CODEPOINT_FROM_SURROGATES(parser->lexerBits);
1871 parser->lexerBits = 0;
1872 parser->lexerState = LEXING_STRING;
1873 goto recordStringCodepointAndAdvance;
1874 }
1875 else
1876 parser->lexerState++;
1877 }
1878 goto advance;
1879
1880 case LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH:
1881 if (c != EOF_CODEPOINT)
1882 {
1883 if (c != '\\')
1884 {
1885 /* The current codepoint location is the first character after
1886 the leading surrogate hex escape sequence. The error
1887 location should be the beginning of the leading surrogate
1888 hex escape sequence, 6 characters earlier. */
1889 JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1890 parser, JSON_Error_UnpairedSurrogateEscapeSequence, 6);
1891 return JSON_Failure;
1892 }
1893 parser->lexerState = LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U;
1894 }
1895 goto advance;
1896
1897 case LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U:
1898 if (c != EOF_CODEPOINT)
1899 {
1900 if (c != 'u')
1901 {
1902 /* Distinguish between a totally bogus escape sequence
1903 and a valid one that just isn't the hex escape kind
1904 that we require for a trailing surrogate. The current
1905 codepoint location is the first character after the
1906 backslash that should have introduced the trailing
1907 surrogate hex escape sequence. */
1908 if (c == '"' || c == '\\' || c == '/' || c == 'b' ||
1909 c == 't' || c == 'n' || c == 'f' || c == 'r')
1910 {
1911 /* The error location should be at that beginning of the
1912 leading surrogate's hex escape sequence, 7 characters
1913 earlier. */
1914 JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1915 parser, JSON_Error_UnpairedSurrogateEscapeSequence, 7);
1916 }
1917 else
1918 {
1919 /* The error location should be at that backslash, 1
1920 character earlier. */
1921 JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1922 parser, JSON_Error_InvalidEscapeSequence, 1);
1923 }
1924 return JSON_Failure;
1925 }
1926 parser->lexerState = LEXING_STRING_HEX_ESCAPE_BYTE_5;
1927 }
1928 goto advance;
1929
1930 case LEXING_NUMBER_AFTER_MINUS:
1931 if (c == EOF_CODEPOINT)
1932 {
1933 /* Allow JSON_Parser_FlushLexer() to fail. */
1934 }
1935 else if (c == 'I' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
1936 {
1937 parser->token = T_NEGATIVE_INFINITY; /* changing horses mid-stream, so to speak */
1938 parser->lexerBits = INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX;
1939 parser->lexerState = LEXING_LITERAL;
1940 }
1941 else
1942 {
1943 if (c == '0')
1944 {
1945 codepointToRecord = '0';
1946 parser->lexerState = LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO;
1947 goto recordNumberCodepointAndAdvance;
1948 }
1949 else if (c >= '1' && c <= '9')
1950 {
1951 codepointToRecord = c;
1952 parser->lexerState = LEXING_NUMBER_DECIMAL_DIGITS;
1953 goto recordNumberCodepointAndAdvance;
1954 }
1955 else
1956 {
1957 /* We trigger an unknown token error rather than an invalid number
1958 error so that "Foo" and "-Foo" trigger the same error. */
1959 JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
1960 return JSON_Failure;
1961 }
1962 }
1963 goto advance;
1964
1965 case LEXING_NUMBER_AFTER_LEADING_ZERO:
1966 case LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO:
1967 if (c == '.')
1968 {
1969 codepointToRecord = '.';
1970 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsDecimalPoint);
1971 parser->lexerState = LEXING_NUMBER_AFTER_DOT;
1972 goto recordNumberCodepointAndAdvance;
1973 }
1974 else if (c == 'e' || c == 'E')
1975 {
1976 codepointToRecord = c;
1977 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
1978 parser->lexerState = LEXING_NUMBER_AFTER_E;
1979 goto recordNumberCodepointAndAdvance;
1980 }
1981 else if (c >= '0' && c <= '9')
1982 {
1983 /* JSON does not allow the integer part of a number to have any
1984 digits after a leading zero. */
1985 if (!JSON_Parser_HandleInvalidNumber(parser, c, 0, 0))
1986 return JSON_Failure;
1987 }
1988 else if ((c == 'x' || c == 'X') &&
1989 parser->lexerState == LEXING_NUMBER_AFTER_LEADING_ZERO &&
1990 GET_FLAGS(parser->flags, PARSER_ALLOW_HEX_NUMBERS))
1991 {
1992 codepointToRecord = c;
1993 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_IsHex);
1994 parser->lexerState = LEXING_NUMBER_AFTER_X;
1995 goto recordNumberCodepointAndAdvance;
1996 }
1997 else
1998 {
1999 /* The number is finished. */
2000 if (!JSON_Parser_ProcessToken(parser))
2001 return JSON_Failure;
2002 goto reprocess;
2003 }
2004 goto advance;
2005
2006 case LEXING_NUMBER_AFTER_X:
2007 if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
2008 {
2009 codepointToRecord = c;
2010 parser->lexerState = LEXING_NUMBER_HEX_DIGITS;
2011 goto recordNumberCodepointAndAdvance;
2012 }
2013 else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_IsHex))
2014 return JSON_Failure;
2015 goto advance;
2016
2017 case LEXING_NUMBER_HEX_DIGITS:
2018 if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
2019 {
2020 codepointToRecord = c;
2021 goto recordNumberCodepointAndAdvance;
2022 }
2023 /* The number is finished. */
2024 if (!JSON_Parser_ProcessToken(parser))
2025 return JSON_Failure;
2026 goto reprocess;
2027
2028 case LEXING_NUMBER_DECIMAL_DIGITS:
2029 if (c >= '0' && c <= '9')
2030 {
2031 codepointToRecord = c;
2032 goto recordNumberCodepointAndAdvance;
2033 }
2034 else if (c == '.')
2035 {
2036 codepointToRecord = '.';
2037 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsDecimalPoint);
2038 parser->lexerState = LEXING_NUMBER_AFTER_DOT;
2039 goto recordNumberCodepointAndAdvance;
2040 }
2041 else if (c == 'e' || c == 'E')
2042 {
2043 codepointToRecord = c;
2044 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
2045 parser->lexerState = LEXING_NUMBER_AFTER_E;
2046 goto recordNumberCodepointAndAdvance;
2047 }
2048 /* The number is finished. */
2049 if (!JSON_Parser_ProcessToken(parser))
2050 return JSON_Failure;
2051 goto reprocess;
2052
2053 case LEXING_NUMBER_AFTER_DOT:
2054 if (c >= '0' && c <= '9')
2055 {
2056 codepointToRecord = c;
2057 parser->lexerState = LEXING_NUMBER_FRACTIONAL_DIGITS;
2058 goto recordNumberCodepointAndAdvance;
2059 }
2060 else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_ContainsDecimalPoint))
2061 return JSON_Failure;
2062 goto advance;
2063
2064 case LEXING_NUMBER_FRACTIONAL_DIGITS:
2065 if (c >= '0' && c <= '9')
2066 {
2067 codepointToRecord = c;
2068 goto recordNumberCodepointAndAdvance;
2069 }
2070 else if (c == 'e' || c == 'E')
2071 {
2072 codepointToRecord = c;
2073 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
2074 parser->lexerState = LEXING_NUMBER_AFTER_E;
2075 goto recordNumberCodepointAndAdvance;
2076 }
2077 /* The number is finished. */
2078 if (!JSON_Parser_ProcessToken(parser))
2079 return JSON_Failure;
2080 goto reprocess;
2081
2082 case LEXING_NUMBER_AFTER_E:
2083 if (c == '+')
2084 {
2085 codepointToRecord = c;
2086 parser->lexerState = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
2087 goto recordNumberCodepointAndAdvance;
2088 }
2089 else if (c == '-')
2090 {
2091 codepointToRecord = c;
2092 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNegativeExponent);
2093 parser->lexerState = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
2094 goto recordNumberCodepointAndAdvance;
2095 }
2096 else if (c >= '0' && c <= '9')
2097 {
2098 codepointToRecord = c;
2099 parser->lexerState = LEXING_NUMBER_EXPONENT_DIGITS;
2100 goto recordNumberCodepointAndAdvance;
2101 }
2102 else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_ContainsExponent))
2103 return JSON_Failure;
2104 goto advance;
2105
2106 case LEXING_NUMBER_AFTER_EXPONENT_SIGN:
2107 if (c >= '0' && c <= '9')
2108 {
2109 codepointToRecord = c;
2110 parser->lexerState = LEXING_NUMBER_EXPONENT_DIGITS;
2111 goto recordNumberCodepointAndAdvance;
2112 }
2113 else if (!JSON_Parser_HandleInvalidNumber(parser, c, 2, JSON_ContainsExponent | JSON_ContainsNegativeExponent))
2114 return JSON_Failure;
2115 goto advance;
2116
2117 case LEXING_NUMBER_EXPONENT_DIGITS:
2118 if (c >= '0' && c <= '9')
2119 {
2120 codepointToRecord = c;
2121 goto recordNumberCodepointAndAdvance;
2122 }
2123 /* The number is finished. */
2124 if (!JSON_Parser_ProcessToken(parser))
2125 return JSON_Failure;
2126 goto reprocess;
2127
2128 case LEXING_COMMENT_AFTER_SLASH:
2129 if (c == '/')
2130 parser->lexerState = LEXING_SINGLE_LINE_COMMENT;
2131 else if (c == '*')
2132 parser->lexerState = LEXING_MULTI_LINE_COMMENT;
2133 else
2134 {
2135 JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
2136 return JSON_Failure;
2137 }
2138 goto advance;
2139
2140 case LEXING_SINGLE_LINE_COMMENT:
2141 if (c == CARRIAGE_RETURN_CODEPOINT || c == LINE_FEED_CODEPOINT || c == EOF_CODEPOINT)
2142 parser->lexerState = LEXING_WHITESPACE;
2143 goto advance;
2144
2145 case LEXING_MULTI_LINE_COMMENT:
2146 if (c == '*')
2147 parser->lexerState = LEXING_MULTI_LINE_COMMENT_AFTER_STAR;
2148 goto advance;
2149
2150 case LEXING_MULTI_LINE_COMMENT_AFTER_STAR:
2151 if (c == '/')
2152 parser->lexerState = LEXING_WHITESPACE;
2153 else if (c != '*')
2154 parser->lexerState = LEXING_MULTI_LINE_COMMENT;
2155 goto advance;
2156 }
2157
2158 recordStringCodepointAndAdvance:
2159
2160 tokenEncoding = parser->stringEncoding;
2161 maxTokenLength = parser->maxStringLength;
2162 if (!codepointToRecord)
2163 {
2164 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNullCharacter | JSON_ContainsControlCharacter);
2165 }
2166 else if (codepointToRecord < FIRST_NON_CONTROL_CODEPOINT)
2167 {
2168 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsControlCharacter);
2169 }
2170 else if (codepointToRecord >= FIRST_NON_BMP_CODEPOINT)
2171 {
2172 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNonASCIICharacter | JSON_ContainsNonBMPCharacter);
2173 }
2174 else if (codepointToRecord >= FIRST_NON_ASCII_CODEPOINT)
2175 {
2176 SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNonASCIICharacter);
2177 }
2178 goto recordCodepointAndAdvance;
2179
2180 recordNumberCodepointAndAdvance:
2181
2182 tokenEncoding = parser->numberEncoding;
2183 maxTokenLength = parser->maxNumberLength;
2184 goto recordCodepointAndAdvance;
2185
2186 recordCodepointAndAdvance:
2187
2188 /* We always ensure that there are LONGEST_ENCODING_SEQUENCE bytes
2189 available in the buffer for the next codepoint, so we don't have to
2190 check whether there is room when we decode a new codepoint, and if
2191 there isn't another codepoint, we have space already allocated for
2192 the encoded null terminator.*/
2193 parser->tokenBytesUsed += EncodeCodepoint(codepointToRecord, tokenEncoding, parser->pTokenBytes + parser->tokenBytesUsed);
2194 if (parser->tokenBytesUsed > maxTokenLength)
2195 {
2196 JSON_Parser_SetErrorAtToken(parser, parser->token == T_NUMBER ? JSON_Error_TooLongNumber : JSON_Error_TooLongString);
2197 return JSON_Failure;
2198 }
2199 if (parser->tokenBytesUsed > parser->tokenBytesLength - LONGEST_ENCODING_SEQUENCE)
2200 {
2201 byte* pBiggerBuffer = DoubleBuffer(&parser->memorySuite, parser->defaultTokenBytes, parser->pTokenBytes, parser->tokenBytesLength);
2202 if (!pBiggerBuffer)
2203 {
2204 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
2205 return JSON_Failure;
2206 }
2207 parser->pTokenBytes = pBiggerBuffer;
2208 parser->tokenBytesLength *= 2;
2209 }
2210 goto advance;
2211
2212 advance:
2213
2214 /* The current codepoint has been accepted, so advance the codepoint
2215 location counters accordingly. Note that the one time we don't
2216 do this is when the codepoint is EOF, which doesn't actually
2217 appear in the input stream. */
2218 if (c == CARRIAGE_RETURN_CODEPOINT)
2219 {
2220 SET_FLAGS_ON(ParserState, parser->state, PARSER_AFTER_CARRIAGE_RETURN);
2221 }
2222 if (c != EOF_CODEPOINT)
2223 {
2224 parser->codepointLocationByte += encodedLength;
2225 if (c == CARRIAGE_RETURN_CODEPOINT || c == LINE_FEED_CODEPOINT)
2226 {
2227 /* The next character will begin a new line. */
2228 parser->codepointLocationLine++;
2229 parser->codepointLocationColumn = 0;
2230 }
2231 else
2232 {
2233 /* The next character will be on the same line. */
2234 parser->codepointLocationColumn++;
2235 }
2236 }
2237
2238 if (tokenFinished && !JSON_Parser_ProcessToken(parser))
2239 return JSON_Failure;
2240
2241 return JSON_Success;
2242 }
2243
JSON_Parser_FlushLexer(JSON_Parser parser)2244 static JSON_Status JSON_Parser_FlushLexer(JSON_Parser parser)
2245 {
2246 /* Push the EOF codepoint to the lexer so that it can finish the pending
2247 token, if any. The EOF codepoint is never emitted by the decoder
2248 itself, since it is outside the Unicode range and therefore cannot
2249 be encoded in any of the possible input encodings. */
2250 if (!JSON_Parser_ProcessCodepoint(parser, EOF_CODEPOINT, 0))
2251 return JSON_Failure;
2252
2253 /* The lexer should be idle when parsing finishes. */
2254 if (parser->lexerState != LEXING_WHITESPACE)
2255 {
2256 JSON_Parser_SetErrorAtToken(parser, JSON_Error_IncompleteToken);
2257 return JSON_Failure;
2258 }
2259 return JSON_Success;
2260 }
2261
2262 /* Parser's decoder functions. */
2263
JSON_Parser_CallEncodingDetectedHandler(JSON_Parser parser)2264 static JSON_Status JSON_Parser_CallEncodingDetectedHandler(JSON_Parser parser)
2265 {
2266 if (parser->encodingDetectedHandler && parser->encodingDetectedHandler(parser) != JSON_Parser_Continue)
2267 {
2268 JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_AbortedByHandler);
2269 return JSON_Failure;
2270 }
2271 return JSON_Success;
2272 }
2273
2274 /* Forward declaration. */
2275 static JSON_Status JSON_Parser_ProcessInputBytes(JSON_Parser parser, const byte* pBytes, size_t length);
2276
JSON_Parser_ProcessUnknownByte(JSON_Parser parser,byte b)2277 static JSON_Status JSON_Parser_ProcessUnknownByte(JSON_Parser parser, byte b)
2278 {
2279 /* When the input encoding is unknown, the first 4 bytes of input are
2280 recorded in decoder.bits. */
2281 byte bytes[LONGEST_ENCODING_SEQUENCE];
2282
2283 switch (parser->decoderData.state)
2284 {
2285 case DECODER_RESET:
2286 parser->decoderData.state = DECODED_1_OF_4;
2287 parser->decoderData.bits = (uint32_t)b << 24;
2288 break;
2289
2290 case DECODED_1_OF_4:
2291 parser->decoderData.state = DECODED_2_OF_4;
2292 parser->decoderData.bits |= (uint32_t)b << 16;
2293 break;
2294
2295 case DECODED_2_OF_4:
2296 parser->decoderData.state = DECODED_3_OF_4;
2297 parser->decoderData.bits |= (uint32_t)b << 8;
2298 break;
2299
2300 case DECODED_3_OF_4:
2301 bytes[0] = (byte)(parser->decoderData.bits >> 24);
2302 bytes[1] = (byte)(parser->decoderData.bits >> 16);
2303 bytes[2] = (byte)(parser->decoderData.bits >> 8);
2304 bytes[3] = (byte)(b);
2305
2306 /* We try to match the following patterns in order, where .. is any
2307 byte value and nz is any non-zero byte value:
2308 EF BB BF .. => UTF-8 with BOM
2309 FF FE 00 00 => UTF-32LE with BOM
2310 FF FE nz 00 => UTF-16LE with BOM
2311 00 00 FE FF -> UTF-32BE with BOM
2312 FE FF .. .. => UTF-16BE with BOM
2313 nz nz .. .. => UTF-8
2314 nz 00 nz .. => UTF-16LE
2315 nz 00 00 00 => UTF-32LE
2316 00 nz .. .. => UTF-16BE
2317 00 00 00 nz => UTF-32BE
2318 .. .. .. .. => unknown encoding */
2319 if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF)
2320 {
2321 /* EF BB BF .. */
2322 parser->inputEncoding = JSON_UTF8;
2323 }
2324 else if (bytes[0] == 0xFF && bytes[1] == 0xFE && bytes[3] == 0x00)
2325 {
2326 /* FF FE 00 00 or
2327 FF FE nz 00 */
2328 parser->inputEncoding = (bytes[2] == 0x00) ? JSON_UTF32LE : JSON_UTF16LE;
2329 }
2330 else if (bytes[0] == 0x00 && bytes[1] == 0x00 && bytes[2] == 0xFE && bytes[3] == 0xFF)
2331 {
2332 /* 00 00 FE FF */
2333 parser->inputEncoding = JSON_UTF32BE;
2334 }
2335 else if (bytes[0] == 0xFE && bytes[1] == 0xFF)
2336 {
2337 /* FE FF .. .. */
2338 parser->inputEncoding = JSON_UTF16BE;
2339 }
2340 else if (bytes[0] != 0x00)
2341 {
2342 /* nz .. .. .. */
2343 if (bytes[1] != 0x00)
2344 {
2345 /* nz nz .. .. */
2346 parser->inputEncoding = JSON_UTF8;
2347 }
2348 else if (bytes[2] != 0x00)
2349 {
2350 /* nz 00 nz .. */
2351 parser->inputEncoding = JSON_UTF16LE;
2352 }
2353 else if (bytes[3] == 0x00)
2354 {
2355 /* nz 00 00 00 */
2356 parser->inputEncoding = JSON_UTF32LE;
2357 }
2358 else
2359 {
2360 /* nz 00 00 nz => error */
2361 }
2362 }
2363 else if (bytes[1] != 0x00)
2364 {
2365 /* 00 nz .. .. */
2366 parser->inputEncoding = JSON_UTF16BE;
2367 }
2368 else if (bytes[2] == 0x00 && bytes[3] != 0x00)
2369 {
2370 /* 00 00 00 nz */
2371 parser->inputEncoding = JSON_UTF32BE;
2372 }
2373 else
2374 {
2375 /* 00 00 nz .. or
2376 00 00 00 00 => error */
2377 }
2378
2379 if (parser->inputEncoding == JSON_UnknownEncoding)
2380 return JSON_Parser_HandleInvalidEncodingSequence(parser, 4);
2381
2382 if (!JSON_Parser_CallEncodingDetectedHandler(parser))
2383 return JSON_Failure;
2384
2385 /* Reset the decoder before reprocessing the bytes. */
2386 Decoder_Reset(&parser->decoderData);
2387 return JSON_Parser_ProcessInputBytes(parser, bytes, 4);
2388 }
2389
2390 /* We don't have 4 bytes yet. */
2391 return JSON_Success;
2392 }
2393
JSON_Parser_ProcessInputBytes(JSON_Parser parser,const byte * pBytes,size_t length)2394 JSON_Status JSON_Parser_ProcessInputBytes(JSON_Parser parser, const byte* pBytes, size_t length)
2395 {
2396 /* Note that if length is 0, pBytes is allowed to be NULL. */
2397 size_t i = 0;
2398 while (parser->inputEncoding == JSON_UnknownEncoding && i < length)
2399 {
2400 if (!JSON_Parser_ProcessUnknownByte(parser, pBytes[i]))
2401 return JSON_Failure;
2402 i++;
2403 }
2404 while (i < length)
2405 {
2406 DecoderOutput output = Decoder_ProcessByte(
2407 &parser->decoderData, parser->inputEncoding, pBytes[i]);
2408 DecoderResultCode result = DECODER_RESULT_CODE(output);
2409 switch (result)
2410 {
2411 case SEQUENCE_PENDING:
2412 i++;
2413 break;
2414
2415 case SEQUENCE_COMPLETE:
2416 if (!JSON_Parser_ProcessCodepoint(
2417 parser, DECODER_CODEPOINT(output),
2418 DECODER_SEQUENCE_LENGTH(output)))
2419 return JSON_Failure;
2420 i++;
2421 break;
2422
2423 case SEQUENCE_INVALID_INCLUSIVE:
2424 i++;
2425 /* fallthrough */
2426 case SEQUENCE_INVALID_EXCLUSIVE:
2427 if (!JSON_Parser_HandleInvalidEncodingSequence(
2428 parser, DECODER_SEQUENCE_LENGTH(output)))
2429 return JSON_Failure;
2430 break;
2431 }
2432 }
2433 return JSON_Success;
2434 }
2435
JSON_Parser_FlushDecoder(JSON_Parser parser)2436 static JSON_Status JSON_Parser_FlushDecoder(JSON_Parser parser)
2437 {
2438 /* If the input was 1, 2, or 3 bytes long, and the input encoding was not
2439 explicitly specified by the client, we can sometimes make a reasonable
2440 guess. If the input was 1 or 3 bytes long, the only encoding that could
2441 possibly be valid JSON is UF-8. If the input was 2 bytes long, we try
2442 to match the following patterns in order, where .. is any byte value
2443 and nz is any non-zero byte value:
2444 FF FE => UTF-16LE with BOM
2445 FE FF => UTF-16BE with BOM
2446 nz nz => UTF-8
2447 nz 00 => UTF-16LE
2448 00 nz => UTF-16BE
2449 .. .. => unknown encoding
2450 */
2451 if (parser->inputEncoding == JSON_UnknownEncoding &&
2452 parser->decoderData.state != DECODER_RESET)
2453 {
2454 byte bytes[3];
2455 size_t length = 0;
2456 bytes[0] = (byte)(parser->decoderData.bits >> 24);
2457 bytes[1] = (byte)(parser->decoderData.bits >> 16);
2458 bytes[2] = (byte)(parser->decoderData.bits >> 8);
2459
2460 switch (parser->decoderData.state)
2461 {
2462 case DECODED_1_OF_4:
2463 parser->inputEncoding = JSON_UTF8;
2464 length = 1;
2465 break;
2466
2467 case DECODED_2_OF_4:
2468 /* FF FE */
2469 if (bytes[0] == 0xFF && bytes[1] == 0xFE)
2470 parser->inputEncoding = JSON_UTF16LE;
2471 /* FE FF */
2472 else if (bytes[0] == 0xFE && bytes[1] == 0xFF)
2473 parser->inputEncoding = JSON_UTF16BE;
2474 else if (bytes[0] != 0x00)
2475 {
2476 /* nz nz or
2477 nz 00 */
2478 parser->inputEncoding = bytes[1] ? JSON_UTF8 : JSON_UTF16LE;
2479 }
2480 /* 00 nz */
2481 else if (bytes[1] != 0x00)
2482 parser->inputEncoding = JSON_UTF16BE;
2483 /* 00 00 */
2484 else
2485 return JSON_Parser_HandleInvalidEncodingSequence(parser, 2);
2486 length = 2;
2487 break;
2488
2489 case DECODED_3_OF_4:
2490 parser->inputEncoding = JSON_UTF8;
2491 length = 3;
2492 break;
2493 }
2494
2495 if (!JSON_Parser_CallEncodingDetectedHandler(parser))
2496 return JSON_Failure;
2497
2498 /* Reset the decoder before reprocessing the bytes. */
2499 parser->decoderData.state = DECODER_RESET;
2500 parser->decoderData.bits = 0;
2501 if (!JSON_Parser_ProcessInputBytes(parser, bytes, length))
2502 return JSON_Failure;
2503 }
2504
2505 /* The decoder should be idle when parsing finishes. */
2506 if (Decoder_SequencePending(&parser->decoderData))
2507 return JSON_Parser_HandleInvalidEncodingSequence(
2508 parser, DECODER_STATE_BYTES(parser->decoderData.state));
2509 return JSON_Success;
2510 }
2511
2512 /* Parser API functions. */
2513
JSON_Parser_Create(const JSON_MemorySuite * pMemorySuite)2514 JSON_Parser JSON_CALL JSON_Parser_Create(const JSON_MemorySuite* pMemorySuite)
2515 {
2516 JSON_Parser parser;
2517 JSON_MemorySuite memorySuite;
2518
2519 if (pMemorySuite)
2520 {
2521 memorySuite = *pMemorySuite;
2522
2523 /* The full memory suite must be specified. */
2524 if (!memorySuite.realloc || !memorySuite.free)
2525 return NULL;
2526 }
2527 else
2528 memorySuite = defaultMemorySuite;
2529
2530 parser = (JSON_Parser)memorySuite.realloc(memorySuite.userData, NULL, sizeof(struct JSON_Parser_Data));
2531
2532 if (!parser)
2533 return NULL;
2534
2535 parser->memorySuite = memorySuite;
2536 JSON_Parser_ResetData(parser, 0/* isInitialized */);
2537 return parser;
2538 }
2539
JSON_Parser_Free(JSON_Parser parser)2540 JSON_Status JSON_CALL JSON_Parser_Free(JSON_Parser parser)
2541 {
2542 if (!parser || GET_FLAGS(parser->state, PARSER_IN_PROTECTED_API))
2543 return JSON_Failure;
2544
2545 SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_PROTECTED_API);
2546
2547 if (parser->pTokenBytes != parser->defaultTokenBytes)
2548 parser->memorySuite.free(parser->memorySuite.userData, parser->pTokenBytes);
2549
2550 while (parser->pMemberNames)
2551 JSON_Parser_PopMemberNameList(parser);
2552
2553 Grammarian_FreeAllocations(&parser->grammarianData, &parser->memorySuite);
2554 parser->memorySuite.free(parser->memorySuite.userData, parser);
2555 return JSON_Success;
2556 }
2557
JSON_Parser_Reset(JSON_Parser parser)2558 JSON_Status JSON_CALL JSON_Parser_Reset(JSON_Parser parser)
2559 {
2560 if (!parser || GET_FLAGS(parser->state, PARSER_IN_PROTECTED_API))
2561 return JSON_Failure;
2562 SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_PROTECTED_API);
2563 JSON_Parser_ResetData(parser, 1/* isInitialized */);
2564 /* Note that JSON_Parser_ResetData() unset PARSER_IN_PROTECTED_API for us. */
2565 return JSON_Success;
2566 }
2567
JSON_Parser_GetUserData(JSON_Parser parser)2568 void* JSON_CALL JSON_Parser_GetUserData(JSON_Parser parser)
2569 {
2570 return parser ? parser->userData : NULL;
2571 }
2572
JSON_Parser_SetUserData(JSON_Parser parser,void * userData)2573 JSON_Status JSON_CALL JSON_Parser_SetUserData(JSON_Parser parser, void* userData)
2574 {
2575 if (!parser)
2576 return JSON_Failure;
2577 parser->userData = userData;
2578 return JSON_Success;
2579 }
2580
JSON_Parser_GetInputEncoding(JSON_Parser parser)2581 JSON_Encoding JSON_CALL JSON_Parser_GetInputEncoding(JSON_Parser parser)
2582 {
2583 return parser ? (JSON_Encoding)parser->inputEncoding : JSON_UnknownEncoding;
2584 }
2585
JSON_Parser_SetInputEncoding(JSON_Parser parser,JSON_Encoding encoding)2586 JSON_Status JSON_CALL JSON_Parser_SetInputEncoding(JSON_Parser parser, JSON_Encoding encoding)
2587 {
2588 if ( !parser
2589 || encoding < JSON_UnknownEncoding
2590 || encoding > JSON_UTF32BE
2591 || GET_FLAGS(parser->state, PARSER_STARTED))
2592 return JSON_Failure;
2593 parser->inputEncoding = (Encoding)encoding;
2594 return JSON_Success;
2595 }
2596
JSON_Parser_GetStringEncoding(JSON_Parser parser)2597 JSON_Encoding JSON_CALL JSON_Parser_GetStringEncoding(JSON_Parser parser)
2598 {
2599 return parser ? (JSON_Encoding)parser->stringEncoding : JSON_UTF8;
2600 }
2601
JSON_Parser_SetStringEncoding(JSON_Parser parser,JSON_Encoding encoding)2602 JSON_Status JSON_CALL JSON_Parser_SetStringEncoding(JSON_Parser parser, JSON_Encoding encoding)
2603 {
2604 if (
2605 !parser
2606 || encoding <= JSON_UnknownEncoding
2607 || encoding > JSON_UTF32BE
2608 || GET_FLAGS(parser->state, PARSER_STARTED))
2609 return JSON_Failure;
2610 parser->stringEncoding = (Encoding)encoding;
2611 return JSON_Success;
2612 }
2613
JSON_Parser_GetMaxStringLength(JSON_Parser parser)2614 size_t JSON_CALL JSON_Parser_GetMaxStringLength(JSON_Parser parser)
2615 {
2616 return parser ? parser->maxStringLength : SIZE_MAX;
2617 }
2618
JSON_Parser_SetMaxStringLength(JSON_Parser parser,size_t maxLength)2619 JSON_Status JSON_CALL JSON_Parser_SetMaxStringLength(JSON_Parser parser, size_t maxLength)
2620 {
2621 if ( !parser
2622 || GET_FLAGS(parser->state, PARSER_STARTED))
2623 return JSON_Failure;
2624 parser->maxStringLength = maxLength;
2625 return JSON_Success;
2626 }
2627
JSON_Parser_GetNumberEncoding(JSON_Parser parser)2628 JSON_Encoding JSON_CALL JSON_Parser_GetNumberEncoding(JSON_Parser parser)
2629 {
2630 return parser ? (JSON_Encoding)parser->numberEncoding : JSON_UTF8;
2631 }
2632
JSON_Parser_SetNumberEncoding(JSON_Parser parser,JSON_Encoding encoding)2633 JSON_Status JSON_CALL JSON_Parser_SetNumberEncoding(JSON_Parser parser, JSON_Encoding encoding)
2634 {
2635 if (!parser || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE || GET_FLAGS(parser->state, PARSER_STARTED))
2636 return JSON_Failure;
2637 parser->numberEncoding = (Encoding)encoding;
2638 return JSON_Success;
2639 }
2640
JSON_Parser_GetMaxNumberLength(JSON_Parser parser)2641 size_t JSON_CALL JSON_Parser_GetMaxNumberLength(JSON_Parser parser)
2642 {
2643 return parser ? parser->maxNumberLength : SIZE_MAX;
2644 }
2645
JSON_Parser_SetMaxNumberLength(JSON_Parser parser,size_t maxLength)2646 JSON_Status JSON_CALL JSON_Parser_SetMaxNumberLength(JSON_Parser parser, size_t maxLength)
2647 {
2648 if ( !parser
2649 || GET_FLAGS(parser->state, PARSER_STARTED))
2650 return JSON_Failure;
2651 parser->maxNumberLength = maxLength;
2652 return JSON_Success;
2653 }
2654
JSON_Parser_GetAllowBOM(JSON_Parser parser)2655 JSON_Boolean JSON_CALL JSON_Parser_GetAllowBOM(JSON_Parser parser)
2656 {
2657 return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_BOM)) ? JSON_True : JSON_False;
2658 }
2659
JSON_Parser_SetAllowBOM(JSON_Parser parser,JSON_Boolean allowBOM)2660 JSON_Status JSON_CALL JSON_Parser_SetAllowBOM(JSON_Parser parser, JSON_Boolean allowBOM)
2661 {
2662 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2663 return JSON_Failure;
2664 SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_BOM, allowBOM);
2665 return JSON_Success;
2666 }
2667
JSON_Parser_GetAllowComments(JSON_Parser parser)2668 JSON_Boolean JSON_CALL JSON_Parser_GetAllowComments(JSON_Parser parser)
2669 {
2670 return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_COMMENTS)) ? JSON_True : JSON_False;
2671 }
2672
JSON_Parser_SetAllowComments(JSON_Parser parser,JSON_Boolean allowComments)2673 JSON_Status JSON_CALL JSON_Parser_SetAllowComments(JSON_Parser parser, JSON_Boolean allowComments)
2674 {
2675 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2676 return JSON_Failure;
2677 SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_COMMENTS, allowComments);
2678 return JSON_Success;
2679 }
2680
JSON_Parser_GetAllowSpecialNumbers(JSON_Parser parser)2681 JSON_Boolean JSON_CALL JSON_Parser_GetAllowSpecialNumbers(JSON_Parser parser)
2682 {
2683 return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS)) ? JSON_True : JSON_False;
2684 }
2685
JSON_Parser_SetAllowSpecialNumbers(JSON_Parser parser,JSON_Boolean allowSpecialNumbers)2686 JSON_Status JSON_CALL JSON_Parser_SetAllowSpecialNumbers(JSON_Parser parser, JSON_Boolean allowSpecialNumbers)
2687 {
2688 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2689 return JSON_Failure;
2690 SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS, allowSpecialNumbers);
2691 return JSON_Success;
2692 }
2693
JSON_Parser_GetAllowHexNumbers(JSON_Parser parser)2694 JSON_Boolean JSON_CALL JSON_Parser_GetAllowHexNumbers(JSON_Parser parser)
2695 {
2696 return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_HEX_NUMBERS)) ? JSON_True : JSON_False;
2697 }
2698
JSON_Parser_SetAllowHexNumbers(JSON_Parser parser,JSON_Boolean allowHexNumbers)2699 JSON_Status JSON_CALL JSON_Parser_SetAllowHexNumbers(JSON_Parser parser, JSON_Boolean allowHexNumbers)
2700 {
2701 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2702 return JSON_Failure;
2703 SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_HEX_NUMBERS, allowHexNumbers);
2704 return JSON_Success;
2705 }
2706
JSON_Parser_GetAllowUnescapedControlCharacters(JSON_Parser parser)2707 JSON_Boolean JSON_CALL JSON_Parser_GetAllowUnescapedControlCharacters(JSON_Parser parser)
2708 {
2709 return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_CONTROL_CHARS)) ? JSON_True : JSON_False;
2710 }
2711
JSON_Parser_SetAllowUnescapedControlCharacters(JSON_Parser parser,JSON_Boolean allowUnescapedControlCharacters)2712 JSON_Status JSON_CALL JSON_Parser_SetAllowUnescapedControlCharacters(JSON_Parser parser, JSON_Boolean allowUnescapedControlCharacters)
2713 {
2714 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2715 return JSON_Failure;
2716 SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_CONTROL_CHARS, allowUnescapedControlCharacters);
2717 return JSON_Success;
2718 }
2719
JSON_Parser_GetReplaceInvalidEncodingSequences(JSON_Parser parser)2720 JSON_Boolean JSON_CALL JSON_Parser_GetReplaceInvalidEncodingSequences(JSON_Parser parser)
2721 {
2722 return (parser && GET_FLAGS(parser->flags, PARSER_REPLACE_INVALID)) ? JSON_True : JSON_False;
2723 }
2724
JSON_Parser_SetReplaceInvalidEncodingSequences(JSON_Parser parser,JSON_Boolean replaceInvalidEncodingSequences)2725 JSON_Status JSON_CALL JSON_Parser_SetReplaceInvalidEncodingSequences(
2726 JSON_Parser parser, JSON_Boolean replaceInvalidEncodingSequences)
2727 {
2728 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2729 return JSON_Failure;
2730 SET_FLAGS(ParserFlags, parser->flags, PARSER_REPLACE_INVALID, replaceInvalidEncodingSequences);
2731 return JSON_Success;
2732 }
2733
JSON_Parser_GetTrackObjectMembers(JSON_Parser parser)2734 JSON_Boolean JSON_CALL JSON_Parser_GetTrackObjectMembers(JSON_Parser parser)
2735 {
2736 return (parser && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS)) ? JSON_True : JSON_False;
2737 }
2738
JSON_Parser_SetTrackObjectMembers(JSON_Parser parser,JSON_Boolean trackObjectMembers)2739 JSON_Status JSON_CALL JSON_Parser_SetTrackObjectMembers(JSON_Parser parser, JSON_Boolean trackObjectMembers)
2740 {
2741 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2742 {
2743 return JSON_Failure;
2744 }
2745 SET_FLAGS(ParserFlags, parser->flags, PARSER_TRACK_OBJECT_MEMBERS, trackObjectMembers);
2746 return JSON_Success;
2747 }
2748
JSON_Parser_GetStopAfterEmbeddedDocument(JSON_Parser parser)2749 JSON_Boolean JSON_CALL JSON_Parser_GetStopAfterEmbeddedDocument(JSON_Parser parser)
2750 {
2751 return (parser && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT)) ? JSON_True : JSON_False;
2752 }
2753
JSON_Parser_SetStopAfterEmbeddedDocument(JSON_Parser parser,JSON_Boolean stopAfterEmbeddedDocument)2754 JSON_Status JSON_CALL JSON_Parser_SetStopAfterEmbeddedDocument(
2755 JSON_Parser parser, JSON_Boolean stopAfterEmbeddedDocument)
2756 {
2757 if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2758 {
2759 return JSON_Failure;
2760 }
2761 SET_FLAGS(ParserFlags, parser->flags, PARSER_EMBEDDED_DOCUMENT, stopAfterEmbeddedDocument);
2762 return JSON_Success;
2763 }
2764
JSON_Parser_GetError(JSON_Parser parser)2765 JSON_Error JSON_CALL JSON_Parser_GetError(JSON_Parser parser)
2766 {
2767 return parser ? (JSON_Error)parser->error : JSON_Error_None;
2768 }
2769
JSON_Parser_GetErrorLocation(JSON_Parser parser,JSON_Location * pLocation)2770 JSON_Status JSON_CALL JSON_Parser_GetErrorLocation(
2771 JSON_Parser parser, JSON_Location* pLocation)
2772 {
2773 if (!pLocation || !parser || parser->error == JSON_Error_None)
2774 return JSON_Failure;
2775
2776 if (parser->errorOffset == ERROR_LOCATION_IS_TOKEN_START)
2777 {
2778 pLocation->byte = parser->tokenLocationByte;
2779 pLocation->line = parser->tokenLocationLine;
2780 pLocation->column = parser->tokenLocationColumn;
2781 }
2782 else
2783 {
2784 pLocation->byte = parser->codepointLocationByte - (SHORTEST_ENCODING_SEQUENCE(parser->inputEncoding) * parser->errorOffset);
2785 pLocation->line = parser->codepointLocationLine;
2786 pLocation->column = parser->codepointLocationColumn - parser->errorOffset;
2787 }
2788 pLocation->depth = parser->depth;
2789 return JSON_Success;
2790 }
2791
JSON_Parser_GetTokenLocation(JSON_Parser parser,JSON_Location * pLocation)2792 JSON_Status JSON_CALL JSON_Parser_GetTokenLocation(
2793 JSON_Parser parser, JSON_Location* pLocation)
2794 {
2795 if (!parser || !pLocation || !GET_FLAGS(parser->state, PARSER_IN_TOKEN_HANDLER))
2796 return JSON_Failure;
2797
2798 pLocation->byte = parser->tokenLocationByte;
2799 pLocation->line = parser->tokenLocationLine;
2800 pLocation->column = parser->tokenLocationColumn;
2801 pLocation->depth = parser->depth;
2802 return JSON_Success;
2803 }
2804
JSON_Parser_GetAfterTokenLocation(JSON_Parser parser,JSON_Location * pLocation)2805 JSON_Status JSON_CALL JSON_Parser_GetAfterTokenLocation(
2806 JSON_Parser parser, JSON_Location* pLocation)
2807 {
2808 if (!parser || !pLocation || !GET_FLAGS(parser->state, PARSER_IN_TOKEN_HANDLER))
2809 return JSON_Failure;
2810
2811 pLocation->byte = parser->codepointLocationByte;
2812 pLocation->line = parser->codepointLocationLine;
2813 pLocation->column = parser->codepointLocationColumn;
2814 pLocation->depth = parser->depth;
2815 return JSON_Success;
2816 }
2817
JSON_Parser_GetEncodingDetectedHandler(JSON_Parser parser)2818 JSON_Parser_NullHandler JSON_CALL JSON_Parser_GetEncodingDetectedHandler(JSON_Parser parser)
2819 {
2820 return parser ? parser->encodingDetectedHandler : NULL;
2821 }
2822
JSON_Parser_SetEncodingDetectedHandler(JSON_Parser parser,JSON_Parser_EncodingDetectedHandler handler)2823 JSON_Status JSON_CALL JSON_Parser_SetEncodingDetectedHandler(
2824 JSON_Parser parser, JSON_Parser_EncodingDetectedHandler handler)
2825 {
2826 if (!parser)
2827 return JSON_Failure;
2828
2829 parser->encodingDetectedHandler = handler;
2830 return JSON_Success;
2831 }
2832
JSON_Parser_GetNullHandler(JSON_Parser parser)2833 JSON_Parser_NullHandler JSON_CALL JSON_Parser_GetNullHandler(JSON_Parser parser)
2834 {
2835 return parser ? parser->nullHandler : NULL;
2836 }
2837
JSON_Parser_SetNullHandler(JSON_Parser parser,JSON_Parser_NullHandler handler)2838 JSON_Status JSON_CALL JSON_Parser_SetNullHandler(
2839 JSON_Parser parser, JSON_Parser_NullHandler handler)
2840 {
2841 if (!parser)
2842 return JSON_Failure;
2843
2844 parser->nullHandler = handler;
2845 return JSON_Success;
2846 }
2847
JSON_Parser_GetBooleanHandler(JSON_Parser parser)2848 JSON_Parser_BooleanHandler JSON_CALL JSON_Parser_GetBooleanHandler(JSON_Parser parser)
2849 {
2850 return parser ? parser->booleanHandler : NULL;
2851 }
2852
JSON_Parser_SetBooleanHandler(JSON_Parser parser,JSON_Parser_BooleanHandler handler)2853 JSON_Status JSON_CALL JSON_Parser_SetBooleanHandler(
2854 JSON_Parser parser, JSON_Parser_BooleanHandler handler)
2855 {
2856 if (!parser)
2857 return JSON_Failure;
2858
2859 parser->booleanHandler = handler;
2860 return JSON_Success;
2861 }
2862
JSON_Parser_GetStringHandler(JSON_Parser parser)2863 JSON_Parser_StringHandler JSON_CALL JSON_Parser_GetStringHandler(JSON_Parser parser)
2864 {
2865 return parser ? parser->stringHandler : NULL;
2866 }
2867
JSON_Parser_SetStringHandler(JSON_Parser parser,JSON_Parser_StringHandler handler)2868 JSON_Status JSON_CALL JSON_Parser_SetStringHandler(
2869 JSON_Parser parser, JSON_Parser_StringHandler handler)
2870 {
2871 if (!parser)
2872 return JSON_Failure;
2873
2874 parser->stringHandler = handler;
2875 return JSON_Success;
2876 }
2877
JSON_Parser_GetNumberHandler(JSON_Parser parser)2878 JSON_Parser_NumberHandler JSON_CALL JSON_Parser_GetNumberHandler(JSON_Parser parser)
2879 {
2880 return parser ? parser->numberHandler : NULL;
2881 }
2882
JSON_Parser_SetNumberHandler(JSON_Parser parser,JSON_Parser_NumberHandler handler)2883 JSON_Status JSON_CALL JSON_Parser_SetNumberHandler(
2884 JSON_Parser parser, JSON_Parser_NumberHandler handler)
2885 {
2886 if (!parser)
2887 return JSON_Failure;
2888
2889 parser->numberHandler = handler;
2890 return JSON_Success;
2891 }
2892
JSON_Parser_GetSpecialNumberHandler(JSON_Parser parser)2893 JSON_Parser_SpecialNumberHandler JSON_CALL JSON_Parser_GetSpecialNumberHandler(JSON_Parser parser)
2894 {
2895 return parser ? parser->specialNumberHandler : NULL;
2896 }
2897
JSON_Parser_SetSpecialNumberHandler(JSON_Parser parser,JSON_Parser_SpecialNumberHandler handler)2898 JSON_Status JSON_CALL JSON_Parser_SetSpecialNumberHandler(
2899 JSON_Parser parser, JSON_Parser_SpecialNumberHandler handler)
2900 {
2901 if (!parser)
2902 return JSON_Failure;
2903 parser->specialNumberHandler = handler;
2904 return JSON_Success;
2905 }
2906
JSON_Parser_GetStartObjectHandler(JSON_Parser parser)2907 JSON_Parser_StartObjectHandler JSON_CALL JSON_Parser_GetStartObjectHandler(JSON_Parser parser)
2908 {
2909 return parser ? parser->startObjectHandler : NULL;
2910 }
2911
JSON_Parser_SetStartObjectHandler(JSON_Parser parser,JSON_Parser_StartObjectHandler handler)2912 JSON_Status JSON_CALL JSON_Parser_SetStartObjectHandler(
2913 JSON_Parser parser, JSON_Parser_StartObjectHandler handler)
2914 {
2915 if (!parser)
2916 return JSON_Failure;
2917
2918 parser->startObjectHandler = handler;
2919 return JSON_Success;
2920 }
2921
JSON_Parser_GetEndObjectHandler(JSON_Parser parser)2922 JSON_Parser_EndObjectHandler JSON_CALL JSON_Parser_GetEndObjectHandler(JSON_Parser parser)
2923 {
2924 return parser ? parser->endObjectHandler : NULL;
2925 }
2926
JSON_Parser_SetEndObjectHandler(JSON_Parser parser,JSON_Parser_EndObjectHandler handler)2927 JSON_Status JSON_CALL JSON_Parser_SetEndObjectHandler(
2928 JSON_Parser parser, JSON_Parser_EndObjectHandler handler)
2929 {
2930 if (!parser)
2931 return JSON_Failure;
2932
2933 parser->endObjectHandler = handler;
2934 return JSON_Success;
2935 }
2936
JSON_Parser_GetObjectMemberHandler(JSON_Parser parser)2937 JSON_Parser_ObjectMemberHandler JSON_CALL JSON_Parser_GetObjectMemberHandler(JSON_Parser parser)
2938 {
2939 return parser ? parser->objectMemberHandler : NULL;
2940 }
2941
JSON_Parser_SetObjectMemberHandler(JSON_Parser parser,JSON_Parser_ObjectMemberHandler handler)2942 JSON_Status JSON_CALL JSON_Parser_SetObjectMemberHandler(
2943 JSON_Parser parser, JSON_Parser_ObjectMemberHandler handler)
2944 {
2945 if (!parser)
2946 return JSON_Failure;
2947
2948 parser->objectMemberHandler = handler;
2949 return JSON_Success;
2950 }
2951
JSON_Parser_GetStartArrayHandler(JSON_Parser parser)2952 JSON_Parser_StartArrayHandler JSON_CALL JSON_Parser_GetStartArrayHandler(JSON_Parser parser)
2953 {
2954 return parser ? parser->startArrayHandler : NULL;
2955 }
2956
JSON_Parser_SetStartArrayHandler(JSON_Parser parser,JSON_Parser_StartArrayHandler handler)2957 JSON_Status JSON_CALL JSON_Parser_SetStartArrayHandler(
2958 JSON_Parser parser, JSON_Parser_StartArrayHandler handler)
2959 {
2960 if (!parser)
2961 return JSON_Failure;
2962
2963 parser->startArrayHandler = handler;
2964 return JSON_Success;
2965 }
2966
JSON_Parser_GetEndArrayHandler(JSON_Parser parser)2967 JSON_Parser_EndArrayHandler JSON_CALL JSON_Parser_GetEndArrayHandler(JSON_Parser parser)
2968 {
2969 return parser ? parser->endArrayHandler : NULL;
2970 }
2971
JSON_Parser_SetEndArrayHandler(JSON_Parser parser,JSON_Parser_EndArrayHandler handler)2972 JSON_Status JSON_CALL JSON_Parser_SetEndArrayHandler(
2973 JSON_Parser parser, JSON_Parser_EndArrayHandler handler)
2974 {
2975 if (!parser)
2976 return JSON_Failure;
2977
2978 parser->endArrayHandler = handler;
2979 return JSON_Success;
2980 }
2981
JSON_Parser_GetArrayItemHandler(JSON_Parser parser)2982 JSON_Parser_ArrayItemHandler JSON_CALL JSON_Parser_GetArrayItemHandler(JSON_Parser parser)
2983 {
2984 return parser ? parser->arrayItemHandler : NULL;
2985 }
2986
JSON_Parser_SetArrayItemHandler(JSON_Parser parser,JSON_Parser_ArrayItemHandler handler)2987 JSON_Status JSON_CALL JSON_Parser_SetArrayItemHandler(
2988 JSON_Parser parser, JSON_Parser_ArrayItemHandler handler)
2989 {
2990 if (!parser)
2991 return JSON_Failure;
2992
2993 parser->arrayItemHandler = handler;
2994 return JSON_Success;
2995 }
2996
JSON_Parser_Parse(JSON_Parser parser,const char * pBytes,size_t length,JSON_Boolean isFinal)2997 JSON_Status JSON_CALL JSON_Parser_Parse(JSON_Parser parser, const char* pBytes, size_t length, JSON_Boolean isFinal)
2998 {
2999 JSON_Status status = JSON_Failure;
3000 if (parser && (pBytes || !length) && !GET_FLAGS(parser->state, PARSER_FINISHED | PARSER_IN_PROTECTED_API))
3001 {
3002 int finishedParsing = 0;
3003 SET_FLAGS_ON(ParserState, parser->state, PARSER_STARTED | PARSER_IN_PROTECTED_API);
3004 if (JSON_Parser_ProcessInputBytes(parser, (const byte*)pBytes, length))
3005 {
3006 /* New input was parsed successfully. */
3007 if (isFinal)
3008 {
3009 /* Make sure there is nothing pending in the decoder, lexer,
3010 or parser. */
3011 if (JSON_Parser_FlushDecoder(parser) &&
3012 JSON_Parser_FlushLexer(parser) &&
3013 JSON_Parser_FlushParser(parser))
3014 status = JSON_Success;
3015
3016 finishedParsing = 1;
3017 }
3018 else
3019 status = JSON_Success;
3020 }
3021 else
3022 {
3023 /* New input failed to parse. */
3024 finishedParsing = 1;
3025 }
3026 if (finishedParsing)
3027 {
3028 SET_FLAGS_ON(ParserState, parser->state, PARSER_FINISHED);
3029 }
3030 SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_PROTECTED_API);
3031 }
3032 return status;
3033 }
3034
3035 #endif /* JSON_NO_PARSER */
3036
3037 /******************** JSON Writer ********************/
3038
3039 #ifndef JSON_NO_WRITER
3040
3041 /* Combinable writer state flags. */
3042 #define WRITER_RESET 0x0
3043 #define WRITER_STARTED 0x1
3044 #define WRITER_IN_PROTECTED_API 0x2
3045 typedef byte WriterState;
3046
3047 /* Combinable writer settings flags. */
3048 #define WRITER_DEFAULT_FLAGS 0x0
3049 #define WRITER_USE_CRLF 0x1
3050 #define WRITER_REPLACE_INVALID 0x2
3051 #define WRITER_ESCAPE_NON_ASCII 0x4
3052 typedef byte WriterFlags;
3053
3054 /* A writer instance. */
3055 struct JSON_Writer_Data
3056 {
3057 JSON_MemorySuite memorySuite;
3058 void* userData;
3059 WriterState state;
3060 WriterFlags flags;
3061 Encoding outputEncoding;
3062 Error error;
3063 GrammarianData grammarianData;
3064 JSON_Writer_OutputHandler outputHandler;
3065 };
3066
3067 /* Writer internal functions. */
3068
JSON_Writer_ResetData(JSON_Writer writer,int isInitialized)3069 static void JSON_Writer_ResetData(JSON_Writer writer, int isInitialized)
3070 {
3071 writer->userData = NULL;
3072 writer->flags = WRITER_DEFAULT_FLAGS;
3073 writer->outputEncoding = JSON_UTF8;
3074 writer->error = JSON_Error_None;
3075 Grammarian_Reset(&writer->grammarianData, isInitialized);
3076 writer->outputHandler = NULL;
3077 writer->state = WRITER_RESET; /* do this last! */
3078 }
3079
JSON_Writer_SetError(JSON_Writer writer,Error error)3080 static void JSON_Writer_SetError(JSON_Writer writer, Error error)
3081 {
3082 writer->error = error;
3083 }
3084
JSON_Writer_ProcessToken(JSON_Writer writer,Symbol token)3085 static JSON_Status JSON_Writer_ProcessToken(JSON_Writer writer, Symbol token)
3086 {
3087 GrammarianOutput output = Grammarian_ProcessToken(&writer->grammarianData, token, &writer->memorySuite);
3088 switch (GRAMMARIAN_RESULT_CODE(output))
3089 {
3090 case REJECTED_TOKEN:
3091 JSON_Writer_SetError(writer, JSON_Error_UnexpectedToken);
3092 return JSON_Failure;
3093
3094 case SYMBOL_STACK_FULL:
3095 JSON_Writer_SetError(writer, JSON_Error_OutOfMemory);
3096 return JSON_Failure;
3097 }
3098 return JSON_Success;
3099 }
3100
JSON_Writer_OutputBytes(JSON_Writer writer,const byte * pBytes,size_t length)3101 static JSON_Status JSON_Writer_OutputBytes(JSON_Writer writer, const byte* pBytes, size_t length)
3102 {
3103 if (writer->outputHandler && length)
3104 {
3105 if (writer->outputHandler(writer, (const char*)pBytes, length) != JSON_Writer_Continue)
3106 {
3107 JSON_Writer_SetError(writer, JSON_Error_AbortedByHandler);
3108 return JSON_Failure;
3109 }
3110 }
3111 return JSON_Success;
3112 }
3113
JSON_Writer_GetCodepointEscapeCharacter(JSON_Writer writer,Codepoint c)3114 static Codepoint JSON_Writer_GetCodepointEscapeCharacter(JSON_Writer writer, Codepoint c)
3115 {
3116 switch (c)
3117 {
3118 case BACKSPACE_CODEPOINT:
3119 return 'b';
3120
3121 case TAB_CODEPOINT:
3122 return 't';
3123
3124 case LINE_FEED_CODEPOINT:
3125 return 'n';
3126
3127 case FORM_FEED_CODEPOINT:
3128 return 'f';
3129
3130 case CARRIAGE_RETURN_CODEPOINT:
3131 return 'r';
3132
3133 case '"':
3134 return '"';
3135 /* Don't escape forward slashes */
3136 /*case '/':
3137 return '/';*/
3138
3139 case '\\':
3140 return '\\';
3141
3142 case DELETE_CODEPOINT:
3143 case LINE_SEPARATOR_CODEPOINT:
3144 case PARAGRAPH_SEPARATOR_CODEPOINT:
3145 return 'u';
3146
3147 default:
3148 if (c < FIRST_NON_CONTROL_CODEPOINT || IS_NONCHARACTER(c) ||
3149 (GET_FLAGS(writer->flags, WRITER_ESCAPE_NON_ASCII) && c > FIRST_NON_ASCII_CODEPOINT))
3150 return 'u';
3151 break;
3152 }
3153 return 0;
3154 }
3155
3156 typedef struct tag_WriteBufferData
3157 {
3158 size_t used;
3159 byte bytes[256];
3160 } WriteBufferData;
3161 typedef WriteBufferData* WriteBuffer;
3162
WriteBuffer_Reset(WriteBuffer buffer)3163 static void WriteBuffer_Reset(WriteBuffer buffer)
3164 {
3165 buffer->used = 0;
3166 }
3167
WriteBuffer_Flush(WriteBuffer buffer,JSON_Writer writer)3168 static JSON_Status WriteBuffer_Flush(WriteBuffer buffer, JSON_Writer writer)
3169 {
3170 JSON_Status status = JSON_Writer_OutputBytes(writer, buffer->bytes, buffer->used);
3171 buffer->used = 0;
3172 return status;
3173 }
3174
WriteBuffer_WriteBytes(WriteBuffer buffer,JSON_Writer writer,const byte * pBytes,size_t length)3175 static JSON_Status WriteBuffer_WriteBytes(WriteBuffer buffer, JSON_Writer writer, const byte* pBytes, size_t length)
3176 {
3177 if (buffer->used + length > sizeof(buffer->bytes) &&
3178 !WriteBuffer_Flush(buffer, writer))
3179 return JSON_Failure;
3180
3181 memcpy(&buffer->bytes[buffer->used], pBytes, length);
3182 buffer->used += length;
3183 return JSON_Success;
3184 }
3185
WriteBuffer_WriteCodepoint(WriteBuffer buffer,JSON_Writer writer,Codepoint c)3186 static JSON_Status WriteBuffer_WriteCodepoint(WriteBuffer buffer, JSON_Writer writer, Codepoint c)
3187 {
3188 if (buffer->used + LONGEST_ENCODING_SEQUENCE > sizeof(buffer->bytes) &&
3189 !WriteBuffer_Flush(buffer, writer))
3190 return JSON_Failure;
3191
3192 buffer->used += EncodeCodepoint(c, writer->outputEncoding, &buffer->bytes[buffer->used]);
3193 return JSON_Success;
3194 }
3195
WriteBuffer_WriteHexEscapeSequence(WriteBuffer buffer,JSON_Writer writer,Codepoint c)3196 static JSON_Status WriteBuffer_WriteHexEscapeSequence(WriteBuffer buffer, JSON_Writer writer, Codepoint c)
3197 {
3198 if (c >= FIRST_NON_BMP_CODEPOINT)
3199 {
3200 /* Non-BMP codepoints must be hex-escaped by escaping the UTF-16
3201 surrogate pair for the codepoint. We put the leading surrogate
3202 in the low 16 bits of c so that it gets written first, then
3203 the second pass through the loop will write out the trailing
3204 surrogate. x*/
3205 c = SURROGATES_FROM_CODEPOINT(c);
3206 c = (c << 16) | (c >> 16);
3207 }
3208 do
3209 {
3210 static const byte hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
3211 byte escapeSequence[6];
3212 int i;
3213 escapeSequence[0] = '\\';
3214 escapeSequence[1] = 'u';
3215 escapeSequence[2] = hexDigits[(c >> 12) & 0xF];
3216 escapeSequence[3] = hexDigits[(c >> 8) & 0xF];
3217 escapeSequence[4] = hexDigits[(c >> 4) & 0xF];
3218 escapeSequence[5] = hexDigits[c & 0xF];
3219 for (i = 0; i < sizeof(escapeSequence); i++)
3220 {
3221 if (!WriteBuffer_WriteCodepoint(buffer, writer, escapeSequence[i]))
3222 return JSON_Failure;
3223 }
3224 c >>= 16;
3225 } while (c);
3226 return JSON_Success;
3227 }
3228
JSON_Writer_OutputString(JSON_Writer writer,const byte * pBytes,size_t length,Encoding encoding)3229 static JSON_Status JSON_Writer_OutputString(JSON_Writer writer, const byte* pBytes, size_t length, Encoding encoding)
3230 {
3231 static const byte quoteUTF[] = { 0, 0, 0, '"', 0, 0, 0 };
3232 static const byte* const quoteEncodings[5] = { quoteUTF + 3, quoteUTF + 3, quoteUTF + 2, quoteUTF + 3, quoteUTF };
3233
3234 const byte* pQuoteEncoded = quoteEncodings[writer->outputEncoding - 1];
3235 size_t minSequenceLength = (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3236 DecoderData decoderData;
3237 WriteBufferData bufferData;
3238 size_t i = 0;
3239
3240 WriteBuffer_Reset(&bufferData);
3241
3242 /* Start quote. */
3243 if (!WriteBuffer_WriteBytes(&bufferData, writer, pQuoteEncoded, minSequenceLength))
3244 return JSON_Failure;
3245
3246 /* String contents. */
3247 Decoder_Reset(&decoderData);
3248 while (i < length)
3249 {
3250 DecoderOutput output = Decoder_ProcessByte(&decoderData, encoding, pBytes[i]);
3251 DecoderResultCode result = DECODER_RESULT_CODE(output);
3252 Codepoint c;
3253 Codepoint escapeCharacter;
3254 switch (result)
3255 {
3256 case SEQUENCE_PENDING:
3257 i++;
3258 break;
3259
3260 case SEQUENCE_COMPLETE:
3261 c = DECODER_CODEPOINT(output);
3262 escapeCharacter = JSON_Writer_GetCodepointEscapeCharacter(writer, c);
3263 switch (escapeCharacter)
3264 {
3265 case 0:
3266 /* Output the codepoint as a normal encoding sequence. */
3267 if (!WriteBuffer_WriteCodepoint(&bufferData, writer, c))
3268 return JSON_Failure;
3269 break;
3270
3271 case 'u':
3272 /* Output the codepoint as 1 or 2 hex escape sequences. */
3273 if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, c))
3274 return JSON_Failure;
3275 break;
3276
3277 default:
3278 /* Output the codepoint as a simple escape sequence. */
3279 if (!WriteBuffer_WriteCodepoint(&bufferData, writer, '\\') ||
3280 !WriteBuffer_WriteCodepoint(&bufferData, writer, escapeCharacter))
3281 return JSON_Failure;
3282 break;
3283 }
3284 i++;
3285 break;
3286
3287 case SEQUENCE_INVALID_INCLUSIVE:
3288 i++;
3289 /* fallthrough */
3290 case SEQUENCE_INVALID_EXCLUSIVE:
3291 if (GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID))
3292 {
3293 if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, REPLACEMENT_CHARACTER_CODEPOINT))
3294 return JSON_Failure;
3295 }
3296 else
3297 {
3298 /* Output whatever valid bytes we've accumulated before failing. */
3299 if (WriteBuffer_Flush(&bufferData, writer))
3300 JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3301 return JSON_Failure;
3302 }
3303 break;
3304 }
3305 }
3306 if (Decoder_SequencePending(&decoderData))
3307 {
3308 if (GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID))
3309 {
3310 if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, REPLACEMENT_CHARACTER_CODEPOINT))
3311 return JSON_Failure;
3312 }
3313 else
3314 {
3315 /* Output whatever valid bytes we've accumulated before failing. */
3316 if (WriteBuffer_Flush(&bufferData, writer))
3317 JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3318 return JSON_Failure;
3319 }
3320 }
3321
3322 /* End quote. */
3323 if (!WriteBuffer_WriteBytes(&bufferData, writer, pQuoteEncoded, minSequenceLength) ||
3324 !WriteBuffer_Flush(&bufferData, writer))
3325 return JSON_Failure;
3326 return JSON_Success;
3327 }
3328
LexNumberCharacter(LexerState state,Codepoint c)3329 static LexerState LexNumberCharacter(LexerState state, Codepoint c)
3330 {
3331 switch (state)
3332 {
3333 case LEXING_WHITESPACE:
3334 if (c == '-')
3335 state = LEXING_NUMBER_AFTER_MINUS;
3336 else if (c == '0')
3337 state = LEXING_NUMBER_AFTER_LEADING_ZERO;
3338 else if (c >= '1' && c <= '9')
3339 state = LEXING_NUMBER_DECIMAL_DIGITS;
3340 else
3341 state = LEXER_ERROR;
3342 break;
3343
3344 case LEXING_NUMBER_AFTER_MINUS:
3345 if (c == '0')
3346 state = LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO;
3347 else if (c >= '1' && c <= '9')
3348 state = LEXING_NUMBER_DECIMAL_DIGITS;
3349 else
3350 state = LEXER_ERROR;
3351 break;
3352
3353 case LEXING_NUMBER_AFTER_LEADING_ZERO:
3354 case LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO:
3355 if (c == '.')
3356 state = LEXING_NUMBER_AFTER_DOT;
3357 else if (c == 'e' || c == 'E')
3358 state = LEXING_NUMBER_AFTER_E;
3359 else if ((c == 'x' || c == 'X') && state == LEXING_NUMBER_AFTER_LEADING_ZERO)
3360 state = LEXING_NUMBER_AFTER_X;
3361 else if (c == EOF_CODEPOINT)
3362 state = LEXING_WHITESPACE;
3363 else
3364 state = LEXER_ERROR;
3365 break;
3366
3367 case LEXING_NUMBER_AFTER_X:
3368 if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
3369 state = LEXING_NUMBER_HEX_DIGITS;
3370 else
3371 state = LEXER_ERROR;
3372 break;
3373
3374 case LEXING_NUMBER_HEX_DIGITS:
3375 if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
3376 {
3377 /* Still LEXING_NUMBER_HEX_DIGITS. */
3378 }
3379 else if (c == EOF_CODEPOINT)
3380 state = LEXING_WHITESPACE;
3381 else
3382 state = LEXER_ERROR;
3383 break;
3384
3385 case LEXING_NUMBER_DECIMAL_DIGITS:
3386 if (c >= '0' && c <= '9')
3387 {
3388 /* Still LEXING_NUMBER_DECIMAL_DIGITS. */
3389 }
3390 else if (c == '.')
3391 state = LEXING_NUMBER_AFTER_DOT;
3392 else if (c == 'e' || c == 'E')
3393 state = LEXING_NUMBER_AFTER_E;
3394 else if (c == EOF_CODEPOINT)
3395 state = LEXING_WHITESPACE;
3396 else
3397 state = LEXER_ERROR;
3398 break;
3399
3400 case LEXING_NUMBER_AFTER_DOT:
3401 if (c >= '0' && c <= '9')
3402 state = LEXING_NUMBER_FRACTIONAL_DIGITS;
3403 else
3404 state = LEXER_ERROR;
3405 break;
3406
3407 case LEXING_NUMBER_FRACTIONAL_DIGITS:
3408 if (c >= '0' && c <= '9')
3409 {
3410 /* Still LEXING_NUMBER_FRACTIONAL_DIGITS. */
3411 }
3412 else if (c == 'e' || c == 'E')
3413 state = LEXING_NUMBER_AFTER_E;
3414 else if (c == EOF_CODEPOINT)
3415 state = LEXING_WHITESPACE;
3416 else
3417 state = LEXER_ERROR;
3418 break;
3419
3420 case LEXING_NUMBER_AFTER_E:
3421 if (c == '+' || c == '-')
3422 state = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
3423 else if (c >= '0' && c <= '9')
3424 state = LEXING_NUMBER_EXPONENT_DIGITS;
3425 else
3426 state = LEXER_ERROR;
3427 break;
3428
3429 case LEXING_NUMBER_AFTER_EXPONENT_SIGN:
3430 if (c >= '0' && c <= '9')
3431 state = LEXING_NUMBER_EXPONENT_DIGITS;
3432 else
3433 state = LEXER_ERROR;
3434 break;
3435
3436 case LEXING_NUMBER_EXPONENT_DIGITS:
3437 if (c >= '0' && c <= '9')
3438 {
3439 /* Still LEXING_NUMBER_EXPONENT_DIGITS. */
3440 }
3441 else if (c == EOF_CODEPOINT)
3442 state = LEXING_WHITESPACE;
3443 else
3444 state = LEXER_ERROR;
3445 break;
3446 }
3447 return state;
3448 }
3449
JSON_Writer_OutputNumber(JSON_Writer writer,const byte * pBytes,size_t length,Encoding encoding)3450 static JSON_Status JSON_Writer_OutputNumber(JSON_Writer writer, const byte* pBytes, size_t length, Encoding encoding)
3451 {
3452 DecoderData decoderData;
3453 WriteBufferData bufferData;
3454 LexerState lexerState = LEXING_WHITESPACE;
3455 size_t i;
3456 Decoder_Reset(&decoderData);
3457 WriteBuffer_Reset(&bufferData);
3458 for (i = 0; i < length; i++)
3459 {
3460 DecoderOutput output = Decoder_ProcessByte(&decoderData, encoding, pBytes[i]);
3461 DecoderResultCode result = DECODER_RESULT_CODE(output);
3462 Codepoint c;
3463 switch (result)
3464 {
3465 case SEQUENCE_PENDING:
3466 break;
3467
3468 case SEQUENCE_COMPLETE:
3469 c = DECODER_CODEPOINT(output);
3470 lexerState = LexNumberCharacter(lexerState, c);
3471 if (lexerState == LEXER_ERROR)
3472 {
3473 /* Output whatever valid bytes we've accumulated before failing. */
3474 if (WriteBuffer_Flush(&bufferData, writer))
3475 JSON_Writer_SetError(writer, JSON_Error_InvalidNumber);
3476 return JSON_Failure;
3477 }
3478 if (!WriteBuffer_WriteCodepoint(&bufferData, writer, c))
3479 return JSON_Failure;
3480 break;
3481
3482 case SEQUENCE_INVALID_INCLUSIVE:
3483 case SEQUENCE_INVALID_EXCLUSIVE:
3484 /* Output whatever valid bytes we've accumulated before failing. */
3485 if (WriteBuffer_Flush(&bufferData, writer))
3486 JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3487 return JSON_Failure;
3488 }
3489 }
3490 if (!WriteBuffer_Flush(&bufferData, writer))
3491 return JSON_Failure;
3492 if (Decoder_SequencePending(&decoderData))
3493 {
3494 JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3495 return JSON_Failure;
3496 }
3497 if (LexNumberCharacter(lexerState, EOF_CODEPOINT) == LEXER_ERROR)
3498 {
3499 JSON_Writer_SetError(writer, JSON_Error_InvalidNumber);
3500 return JSON_Failure;
3501 }
3502 return JSON_Success;
3503 }
3504
3505 #define SPACES_PER_CHUNK 8
JSON_Writer_OutputSpaces(JSON_Writer writer,size_t numberOfSpaces)3506 static JSON_Status JSON_Writer_OutputSpaces(JSON_Writer writer, size_t numberOfSpaces)
3507 {
3508 static const byte spacesUTF8[SPACES_PER_CHUNK] = { ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' };
3509 static const byte spacesUTF16[SPACES_PER_CHUNK * 2 + 1] = { 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0 };
3510 static const byte spacesUTF32[SPACES_PER_CHUNK * 4 + 3] = { 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0 };
3511 static const byte* const spacesEncodings[5] = { spacesUTF8, spacesUTF16 + 1, spacesUTF16, spacesUTF32 + 3, spacesUTF32 };
3512
3513 size_t encodedLength = (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3514 const byte* encoded = spacesEncodings[writer->outputEncoding - 1];
3515 while (numberOfSpaces > SPACES_PER_CHUNK)
3516 {
3517 if (!JSON_Writer_OutputBytes(writer, encoded, SPACES_PER_CHUNK * encodedLength))
3518 return JSON_Failure;
3519 numberOfSpaces -= SPACES_PER_CHUNK;
3520 }
3521
3522 if (!JSON_Writer_OutputBytes(writer, encoded, numberOfSpaces * encodedLength))
3523 return JSON_Failure;
3524 return JSON_Success;
3525 }
3526
JSON_Writer_WriteSimpleToken(JSON_Writer writer,Symbol token,const byte * const * encodings,size_t length)3527 static JSON_Status JSON_Writer_WriteSimpleToken(JSON_Writer writer, Symbol token, const byte* const* encodings, size_t length)
3528 {
3529 JSON_Status status = JSON_Failure;
3530 if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3531 {
3532 size_t encodedLength = length * (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3533 SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3534 if (JSON_Writer_ProcessToken(writer, token) &&
3535 JSON_Writer_OutputBytes(writer, encodings[writer->outputEncoding - 1], encodedLength))
3536 status = JSON_Success;
3537 SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3538 }
3539 return status;
3540 }
3541
3542 /* Writer API functions. */
3543
JSON_Writer_Create(const JSON_MemorySuite * pMemorySuite)3544 JSON_Writer JSON_CALL JSON_Writer_Create(const JSON_MemorySuite* pMemorySuite)
3545 {
3546 JSON_Writer writer;
3547 JSON_MemorySuite memorySuite;
3548 if (pMemorySuite)
3549 {
3550 memorySuite = *pMemorySuite;
3551 /* The full memory suite must be specified. */
3552 if (!memorySuite.realloc || !memorySuite.free)
3553 return NULL;
3554 }
3555 else
3556 memorySuite = defaultMemorySuite;
3557
3558 writer = (JSON_Writer)memorySuite.realloc(memorySuite.userData, NULL, sizeof(struct JSON_Writer_Data));
3559
3560 if (!writer)
3561 return NULL;
3562
3563 writer->memorySuite = memorySuite;
3564 JSON_Writer_ResetData(writer, 0/* isInitialized */);
3565 return writer;
3566 }
3567
JSON_Writer_Free(JSON_Writer writer)3568 JSON_Status JSON_CALL JSON_Writer_Free(JSON_Writer writer)
3569 {
3570 if (!writer || GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API))
3571 return JSON_Failure;
3572
3573 SET_FLAGS_ON(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3574 Grammarian_FreeAllocations(&writer->grammarianData, &writer->memorySuite);
3575 writer->memorySuite.free(writer->memorySuite.userData, writer);
3576 return JSON_Success;
3577 }
3578
JSON_Writer_Reset(JSON_Writer writer)3579 JSON_Status JSON_CALL JSON_Writer_Reset(JSON_Writer writer)
3580 {
3581 if (!writer || GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API))
3582 return JSON_Failure;
3583
3584 SET_FLAGS_ON(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3585 JSON_Writer_ResetData(writer, 1/* isInitialized */);
3586 /* Note that JSON_Writer_ResetData() unset WRITER_IN_PROTECTED_API for us. */
3587 return JSON_Success;
3588 }
3589
JSON_Writer_GetUserData(JSON_Writer writer)3590 void* JSON_CALL JSON_Writer_GetUserData(JSON_Writer writer)
3591 {
3592 return writer ? writer->userData : NULL;
3593 }
3594
JSON_Writer_SetUserData(JSON_Writer writer,void * userData)3595 JSON_Status JSON_CALL JSON_Writer_SetUserData(JSON_Writer writer, void* userData)
3596 {
3597 if (!writer)
3598 return JSON_Failure;
3599
3600 writer->userData = userData;
3601 return JSON_Success;
3602 }
3603
JSON_Writer_GetOutputEncoding(JSON_Writer writer)3604 JSON_Encoding JSON_CALL JSON_Writer_GetOutputEncoding(JSON_Writer writer)
3605 {
3606 return writer ? (JSON_Encoding)writer->outputEncoding : JSON_UTF8;
3607 }
3608
JSON_Writer_SetOutputEncoding(JSON_Writer writer,JSON_Encoding encoding)3609 JSON_Status JSON_CALL JSON_Writer_SetOutputEncoding(JSON_Writer writer, JSON_Encoding encoding)
3610 {
3611 if (!writer || GET_FLAGS(writer->state, WRITER_STARTED) || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE)
3612 return JSON_Failure;
3613
3614 writer->outputEncoding = (Encoding)encoding;
3615 return JSON_Success;
3616 }
3617
JSON_Writer_GetUseCRLF(JSON_Writer writer)3618 JSON_Boolean JSON_CALL JSON_Writer_GetUseCRLF(JSON_Writer writer)
3619 {
3620 return (writer && GET_FLAGS(writer->flags, WRITER_USE_CRLF)) ? JSON_True : JSON_False;
3621 }
3622
JSON_Writer_SetUseCRLF(JSON_Writer writer,JSON_Boolean useCRLF)3623 JSON_Status JSON_CALL JSON_Writer_SetUseCRLF(JSON_Writer writer, JSON_Boolean useCRLF)
3624 {
3625 if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
3626 return JSON_Failure;
3627
3628 SET_FLAGS(WriterFlags, writer->flags, WRITER_USE_CRLF, useCRLF);
3629 return JSON_Success;
3630 }
3631
JSON_Writer_GetReplaceInvalidEncodingSequences(JSON_Writer writer)3632 JSON_Boolean JSON_CALL JSON_Writer_GetReplaceInvalidEncodingSequences(JSON_Writer writer)
3633 {
3634 return (writer && GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID)) ? JSON_True : JSON_False;
3635 }
3636
JSON_Writer_SetReplaceInvalidEncodingSequences(JSON_Writer writer,JSON_Boolean replaceInvalidEncodingSequences)3637 JSON_Status JSON_CALL JSON_Writer_SetReplaceInvalidEncodingSequences(JSON_Writer writer, JSON_Boolean replaceInvalidEncodingSequences)
3638 {
3639 if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
3640 return JSON_Failure;
3641
3642 SET_FLAGS(WriterFlags, writer->flags, WRITER_REPLACE_INVALID, replaceInvalidEncodingSequences);
3643 return JSON_Success;
3644 }
3645
JSON_Writer_GetEscapeAllNonASCIICharacters(JSON_Writer writer)3646 JSON_Boolean JSON_CALL JSON_Writer_GetEscapeAllNonASCIICharacters(JSON_Writer writer)
3647 {
3648 return (writer && GET_FLAGS(writer->flags, WRITER_ESCAPE_NON_ASCII)) ? JSON_True : JSON_False;
3649 }
3650
JSON_Writer_SetEscapeAllNonASCIICharacters(JSON_Writer writer,JSON_Boolean escapeAllNonASCIICharacters)3651 JSON_Status JSON_CALL JSON_Writer_SetEscapeAllNonASCIICharacters(JSON_Writer writer, JSON_Boolean escapeAllNonASCIICharacters)
3652 {
3653 if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
3654 return JSON_Failure;
3655
3656 SET_FLAGS(WriterFlags, writer->flags, WRITER_ESCAPE_NON_ASCII, escapeAllNonASCIICharacters);
3657 return JSON_Success;
3658 }
3659
JSON_Writer_GetError(JSON_Writer writer)3660 JSON_Error JSON_CALL JSON_Writer_GetError(JSON_Writer writer)
3661 {
3662 return writer ? (JSON_Error)writer->error : JSON_Error_None;
3663 }
3664
JSON_Writer_GetOutputHandler(JSON_Writer writer)3665 JSON_Writer_OutputHandler JSON_CALL JSON_Writer_GetOutputHandler(JSON_Writer writer)
3666 {
3667 return writer ? writer->outputHandler : NULL;
3668 }
3669
JSON_Writer_SetOutputHandler(JSON_Writer writer,JSON_Writer_OutputHandler handler)3670 JSON_Status JSON_CALL JSON_Writer_SetOutputHandler(JSON_Writer writer, JSON_Writer_OutputHandler handler)
3671 {
3672 if (!writer)
3673 return JSON_Failure;
3674
3675 writer->outputHandler = handler;
3676 return JSON_Success;
3677 }
3678
JSON_Writer_WriteNull(JSON_Writer writer)3679 JSON_Status JSON_CALL JSON_Writer_WriteNull(JSON_Writer writer)
3680 {
3681 static const byte nullUTF8[] = { 'n', 'u', 'l', 'l' };
3682 static const byte nullUTF16[] = { 0, 'n', 0, 'u', 0, 'l', 0, 'l', 0 };
3683 static const byte nullUTF32[] = { 0, 0, 0, 'n', 0, 0, 0, 'u', 0, 0, 0, 'l', 0, 0, 0, 'l', 0, 0, 0 };
3684 static const byte* const nullEncodings[5] = { nullUTF8, nullUTF16 + 1, nullUTF16, nullUTF32 + 3, nullUTF32 };
3685
3686 return JSON_Writer_WriteSimpleToken(writer, T_NULL, nullEncodings, sizeof(nullUTF8));
3687 }
3688
JSON_Writer_WriteBoolean(JSON_Writer writer,JSON_Boolean value)3689 JSON_Status JSON_CALL JSON_Writer_WriteBoolean(JSON_Writer writer, JSON_Boolean value)
3690 {
3691 static const byte trueUTF8[] = { 't', 'r', 'u', 'e' };
3692 static const byte trueUTF16[] = { 0, 't', 0, 'r', 0, 'u', 0, 'e', 0 };
3693 static const byte trueUTF32[] = { 0, 0, 0, 't', 0, 0, 0, 'r', 0, 0, 0, 'u', 0, 0, 0, 'e', 0, 0, 0 };
3694 static const byte* const trueEncodings[5] = { trueUTF8, trueUTF16 + 1, trueUTF16, trueUTF32 + 3, trueUTF32 };
3695
3696 static const byte falseUTF8[] = { 'f', 'a', 'l', 's', 'e' };
3697 static const byte falseUTF16[] = { 0, 'f', 0, 'a', 0, 'l', 0, 's', 0, 'e', 0 };
3698 static const byte falseUTF32[] = { 0, 0, 0, 'f', 0, 0, 0, 'a', 0, 0, 0, 'l', 0, 0, 0, 's', 0, 0, 0, 'e', 0, 0, 0 };
3699 static const byte* const falseEncodings[5] = { falseUTF8, falseUTF16 + 1, falseUTF16, falseUTF32 + 3, falseUTF32 };
3700
3701 Symbol token;
3702 const byte* const* encodings;
3703 size_t length;
3704 if (value)
3705 {
3706 token = T_TRUE;
3707 encodings = trueEncodings;
3708 length = sizeof(trueUTF8);
3709 }
3710 else
3711 {
3712 token = T_FALSE;
3713 encodings = falseEncodings;
3714 length = sizeof(falseUTF8);
3715 }
3716 return JSON_Writer_WriteSimpleToken(writer, token, encodings, length);
3717 }
3718
JSON_Writer_WriteString(JSON_Writer writer,const char * pValue,size_t length,JSON_Encoding encoding)3719 JSON_Status JSON_CALL JSON_Writer_WriteString(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding)
3720 {
3721 JSON_Status status = JSON_Failure;
3722 if (writer && (pValue || !length) && encoding > JSON_UnknownEncoding && encoding <= JSON_UTF32BE &&
3723 !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3724 {
3725 SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3726 if (JSON_Writer_ProcessToken(writer, T_STRING))
3727 status = JSON_Writer_OutputString(writer, (const byte*)pValue, length, (Encoding)encoding);
3728
3729 SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3730 }
3731 return status;
3732 }
3733
JSON_Writer_WriteNumber(JSON_Writer writer,const char * pValue,size_t length,JSON_Encoding encoding)3734 JSON_Status JSON_CALL JSON_Writer_WriteNumber(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding)
3735 {
3736 JSON_Status status = JSON_Failure;
3737 if (writer && pValue && length && encoding > JSON_UnknownEncoding && encoding <= JSON_UTF32BE &&
3738 !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3739 {
3740 SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3741 if (JSON_Writer_ProcessToken(writer, T_NUMBER))
3742 status = JSON_Writer_OutputNumber(writer, (const byte*)pValue, length, (Encoding)encoding);
3743
3744 SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3745 }
3746 return status;
3747 }
3748
JSON_Writer_WriteSpecialNumber(JSON_Writer writer,JSON_SpecialNumber value)3749 JSON_Status JSON_CALL JSON_Writer_WriteSpecialNumber(JSON_Writer writer, JSON_SpecialNumber value)
3750 {
3751 static const byte nanUTF8[] = { 'N', 'a', 'N' };
3752 static const byte nanUTF16[] = { 0, 'N', 0, 'a', 0, 'N', 0 };
3753 static const byte nanUTF32[] = { 0, 0, 0, 'N', 0, 0, 0, 'a', 0, 0, 0, 'N', 0, 0, 0 };
3754 static const byte* const nanEncodings[5] = { nanUTF8, nanUTF16 + 1, nanUTF16, nanUTF32 + 3, nanUTF32 };
3755
3756 static const byte ninfUTF8[] = { '-', 'I', 'n', 'f', 'i', 'n', 'i', 't', 'y' };
3757 static const byte ninfUTF16[] = { 0, '-', 0, 'I', 0, 'n', 0, 'f', 0, 'i', 0, 'n', 0, 'i', 0, 't', 0, 'y', 0 };
3758 static const byte ninfUTF32[] = { 0, 0, 0, '-', 0, 0, 0, 'I', 0, 0, 0, 'n', 0, 0, 0, 'f', 0, 0, 0, 'i', 0, 0, 0, 'n', 0, 0, 0, 'i', 0, 0, 0, 't', 0, 0, 0, 'y', 0, 0, 0 };
3759 static const byte* const infinityEncodings[5] = { ninfUTF8 + 1, ninfUTF16 + 3, ninfUTF16 + 2, ninfUTF32 + 7, ninfUTF32 + 4 };
3760 static const byte* const negativeInfinityEncodings[5] = { ninfUTF8, ninfUTF16 + 1, ninfUTF16, ninfUTF32 + 3, ninfUTF32 };
3761
3762 Symbol token;
3763 const byte* const* encodings;
3764 size_t length;
3765 if (value == JSON_Infinity)
3766 {
3767 token = T_INFINITY;
3768 encodings = infinityEncodings;
3769 length = sizeof(ninfUTF8) - 1/* - */;
3770 }
3771 else if (value == JSON_NegativeInfinity)
3772 {
3773 token = T_NEGATIVE_INFINITY;
3774 encodings = negativeInfinityEncodings;
3775 length = sizeof(ninfUTF8);
3776 }
3777 else
3778 {
3779 token = T_NAN;
3780 encodings = nanEncodings;
3781 length = sizeof(nanUTF8);
3782 }
3783 return JSON_Writer_WriteSimpleToken(writer, token, encodings, length);
3784 }
3785
JSON_Writer_WriteStartObject(JSON_Writer writer)3786 JSON_Status JSON_CALL JSON_Writer_WriteStartObject(JSON_Writer writer)
3787 {
3788 static const byte utf[] = { 0, 0, 0, '{', 0, 0, 0 };
3789 static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3790
3791 return JSON_Writer_WriteSimpleToken(writer, T_LEFT_CURLY, encodings, 1);
3792 }
3793
JSON_Writer_WriteEndObject(JSON_Writer writer)3794 JSON_Status JSON_CALL JSON_Writer_WriteEndObject(JSON_Writer writer)
3795 {
3796 static const byte utf[] = { 0, 0, 0, '}', 0, 0, 0 };
3797 static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3798
3799 return JSON_Writer_WriteSimpleToken(writer, T_RIGHT_CURLY, encodings, 1);
3800 }
3801
JSON_Writer_WriteStartArray(JSON_Writer writer)3802 JSON_Status JSON_CALL JSON_Writer_WriteStartArray(JSON_Writer writer)
3803 {
3804 static const byte utf[] = { 0, 0, 0, '[', 0, 0, 0 };
3805 static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3806
3807 return JSON_Writer_WriteSimpleToken(writer, T_LEFT_SQUARE, encodings, 1);
3808 }
3809
JSON_Writer_WriteEndArray(JSON_Writer writer)3810 JSON_Status JSON_CALL JSON_Writer_WriteEndArray(JSON_Writer writer)
3811 {
3812 static const byte utf[] = { 0, 0, 0, ']', 0, 0, 0 };
3813 static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3814
3815 return JSON_Writer_WriteSimpleToken(writer, T_RIGHT_SQUARE, encodings, 1);
3816 }
3817
JSON_Writer_WriteColon(JSON_Writer writer)3818 JSON_Status JSON_CALL JSON_Writer_WriteColon(JSON_Writer writer)
3819 {
3820 static const byte utf[] = { 0, 0, 0, ':', 0, 0, 0 };
3821 static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3822
3823 return JSON_Writer_WriteSimpleToken(writer, T_COLON, encodings, 1);
3824 }
3825
JSON_Writer_WriteComma(JSON_Writer writer)3826 JSON_Status JSON_CALL JSON_Writer_WriteComma(JSON_Writer writer)
3827 {
3828 static const byte utf[] = { 0, 0, 0, ',', 0, 0, 0 };
3829 static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3830
3831 return JSON_Writer_WriteSimpleToken(writer, T_COMMA, encodings, 1);
3832 }
3833
JSON_Writer_WriteSpace(JSON_Writer writer,size_t numberOfSpaces)3834 JSON_Status JSON_CALL JSON_Writer_WriteSpace(JSON_Writer writer, size_t numberOfSpaces)
3835 {
3836 JSON_Status status = JSON_Failure;
3837 if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3838 {
3839 SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3840 status = JSON_Writer_OutputSpaces(writer, numberOfSpaces);
3841 SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3842 }
3843 return status;
3844 }
3845
JSON_Writer_WriteNewLine(JSON_Writer writer)3846 JSON_Status JSON_CALL JSON_Writer_WriteNewLine(JSON_Writer writer)
3847 {
3848 static const byte lfUTF[] = { 0, 0, 0, LINE_FEED_CODEPOINT, 0, 0, 0 };
3849 static const byte* const lfEncodings[5] = { lfUTF + 3, lfUTF + 3, lfUTF + 2, lfUTF + 3, lfUTF };
3850
3851 static const byte crlfUTF8[] = { CARRIAGE_RETURN_CODEPOINT, LINE_FEED_CODEPOINT };
3852 static const byte crlfUTF16[] = { 0, CARRIAGE_RETURN_CODEPOINT, 0, LINE_FEED_CODEPOINT, 0 };
3853 static const byte crlfUTF32[] = { 0, 0, 0, CARRIAGE_RETURN_CODEPOINT, 0, 0, 0, LINE_FEED_CODEPOINT, 0, 0, 0 };
3854 static const byte* const crlfEncodings[5] = { crlfUTF8, crlfUTF16 + 1, crlfUTF16, crlfUTF32 + 3, crlfUTF32 };
3855
3856 JSON_Status status = JSON_Failure;
3857 if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3858 {
3859 const byte* const* encodings;
3860 size_t length;
3861 size_t encodedLength;
3862 SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3863 if (GET_FLAGS(writer->flags, WRITER_USE_CRLF))
3864 {
3865 encodings = crlfEncodings;
3866 length = 2;
3867 }
3868 else
3869 {
3870 encodings = lfEncodings;
3871 length = 1;
3872 }
3873 encodedLength = length * (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3874 if (JSON_Writer_OutputBytes(writer, encodings[writer->outputEncoding - 1], encodedLength))
3875 status = JSON_Success;
3876 SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3877 }
3878 return status;
3879 }
3880
3881 #endif /* JSON_NO_WRITER */
3882
3883 /******************** Miscellaneous API ********************/
3884
JSON_LibraryVersion(void)3885 const JSON_Version* JSON_CALL JSON_LibraryVersion(void)
3886 {
3887 static JSON_Version version = { JSON_MAJOR_VERSION, JSON_MINOR_VERSION, JSON_MICRO_VERSION };
3888 return &version;
3889 }
3890
JSON_ErrorString(JSON_Error error)3891 const char* JSON_CALL JSON_ErrorString(JSON_Error error)
3892 {
3893 /* This array must match the order and number of the JSON_Error enum. */
3894 static const char* errorStrings[] =
3895 {
3896 /* JSON_Error_None */ "no error",
3897 /* JSON_Error_OutOfMemory */ "could not allocate enough memory",
3898 /* JSON_Error_AbortedByHandler */ "the operation was aborted by a handler",
3899 /* JSON_Error_BOMNotAllowed */ "the input begins with a byte-order mark (BOM), which is not allowed by RFC 4627",
3900 /* JSON_Error_InvalidEncodingSequence */ "the input contains a byte or sequence of bytes that is not valid for the input encoding",
3901 /* JSON_Error_UnknownToken */ "the input contains an unknown token",
3902 /* JSON_Error_UnexpectedToken */ "the input contains an unexpected token",
3903 /* JSON_Error_IncompleteToken */ "the input ends in the middle of a token",
3904 /* JSON_Error_MoreTokensExpected */ "the input ends when more tokens are expected",
3905 /* JSON_Error_UnescapedControlCharacter */ "the input contains a string containing an unescaped control character (U+0000 - U+001F)",
3906 /* JSON_Error_InvalidEscapeSequence */ "the input contains a string containing an invalid escape sequence",
3907 /* JSON_Error_UnpairedSurrogateEscapeSequence */ "the input contains a string containing an unmatched UTF-16 surrogate codepoint",
3908 /* JSON_Error_TooLongString */ "the input contains a string that is too long",
3909 /* JSON_Error_InvalidNumber */ "the input contains an invalid number",
3910 /* JSON_Error_TooLongNumber */ "the input contains a number that is too long",
3911 /* JSON_Error_DuplicateObjectMember */ "the input contains an object with duplicate members",
3912 /* JSON_Error_StoppedAfterEmbeddedDocument */ "the end of the embedded document was reached"
3913 };
3914 return ((unsigned int)error < (sizeof(errorStrings) / sizeof(errorStrings[0])))
3915 ? errorStrings[error]
3916 : "";
3917 }
3918
3919 static const uint32_t endianEncodings = (((uint32_t)JSON_UTF32BE) << 24) | (((uint32_t)JSON_UTF16BE) << 16) | (((uint32_t)JSON_UTF16LE) << 8) | ((uint32_t)JSON_UTF32LE);
3920
JSON_NativeUTF16Encoding(void)3921 JSON_Encoding JSON_CALL JSON_NativeUTF16Encoding(void)
3922 {
3923 return (JSON_Encoding)(((byte*)&endianEncodings)[1]);
3924 }
3925
JSON_NativeUTF32Encoding(void)3926 JSON_Encoding JSON_CALL JSON_NativeUTF32Encoding(void)
3927 {
3928 return (JSON_Encoding)(((byte*)&endianEncodings)[0]);
3929 }
3930