1 /*
2   Copyright (c) 2012 John-Anthony Owens
3 
4   Permission is hereby granted, free of charge, to any person obtaining a
5   copy of this software and associated documentation files (the "Software"),
6   to deal in the Software without restriction, including without limitation
7   the rights to use, copy, modify, merge, publish, distribute, sublicense,
8   and/or sell copies of the Software, and to permit persons to whom the
9   Software is furnished to do so, subject to the following conditions:
10 
11   The above copyright notice and this permission notice shall be included
12   in all copies or substantial portions of the Software.
13 
14   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20   IN THE SOFTWARE.
21 */
22 
23 #include <stdlib.h>
24 #include <string.h>
25 
26 /* Ensure uint32_t type (compiler-dependent). */
27 #if defined(_MSC_VER)
28 typedef unsigned __int32 uint32_t;
29 #else
30 #include <stdint.h>
31 #endif
32 
33 /* Ensure SIZE_MAX defined. */
34 #ifndef SIZE_MAX
35 #define SIZE_MAX ((size_t)-1)
36 #endif
37 
38 /* Mark APIs for export (as opposed to import) when we build this file. */
39 #define JSON_BUILDING
40 #include <formats/jsonsax_full.h>
41 
42 /* Default allocation constants. */
43 #define DEFAULT_TOKEN_BYTES_LENGTH 64 /* MUST be a power of 2 */
44 #define DEFAULT_SYMBOL_STACK_SIZE  32 /* MUST be a power of 2 */
45 
46 /* Types for readability. */
47 typedef unsigned char byte;
48 typedef uint32_t Codepoint;
49 
50 /* Especially-relevant Unicode codepoints. */
51 #define U_(x) ((Codepoint)(x))
52 #define NULL_CODEPOINT                  U_(0x0000)
53 #define BACKSPACE_CODEPOINT             U_(0x0008)
54 #define TAB_CODEPOINT                   U_(0x0009)
55 #define LINE_FEED_CODEPOINT             U_(0x000A)
56 #define FORM_FEED_CODEPOINT             U_(0x000C)
57 #define CARRIAGE_RETURN_CODEPOINT       U_(0x000D)
58 #define FIRST_NON_CONTROL_CODEPOINT     U_(0x0020)
59 #define DELETE_CODEPOINT                U_(0x007F)
60 #define FIRST_NON_ASCII_CODEPOINT       U_(0x0080)
61 #define FIRST_2_BYTE_UTF8_CODEPOINT     U_(0x0080)
62 #define FIRST_3_BYTE_UTF8_CODEPOINT     U_(0x0800)
63 #define LINE_SEPARATOR_CODEPOINT        U_(0x2028)
64 #define PARAGRAPH_SEPARATOR_CODEPOINT   U_(0x2029)
65 #define BOM_CODEPOINT                   U_(0xFEFF)
66 #define REPLACEMENT_CHARACTER_CODEPOINT U_(0xFFFD)
67 #define FIRST_NON_BMP_CODEPOINT         U_(0x10000)
68 #define FIRST_4_BYTE_UTF8_CODEPOINT     U_(0x10000)
69 #define MAX_CODEPOINT                   U_(0x10FFFF)
70 #define EOF_CODEPOINT                   U_(0xFFFFFFFF)
71 
72 /* Bit-masking macros. */
73 #define BOTTOM_3_BITS(x) ((x) & 0x7)
74 #define BOTTOM_4_BITS(x) ((x) & 0xF)
75 #define BOTTOM_5_BITS(x) ((x) & 0x1F)
76 #define BOTTOM_6_BITS(x) ((x) & 0x3F)
77 
78 /* Bit-flag macros. */
79 #define GET_FLAGS(x, f)                  ((x) & (f))
80 #define SET_FLAGS_ON(flagstype, x, f)    do { (x) |= (flagstype)(f); } while (0)
81 #define SET_FLAGS_OFF(flagstype, x, f)   do { (x) &= (flagstype)~(f); } while (0)
82 #define SET_FLAGS(flagstype, x, f, cond) do { if (cond) (x) |= (flagstype)(f); else (x) &= (flagstype)~(f); } while (0)
83 
84 /* UTF-8 byte-related macros. */
85 #define IS_UTF8_SINGLE_BYTE(b)       (((b) & 0x80) == 0)
86 #define IS_UTF8_CONTINUATION_BYTE(b) (((b) & 0xC0) == 0x80)
87 #define IS_UTF8_FIRST_BYTE_OF_2(b)   (((b) & 0xE0) == 0xC0)
88 #define IS_UTF8_FIRST_BYTE_OF_3(b)   (((b) & 0xF0) == 0xE0)
89 #define IS_UTF8_FIRST_BYTE_OF_4(b)   (((b) & 0xF8) == 0xF0)
90 
91 /* Unicode codepoint-related macros. */
92 #define IS_NONCHARACTER(c)               ((((c) & 0xFE) == 0xFE) || (((c) >= 0xFDD0) && ((c) <= 0xFDEF)))
93 #define IS_SURROGATE(c)                  (((c) & 0xFFFFF800) == 0xD800)
94 #define IS_LEADING_SURROGATE(c)          (((c) & 0xFFFFFC00) == 0xD800)
95 #define IS_TRAILING_SURROGATE(c)         (((c) & 0xFFFFFC00) == 0xDC00)
96 #define CODEPOINT_FROM_SURROGATES(hi_lo) ((((hi_lo) >> 16) << 10) + ((hi_lo) & 0xFFFF) + 0xFCA02400)
97 #define SURROGATES_FROM_CODEPOINT(c)     ((((c) << 6) & 0x7FF0000) + ((c) & 0x3FF) + 0xD7C0DC00)
98 #define SHORTEST_ENCODING_SEQUENCE(enc)  (1U << ((enc) >> 1))
99 #define LONGEST_ENCODING_SEQUENCE        4
100 
101 /* Internal types that alias enum types in the public API.
102    By using byte to represent these values internally,
103    we can guarantee minimal storage size and avoid compiler
104    warnings when using values of the type in switch statements
105    that don't have (or need) a default case. */
106 typedef byte Encoding;
107 typedef byte Error;
108 typedef byte TokenAttributes;
109 
110 /******************** Default Memory Suite ********************/
111 
DefaultReallocHandler(void * userData,void * ptr,size_t size)112 static void* JSON_CALL DefaultReallocHandler(void* userData, void* ptr, size_t size)
113 {
114    (void)userData; /* unused */
115    return realloc(ptr, size);
116 }
117 
DefaultFreeHandler(void * userData,void * ptr)118 static void JSON_CALL DefaultFreeHandler(void* userData, void* ptr)
119 {
120    (void)userData; /* unused */
121    free(ptr);
122 }
123 
124 static const JSON_MemorySuite defaultMemorySuite = { NULL, &DefaultReallocHandler, &DefaultFreeHandler };
125 
DoubleBuffer(const JSON_MemorySuite * pMemorySuite,byte * pDefaultBuffer,byte * pBuffer,size_t length)126 static byte* DoubleBuffer(const JSON_MemorySuite* pMemorySuite, byte* pDefaultBuffer, byte* pBuffer, size_t length)
127 {
128    size_t newLength = length * 2;
129    if (newLength < length)
130    {
131       pBuffer = NULL;
132    }
133    else if (pBuffer == pDefaultBuffer)
134    {
135       pBuffer = (byte*)pMemorySuite->realloc(pMemorySuite->userData, NULL, newLength);
136       if (pBuffer)
137       {
138          memcpy(pBuffer, pDefaultBuffer, length);
139       }
140    }
141    else
142    {
143       pBuffer = (byte*)pMemorySuite->realloc(pMemorySuite->userData, pBuffer, newLength);
144    }
145    return pBuffer;
146 }
147 
148 /******************** Unicode Decoder ********************/
149 
150 /* Mutually-exclusive decoder states. */
151 /* The bits of DecoderState are layed out as follows:
152 
153    ---lllnn
154 
155    - = unused (3 bits)
156    l = expected total sequence length (3 bits)
157    d = number of bytes decoded so far (2 bits)
158    */
159 
160 #define DECODER_RESET  0x00
161 #define DECODED_1_OF_2 0x09 /* 00001001 */
162 #define DECODED_1_OF_3 0x0D /* 00001101 */
163 #define DECODED_2_OF_3 0x0E /* 00001110 */
164 #define DECODED_1_OF_4 0x11 /* 00010001 */
165 #define DECODED_2_OF_4 0x12 /* 00010010 */
166 #define DECODED_3_OF_4 0x13 /* 00010011 */
167 typedef byte DecoderState;
168 
169 #define DECODER_STATE_BYTES(s) (size_t)((s) & 0x3)
170 
171 /* Decoder data. */
172 typedef struct tag_DecoderData
173 {
174    DecoderState state;
175    uint32_t     bits;
176 } DecoderData;
177 typedef DecoderData* Decoder;
178 
179 /* The bits of DecoderOutput are layed out as follows:
180 
181    ------rrlllccccccccccccccccccccc
182 
183    - = unused (6 bits)
184    r = result code (2 bits)
185    l = sequence length (3 bits)
186    c = codepoint (21 bits)
187    */
188 #define SEQUENCE_PENDING           0
189 #define SEQUENCE_COMPLETE          1
190 #define SEQUENCE_INVALID_INCLUSIVE 2
191 #define SEQUENCE_INVALID_EXCLUSIVE 3
192 typedef uint32_t DecoderResultCode;
193 
194 #define DECODER_OUTPUT(r, l, c)    (DecoderOutput)(((r) << 24) | ((l) << 21) | (c))
195 #define DECODER_RESULT_CODE(o)     (DecoderResultCode)((DecoderOutput)(o) >> 24)
196 #define DECODER_SEQUENCE_LENGTH(o) (size_t)(((DecoderOutput)(o) >> 21) & 0x7)
197 #define DECODER_CODEPOINT(o)       (Codepoint)((DecoderOutput)(o) & 0x001FFFFF)
198 typedef uint32_t DecoderOutput;
199 
200 /* Decoder functions. */
201 
Decoder_Reset(Decoder decoder)202 static void Decoder_Reset(Decoder decoder)
203 {
204    decoder->state = DECODER_RESET;
205    decoder->bits = 0;
206 }
207 
Decoder_SequencePending(Decoder decoder)208 static int Decoder_SequencePending(Decoder decoder)
209 {
210    return decoder->state != DECODER_RESET;
211 }
212 
Decoder_ProcessByte(Decoder decoder,Encoding encoding,byte b)213 static DecoderOutput Decoder_ProcessByte(Decoder decoder, Encoding encoding, byte b)
214 {
215    DecoderOutput output = DECODER_OUTPUT(SEQUENCE_PENDING, 0, 0);
216    switch (encoding)
217    {
218       case JSON_UTF8:
219          /* When the input encoding is UTF-8, the decoded codepoint's bits are
220             recorded in the bottom 3 bytes of bits as they are decoded.
221             The top byte is not used. */
222          switch (decoder->state)
223          {
224             case DECODER_RESET:
225                if (IS_UTF8_SINGLE_BYTE(b))
226                {
227                   output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 1, b);
228                }
229                else if (IS_UTF8_FIRST_BYTE_OF_2(b))
230                {
231                   /* UTF-8 2-byte sequences that are overlong encodings can be
232                      detected from just the first byte (C0 or C1). */
233                   decoder->bits = (uint32_t)BOTTOM_5_BITS(b) << 6;
234                   if (decoder->bits < FIRST_2_BYTE_UTF8_CODEPOINT)
235                   {
236                      output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
237                   }
238                   else
239                   {
240                      decoder->state = DECODED_1_OF_2;
241                      goto noreset;
242                   }
243                }
244                else if (IS_UTF8_FIRST_BYTE_OF_3(b))
245                {
246                   decoder->bits = (uint32_t)BOTTOM_4_BITS(b) << 12;
247                   decoder->state = DECODED_1_OF_3;
248                   goto noreset;
249                }
250                else if (IS_UTF8_FIRST_BYTE_OF_4(b))
251                {
252                   /* Some UTF-8 4-byte sequences that encode out-of-range
253                      codepoints can be detected from the first byte (F5 - FF). */
254                   decoder->bits = (uint32_t)BOTTOM_3_BITS(b) << 18;
255                   if (decoder->bits > MAX_CODEPOINT)
256                   {
257                      output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
258                   }
259                   else
260                   {
261                      decoder->state = DECODED_1_OF_4;
262                      goto noreset;
263                   }
264                }
265                else
266                {
267                   /* The byte is of the form 11111xxx or 10xxxxxx, and is not
268                      a valid first byte for a UTF-8 sequence. */
269                   output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
270                }
271                break;
272 
273             case DECODED_1_OF_2:
274                if (IS_UTF8_CONTINUATION_BYTE(b))
275                {
276                   output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits | BOTTOM_6_BITS(b));
277                }
278                else
279                {
280                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
281 
282                }
283                break;
284 
285             case DECODED_1_OF_3:
286                if (IS_UTF8_CONTINUATION_BYTE(b))
287                {
288                   /* UTF-8 3-byte sequences that are overlong encodings or encode
289                      surrogate codepoints can be detected after 2 bytes. */
290                   decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 6;
291                   if ((decoder->bits < FIRST_3_BYTE_UTF8_CODEPOINT) || IS_SURROGATE(decoder->bits))
292                   {
293                      output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
294                   }
295                   else
296                   {
297                      decoder->state = DECODED_2_OF_3;
298                      goto noreset;
299                   }
300                }
301                else
302                {
303                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
304                }
305                break;
306 
307             case DECODED_2_OF_3:
308                if (IS_UTF8_CONTINUATION_BYTE(b))
309                {
310                   output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 3, decoder->bits | BOTTOM_6_BITS(b));
311                }
312                else
313                {
314                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
315                }
316                break;
317 
318             case DECODED_1_OF_4:
319                if (IS_UTF8_CONTINUATION_BYTE(b))
320                {
321                   /* UTF-8 4-byte sequences that are overlong encodings or encode
322                      out-of-range codepoints can be detected after 2 bytes. */
323                   decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 12;
324                   if ((decoder->bits < FIRST_4_BYTE_UTF8_CODEPOINT) || (decoder->bits > MAX_CODEPOINT))
325                   {
326                      output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
327                   }
328                   else
329                   {
330                      decoder->state = DECODED_2_OF_4;
331                      goto noreset;
332                   }
333                }
334                else
335                {
336                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
337                }
338                break;
339 
340             case DECODED_2_OF_4:
341                if (IS_UTF8_CONTINUATION_BYTE(b))
342                {
343                   decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 6;
344                   decoder->state = DECODED_3_OF_4;
345                   goto noreset;
346                }
347                else
348                {
349                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
350                }
351                break;
352 
353             case DECODED_3_OF_4:
354                if (IS_UTF8_CONTINUATION_BYTE(b))
355                {
356                   output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits | BOTTOM_6_BITS(b));
357                }
358                else
359                {
360                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 3, 0);
361                }
362                break;
363          }
364          break;
365 
366       case JSON_UTF16LE:
367          /* When the input encoding is UTF-16, the decoded codepoint's bits are
368             recorded in the bottom 2 bytes of bits as they are decoded.
369             If those 2 bytes form a leading surrogate, the decoder treats the
370             surrogate pair as a single 4-byte sequence, shifts the leading
371             surrogate into the high 2 bytes of bits, and decodes the
372             trailing surrogate's bits in the bottom 2 bytes of bits. */
373          switch (decoder->state)
374          {
375             case DECODER_RESET:
376                decoder->bits = b;
377                decoder->state = DECODED_1_OF_2;
378                goto noreset;
379 
380             case DECODED_1_OF_2:
381                decoder->bits |= (uint32_t)b << 8;
382                if (IS_TRAILING_SURROGATE(decoder->bits))
383                {
384                   /* A trailing surrogate cannot appear on its own. */
385                   output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 2, 0);
386                }
387                else if (IS_LEADING_SURROGATE(decoder->bits))
388                {
389                   /* A leading surrogate implies a 4-byte surrogate pair. */
390                   decoder->bits <<= 16;
391                   decoder->state = DECODED_2_OF_4;
392                   goto noreset;
393                }
394                else
395                {
396                   output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits);
397                }
398                break;
399 
400             case DECODED_2_OF_4:
401                decoder->bits |= b;
402                decoder->state = DECODED_3_OF_4;
403                goto noreset;
404 
405             case DECODED_3_OF_4:
406                decoder->bits |= (uint32_t)b << 8;
407                if (!IS_TRAILING_SURROGATE(decoder->bits & 0xFFFF))
408                {
409                   /* A leading surrogate must be followed by a trailing one.
410                      Treat the previous 3 bytes as an invalid 2-byte sequence
411                      followed by the first byte of a new sequence. */
412                   decoder->bits &= 0xFF;
413                   decoder->state = DECODED_1_OF_2;
414                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
415                   goto noreset;
416                }
417                else
418                {
419                   output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, CODEPOINT_FROM_SURROGATES(decoder->bits));
420                }
421                break;
422          }
423          break;
424 
425       case JSON_UTF16BE:
426          /* When the input encoding is UTF-16, the decoded codepoint's bits are
427             recorded in the bottom 2 bytes of bits as they are decoded.
428             If those 2 bytes form a leading surrogate, the decoder treats the
429             surrogate pair as a single 4-byte sequence, shifts the leading
430             surrogate into the high 2 bytes of bits, and decodes the
431             trailing surrogate's bits in the bottom 2 bytes of bits. */
432          switch (decoder->state)
433          {
434             case DECODER_RESET:
435                decoder->bits = (uint32_t)b << 8;
436                decoder->state = DECODED_1_OF_2;
437                goto noreset;
438 
439             case DECODED_1_OF_2:
440                decoder->bits |= b;
441                if (IS_TRAILING_SURROGATE(decoder->bits))
442                {
443                   /* A trailing surrogate cannot appear on its own. */
444                   output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 2, 0);
445                }
446                else if (IS_LEADING_SURROGATE(decoder->bits))
447                {
448                   /* A leading surrogate implies a 4-byte surrogate pair. */
449                   decoder->bits <<= 16;
450                   decoder->state = DECODED_2_OF_4;
451                   goto noreset;
452                }
453                else
454                {
455                   output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits);
456                }
457                break;
458 
459             case DECODED_2_OF_4:
460                decoder->bits |= (uint32_t)b << 8;
461                decoder->state = DECODED_3_OF_4;
462                goto noreset;
463 
464             case DECODED_3_OF_4:
465                decoder->bits |= b;
466                if (!IS_TRAILING_SURROGATE(decoder->bits & 0xFFFF))
467                {
468                   /* A leading surrogate must be followed by a trailing one.
469                      Treat the previous 3 bytes as an invalid 2-byte sequence
470                      followed by the first byte of a new sequence. */
471                   decoder->bits &= 0xFF00;
472                   decoder->state = DECODED_1_OF_2;
473                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
474                   goto noreset;
475                }
476                else
477                {
478                   output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, CODEPOINT_FROM_SURROGATES(decoder->bits));
479                }
480                break;
481          }
482          break;
483 
484       case JSON_UTF32LE:
485          /* When the input encoding is UTF-32, the decoded codepoint's bits are
486             recorded in bits as they are decoded. */
487          switch (decoder->state)
488          {
489             case DECODER_RESET:
490                decoder->state = DECODED_1_OF_4;
491                decoder->bits = (uint32_t)b;
492                goto noreset;
493 
494             case DECODED_1_OF_4:
495                decoder->state = DECODED_2_OF_4;
496                decoder->bits |= (uint32_t)b << 8;
497                goto noreset;
498 
499             case DECODED_2_OF_4:
500                decoder->state = DECODED_3_OF_4;
501                decoder->bits |= (uint32_t)b << 16;
502                goto noreset;
503 
504             case DECODED_3_OF_4:
505                decoder->bits |= (uint32_t)b << 24;
506                output = (IS_SURROGATE(decoder->bits) || (decoder->bits > MAX_CODEPOINT))
507                   ? DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 4, 0)
508                   : DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits);
509                break;
510          }
511          break;
512 
513       case JSON_UTF32BE:
514          /* When the input encoding is UTF-32, the decoded codepoint's bits are
515             recorded in bits as they are decoded. */
516          switch (decoder->state)
517          {
518             case DECODER_RESET:
519                decoder->state = DECODED_1_OF_4;
520                decoder->bits = (uint32_t)b << 24;
521                goto noreset;
522 
523             case DECODED_1_OF_4:
524                decoder->state = DECODED_2_OF_4;
525                decoder->bits |= (uint32_t)b << 16;
526                goto noreset;
527 
528             case DECODED_2_OF_4:
529                decoder->state = DECODED_3_OF_4;
530                decoder->bits |= (uint32_t)b << 8;
531                goto noreset;
532 
533             case DECODED_3_OF_4:
534                decoder->bits |= b;
535                output = (IS_SURROGATE(decoder->bits) || (decoder->bits > MAX_CODEPOINT))
536                   ? DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 4, 0)
537                   : DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits);
538                break;
539          }
540          break;
541    }
542 
543    /* Reset the decoder for the next sequence. */
544    Decoder_Reset(decoder);
545 
546 noreset:
547    return output;
548 }
549 
550 /******************** Unicode Encoder ********************/
551 
552 /* This function makes the following assumptions about its input:
553 
554    1. The c argument is a valid codepoint (U+0000 - U+10FFFF).
555    2. The encoding argument is not JSON_UnknownEncoding.
556    3. The pBytes argument points to an array of at least 4 bytes.
557    */
EncodeCodepoint(Codepoint c,Encoding encoding,byte * pBytes)558 static size_t EncodeCodepoint(Codepoint c, Encoding encoding, byte* pBytes)
559 {
560    size_t length = 0;
561    switch (encoding)
562    {
563       case JSON_UTF8:
564          if (c < FIRST_2_BYTE_UTF8_CODEPOINT)
565          {
566             pBytes[0] = (byte)c;
567             length = 1;
568          }
569          else if (c < FIRST_3_BYTE_UTF8_CODEPOINT)
570          {
571             pBytes[0] = (byte)(0xC0 | (c >> 6));
572             pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c));
573             length = 2;
574          }
575          else if (c < FIRST_4_BYTE_UTF8_CODEPOINT)
576          {
577             pBytes[0] = (byte)(0xE0 | (c >> 12));
578             pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c >> 6));
579             pBytes[2] = (byte)(0x80 | BOTTOM_6_BITS(c));
580             length = 3;
581          }
582          else
583          {
584             pBytes[0] = (byte)(0xF0 | (c >> 18));
585             pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c >> 12));
586             pBytes[2] = (byte)(0x80 | BOTTOM_6_BITS(c >> 6));
587             pBytes[3] = (byte)(0x80 | BOTTOM_6_BITS(c));
588             length = 4;
589          }
590          break;
591 
592       case JSON_UTF16LE:
593          if (c < FIRST_NON_BMP_CODEPOINT)
594          {
595             pBytes[0] = (byte)(c);
596             pBytes[1] = (byte)(c >> 8);
597             length = 2;
598          }
599          else
600          {
601             uint32_t surrogates = SURROGATES_FROM_CODEPOINT(c);
602 
603             /* Leading surrogate. */
604             pBytes[0] = (byte)(surrogates >> 16);
605             pBytes[1] = (byte)(surrogates >> 24);
606 
607             /* Trailing surrogate. */
608             pBytes[2] = (byte)(surrogates);
609             pBytes[3] = (byte)(surrogates >> 8);
610             length = 4;
611          }
612          break;
613 
614       case JSON_UTF16BE:
615          if (c < FIRST_NON_BMP_CODEPOINT)
616          {
617             pBytes[1] = (byte)(c);
618             pBytes[0] = (byte)(c >> 8);
619             length = 2;
620          }
621          else
622          {
623             /* The codepoint requires a surrogate pair in UTF-16. */
624             uint32_t surrogates = SURROGATES_FROM_CODEPOINT(c);
625 
626             /* Leading surrogate. */
627             pBytes[1] = (byte)(surrogates >> 16);
628             pBytes[0] = (byte)(surrogates >> 24);
629 
630             /* Trailing surrogate. */
631             pBytes[3] = (byte)(surrogates);
632             pBytes[2] = (byte)(surrogates >> 8);
633             length = 4;
634          }
635          break;
636 
637       case JSON_UTF32LE:
638          pBytes[0] = (byte)(c);
639          pBytes[1] = (byte)(c >> 8);
640          pBytes[2] = (byte)(c >> 16);
641          pBytes[3] = (byte)(c >> 24);
642          length = 4;
643          break;
644 
645       case JSON_UTF32BE:
646          pBytes[3] = (byte)(c);
647          pBytes[2] = (byte)(c >> 8);
648          pBytes[1] = (byte)(c >> 16);
649          pBytes[0] = (byte)(c >> 24);
650          length = 4;
651          break;
652    }
653    return length;
654 }
655 
656 /******************** JSON Lexer States ********************/
657 
658 /* Mutually-exclusive lexer states. */
659 #define LEXING_WHITESPACE                                     0
660 #define LEXING_LITERAL                                        1
661 #define LEXING_STRING                                         2
662 #define LEXING_STRING_ESCAPE                                  3
663 #define LEXING_STRING_HEX_ESCAPE_BYTE_1                       4
664 #define LEXING_STRING_HEX_ESCAPE_BYTE_2                       5
665 #define LEXING_STRING_HEX_ESCAPE_BYTE_3                       6
666 #define LEXING_STRING_HEX_ESCAPE_BYTE_4                       7
667 #define LEXING_STRING_HEX_ESCAPE_BYTE_5                       8
668 #define LEXING_STRING_HEX_ESCAPE_BYTE_6                       9
669 #define LEXING_STRING_HEX_ESCAPE_BYTE_7                       10
670 #define LEXING_STRING_HEX_ESCAPE_BYTE_8                       11
671 #define LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH 12
672 #define LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U         13
673 #define LEXING_NUMBER_AFTER_MINUS                             14
674 #define LEXING_NUMBER_AFTER_LEADING_ZERO                      15
675 #define LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO             16
676 #define LEXING_NUMBER_AFTER_X                                 17
677 #define LEXING_NUMBER_HEX_DIGITS                              18
678 #define LEXING_NUMBER_DECIMAL_DIGITS                          19
679 #define LEXING_NUMBER_AFTER_DOT                               20
680 #define LEXING_NUMBER_FRACTIONAL_DIGITS                       21
681 #define LEXING_NUMBER_AFTER_E                                 22
682 #define LEXING_NUMBER_AFTER_EXPONENT_SIGN                     23
683 #define LEXING_NUMBER_EXPONENT_DIGITS                         24
684 #define LEXING_COMMENT_AFTER_SLASH                            25
685 #define LEXING_SINGLE_LINE_COMMENT                            26
686 #define LEXING_MULTI_LINE_COMMENT                             27
687 #define LEXING_MULTI_LINE_COMMENT_AFTER_STAR                  28
688 #define LEXER_ERROR                                           255
689 typedef byte LexerState;
690 
691 /******************** JSON Grammarian ********************/
692 
693 /* The JSON grammar comprises the following productions:
694 
695    1.  VALUE => null
696    2.  VALUE => boolean
697    3.  VALUE => string
698    4.  VALUE => number
699    5.  VALUE => specialnumber
700    6.  VALUE => { MEMBERS }
701    7.  VALUE => [ ITEMS ]
702    8.  MEMBERS => MEMBER MORE_MEMBERS
703    9.  MEMBERS => e
704    10. MEMBER => string : VALUE
705    11. MORE_MEMBERS => , MEMBER MORE_MEMBERS
706    12. MORE_MEMBERS => e
707    13. ITEMS => ITEM MORE_ITEMS
708    14. ITEMS => e
709    15. ITEM => VALUE
710    16. MORE_ITEMS => , ITEM MORE_ITEMS
711    17. MORE_ITEMS => e
712 
713    We implement a simple LL(1) parser based on this grammar, with events
714    emitted when certain non-terminals are replaced.
715    */
716 
717 /* Mutually-exclusive grammar tokens and non-terminals. The values are defined
718    so that the bottom 4 bits of a value can be used as an index into the
719    grammar production rule table. */
720 #define T_NONE              0x00 /* tokens are in the form 0x0X */
721 #define T_NULL              0x01
722 #define T_TRUE              0x02
723 #define T_FALSE             0x03
724 #define T_STRING            0x04
725 #define T_NUMBER            0x05
726 #define T_NAN               0x06
727 #define T_INFINITY          0x07
728 #define T_NEGATIVE_INFINITY 0x08
729 #define T_LEFT_CURLY        0x09
730 #define T_RIGHT_CURLY       0x0A
731 #define T_LEFT_SQUARE       0x0B
732 #define T_RIGHT_SQUARE      0x0C
733 #define T_COLON             0x0D
734 #define T_COMMA             0x0E
735 #define NT_VALUE            0x10 /* non-terminals are in the form 0x1X */
736 #define NT_MEMBERS          0x11
737 #define NT_MEMBER           0x12
738 #define NT_MORE_MEMBERS     0x13
739 #define NT_ITEMS            0x14
740 #define NT_ITEM             0x15
741 #define NT_MORE_ITEMS       0x16
742 typedef byte Symbol;
743 
744 #define IS_NONTERMINAL(s) ((s) & 0x10)
745 #define IS_TOKEN(s)       !IS_NONTERMINAL(s)
746 
747 /* Grammarian data. */
748 typedef struct tag_GrammarianData
749 {
750    Symbol* pStack; /* initially set to defaultStack */
751    size_t  stackSize;
752    size_t  stackUsed;
753    Symbol  defaultStack[DEFAULT_SYMBOL_STACK_SIZE];
754 } GrammarianData;
755 typedef GrammarianData* Grammarian;
756 
757 /* Mutually-exclusive result codes returned by the grammarian
758    after processing a token. */
759 #define ACCEPTED_TOKEN    0
760 #define REJECTED_TOKEN    1
761 #define SYMBOL_STACK_FULL 2
762 typedef uint32_t GrammarianResultCode;
763 
764 /* Events emitted by the grammarian as a result of processing a
765    token. Note that EMIT_ARRAY_ITEM always appears bitwise OR-ed
766    with one of the other values. */
767 #define EMIT_NOTHING        0x00
768 #define EMIT_NULL           0x01
769 #define EMIT_BOOLEAN        0x02
770 #define EMIT_STRING         0x03
771 #define EMIT_NUMBER         0x04
772 #define EMIT_SPECIAL_NUMBER 0x05
773 #define EMIT_START_OBJECT   0x06
774 #define EMIT_END_OBJECT     0x07
775 #define EMIT_OBJECT_MEMBER  0x08
776 #define EMIT_START_ARRAY    0x09
777 #define EMIT_END_ARRAY      0x0A
778 #define EMIT_ARRAY_ITEM     0x10 /* may be combined with other values */
779 typedef byte GrammarEvent;
780 
781 /* The bits of GrammarianOutput are layed out as follows:
782 
783    -rreeeee
784 
785    - = unused (1 bit)
786    r = result code (2 bits)
787    e = event (5 bits)
788    */
789 #define GRAMMARIAN_OUTPUT(r, e)   (GrammarianOutput)(((GrammarianResultCode)(r) << 5) | (GrammarEvent)(e))
790 #define GRAMMARIAN_RESULT_CODE(o) (GrammarianResultCode)((GrammarianOutput)(o) >> 5)
791 #define GRAMMARIAN_EVENT(o)       (GrammarEvent)((GrammarianOutput)(o) & 0x1F)
792 typedef byte GrammarianOutput;
793 
794 /* Grammar rule used by the grammarian to process a token. */
795 typedef struct tag_GrammarRule
796 {
797    Symbol       symbolToPush1;
798    Symbol       symbolToPush2;
799    byte         reprocess;
800    GrammarEvent emit;
801 } GrammarRule;
802 
803 /* Grammarian functions. */
804 
Grammarian_Reset(Grammarian grammarian,int isInitialized)805 static void Grammarian_Reset(Grammarian grammarian, int isInitialized)
806 {
807    /* When we reset the grammarian, we keep the symbol stack that has
808       already been allocated, if any. If the client wants to reclaim the
809       memory used by the that buffer, he needs to free the grammarian
810       and create a new one. */
811    if (!isInitialized)
812    {
813       grammarian->pStack = grammarian->defaultStack;
814       grammarian->stackSize = sizeof(grammarian->defaultStack);
815    }
816 
817    /* The grammarian always starts with NT_VALUE on the symbol stack. */
818    grammarian->pStack[0] = NT_VALUE;
819    grammarian->stackUsed = 1;
820 }
821 
Grammarian_FreeAllocations(Grammarian grammarian,const JSON_MemorySuite * pMemorySuite)822 static void Grammarian_FreeAllocations(Grammarian grammarian,
823       const JSON_MemorySuite* pMemorySuite)
824 {
825    if (grammarian->pStack != grammarian->defaultStack)
826       pMemorySuite->free(pMemorySuite->userData, grammarian->pStack);
827 }
828 
Grammarian_FinishedDocument(Grammarian grammarian)829 static int Grammarian_FinishedDocument(Grammarian grammarian)
830 {
831    return !grammarian->stackUsed;
832 }
833 
Grammarian_ProcessToken(Grammarian grammarian,Symbol token,const JSON_MemorySuite * pMemorySuite)834 static GrammarianOutput Grammarian_ProcessToken(Grammarian grammarian,
835       Symbol token, const JSON_MemorySuite* pMemorySuite)
836 {
837    /* The order and number of the rows and columns in this table must
838       match the defined token and non-terminal symbol values.
839 
840       The row index is the incoming token's Symbol value.
841 
842       The column index is the bottom 4 bits of Symbol value of
843       the non-terminal at the top of the processing stack.
844       Since non-terminal Symbol values start at 0x10, taking
845       the bottom 4 bits yields a 0-based index. */
846    static const byte ruleLookup[15][7] =
847    {
848       /*             V     MS    M     MM    IS    I     MI  */
849       /*  ----  */ { 0,    0,    0,    0,    0,    0,    0  },
850       /*  null  */ { 1,    0,    0,    0,    13,   15,   0  },
851       /*  true  */ { 2,    0,    0,    0,    13,   15,   0  },
852       /* false  */ { 2,    0,    0,    0,    13,   15,   0  },
853       /* string */ { 3,    8,    10,   0,    13,   15,   0  },
854       /* number */ { 4,    0,    0,    0,    13,   15,   0  },
855       /*  NaN   */ { 5,    0,    0,    0,    13,   15,   0  },
856       /*  Inf   */ { 5,    0,    0,    0,    13,   15,   0  },
857       /* -Inf   */ { 5,    0,    0,    0,    13,   15,   0  },
858       /*   {    */ { 6,    0,    0,    0,    13,   15,   0  },
859       /*   }    */ { 0,    9,    0,    12,   0,    0,    0  },
860       /*   [    */ { 7,    0,    0,    0,    13,   15,   0  },
861       /*   ]    */ { 0,    0,    0,    0,    14,   0,    17 },
862       /*   :    */ { 0,    0,    0,    0,    0,    0,    0  },
863       /*   ,    */ { 0,    0,    0,    11,   0,    0,    16 }
864    };
865 
866    static const GrammarRule rules[17] =
867    {
868       /* 1.  */ { T_NONE,          T_NONE,      0, EMIT_NULL           },
869       /* 2.  */ { T_NONE,          T_NONE,      0, EMIT_BOOLEAN        },
870       /* 3.  */ { T_NONE,          T_NONE,      0, EMIT_STRING         },
871       /* 4.  */ { T_NONE,          T_NONE,      0, EMIT_NUMBER         },
872       /* 5.  */ { T_NONE,          T_NONE,      0, EMIT_SPECIAL_NUMBER },
873       /* 6.  */ { T_RIGHT_CURLY,   NT_MEMBERS,  0, EMIT_START_OBJECT   },
874       /* 7.  */ { T_RIGHT_SQUARE,  NT_ITEMS,    0, EMIT_START_ARRAY    },
875       /* 8.  */ { NT_MORE_MEMBERS, NT_MEMBER,   1, EMIT_NOTHING        },
876       /* 9.  */ { T_NONE,          T_NONE,      1, EMIT_END_OBJECT     },
877       /* 10. */ { NT_VALUE,        T_COLON,     0, EMIT_OBJECT_MEMBER  },
878       /* 11. */ { NT_MORE_MEMBERS, NT_MEMBER,   0, EMIT_NOTHING        },
879       /* 12. */ { T_NONE,          T_NONE,      1, EMIT_END_OBJECT     },
880       /* 13. */ { NT_MORE_ITEMS,   NT_ITEM,     1, EMIT_NOTHING        },
881       /* 14. */ { T_NONE,          T_NONE,      1, EMIT_END_ARRAY      },
882       /* 15. */ { NT_VALUE,        T_NONE,      1, EMIT_ARRAY_ITEM     },
883       /* 16. */ { NT_MORE_ITEMS,   NT_ITEM,     0, EMIT_NOTHING        },
884       /* 17. */ { T_NONE,          T_NONE,      1, EMIT_END_ARRAY      }
885    };
886 
887    GrammarEvent emit = EMIT_NOTHING;
888 
889    /* If the stack is empty, no more tokens were expected. */
890    if (Grammarian_FinishedDocument(grammarian))
891       return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
892 
893    for (;;)
894    {
895       Symbol topSymbol = grammarian->pStack[grammarian->stackUsed - 1];
896       if (IS_TOKEN(topSymbol))
897       {
898          if (topSymbol != token)
899             return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
900          grammarian->stackUsed--;
901          break;
902       }
903       else
904       {
905          const GrammarRule* pRule = NULL;
906          byte ruleNumber          = ruleLookup[token][BOTTOM_4_BITS(topSymbol)];
907 
908          if (ruleNumber == 0)
909             return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
910 
911          pRule = &rules[ruleNumber - 1];
912 
913          /* The rule removes the top symbol and does not replace it. */
914          if (pRule->symbolToPush1 == T_NONE)
915             grammarian->stackUsed--;
916          else
917          {
918             /* The rule replaces the top symbol with 1 or 2 symbols. */
919             grammarian->pStack[grammarian->stackUsed - 1] = pRule->symbolToPush1;
920             if (pRule->symbolToPush2 != T_NONE)
921             {
922                /* The rule replaces the top symbol with 2 symbols.
923                   Make sure the stack has room for the second one. */
924                if (grammarian->stackUsed == grammarian->stackSize)
925                {
926                   Symbol* pBiggerStack = DoubleBuffer(pMemorySuite,
927                         grammarian->defaultStack, grammarian->pStack,
928                         grammarian->stackSize);
929 
930                   if (!pBiggerStack)
931                      return GRAMMARIAN_OUTPUT(SYMBOL_STACK_FULL, EMIT_NOTHING);
932 
933                   grammarian->pStack = pBiggerStack;
934                   grammarian->stackSize *= 2;
935                }
936                grammarian->pStack[grammarian->stackUsed] = pRule->symbolToPush2;
937                grammarian->stackUsed++;
938             }
939          }
940          emit |= pRule->emit;
941          if (!pRule->reprocess)
942             break;
943       }
944    }
945 
946    return GRAMMARIAN_OUTPUT(ACCEPTED_TOKEN, emit);
947 }
948 
949 /******************** JSON Parser ********************/
950 
951 #ifndef JSON_NO_PARSER
952 
953 /* Combinable parser state flags. */
954 #define PARSER_RESET                 0x00
955 #define PARSER_STARTED               0x01
956 #define PARSER_FINISHED              0x02
957 #define PARSER_IN_PROTECTED_API      0x04
958 #define PARSER_IN_TOKEN_HANDLER      0x08
959 #define PARSER_AFTER_CARRIAGE_RETURN 0x10
960 typedef byte ParserState;
961 
962 /* Combinable parser settings flags. */
963 #define PARSER_DEFAULT_FLAGS         0x00
964 #define PARSER_ALLOW_BOM             0x01
965 #define PARSER_ALLOW_COMMENTS        0x02
966 #define PARSER_ALLOW_SPECIAL_NUMBERS 0x04
967 #define PARSER_ALLOW_HEX_NUMBERS     0x08
968 #define PARSER_REPLACE_INVALID       0x10
969 #define PARSER_TRACK_OBJECT_MEMBERS  0x20
970 #define PARSER_ALLOW_CONTROL_CHARS   0x40
971 #define PARSER_EMBEDDED_DOCUMENT     0x80
972 typedef byte ParserFlags;
973 
974 /* Sentinel value for parser error location offset. */
975 #define ERROR_LOCATION_IS_TOKEN_START 0xFF
976 
977 /* An object member name stored in an unordered, singly-linked-list, used for
978    detecting duplicate member names. Note that the name string is not null-
979    terminated. */
980 typedef struct tag_MemberName
981 {
982    struct tag_MemberName* pNextName;
983    size_t                 length;
984    byte                   pBytes[1]; /* variable-size buffer */
985 } MemberName;
986 
987 /* An object's list of member names, and a pointer to the object's
988    nearest ancestor object, if any. This is used as a stack. Because arrays
989    do not have named items, they do not need to be recorded in the stack. */
990 typedef struct tag_MemberNames
991 {
992    struct tag_MemberNames* pAncestor;
993    MemberName*             pFirstName;
994 } MemberNames;
995 
996 /* A parser instance. */
997 struct JSON_Parser_Data
998 {
999    JSON_MemorySuite                    memorySuite;
1000    void*                               userData;
1001    ParserState                         state;
1002    ParserFlags                         flags;
1003    Encoding                            inputEncoding;
1004    Encoding                            stringEncoding;
1005    Encoding                            numberEncoding;
1006    Symbol                              token;
1007    TokenAttributes                     tokenAttributes;
1008    Error                               error;
1009    byte                                errorOffset;
1010    LexerState                          lexerState;
1011    uint32_t                            lexerBits;
1012    size_t                              codepointLocationByte;
1013    size_t                              codepointLocationLine;
1014    size_t                              codepointLocationColumn;
1015    size_t                              tokenLocationByte;
1016    size_t                              tokenLocationLine;
1017    size_t                              tokenLocationColumn;
1018    size_t                              depth;
1019    byte*                               pTokenBytes;
1020    size_t                              tokenBytesLength;
1021    size_t                              tokenBytesUsed;
1022    size_t                              maxStringLength;
1023    size_t                              maxNumberLength;
1024    MemberNames*                        pMemberNames;
1025    DecoderData                         decoderData;
1026    GrammarianData                      grammarianData;
1027    JSON_Parser_EncodingDetectedHandler encodingDetectedHandler;
1028    JSON_Parser_NullHandler             nullHandler;
1029    JSON_Parser_BooleanHandler          booleanHandler;
1030    JSON_Parser_StringHandler           stringHandler;
1031    JSON_Parser_NumberHandler           numberHandler;
1032    JSON_Parser_SpecialNumberHandler    specialNumberHandler;
1033    JSON_Parser_StartObjectHandler      startObjectHandler;
1034    JSON_Parser_EndObjectHandler        endObjectHandler;
1035    JSON_Parser_ObjectMemberHandler     objectMemberHandler;
1036    JSON_Parser_StartArrayHandler       startArrayHandler;
1037    JSON_Parser_EndArrayHandler         endArrayHandler;
1038    JSON_Parser_ArrayItemHandler        arrayItemHandler;
1039    byte                                defaultTokenBytes[DEFAULT_TOKEN_BYTES_LENGTH];
1040 };
1041 
1042 /* Parser internal functions. */
1043 
JSON_Parser_SetErrorAtCodepoint(JSON_Parser parser,Error error)1044 static void JSON_Parser_SetErrorAtCodepoint(JSON_Parser parser, Error error)
1045 {
1046    parser->error = error;
1047 }
1048 
JSON_Parser_SetErrorAtStringEscapeSequenceStart(JSON_Parser parser,Error error,int codepointsAgo)1049 static void JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1050       JSON_Parser parser, Error error, int codepointsAgo)
1051 {
1052    /* Note that backtracking from the current codepoint requires us to make
1053       three assumptions, which are always valid in the context of a string
1054       escape sequence:
1055 
1056       1. The input encoding is not JSON_UnknownEncoding.
1057 
1058       2 The codepoints we are backing up across are all in the range
1059       U+0000 - U+007F, aka ASCII, so we can assume the number of
1060       bytes comprising them based on the input encoding.
1061 
1062       3. The codepoints we are backing up across do not include any
1063       line breaks, so we can assume that the line number stays the
1064       same and the column number can simply be decremented.
1065       */
1066    parser->error = error;
1067    parser->errorOffset = (byte)codepointsAgo;
1068 }
1069 
JSON_Parser_SetErrorAtToken(JSON_Parser parser,Error error)1070 static void JSON_Parser_SetErrorAtToken(JSON_Parser parser, Error error)
1071 {
1072    parser->error = error;
1073    parser->errorOffset = ERROR_LOCATION_IS_TOKEN_START;
1074 }
1075 
JSON_Parser_PushMemberNameList(JSON_Parser parser)1076 static JSON_Status JSON_Parser_PushMemberNameList(JSON_Parser parser)
1077 {
1078    MemberNames* pNames = (MemberNames*)parser->memorySuite.realloc(
1079          parser->memorySuite.userData, NULL, sizeof(MemberNames));
1080 
1081    if (!pNames)
1082    {
1083       JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
1084       return JSON_Failure;
1085    }
1086 
1087    pNames->pAncestor    = parser->pMemberNames;
1088    pNames->pFirstName   = NULL;
1089    parser->pMemberNames = pNames;
1090    return JSON_Success;
1091 }
1092 
JSON_Parser_PopMemberNameList(JSON_Parser parser)1093 static void JSON_Parser_PopMemberNameList(JSON_Parser parser)
1094 {
1095    MemberNames* pAncestor = parser->pMemberNames->pAncestor;
1096    while (parser->pMemberNames->pFirstName)
1097    {
1098       MemberName* pNextName = parser->pMemberNames->pFirstName->pNextName;
1099       parser->memorySuite.free(parser->memorySuite.userData, parser->pMemberNames->pFirstName);
1100       parser->pMemberNames->pFirstName = pNextName;
1101    }
1102    parser->memorySuite.free(parser->memorySuite.userData, parser->pMemberNames);
1103    parser->pMemberNames = pAncestor;
1104 }
1105 
JSON_Parser_StartContainer(JSON_Parser parser,int isObject)1106 static JSON_Status JSON_Parser_StartContainer(JSON_Parser parser, int isObject)
1107 {
1108    if (isObject && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS) &&
1109          !JSON_Parser_PushMemberNameList(parser))
1110    {
1111       return JSON_Failure;
1112    }
1113    parser->depth++;
1114    return JSON_Success;
1115 }
1116 
JSON_Parser_EndContainer(JSON_Parser parser,int isObject)1117 static void JSON_Parser_EndContainer(JSON_Parser parser, int isObject)
1118 {
1119    parser->depth--;
1120    if (isObject && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS))
1121    {
1122       JSON_Parser_PopMemberNameList(parser);
1123    }
1124 }
1125 
JSON_Parser_AddMemberNameToList(JSON_Parser parser)1126 static JSON_Status JSON_Parser_AddMemberNameToList(JSON_Parser parser)
1127 {
1128    if (GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS))
1129    {
1130       MemberName* pName;
1131       for (pName = parser->pMemberNames->pFirstName; pName; pName = pName->pNextName)
1132       {
1133          if (pName->length == parser->tokenBytesUsed && !memcmp(pName->pBytes, parser->pTokenBytes, pName->length))
1134          {
1135             JSON_Parser_SetErrorAtToken(parser, JSON_Error_DuplicateObjectMember);
1136             return JSON_Failure;
1137          }
1138       }
1139       pName = (MemberName*)parser->memorySuite.realloc(parser->memorySuite.userData, NULL, sizeof(MemberName) + parser->tokenBytesUsed - 1);
1140       if (!pName)
1141       {
1142          JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
1143          return JSON_Failure;
1144       }
1145       pName->pNextName = parser->pMemberNames->pFirstName;
1146       pName->length = parser->tokenBytesUsed;
1147       memcpy(pName->pBytes, parser->pTokenBytes, parser->tokenBytesUsed);
1148       parser->pMemberNames->pFirstName = pName;
1149    }
1150    return JSON_Success;
1151 }
1152 
JSON_Parser_ResetData(JSON_Parser parser,int isInitialized)1153 static void JSON_Parser_ResetData(JSON_Parser parser, int isInitialized)
1154 {
1155    parser->userData                 = NULL;
1156    parser->flags                    = PARSER_DEFAULT_FLAGS;
1157    parser->inputEncoding            = JSON_UnknownEncoding;
1158    parser->stringEncoding           = JSON_UTF8;
1159    parser->numberEncoding           = JSON_UTF8;
1160    parser->token                    = T_NONE;
1161    parser->tokenAttributes          = 0;
1162    parser->error                    = JSON_Error_None;
1163    parser->errorOffset              = 0;
1164    parser->lexerState               = LEXING_WHITESPACE;
1165    parser->lexerBits                = 0;
1166    parser->codepointLocationByte    = 0;
1167    parser->codepointLocationLine    = 0;
1168    parser->codepointLocationColumn  = 0;
1169    parser->tokenLocationByte        = 0;
1170    parser->tokenLocationLine        = 0;
1171    parser->tokenLocationColumn      = 0;
1172    parser->depth                    = 0;
1173 
1174    if (!isInitialized)
1175    {
1176       parser->pTokenBytes      = parser->defaultTokenBytes;
1177       parser->tokenBytesLength = sizeof(parser->defaultTokenBytes);
1178    }
1179    else
1180    {
1181       /* When we reset the parser, we keep the output buffer and the symbol
1182          stack that have already been allocated, if any. If the client wants
1183          to reclaim the memory used by the those buffers, he needs to free
1184          the parser and create a new one. */
1185    }
1186    parser->tokenBytesUsed  = 0;
1187    parser->maxStringLength = SIZE_MAX;
1188    parser->maxNumberLength = SIZE_MAX;
1189    if (!isInitialized)
1190       parser->pMemberNames = NULL;
1191    else
1192    {
1193       while (parser->pMemberNames)
1194          JSON_Parser_PopMemberNameList(parser);
1195    }
1196    Decoder_Reset(&parser->decoderData);
1197    Grammarian_Reset(&parser->grammarianData, isInitialized);
1198    parser->encodingDetectedHandler = NULL;
1199    parser->nullHandler = NULL;
1200    parser->booleanHandler = NULL;
1201    parser->stringHandler = NULL;
1202    parser->numberHandler = NULL;
1203    parser->specialNumberHandler = NULL;
1204    parser->startObjectHandler = NULL;
1205    parser->endObjectHandler = NULL;
1206    parser->objectMemberHandler = NULL;
1207    parser->startArrayHandler = NULL;
1208    parser->endArrayHandler = NULL;
1209    parser->arrayItemHandler = NULL;
1210    parser->state = PARSER_RESET; /* do this last! */
1211 }
1212 
JSON_Parser_NullTerminateToken(JSON_Parser parser)1213 static void JSON_Parser_NullTerminateToken(JSON_Parser parser)
1214 {
1215    /* Because we always ensure that there are LONGEST_ENCODING_SEQUENCE bytes
1216       available at the end of the token buffer when we record codepoints, we
1217       can write the null terminator to the buffer with impunity. */
1218    static const byte nullTerminatorBytes[LONGEST_ENCODING_SEQUENCE] = { 0 };
1219    Encoding encoding = (Encoding)((parser->token == T_NUMBER) ? parser->numberEncoding : parser->stringEncoding);
1220    memcpy(parser->pTokenBytes + parser->tokenBytesUsed, nullTerminatorBytes, (size_t)SHORTEST_ENCODING_SEQUENCE(encoding));
1221 }
1222 
JSON_Parser_FlushParser(JSON_Parser parser)1223 static JSON_Status JSON_Parser_FlushParser(JSON_Parser parser)
1224 {
1225    /* The symbol stack should be empty when parsing finishes. */
1226    if (!Grammarian_FinishedDocument(&parser->grammarianData))
1227    {
1228       JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_ExpectedMoreTokens);
1229       return JSON_Failure;
1230    }
1231    return JSON_Success;
1232 }
1233 
1234 typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_SimpleTokenHandler)(JSON_Parser parser);
JSON_Parser_CallSimpleTokenHandler(JSON_Parser parser,JSON_Parser_SimpleTokenHandler handler)1235 static JSON_Status JSON_Parser_CallSimpleTokenHandler(JSON_Parser parser, JSON_Parser_SimpleTokenHandler handler)
1236 {
1237    if (handler)
1238    {
1239       JSON_Parser_HandlerResult result;
1240       SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1241       result = handler(parser);
1242       SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1243       if (result != JSON_Parser_Continue)
1244       {
1245          JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1246          return JSON_Failure;
1247       }
1248    }
1249    return JSON_Success;
1250 }
1251 
JSON_Parser_CallBooleanHandler(JSON_Parser parser)1252 static JSON_Status JSON_Parser_CallBooleanHandler(JSON_Parser parser)
1253 {
1254    if (parser->booleanHandler)
1255    {
1256       JSON_Parser_HandlerResult result;
1257       SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1258       result = parser->booleanHandler(parser, parser->token == T_TRUE ? JSON_True : JSON_False);
1259       SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1260       if (result != JSON_Parser_Continue)
1261       {
1262          JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1263          return JSON_Failure;
1264       }
1265    }
1266    return JSON_Success;
1267 }
1268 
JSON_Parser_CallStringHandler(JSON_Parser parser,int isObjectMember)1269 static JSON_Status JSON_Parser_CallStringHandler(JSON_Parser parser, int isObjectMember)
1270 {
1271    JSON_Parser_StringHandler handler = isObjectMember ? parser->objectMemberHandler : parser->stringHandler;
1272    if (handler)
1273    {
1274       JSON_Parser_HandlerResult result;
1275       JSON_Parser_NullTerminateToken(parser);
1276       SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1277       result = handler(parser, (char*)parser->pTokenBytes, parser->tokenBytesUsed, parser->tokenAttributes);
1278       SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1279 
1280       if (result != JSON_Parser_Continue)
1281       {
1282          JSON_Parser_SetErrorAtToken(parser,
1283                (isObjectMember && result == JSON_Parser_TreatAsDuplicateObjectMember)
1284                ? JSON_Error_DuplicateObjectMember
1285                : JSON_Error_AbortedByHandler);
1286          return JSON_Failure;
1287       }
1288    }
1289    return JSON_Success;
1290 }
1291 
JSON_Parser_CallNumberHandler(JSON_Parser parser)1292 static JSON_Status JSON_Parser_CallNumberHandler(JSON_Parser parser)
1293 {
1294    if (parser->numberHandler)
1295    {
1296       JSON_Parser_HandlerResult result;
1297       JSON_Parser_NullTerminateToken(parser);
1298       SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1299       result = parser->numberHandler(parser, (char*)parser->pTokenBytes,
1300             parser->tokenBytesUsed, parser->tokenAttributes);
1301 
1302       SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1303 
1304       if (result != JSON_Parser_Continue)
1305       {
1306          JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1307          return JSON_Failure;
1308       }
1309    }
1310    return JSON_Success;
1311 }
1312 
JSON_Parser_CallSpecialNumberHandler(JSON_Parser parser)1313 static JSON_Status JSON_Parser_CallSpecialNumberHandler(JSON_Parser parser)
1314 {
1315    if (parser->specialNumberHandler)
1316    {
1317       JSON_Parser_HandlerResult result;
1318       SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1319       result = parser->specialNumberHandler(parser, parser->token == T_NAN ? JSON_NaN :
1320             (parser->token == T_INFINITY ? JSON_Infinity : JSON_NegativeInfinity));
1321       SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1322 
1323       if (result != JSON_Parser_Continue)
1324       {
1325          JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1326          return JSON_Failure;
1327       }
1328    }
1329    return JSON_Success;
1330 }
1331 
JSON_Parser_HandleGrammarEvents(JSON_Parser parser,byte emit)1332 static JSON_Status JSON_Parser_HandleGrammarEvents(JSON_Parser parser, byte emit)
1333 {
1334    if (GET_FLAGS(emit, EMIT_ARRAY_ITEM))
1335    {
1336       if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->arrayItemHandler))
1337       {
1338          return JSON_Failure;
1339       }
1340       SET_FLAGS_OFF(byte, emit, EMIT_ARRAY_ITEM);
1341    }
1342    switch (emit)
1343    {
1344       case EMIT_NULL:
1345          if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->nullHandler))
1346             return JSON_Failure;
1347          break;
1348 
1349       case EMIT_BOOLEAN:
1350          if (!JSON_Parser_CallBooleanHandler(parser))
1351             return JSON_Failure;
1352          break;
1353 
1354       case EMIT_STRING:
1355          if (!JSON_Parser_CallStringHandler(parser, 0/* isObjectMember */))
1356             return JSON_Failure;
1357          break;
1358 
1359       case EMIT_NUMBER:
1360          if (!JSON_Parser_CallNumberHandler(parser))
1361             return JSON_Failure;
1362          break;
1363 
1364       case EMIT_SPECIAL_NUMBER:
1365          if (!JSON_Parser_CallSpecialNumberHandler(parser))
1366             return JSON_Failure;
1367          break;
1368 
1369       case EMIT_START_OBJECT:
1370          if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->startObjectHandler) ||
1371                !JSON_Parser_StartContainer(parser, 1/*isObject*/))
1372             return JSON_Failure;
1373          break;
1374 
1375       case EMIT_END_OBJECT:
1376          JSON_Parser_EndContainer(parser, 1/*isObject*/);
1377          if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->endObjectHandler))
1378             return JSON_Failure;
1379          break;
1380       case EMIT_OBJECT_MEMBER:
1381          if (!JSON_Parser_AddMemberNameToList(parser) || /* will fail if member is duplicate */
1382                !JSON_Parser_CallStringHandler(parser, 1 /* isObjectMember */))
1383             return JSON_Failure;
1384          break;
1385 
1386       case EMIT_START_ARRAY:
1387          if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->startArrayHandler) ||
1388                !JSON_Parser_StartContainer(parser, 0/*isObject*/))
1389             return JSON_Failure;
1390          break;
1391 
1392       case EMIT_END_ARRAY:
1393          JSON_Parser_EndContainer(parser, 0/*isObject*/);
1394          if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->endArrayHandler))
1395             return JSON_Failure;
1396          break;
1397    }
1398 
1399    if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
1400    {
1401       JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_StoppedAfterEmbeddedDocument);
1402       return JSON_Failure;
1403    }
1404    return JSON_Success;
1405 }
1406 
JSON_Parser_ProcessToken(JSON_Parser parser)1407 static JSON_Status JSON_Parser_ProcessToken(JSON_Parser parser)
1408 {
1409    GrammarianOutput output;
1410    output = Grammarian_ProcessToken(&parser->grammarianData, parser->token, &parser->memorySuite);
1411    switch (GRAMMARIAN_RESULT_CODE(output))
1412    {
1413       case ACCEPTED_TOKEN:
1414          if (!JSON_Parser_HandleGrammarEvents(parser, GRAMMARIAN_EVENT(output)))
1415             return JSON_Failure;
1416          break;
1417 
1418       case REJECTED_TOKEN:
1419          JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnexpectedToken);
1420          return JSON_Failure;
1421 
1422       case SYMBOL_STACK_FULL:
1423          JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
1424          return JSON_Failure;
1425    }
1426 
1427    /* Reset the lexer to prepare for the next token. */
1428    parser->lexerState = LEXING_WHITESPACE;
1429    parser->lexerBits = 0;
1430    parser->token = T_NONE;
1431    parser->tokenAttributes = 0;
1432    parser->tokenBytesUsed = 0;
1433    return JSON_Success;
1434 }
1435 
1436 /* Lexer functions. */
1437 
1438 static const byte expectedLiteralChars[] = { 'u', 'l', 'l', 0, 'r', 'u', 'e', 0, 'a', 'l', 's', 'e', 0, 'a', 'N', 0, 'n', 'f', 'i', 'n', 'i', 't', 'y', 0  };
1439 
1440 #define NULL_LITERAL_EXPECTED_CHARS_START_INDEX     0
1441 #define TRUE_LITERAL_EXPECTED_CHARS_START_INDEX     4
1442 #define FALSE_LITERAL_EXPECTED_CHARS_START_INDEX    8
1443 #define NAN_LITERAL_EXPECTED_CHARS_START_INDEX      13
1444 #define INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX 16
1445 
1446 /* Forward declaration. */
1447 static JSON_Status JSON_Parser_FlushLexer(JSON_Parser parser);
1448 static JSON_Status JSON_Parser_ProcessCodepoint(
1449       JSON_Parser parser, Codepoint c, size_t encodedLength);
1450 
JSON_Parser_HandleInvalidEncodingSequence(JSON_Parser parser,size_t encodedLength)1451 static JSON_Status JSON_Parser_HandleInvalidEncodingSequence(
1452       JSON_Parser parser, size_t encodedLength)
1453 {
1454    if (parser->token == T_STRING && GET_FLAGS(parser->flags, PARSER_REPLACE_INVALID))
1455    {
1456       /* Since we're inside a string token, replacing the invalid sequence
1457          with the Unicode replacement character as requested by the client
1458          is a viable way to avoid a parse failure. Outside a string token,
1459          such a replacement would simply trigger JSON_Error_UnknownToken
1460          when we tried to process the replacement character, so it's less
1461          confusing to stick with JSON_Error_InvalidEncodingSequence in that
1462          case. */
1463       SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsReplacedCharacter);
1464       return JSON_Parser_ProcessCodepoint(parser, REPLACEMENT_CHARACTER_CODEPOINT, encodedLength);
1465    }
1466    else if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
1467    {
1468       /* Since we're parsing the top-level value of an embedded
1469          document, assume that the invalid encoding sequence we've
1470          encountered does not actually belong to the document, and
1471          finish parsing by pretending that we've encountered EOF
1472          instead of an invalid sequence. If the content is valid,
1473          this will fail with JSON_Error_StoppedAfterEmbeddedDocument;
1474          otherwise, it will fail with an appropriate error. */
1475       return (JSON_Status)(JSON_Parser_FlushLexer(parser) && JSON_Parser_FlushParser(parser));
1476    }
1477    JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_InvalidEncodingSequence);
1478    return JSON_Failure;
1479 }
1480 
JSON_Parser_HandleInvalidNumber(JSON_Parser parser,Codepoint c,int codepointsSinceValidNumber,TokenAttributes attributesToRemove)1481 static JSON_Status JSON_Parser_HandleInvalidNumber(JSON_Parser parser,
1482       Codepoint c, int codepointsSinceValidNumber, TokenAttributes attributesToRemove)
1483 {
1484    SET_FLAGS_OFF(TokenAttributes, parser->tokenAttributes, attributesToRemove);
1485    if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
1486    {
1487       /* The invalid number is the top-level value of an embedded document,
1488          and it has a prefix that can be interpreted as a valid number.
1489          We want to backtrack so that we are at the end of that prefix,
1490          and then process the valid token.
1491 
1492          Note that backtracking requires us to make three assumptions, which
1493          are always valid in the context of a number token:
1494 
1495          1. The input encoding is not JSON_UnknownEncoding.
1496 
1497          2 The codepoints we are backing up across are all in the range
1498          U+0000 - U+007F, aka ASCII, so we can assume the number of
1499          bytes comprising them based on the input encoding.
1500 
1501          3. The codepoints we are backing up across do not include any
1502          line breaks, so we can assume that the line number stays the
1503          same and the column number can simply be decremented.
1504 
1505          For example:
1506 
1507          "01"     => "0"
1508          "123.!"  => "123"
1509          "123e!"  => "123"
1510          "123e+!" => "123"
1511          "123e-!" => "123"
1512          "1.2e!"  => "1.2"
1513          "1.2e+!" => "1.2"
1514          "1.2e-!" => "1.2"
1515          */
1516       parser->codepointLocationByte -= (size_t)codepointsSinceValidNumber
1517          * (size_t)SHORTEST_ENCODING_SEQUENCE(parser->inputEncoding);
1518       parser->codepointLocationColumn -= (size_t)codepointsSinceValidNumber;
1519       parser->tokenBytesUsed -= (size_t)codepointsSinceValidNumber
1520          * (size_t)SHORTEST_ENCODING_SEQUENCE(parser->numberEncoding);
1521       return JSON_Parser_ProcessToken(parser); /* always fails */
1522    }
1523    /* Allow JSON_Parser_FlushLexer() to fail. */
1524    else if (c == EOF_CODEPOINT)
1525       return JSON_Success;
1526 
1527    JSON_Parser_SetErrorAtToken(parser, JSON_Error_InvalidNumber);
1528    return JSON_Failure;
1529 }
1530 
JSON_Parser_StartToken(JSON_Parser parser,Symbol token)1531 static void JSON_Parser_StartToken(JSON_Parser parser, Symbol token)
1532 {
1533    parser->token               = token;
1534    parser->tokenLocationByte   = parser->codepointLocationByte;
1535    parser->tokenLocationLine   = parser->codepointLocationLine;
1536    parser->tokenLocationColumn = parser->codepointLocationColumn;
1537 }
1538 
JSON_Parser_ProcessCodepoint(JSON_Parser parser,Codepoint c,size_t encodedLength)1539 static JSON_Status JSON_Parser_ProcessCodepoint(JSON_Parser parser, Codepoint c, size_t encodedLength)
1540 {
1541    Encoding tokenEncoding;
1542    size_t maxTokenLength;
1543    int tokenFinished           = 0;
1544    Codepoint codepointToRecord = EOF_CODEPOINT;
1545 
1546    /* If the previous codepoint was U+000D (CARRIAGE RETURN), and the current
1547       codepoint is U+000A (LINE FEED), then treat the 2 codepoints as a single
1548       line break. */
1549    if (GET_FLAGS(parser->state, PARSER_AFTER_CARRIAGE_RETURN))
1550    {
1551       if (c == LINE_FEED_CODEPOINT)
1552          parser->codepointLocationLine--;
1553       SET_FLAGS_OFF(ParserState, parser->state, PARSER_AFTER_CARRIAGE_RETURN);
1554    }
1555 
1556 reprocess:
1557 
1558    switch (parser->lexerState)
1559    {
1560       case LEXING_WHITESPACE:
1561          if (c == '{')
1562          {
1563             JSON_Parser_StartToken(parser, T_LEFT_CURLY);
1564             tokenFinished = 1;
1565          }
1566          else if (c == '}')
1567          {
1568             JSON_Parser_StartToken(parser, T_RIGHT_CURLY);
1569             tokenFinished = 1;
1570          }
1571          else if (c == '[')
1572          {
1573             JSON_Parser_StartToken(parser, T_LEFT_SQUARE);
1574             tokenFinished = 1;
1575          }
1576          else if (c == ']')
1577          {
1578             JSON_Parser_StartToken(parser, T_RIGHT_SQUARE);
1579             tokenFinished = 1;
1580          }
1581          else if (c == ':')
1582          {
1583             JSON_Parser_StartToken(parser, T_COLON);
1584             tokenFinished = 1;
1585          }
1586          else if (c == ',')
1587          {
1588             JSON_Parser_StartToken(parser, T_COMMA);
1589             tokenFinished = 1;
1590          }
1591          else if (c == 'n')
1592          {
1593             JSON_Parser_StartToken(parser, T_NULL);
1594             parser->lexerBits = NULL_LITERAL_EXPECTED_CHARS_START_INDEX;
1595             parser->lexerState = LEXING_LITERAL;
1596          }
1597          else if (c == 't')
1598          {
1599             JSON_Parser_StartToken(parser, T_TRUE);
1600             parser->lexerBits = TRUE_LITERAL_EXPECTED_CHARS_START_INDEX;
1601             parser->lexerState = LEXING_LITERAL;
1602          }
1603          else if (c == 'f')
1604          {
1605             JSON_Parser_StartToken(parser, T_FALSE);
1606             parser->lexerBits = FALSE_LITERAL_EXPECTED_CHARS_START_INDEX;
1607             parser->lexerState = LEXING_LITERAL;
1608          }
1609          else if (c == '"')
1610          {
1611             JSON_Parser_StartToken(parser, T_STRING);
1612             parser->lexerState = LEXING_STRING;
1613          }
1614          else if (c == '-')
1615          {
1616             JSON_Parser_StartToken(parser, T_NUMBER);
1617             parser->tokenAttributes = JSON_IsNegative;
1618             codepointToRecord = '-';
1619             parser->lexerState = LEXING_NUMBER_AFTER_MINUS;
1620             goto recordNumberCodepointAndAdvance;
1621          }
1622          else if (c == '0')
1623          {
1624             JSON_Parser_StartToken(parser, T_NUMBER);
1625             codepointToRecord = '0';
1626             parser->lexerState = LEXING_NUMBER_AFTER_LEADING_ZERO;
1627             goto recordNumberCodepointAndAdvance;
1628          }
1629          else if (c >= '1' && c <= '9')
1630          {
1631             JSON_Parser_StartToken(parser, T_NUMBER);
1632             codepointToRecord = c;
1633             parser->lexerState = LEXING_NUMBER_DECIMAL_DIGITS;
1634             goto recordNumberCodepointAndAdvance;
1635          }
1636          else if (c == ' ' || c == TAB_CODEPOINT || c == LINE_FEED_CODEPOINT ||
1637                c == CARRIAGE_RETURN_CODEPOINT || c == EOF_CODEPOINT)
1638          {
1639             /* Ignore whitespace between tokens. */
1640          }
1641          else if (c == BOM_CODEPOINT && parser->codepointLocationByte == 0)
1642          {
1643             /* OK, we'll allow the BOM. */
1644             if (GET_FLAGS(parser->flags, PARSER_ALLOW_BOM)) { }
1645             else
1646             {
1647                JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_BOMNotAllowed);
1648                return JSON_Failure;
1649             }
1650          }
1651          else if (c == '/' && GET_FLAGS(parser->flags, PARSER_ALLOW_COMMENTS))
1652          {
1653             /* Comments are not real tokens, but we save the location
1654                of the comment as the token location in case of an error. */
1655             parser->tokenLocationByte = parser->codepointLocationByte;
1656             parser->tokenLocationLine = parser->codepointLocationLine;
1657             parser->tokenLocationColumn = parser->codepointLocationColumn;
1658             parser->lexerState = LEXING_COMMENT_AFTER_SLASH;
1659          }
1660          else if (c == 'N' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
1661          {
1662             JSON_Parser_StartToken(parser, T_NAN);
1663             parser->lexerBits = NAN_LITERAL_EXPECTED_CHARS_START_INDEX;
1664             parser->lexerState = LEXING_LITERAL;
1665          }
1666          else if (c == 'I' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
1667          {
1668             JSON_Parser_StartToken(parser, T_INFINITY);
1669             parser->lexerBits = INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX;
1670             parser->lexerState = LEXING_LITERAL;
1671          }
1672          else
1673          {
1674             JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_UnknownToken);
1675             return JSON_Failure;
1676          }
1677          goto advance;
1678 
1679       case LEXING_LITERAL:
1680          /* While lexing a literal we store an index into expectedLiteralChars
1681             in lexerBits. */
1682          if (expectedLiteralChars[parser->lexerBits])
1683          {
1684             /* The codepoint should match the next character in the literal. */
1685             if (c != expectedLiteralChars[parser->lexerBits])
1686             {
1687                JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
1688                return JSON_Failure;
1689             }
1690             parser->lexerBits++;
1691 
1692             /* If the literal is the top-level value of an embedded document,
1693                process it as soon as we consume its last expected codepoint.
1694                Normally we defer processing until the following codepoint
1695                has been examined, so that we can treat sequences like "nullx"
1696                as a single, unknown token rather than a null literal followed
1697                by an unknown token. */
1698             if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT) &&
1699                   !expectedLiteralChars[parser->lexerBits])
1700                tokenFinished = 1;
1701          }
1702          else
1703          {
1704             /* The literal should be finished, so the codepoint should not be
1705                a plausible JSON literal character, but rather EOF, whitespace,
1706                or the first character of the next token. */
1707             if ((c >= 'A' && c <= 'Z') ||
1708                   (c >= 'a' && c <= 'z') ||
1709                   (c >= '0' && c <= '9') ||
1710                   (c == '_'))
1711             {
1712                JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
1713                return JSON_Failure;
1714             }
1715             if (!JSON_Parser_ProcessToken(parser))
1716                return JSON_Failure;
1717             goto reprocess;
1718          }
1719          goto advance;
1720 
1721       case LEXING_STRING:
1722          /* Allow JSON_Parser_FlushLexer() to fail. */
1723          if (c == EOF_CODEPOINT) { }
1724          else if (c == '"')
1725             tokenFinished = 1;
1726          else if (c == '\\')
1727             parser->lexerState = LEXING_STRING_ESCAPE;
1728          else if (c < 0x20 && !GET_FLAGS(parser->flags, PARSER_ALLOW_CONTROL_CHARS))
1729          {
1730             /* ASCII control characters (U+0000 - U+001F) are not allowed to
1731                appear unescaped in string values unless specifically allowed. */
1732             JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_UnescapedControlCharacter);
1733             return JSON_Failure;
1734          }
1735          else
1736          {
1737             codepointToRecord = c;
1738             goto recordStringCodepointAndAdvance;
1739          }
1740          goto advance;
1741 
1742       case LEXING_STRING_ESCAPE:
1743          if (c == EOF_CODEPOINT)
1744          {
1745             /* Allow JSON_Parser_FlushLexer() to fail. */
1746          }
1747          else
1748          {
1749             if (c == 'u')
1750                parser->lexerState = LEXING_STRING_HEX_ESCAPE_BYTE_1;
1751             else
1752             {
1753                if (c == '"' || c == '\\' || c == '/')
1754                   codepointToRecord = c;
1755                else if (c == 'b')
1756                   codepointToRecord = BACKSPACE_CODEPOINT;
1757                else if (c == 't')
1758                   codepointToRecord = TAB_CODEPOINT;
1759                else if (c == 'n')
1760                   codepointToRecord = LINE_FEED_CODEPOINT;
1761                else if (c == 'f')
1762                   codepointToRecord = FORM_FEED_CODEPOINT;
1763                else if (c == 'r')
1764                   codepointToRecord = CARRIAGE_RETURN_CODEPOINT;
1765                else
1766                {
1767                   /* The current codepoint location is the first character after
1768                      the backslash that started the escape sequence. The error
1769                      location should be the beginning of the escape sequence, 1
1770                      character earlier. */
1771                   JSON_Parser_SetErrorAtStringEscapeSequenceStart(parser, JSON_Error_InvalidEscapeSequence, 1);
1772                   return JSON_Failure;
1773                }
1774                parser->lexerState = LEXING_STRING;
1775                goto recordStringCodepointAndAdvance;
1776             }
1777          }
1778          goto advance;
1779 
1780       case LEXING_STRING_HEX_ESCAPE_BYTE_1:
1781       case LEXING_STRING_HEX_ESCAPE_BYTE_2:
1782       case LEXING_STRING_HEX_ESCAPE_BYTE_3:
1783       case LEXING_STRING_HEX_ESCAPE_BYTE_4:
1784       case LEXING_STRING_HEX_ESCAPE_BYTE_5:
1785       case LEXING_STRING_HEX_ESCAPE_BYTE_6:
1786       case LEXING_STRING_HEX_ESCAPE_BYTE_7:
1787       case LEXING_STRING_HEX_ESCAPE_BYTE_8:
1788          /* Allow JSON_Parser_FlushLexer() to fail. */
1789          if (c != EOF_CODEPOINT)
1790          {
1791             /* While lexing a string hex escape sequence we store the bytes
1792                of the escaped codepoint in the low 2 bytes of lexerBits. If
1793                the escape sequence represents a leading surrogate, we shift
1794                the leading surrogate into the high 2 bytes and lex a second
1795                hex escape sequence (which should be a trailing surrogate). */
1796             int byteNumber = (parser->lexerState - LEXING_STRING_HEX_ESCAPE_BYTE_1) & 0x3;
1797             uint32_t nibble;
1798             if (c >= '0' && c <= '9')
1799                nibble = c - '0';
1800             else if (c >= 'A' && c <= 'F')
1801                nibble = c - 'A' + 10;
1802             else if (c >= 'a' && c <= 'f')
1803                nibble = c - 'a' + 10;
1804             else
1805             {
1806                /* The current codepoint location is one of the 4 hex digit
1807                   character slots in the hex escape sequence. The error
1808                   location should be the beginning of the hex escape
1809                   sequence, between 2 and 5 bytes earlier. */
1810                int codepointsAgo = 2 /* for "\u" */ + byteNumber;
1811                JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1812                      parser, JSON_Error_InvalidEscapeSequence, codepointsAgo);
1813                return JSON_Failure;
1814             }
1815             /* Store the hex digit's bits in the appropriate byte of lexerBits. */
1816             nibble <<= (3 - byteNumber) * 4 /* shift left by 12, 8, 4, 0 */ ;
1817             parser->lexerBits |= nibble;
1818             if (parser->lexerState == LEXING_STRING_HEX_ESCAPE_BYTE_4)
1819             {
1820                /* The escape sequence is complete. We need to check whether
1821                   it represents a leading surrogate (which implies that it
1822                   will be immediately followed by a hex-escaped trailing
1823                   surrogate), a trailing surrogate (which is invalid), or a
1824                   valid codepoint (which should simply be appended to the
1825                   string token value). */
1826                if (IS_LEADING_SURROGATE(parser->lexerBits))
1827                {
1828                   /* Shift the leading surrogate into the high 2 bytes of
1829                      lexerBits so that the trailing surrogate can be stored
1830                      in the low 2 bytes. */
1831                   parser->lexerBits <<= 16;
1832                   parser->lexerState = LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH;
1833                }
1834                else if (IS_TRAILING_SURROGATE(parser->lexerBits))
1835                {
1836                   /* The current codepoint location is the last hex digit
1837                      of the hex escape sequence. The error location should
1838                      be the beginning of the hex escape sequence, 5
1839                      characters earlier. */
1840                   JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1841                         parser, JSON_Error_UnpairedSurrogateEscapeSequence, 5);
1842                   return JSON_Failure;
1843                }
1844                else
1845                {
1846                   /* The escape sequence represents a BMP codepoint. */
1847                   codepointToRecord = parser->lexerBits;
1848                   parser->lexerBits = 0;
1849                   parser->lexerState = LEXING_STRING;
1850                   goto recordStringCodepointAndAdvance;
1851                }
1852             }
1853             else if (parser->lexerState == LEXING_STRING_HEX_ESCAPE_BYTE_8)
1854             {
1855                /* The second hex escape sequence is complete. We need to
1856                   check whether it represents a trailing surrogate as
1857                   expected. If so, the surrogate pair represents a single
1858                   non-BMP codepoint. */
1859                if (!IS_TRAILING_SURROGATE(parser->lexerBits & 0xFFFF))
1860                {
1861                   /* The current codepoint location is the last hex digit of
1862                      the second hex escape sequence. The error location
1863                      should be the beginning of the leading surrogate
1864                      hex escape sequence, 11 characters earlier. */
1865                   JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1866                         parser, JSON_Error_UnpairedSurrogateEscapeSequence, 11);
1867                   return JSON_Failure;
1868                }
1869                /* The escape sequence represents a non-BMP codepoint. */
1870                codepointToRecord = CODEPOINT_FROM_SURROGATES(parser->lexerBits);
1871                parser->lexerBits = 0;
1872                parser->lexerState = LEXING_STRING;
1873                goto recordStringCodepointAndAdvance;
1874             }
1875             else
1876                parser->lexerState++;
1877          }
1878          goto advance;
1879 
1880       case LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH:
1881          if (c != EOF_CODEPOINT)
1882          {
1883             if (c != '\\')
1884             {
1885                /* The current codepoint location is the first character after
1886                   the leading surrogate hex escape sequence. The error
1887                   location should be the beginning of the leading surrogate
1888                   hex escape sequence, 6 characters earlier. */
1889                JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1890                      parser, JSON_Error_UnpairedSurrogateEscapeSequence, 6);
1891                return JSON_Failure;
1892             }
1893             parser->lexerState = LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U;
1894          }
1895          goto advance;
1896 
1897       case LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U:
1898          if (c != EOF_CODEPOINT)
1899          {
1900             if (c != 'u')
1901             {
1902                /* Distinguish between a totally bogus escape sequence
1903                   and a valid one that just isn't the hex escape kind
1904                   that we require for a trailing surrogate. The current
1905                   codepoint location is the first character after the
1906                   backslash that should have introduced the trailing
1907                   surrogate hex escape sequence. */
1908                if (c == '"' || c == '\\' || c == '/' || c == 'b' ||
1909                      c == 't' || c == 'n' || c == 'f' || c == 'r')
1910                {
1911                   /* The error location should be at that beginning of the
1912                      leading surrogate's hex escape sequence, 7 characters
1913                      earlier. */
1914                   JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1915                         parser, JSON_Error_UnpairedSurrogateEscapeSequence, 7);
1916                }
1917                else
1918                {
1919                   /* The error location should be at that backslash, 1
1920                      character earlier. */
1921                   JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1922                         parser, JSON_Error_InvalidEscapeSequence, 1);
1923                }
1924                return JSON_Failure;
1925             }
1926             parser->lexerState = LEXING_STRING_HEX_ESCAPE_BYTE_5;
1927          }
1928          goto advance;
1929 
1930       case LEXING_NUMBER_AFTER_MINUS:
1931          if (c == EOF_CODEPOINT)
1932          {
1933             /* Allow JSON_Parser_FlushLexer() to fail. */
1934          }
1935          else if (c == 'I' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
1936          {
1937             parser->token      = T_NEGATIVE_INFINITY; /* changing horses mid-stream, so to speak */
1938             parser->lexerBits  = INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX;
1939             parser->lexerState = LEXING_LITERAL;
1940          }
1941          else
1942          {
1943             if (c == '0')
1944             {
1945                codepointToRecord  = '0';
1946                parser->lexerState = LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO;
1947                goto recordNumberCodepointAndAdvance;
1948             }
1949             else if (c >= '1' && c <= '9')
1950             {
1951                codepointToRecord  = c;
1952                parser->lexerState = LEXING_NUMBER_DECIMAL_DIGITS;
1953                goto recordNumberCodepointAndAdvance;
1954             }
1955             else
1956             {
1957                /* We trigger an unknown token error rather than an invalid number
1958                   error so that "Foo" and "-Foo" trigger the same error. */
1959                JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
1960                return JSON_Failure;
1961             }
1962          }
1963          goto advance;
1964 
1965       case LEXING_NUMBER_AFTER_LEADING_ZERO:
1966       case LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO:
1967          if (c == '.')
1968          {
1969             codepointToRecord = '.';
1970             SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsDecimalPoint);
1971             parser->lexerState = LEXING_NUMBER_AFTER_DOT;
1972             goto recordNumberCodepointAndAdvance;
1973          }
1974          else if (c == 'e' || c == 'E')
1975          {
1976             codepointToRecord = c;
1977             SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
1978             parser->lexerState = LEXING_NUMBER_AFTER_E;
1979             goto recordNumberCodepointAndAdvance;
1980          }
1981          else if (c >= '0' && c <= '9')
1982          {
1983             /* JSON does not allow the integer part of a number to have any
1984                digits after a leading zero. */
1985             if (!JSON_Parser_HandleInvalidNumber(parser, c, 0, 0))
1986                return JSON_Failure;
1987          }
1988          else if ((c == 'x' || c == 'X') &&
1989                parser->lexerState == LEXING_NUMBER_AFTER_LEADING_ZERO &&
1990                GET_FLAGS(parser->flags, PARSER_ALLOW_HEX_NUMBERS))
1991          {
1992             codepointToRecord = c;
1993             SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_IsHex);
1994             parser->lexerState = LEXING_NUMBER_AFTER_X;
1995             goto recordNumberCodepointAndAdvance;
1996          }
1997          else
1998          {
1999             /* The number is finished. */
2000             if (!JSON_Parser_ProcessToken(parser))
2001                return JSON_Failure;
2002             goto reprocess;
2003          }
2004          goto advance;
2005 
2006       case LEXING_NUMBER_AFTER_X:
2007          if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
2008          {
2009             codepointToRecord = c;
2010             parser->lexerState = LEXING_NUMBER_HEX_DIGITS;
2011             goto recordNumberCodepointAndAdvance;
2012          }
2013          else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_IsHex))
2014             return JSON_Failure;
2015          goto advance;
2016 
2017       case LEXING_NUMBER_HEX_DIGITS:
2018          if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
2019          {
2020             codepointToRecord = c;
2021             goto recordNumberCodepointAndAdvance;
2022          }
2023          /* The number is finished. */
2024          if (!JSON_Parser_ProcessToken(parser))
2025             return JSON_Failure;
2026          goto reprocess;
2027 
2028       case LEXING_NUMBER_DECIMAL_DIGITS:
2029          if (c >= '0' && c <= '9')
2030          {
2031             codepointToRecord = c;
2032             goto recordNumberCodepointAndAdvance;
2033          }
2034          else if (c == '.')
2035          {
2036             codepointToRecord = '.';
2037             SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsDecimalPoint);
2038             parser->lexerState = LEXING_NUMBER_AFTER_DOT;
2039             goto recordNumberCodepointAndAdvance;
2040          }
2041          else if (c == 'e' || c == 'E')
2042          {
2043             codepointToRecord = c;
2044             SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
2045             parser->lexerState = LEXING_NUMBER_AFTER_E;
2046             goto recordNumberCodepointAndAdvance;
2047          }
2048          /* The number is finished. */
2049          if (!JSON_Parser_ProcessToken(parser))
2050             return JSON_Failure;
2051          goto reprocess;
2052 
2053       case LEXING_NUMBER_AFTER_DOT:
2054          if (c >= '0' && c <= '9')
2055          {
2056             codepointToRecord = c;
2057             parser->lexerState = LEXING_NUMBER_FRACTIONAL_DIGITS;
2058             goto recordNumberCodepointAndAdvance;
2059          }
2060          else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_ContainsDecimalPoint))
2061             return JSON_Failure;
2062          goto advance;
2063 
2064       case LEXING_NUMBER_FRACTIONAL_DIGITS:
2065          if (c >= '0' && c <= '9')
2066          {
2067             codepointToRecord = c;
2068             goto recordNumberCodepointAndAdvance;
2069          }
2070          else if (c == 'e' || c == 'E')
2071          {
2072             codepointToRecord = c;
2073             SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
2074             parser->lexerState = LEXING_NUMBER_AFTER_E;
2075             goto recordNumberCodepointAndAdvance;
2076          }
2077          /* The number is finished. */
2078          if (!JSON_Parser_ProcessToken(parser))
2079             return JSON_Failure;
2080          goto reprocess;
2081 
2082       case LEXING_NUMBER_AFTER_E:
2083          if (c == '+')
2084          {
2085             codepointToRecord = c;
2086             parser->lexerState = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
2087             goto recordNumberCodepointAndAdvance;
2088          }
2089          else if (c == '-')
2090          {
2091             codepointToRecord = c;
2092             SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNegativeExponent);
2093             parser->lexerState = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
2094             goto recordNumberCodepointAndAdvance;
2095          }
2096          else if (c >= '0' && c <= '9')
2097          {
2098             codepointToRecord = c;
2099             parser->lexerState = LEXING_NUMBER_EXPONENT_DIGITS;
2100             goto recordNumberCodepointAndAdvance;
2101          }
2102          else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_ContainsExponent))
2103             return JSON_Failure;
2104          goto advance;
2105 
2106       case LEXING_NUMBER_AFTER_EXPONENT_SIGN:
2107          if (c >= '0' && c <= '9')
2108          {
2109             codepointToRecord = c;
2110             parser->lexerState = LEXING_NUMBER_EXPONENT_DIGITS;
2111             goto recordNumberCodepointAndAdvance;
2112          }
2113          else if (!JSON_Parser_HandleInvalidNumber(parser, c, 2, JSON_ContainsExponent | JSON_ContainsNegativeExponent))
2114             return JSON_Failure;
2115          goto advance;
2116 
2117       case LEXING_NUMBER_EXPONENT_DIGITS:
2118          if (c >= '0' && c <= '9')
2119          {
2120             codepointToRecord = c;
2121             goto recordNumberCodepointAndAdvance;
2122          }
2123          /* The number is finished. */
2124          if (!JSON_Parser_ProcessToken(parser))
2125             return JSON_Failure;
2126          goto reprocess;
2127 
2128       case LEXING_COMMENT_AFTER_SLASH:
2129          if (c == '/')
2130             parser->lexerState = LEXING_SINGLE_LINE_COMMENT;
2131          else if (c == '*')
2132             parser->lexerState = LEXING_MULTI_LINE_COMMENT;
2133          else
2134          {
2135             JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
2136             return JSON_Failure;
2137          }
2138          goto advance;
2139 
2140       case LEXING_SINGLE_LINE_COMMENT:
2141          if (c == CARRIAGE_RETURN_CODEPOINT || c == LINE_FEED_CODEPOINT || c == EOF_CODEPOINT)
2142             parser->lexerState = LEXING_WHITESPACE;
2143          goto advance;
2144 
2145       case LEXING_MULTI_LINE_COMMENT:
2146          if (c == '*')
2147             parser->lexerState = LEXING_MULTI_LINE_COMMENT_AFTER_STAR;
2148          goto advance;
2149 
2150       case LEXING_MULTI_LINE_COMMENT_AFTER_STAR:
2151          if (c == '/')
2152             parser->lexerState = LEXING_WHITESPACE;
2153          else if (c != '*')
2154             parser->lexerState = LEXING_MULTI_LINE_COMMENT;
2155          goto advance;
2156    }
2157 
2158 recordStringCodepointAndAdvance:
2159 
2160    tokenEncoding  = parser->stringEncoding;
2161    maxTokenLength = parser->maxStringLength;
2162    if (!codepointToRecord)
2163    {
2164       SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNullCharacter | JSON_ContainsControlCharacter);
2165    }
2166    else if (codepointToRecord < FIRST_NON_CONTROL_CODEPOINT)
2167    {
2168       SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsControlCharacter);
2169    }
2170    else if (codepointToRecord >= FIRST_NON_BMP_CODEPOINT)
2171    {
2172       SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNonASCIICharacter | JSON_ContainsNonBMPCharacter);
2173    }
2174    else if (codepointToRecord >= FIRST_NON_ASCII_CODEPOINT)
2175    {
2176       SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNonASCIICharacter);
2177    }
2178    goto recordCodepointAndAdvance;
2179 
2180 recordNumberCodepointAndAdvance:
2181 
2182    tokenEncoding = parser->numberEncoding;
2183    maxTokenLength = parser->maxNumberLength;
2184    goto recordCodepointAndAdvance;
2185 
2186 recordCodepointAndAdvance:
2187 
2188    /* We always ensure that there are LONGEST_ENCODING_SEQUENCE bytes
2189       available in the buffer for the next codepoint, so we don't have to
2190       check whether there is room when we decode a new codepoint, and if
2191       there isn't another codepoint, we have space already allocated for
2192       the encoded null terminator.*/
2193    parser->tokenBytesUsed += EncodeCodepoint(codepointToRecord, tokenEncoding, parser->pTokenBytes + parser->tokenBytesUsed);
2194    if (parser->tokenBytesUsed > maxTokenLength)
2195    {
2196       JSON_Parser_SetErrorAtToken(parser, parser->token == T_NUMBER ? JSON_Error_TooLongNumber : JSON_Error_TooLongString);
2197       return JSON_Failure;
2198    }
2199    if (parser->tokenBytesUsed > parser->tokenBytesLength - LONGEST_ENCODING_SEQUENCE)
2200    {
2201       byte* pBiggerBuffer = DoubleBuffer(&parser->memorySuite, parser->defaultTokenBytes, parser->pTokenBytes, parser->tokenBytesLength);
2202       if (!pBiggerBuffer)
2203       {
2204          JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
2205          return JSON_Failure;
2206       }
2207       parser->pTokenBytes = pBiggerBuffer;
2208       parser->tokenBytesLength *= 2;
2209    }
2210    goto advance;
2211 
2212 advance:
2213 
2214    /* The current codepoint has been accepted, so advance the codepoint
2215       location counters accordingly. Note that the one time we don't
2216       do this is when the codepoint is EOF, which doesn't actually
2217       appear in the input stream. */
2218    if (c == CARRIAGE_RETURN_CODEPOINT)
2219    {
2220       SET_FLAGS_ON(ParserState, parser->state, PARSER_AFTER_CARRIAGE_RETURN);
2221    }
2222    if (c != EOF_CODEPOINT)
2223    {
2224       parser->codepointLocationByte += encodedLength;
2225       if (c == CARRIAGE_RETURN_CODEPOINT || c == LINE_FEED_CODEPOINT)
2226       {
2227          /* The next character will begin a new line. */
2228          parser->codepointLocationLine++;
2229          parser->codepointLocationColumn = 0;
2230       }
2231       else
2232       {
2233          /* The next character will be on the same line. */
2234          parser->codepointLocationColumn++;
2235       }
2236    }
2237 
2238    if (tokenFinished && !JSON_Parser_ProcessToken(parser))
2239       return JSON_Failure;
2240 
2241    return JSON_Success;
2242 }
2243 
JSON_Parser_FlushLexer(JSON_Parser parser)2244 static JSON_Status JSON_Parser_FlushLexer(JSON_Parser parser)
2245 {
2246    /* Push the EOF codepoint to the lexer so that it can finish the pending
2247       token, if any. The EOF codepoint is never emitted by the decoder
2248       itself, since it is outside the Unicode range and therefore cannot
2249       be encoded in any of the possible input encodings. */
2250    if (!JSON_Parser_ProcessCodepoint(parser, EOF_CODEPOINT, 0))
2251       return JSON_Failure;
2252 
2253    /* The lexer should be idle when parsing finishes. */
2254    if (parser->lexerState != LEXING_WHITESPACE)
2255    {
2256       JSON_Parser_SetErrorAtToken(parser, JSON_Error_IncompleteToken);
2257       return JSON_Failure;
2258    }
2259    return JSON_Success;
2260 }
2261 
2262 /* Parser's decoder functions. */
2263 
JSON_Parser_CallEncodingDetectedHandler(JSON_Parser parser)2264 static JSON_Status JSON_Parser_CallEncodingDetectedHandler(JSON_Parser parser)
2265 {
2266    if (parser->encodingDetectedHandler && parser->encodingDetectedHandler(parser) != JSON_Parser_Continue)
2267    {
2268       JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_AbortedByHandler);
2269       return JSON_Failure;
2270    }
2271    return JSON_Success;
2272 }
2273 
2274 /* Forward declaration. */
2275 static JSON_Status JSON_Parser_ProcessInputBytes(JSON_Parser parser, const byte* pBytes, size_t length);
2276 
JSON_Parser_ProcessUnknownByte(JSON_Parser parser,byte b)2277 static JSON_Status JSON_Parser_ProcessUnknownByte(JSON_Parser parser, byte b)
2278 {
2279    /* When the input encoding is unknown, the first 4 bytes of input are
2280       recorded in decoder.bits. */
2281    byte bytes[LONGEST_ENCODING_SEQUENCE];
2282 
2283    switch (parser->decoderData.state)
2284    {
2285       case DECODER_RESET:
2286          parser->decoderData.state = DECODED_1_OF_4;
2287          parser->decoderData.bits = (uint32_t)b << 24;
2288          break;
2289 
2290       case DECODED_1_OF_4:
2291          parser->decoderData.state = DECODED_2_OF_4;
2292          parser->decoderData.bits |= (uint32_t)b << 16;
2293          break;
2294 
2295       case DECODED_2_OF_4:
2296          parser->decoderData.state = DECODED_3_OF_4;
2297          parser->decoderData.bits |= (uint32_t)b << 8;
2298          break;
2299 
2300       case DECODED_3_OF_4:
2301          bytes[0] = (byte)(parser->decoderData.bits >> 24);
2302          bytes[1] = (byte)(parser->decoderData.bits >> 16);
2303          bytes[2] = (byte)(parser->decoderData.bits >> 8);
2304          bytes[3] = (byte)(b);
2305 
2306          /* We try to match the following patterns in order, where .. is any
2307             byte value and nz is any non-zero byte value:
2308             EF BB BF .. => UTF-8 with BOM
2309             FF FE 00 00 => UTF-32LE with BOM
2310             FF FE nz 00 => UTF-16LE with BOM
2311             00 00 FE FF -> UTF-32BE with BOM
2312             FE FF .. .. => UTF-16BE with BOM
2313             nz nz .. .. => UTF-8
2314             nz 00 nz .. => UTF-16LE
2315             nz 00 00 00 => UTF-32LE
2316             00 nz .. .. => UTF-16BE
2317             00 00 00 nz => UTF-32BE
2318             .. .. .. .. => unknown encoding */
2319          if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF)
2320          {
2321             /* EF BB BF .. */
2322             parser->inputEncoding = JSON_UTF8;
2323          }
2324          else if (bytes[0] == 0xFF && bytes[1] == 0xFE && bytes[3] == 0x00)
2325          {
2326             /* FF FE 00 00 or
2327                FF FE nz 00 */
2328             parser->inputEncoding = (bytes[2] == 0x00) ? JSON_UTF32LE : JSON_UTF16LE;
2329          }
2330          else if (bytes[0] == 0x00 && bytes[1] == 0x00 && bytes[2] == 0xFE && bytes[3] == 0xFF)
2331          {
2332             /* 00 00 FE FF */
2333             parser->inputEncoding = JSON_UTF32BE;
2334          }
2335          else if (bytes[0] == 0xFE && bytes[1] == 0xFF)
2336          {
2337             /* FE FF .. .. */
2338             parser->inputEncoding = JSON_UTF16BE;
2339          }
2340          else if (bytes[0] != 0x00)
2341          {
2342             /* nz .. .. .. */
2343             if (bytes[1] != 0x00)
2344             {
2345                /* nz nz .. .. */
2346                parser->inputEncoding = JSON_UTF8;
2347             }
2348             else if (bytes[2] != 0x00)
2349             {
2350                /* nz 00 nz .. */
2351                parser->inputEncoding = JSON_UTF16LE;
2352             }
2353             else if (bytes[3] == 0x00)
2354             {
2355                /* nz 00 00 00 */
2356                parser->inputEncoding = JSON_UTF32LE;
2357             }
2358             else
2359             {
2360                /* nz 00 00 nz => error */
2361             }
2362          }
2363          else if (bytes[1] != 0x00)
2364          {
2365             /* 00 nz .. .. */
2366             parser->inputEncoding = JSON_UTF16BE;
2367          }
2368          else if (bytes[2] == 0x00 && bytes[3] != 0x00)
2369          {
2370             /* 00 00 00 nz */
2371             parser->inputEncoding = JSON_UTF32BE;
2372          }
2373          else
2374          {
2375             /* 00 00 nz .. or
2376                00 00 00 00 => error */
2377          }
2378 
2379          if (parser->inputEncoding == JSON_UnknownEncoding)
2380             return JSON_Parser_HandleInvalidEncodingSequence(parser, 4);
2381 
2382          if (!JSON_Parser_CallEncodingDetectedHandler(parser))
2383             return JSON_Failure;
2384 
2385          /* Reset the decoder before reprocessing the bytes. */
2386          Decoder_Reset(&parser->decoderData);
2387          return JSON_Parser_ProcessInputBytes(parser, bytes, 4);
2388    }
2389 
2390    /* We don't have 4 bytes yet. */
2391    return JSON_Success;
2392 }
2393 
JSON_Parser_ProcessInputBytes(JSON_Parser parser,const byte * pBytes,size_t length)2394 JSON_Status JSON_Parser_ProcessInputBytes(JSON_Parser parser, const byte* pBytes, size_t length)
2395 {
2396    /* Note that if length is 0, pBytes is allowed to be NULL. */
2397    size_t i = 0;
2398    while (parser->inputEncoding == JSON_UnknownEncoding && i < length)
2399    {
2400       if (!JSON_Parser_ProcessUnknownByte(parser, pBytes[i]))
2401          return JSON_Failure;
2402       i++;
2403    }
2404    while (i < length)
2405    {
2406       DecoderOutput output     = Decoder_ProcessByte(
2407             &parser->decoderData, parser->inputEncoding, pBytes[i]);
2408       DecoderResultCode result = DECODER_RESULT_CODE(output);
2409       switch (result)
2410       {
2411          case SEQUENCE_PENDING:
2412             i++;
2413             break;
2414 
2415          case SEQUENCE_COMPLETE:
2416             if (!JSON_Parser_ProcessCodepoint(
2417                      parser, DECODER_CODEPOINT(output),
2418                      DECODER_SEQUENCE_LENGTH(output)))
2419                return JSON_Failure;
2420             i++;
2421             break;
2422 
2423          case SEQUENCE_INVALID_INCLUSIVE:
2424             i++;
2425             /* fallthrough */
2426          case SEQUENCE_INVALID_EXCLUSIVE:
2427             if (!JSON_Parser_HandleInvalidEncodingSequence(
2428                      parser, DECODER_SEQUENCE_LENGTH(output)))
2429                return JSON_Failure;
2430             break;
2431       }
2432    }
2433    return JSON_Success;
2434 }
2435 
JSON_Parser_FlushDecoder(JSON_Parser parser)2436 static JSON_Status JSON_Parser_FlushDecoder(JSON_Parser parser)
2437 {
2438    /* If the input was 1, 2, or 3 bytes long, and the input encoding was not
2439       explicitly specified by the client, we can sometimes make a reasonable
2440       guess. If the input was 1 or 3 bytes long, the only encoding that could
2441       possibly be valid JSON is UF-8. If the input was 2 bytes long, we try
2442       to match the following patterns in order, where .. is any byte value
2443       and nz is any non-zero byte value:
2444       FF FE => UTF-16LE with BOM
2445       FE FF => UTF-16BE with BOM
2446       nz nz => UTF-8
2447       nz 00 => UTF-16LE
2448       00 nz => UTF-16BE
2449       .. .. => unknown encoding
2450       */
2451    if (parser->inputEncoding == JSON_UnknownEncoding &&
2452          parser->decoderData.state != DECODER_RESET)
2453    {
2454       byte bytes[3];
2455       size_t length = 0;
2456       bytes[0] = (byte)(parser->decoderData.bits >> 24);
2457       bytes[1] = (byte)(parser->decoderData.bits >> 16);
2458       bytes[2] = (byte)(parser->decoderData.bits >> 8);
2459 
2460       switch (parser->decoderData.state)
2461       {
2462          case DECODED_1_OF_4:
2463             parser->inputEncoding = JSON_UTF8;
2464             length = 1;
2465             break;
2466 
2467          case DECODED_2_OF_4:
2468             /* FF FE */
2469             if (bytes[0] == 0xFF && bytes[1] == 0xFE)
2470                parser->inputEncoding = JSON_UTF16LE;
2471             /* FE FF */
2472             else if (bytes[0] == 0xFE && bytes[1] == 0xFF)
2473                parser->inputEncoding = JSON_UTF16BE;
2474             else if (bytes[0] != 0x00)
2475             {
2476                /* nz nz or
2477                   nz 00 */
2478                parser->inputEncoding = bytes[1] ? JSON_UTF8 : JSON_UTF16LE;
2479             }
2480             /* 00 nz */
2481             else if (bytes[1] != 0x00)
2482                parser->inputEncoding = JSON_UTF16BE;
2483             /* 00 00 */
2484             else
2485                return JSON_Parser_HandleInvalidEncodingSequence(parser, 2);
2486             length = 2;
2487             break;
2488 
2489          case DECODED_3_OF_4:
2490             parser->inputEncoding = JSON_UTF8;
2491             length = 3;
2492             break;
2493       }
2494 
2495       if (!JSON_Parser_CallEncodingDetectedHandler(parser))
2496          return JSON_Failure;
2497 
2498       /* Reset the decoder before reprocessing the bytes. */
2499       parser->decoderData.state = DECODER_RESET;
2500       parser->decoderData.bits = 0;
2501       if (!JSON_Parser_ProcessInputBytes(parser, bytes, length))
2502          return JSON_Failure;
2503    }
2504 
2505    /* The decoder should be idle when parsing finishes. */
2506    if (Decoder_SequencePending(&parser->decoderData))
2507       return JSON_Parser_HandleInvalidEncodingSequence(
2508             parser, DECODER_STATE_BYTES(parser->decoderData.state));
2509    return JSON_Success;
2510 }
2511 
2512 /* Parser API functions. */
2513 
JSON_Parser_Create(const JSON_MemorySuite * pMemorySuite)2514 JSON_Parser JSON_CALL JSON_Parser_Create(const JSON_MemorySuite* pMemorySuite)
2515 {
2516    JSON_Parser parser;
2517    JSON_MemorySuite memorySuite;
2518 
2519    if (pMemorySuite)
2520    {
2521       memorySuite = *pMemorySuite;
2522 
2523       /* The full memory suite must be specified. */
2524       if (!memorySuite.realloc || !memorySuite.free)
2525          return NULL;
2526    }
2527    else
2528       memorySuite = defaultMemorySuite;
2529 
2530    parser = (JSON_Parser)memorySuite.realloc(memorySuite.userData, NULL, sizeof(struct JSON_Parser_Data));
2531 
2532    if (!parser)
2533       return NULL;
2534 
2535    parser->memorySuite = memorySuite;
2536    JSON_Parser_ResetData(parser, 0/* isInitialized */);
2537    return parser;
2538 }
2539 
JSON_Parser_Free(JSON_Parser parser)2540 JSON_Status JSON_CALL JSON_Parser_Free(JSON_Parser parser)
2541 {
2542    if (!parser || GET_FLAGS(parser->state, PARSER_IN_PROTECTED_API))
2543       return JSON_Failure;
2544 
2545    SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_PROTECTED_API);
2546 
2547    if (parser->pTokenBytes != parser->defaultTokenBytes)
2548       parser->memorySuite.free(parser->memorySuite.userData, parser->pTokenBytes);
2549 
2550    while (parser->pMemberNames)
2551       JSON_Parser_PopMemberNameList(parser);
2552 
2553    Grammarian_FreeAllocations(&parser->grammarianData, &parser->memorySuite);
2554    parser->memorySuite.free(parser->memorySuite.userData, parser);
2555    return JSON_Success;
2556 }
2557 
JSON_Parser_Reset(JSON_Parser parser)2558 JSON_Status JSON_CALL JSON_Parser_Reset(JSON_Parser parser)
2559 {
2560    if (!parser || GET_FLAGS(parser->state, PARSER_IN_PROTECTED_API))
2561       return JSON_Failure;
2562    SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_PROTECTED_API);
2563    JSON_Parser_ResetData(parser, 1/* isInitialized */);
2564    /* Note that JSON_Parser_ResetData() unset PARSER_IN_PROTECTED_API for us. */
2565    return JSON_Success;
2566 }
2567 
JSON_Parser_GetUserData(JSON_Parser parser)2568 void* JSON_CALL JSON_Parser_GetUserData(JSON_Parser parser)
2569 {
2570    return parser ? parser->userData : NULL;
2571 }
2572 
JSON_Parser_SetUserData(JSON_Parser parser,void * userData)2573 JSON_Status JSON_CALL JSON_Parser_SetUserData(JSON_Parser parser, void* userData)
2574 {
2575    if (!parser)
2576       return JSON_Failure;
2577    parser->userData = userData;
2578    return JSON_Success;
2579 }
2580 
JSON_Parser_GetInputEncoding(JSON_Parser parser)2581 JSON_Encoding JSON_CALL JSON_Parser_GetInputEncoding(JSON_Parser parser)
2582 {
2583    return parser ? (JSON_Encoding)parser->inputEncoding : JSON_UnknownEncoding;
2584 }
2585 
JSON_Parser_SetInputEncoding(JSON_Parser parser,JSON_Encoding encoding)2586 JSON_Status JSON_CALL JSON_Parser_SetInputEncoding(JSON_Parser parser, JSON_Encoding encoding)
2587 {
2588    if (     !parser
2589          || encoding < JSON_UnknownEncoding
2590          || encoding > JSON_UTF32BE
2591          || GET_FLAGS(parser->state, PARSER_STARTED))
2592       return JSON_Failure;
2593    parser->inputEncoding = (Encoding)encoding;
2594    return JSON_Success;
2595 }
2596 
JSON_Parser_GetStringEncoding(JSON_Parser parser)2597 JSON_Encoding JSON_CALL JSON_Parser_GetStringEncoding(JSON_Parser parser)
2598 {
2599    return parser ? (JSON_Encoding)parser->stringEncoding : JSON_UTF8;
2600 }
2601 
JSON_Parser_SetStringEncoding(JSON_Parser parser,JSON_Encoding encoding)2602 JSON_Status JSON_CALL JSON_Parser_SetStringEncoding(JSON_Parser parser, JSON_Encoding encoding)
2603 {
2604    if (
2605             !parser
2606          || encoding <= JSON_UnknownEncoding
2607          || encoding > JSON_UTF32BE
2608          || GET_FLAGS(parser->state, PARSER_STARTED))
2609       return JSON_Failure;
2610    parser->stringEncoding = (Encoding)encoding;
2611    return JSON_Success;
2612 }
2613 
JSON_Parser_GetMaxStringLength(JSON_Parser parser)2614 size_t JSON_CALL JSON_Parser_GetMaxStringLength(JSON_Parser parser)
2615 {
2616    return parser ? parser->maxStringLength : SIZE_MAX;
2617 }
2618 
JSON_Parser_SetMaxStringLength(JSON_Parser parser,size_t maxLength)2619 JSON_Status JSON_CALL JSON_Parser_SetMaxStringLength(JSON_Parser parser, size_t maxLength)
2620 {
2621    if (     !parser
2622          || GET_FLAGS(parser->state, PARSER_STARTED))
2623       return JSON_Failure;
2624    parser->maxStringLength = maxLength;
2625    return JSON_Success;
2626 }
2627 
JSON_Parser_GetNumberEncoding(JSON_Parser parser)2628 JSON_Encoding JSON_CALL JSON_Parser_GetNumberEncoding(JSON_Parser parser)
2629 {
2630    return parser ? (JSON_Encoding)parser->numberEncoding : JSON_UTF8;
2631 }
2632 
JSON_Parser_SetNumberEncoding(JSON_Parser parser,JSON_Encoding encoding)2633 JSON_Status JSON_CALL JSON_Parser_SetNumberEncoding(JSON_Parser parser, JSON_Encoding encoding)
2634 {
2635    if (!parser || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE || GET_FLAGS(parser->state, PARSER_STARTED))
2636       return JSON_Failure;
2637    parser->numberEncoding = (Encoding)encoding;
2638    return JSON_Success;
2639 }
2640 
JSON_Parser_GetMaxNumberLength(JSON_Parser parser)2641 size_t JSON_CALL JSON_Parser_GetMaxNumberLength(JSON_Parser parser)
2642 {
2643    return parser ? parser->maxNumberLength : SIZE_MAX;
2644 }
2645 
JSON_Parser_SetMaxNumberLength(JSON_Parser parser,size_t maxLength)2646 JSON_Status JSON_CALL JSON_Parser_SetMaxNumberLength(JSON_Parser parser, size_t maxLength)
2647 {
2648    if (     !parser
2649          || GET_FLAGS(parser->state, PARSER_STARTED))
2650       return JSON_Failure;
2651    parser->maxNumberLength = maxLength;
2652    return JSON_Success;
2653 }
2654 
JSON_Parser_GetAllowBOM(JSON_Parser parser)2655 JSON_Boolean JSON_CALL JSON_Parser_GetAllowBOM(JSON_Parser parser)
2656 {
2657    return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_BOM)) ? JSON_True : JSON_False;
2658 }
2659 
JSON_Parser_SetAllowBOM(JSON_Parser parser,JSON_Boolean allowBOM)2660 JSON_Status JSON_CALL JSON_Parser_SetAllowBOM(JSON_Parser parser, JSON_Boolean allowBOM)
2661 {
2662    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2663       return JSON_Failure;
2664    SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_BOM, allowBOM);
2665    return JSON_Success;
2666 }
2667 
JSON_Parser_GetAllowComments(JSON_Parser parser)2668 JSON_Boolean JSON_CALL JSON_Parser_GetAllowComments(JSON_Parser parser)
2669 {
2670    return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_COMMENTS)) ? JSON_True : JSON_False;
2671 }
2672 
JSON_Parser_SetAllowComments(JSON_Parser parser,JSON_Boolean allowComments)2673 JSON_Status JSON_CALL JSON_Parser_SetAllowComments(JSON_Parser parser, JSON_Boolean allowComments)
2674 {
2675    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2676       return JSON_Failure;
2677    SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_COMMENTS, allowComments);
2678    return JSON_Success;
2679 }
2680 
JSON_Parser_GetAllowSpecialNumbers(JSON_Parser parser)2681 JSON_Boolean JSON_CALL JSON_Parser_GetAllowSpecialNumbers(JSON_Parser parser)
2682 {
2683    return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS)) ? JSON_True : JSON_False;
2684 }
2685 
JSON_Parser_SetAllowSpecialNumbers(JSON_Parser parser,JSON_Boolean allowSpecialNumbers)2686 JSON_Status JSON_CALL JSON_Parser_SetAllowSpecialNumbers(JSON_Parser parser, JSON_Boolean allowSpecialNumbers)
2687 {
2688    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2689       return JSON_Failure;
2690    SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS, allowSpecialNumbers);
2691    return JSON_Success;
2692 }
2693 
JSON_Parser_GetAllowHexNumbers(JSON_Parser parser)2694 JSON_Boolean JSON_CALL JSON_Parser_GetAllowHexNumbers(JSON_Parser parser)
2695 {
2696    return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_HEX_NUMBERS)) ? JSON_True : JSON_False;
2697 }
2698 
JSON_Parser_SetAllowHexNumbers(JSON_Parser parser,JSON_Boolean allowHexNumbers)2699 JSON_Status JSON_CALL JSON_Parser_SetAllowHexNumbers(JSON_Parser parser, JSON_Boolean allowHexNumbers)
2700 {
2701    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2702       return JSON_Failure;
2703    SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_HEX_NUMBERS, allowHexNumbers);
2704    return JSON_Success;
2705 }
2706 
JSON_Parser_GetAllowUnescapedControlCharacters(JSON_Parser parser)2707 JSON_Boolean JSON_CALL JSON_Parser_GetAllowUnescapedControlCharacters(JSON_Parser parser)
2708 {
2709    return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_CONTROL_CHARS)) ? JSON_True : JSON_False;
2710 }
2711 
JSON_Parser_SetAllowUnescapedControlCharacters(JSON_Parser parser,JSON_Boolean allowUnescapedControlCharacters)2712 JSON_Status JSON_CALL JSON_Parser_SetAllowUnescapedControlCharacters(JSON_Parser parser, JSON_Boolean allowUnescapedControlCharacters)
2713 {
2714    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2715       return JSON_Failure;
2716    SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_CONTROL_CHARS, allowUnescapedControlCharacters);
2717    return JSON_Success;
2718 }
2719 
JSON_Parser_GetReplaceInvalidEncodingSequences(JSON_Parser parser)2720 JSON_Boolean JSON_CALL JSON_Parser_GetReplaceInvalidEncodingSequences(JSON_Parser parser)
2721 {
2722    return (parser && GET_FLAGS(parser->flags, PARSER_REPLACE_INVALID)) ? JSON_True : JSON_False;
2723 }
2724 
JSON_Parser_SetReplaceInvalidEncodingSequences(JSON_Parser parser,JSON_Boolean replaceInvalidEncodingSequences)2725 JSON_Status JSON_CALL JSON_Parser_SetReplaceInvalidEncodingSequences(
2726       JSON_Parser parser, JSON_Boolean replaceInvalidEncodingSequences)
2727 {
2728    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2729       return JSON_Failure;
2730    SET_FLAGS(ParserFlags, parser->flags, PARSER_REPLACE_INVALID, replaceInvalidEncodingSequences);
2731    return JSON_Success;
2732 }
2733 
JSON_Parser_GetTrackObjectMembers(JSON_Parser parser)2734 JSON_Boolean JSON_CALL JSON_Parser_GetTrackObjectMembers(JSON_Parser parser)
2735 {
2736    return (parser && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS)) ? JSON_True : JSON_False;
2737 }
2738 
JSON_Parser_SetTrackObjectMembers(JSON_Parser parser,JSON_Boolean trackObjectMembers)2739 JSON_Status JSON_CALL JSON_Parser_SetTrackObjectMembers(JSON_Parser parser, JSON_Boolean trackObjectMembers)
2740 {
2741    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2742    {
2743       return JSON_Failure;
2744    }
2745    SET_FLAGS(ParserFlags, parser->flags, PARSER_TRACK_OBJECT_MEMBERS, trackObjectMembers);
2746    return JSON_Success;
2747 }
2748 
JSON_Parser_GetStopAfterEmbeddedDocument(JSON_Parser parser)2749 JSON_Boolean JSON_CALL JSON_Parser_GetStopAfterEmbeddedDocument(JSON_Parser parser)
2750 {
2751    return (parser && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT)) ? JSON_True : JSON_False;
2752 }
2753 
JSON_Parser_SetStopAfterEmbeddedDocument(JSON_Parser parser,JSON_Boolean stopAfterEmbeddedDocument)2754 JSON_Status JSON_CALL JSON_Parser_SetStopAfterEmbeddedDocument(
2755       JSON_Parser parser, JSON_Boolean stopAfterEmbeddedDocument)
2756 {
2757    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2758    {
2759       return JSON_Failure;
2760    }
2761    SET_FLAGS(ParserFlags, parser->flags, PARSER_EMBEDDED_DOCUMENT, stopAfterEmbeddedDocument);
2762    return JSON_Success;
2763 }
2764 
JSON_Parser_GetError(JSON_Parser parser)2765 JSON_Error JSON_CALL JSON_Parser_GetError(JSON_Parser parser)
2766 {
2767    return parser ? (JSON_Error)parser->error : JSON_Error_None;
2768 }
2769 
JSON_Parser_GetErrorLocation(JSON_Parser parser,JSON_Location * pLocation)2770 JSON_Status JSON_CALL JSON_Parser_GetErrorLocation(
2771       JSON_Parser parser, JSON_Location* pLocation)
2772 {
2773    if (!pLocation || !parser || parser->error == JSON_Error_None)
2774       return JSON_Failure;
2775 
2776    if (parser->errorOffset == ERROR_LOCATION_IS_TOKEN_START)
2777    {
2778       pLocation->byte = parser->tokenLocationByte;
2779       pLocation->line = parser->tokenLocationLine;
2780       pLocation->column = parser->tokenLocationColumn;
2781    }
2782    else
2783    {
2784       pLocation->byte = parser->codepointLocationByte - (SHORTEST_ENCODING_SEQUENCE(parser->inputEncoding) * parser->errorOffset);
2785       pLocation->line = parser->codepointLocationLine;
2786       pLocation->column = parser->codepointLocationColumn - parser->errorOffset;
2787    }
2788    pLocation->depth = parser->depth;
2789    return JSON_Success;
2790 }
2791 
JSON_Parser_GetTokenLocation(JSON_Parser parser,JSON_Location * pLocation)2792 JSON_Status JSON_CALL JSON_Parser_GetTokenLocation(
2793       JSON_Parser parser, JSON_Location* pLocation)
2794 {
2795    if (!parser || !pLocation || !GET_FLAGS(parser->state, PARSER_IN_TOKEN_HANDLER))
2796       return JSON_Failure;
2797 
2798    pLocation->byte = parser->tokenLocationByte;
2799    pLocation->line = parser->tokenLocationLine;
2800    pLocation->column = parser->tokenLocationColumn;
2801    pLocation->depth = parser->depth;
2802    return JSON_Success;
2803 }
2804 
JSON_Parser_GetAfterTokenLocation(JSON_Parser parser,JSON_Location * pLocation)2805 JSON_Status JSON_CALL JSON_Parser_GetAfterTokenLocation(
2806       JSON_Parser parser, JSON_Location* pLocation)
2807 {
2808    if (!parser || !pLocation || !GET_FLAGS(parser->state, PARSER_IN_TOKEN_HANDLER))
2809       return JSON_Failure;
2810 
2811    pLocation->byte = parser->codepointLocationByte;
2812    pLocation->line = parser->codepointLocationLine;
2813    pLocation->column = parser->codepointLocationColumn;
2814    pLocation->depth = parser->depth;
2815    return JSON_Success;
2816 }
2817 
JSON_Parser_GetEncodingDetectedHandler(JSON_Parser parser)2818 JSON_Parser_NullHandler JSON_CALL JSON_Parser_GetEncodingDetectedHandler(JSON_Parser parser)
2819 {
2820    return parser ? parser->encodingDetectedHandler : NULL;
2821 }
2822 
JSON_Parser_SetEncodingDetectedHandler(JSON_Parser parser,JSON_Parser_EncodingDetectedHandler handler)2823 JSON_Status JSON_CALL JSON_Parser_SetEncodingDetectedHandler(
2824       JSON_Parser parser, JSON_Parser_EncodingDetectedHandler handler)
2825 {
2826    if (!parser)
2827       return JSON_Failure;
2828 
2829    parser->encodingDetectedHandler = handler;
2830    return JSON_Success;
2831 }
2832 
JSON_Parser_GetNullHandler(JSON_Parser parser)2833 JSON_Parser_NullHandler JSON_CALL JSON_Parser_GetNullHandler(JSON_Parser parser)
2834 {
2835    return parser ? parser->nullHandler : NULL;
2836 }
2837 
JSON_Parser_SetNullHandler(JSON_Parser parser,JSON_Parser_NullHandler handler)2838 JSON_Status JSON_CALL JSON_Parser_SetNullHandler(
2839       JSON_Parser parser, JSON_Parser_NullHandler handler)
2840 {
2841    if (!parser)
2842       return JSON_Failure;
2843 
2844    parser->nullHandler = handler;
2845    return JSON_Success;
2846 }
2847 
JSON_Parser_GetBooleanHandler(JSON_Parser parser)2848 JSON_Parser_BooleanHandler JSON_CALL JSON_Parser_GetBooleanHandler(JSON_Parser parser)
2849 {
2850    return parser ? parser->booleanHandler : NULL;
2851 }
2852 
JSON_Parser_SetBooleanHandler(JSON_Parser parser,JSON_Parser_BooleanHandler handler)2853 JSON_Status JSON_CALL JSON_Parser_SetBooleanHandler(
2854       JSON_Parser parser, JSON_Parser_BooleanHandler handler)
2855 {
2856    if (!parser)
2857       return JSON_Failure;
2858 
2859    parser->booleanHandler = handler;
2860    return JSON_Success;
2861 }
2862 
JSON_Parser_GetStringHandler(JSON_Parser parser)2863 JSON_Parser_StringHandler JSON_CALL JSON_Parser_GetStringHandler(JSON_Parser parser)
2864 {
2865    return parser ? parser->stringHandler : NULL;
2866 }
2867 
JSON_Parser_SetStringHandler(JSON_Parser parser,JSON_Parser_StringHandler handler)2868 JSON_Status JSON_CALL JSON_Parser_SetStringHandler(
2869       JSON_Parser parser, JSON_Parser_StringHandler handler)
2870 {
2871    if (!parser)
2872       return JSON_Failure;
2873 
2874    parser->stringHandler = handler;
2875    return JSON_Success;
2876 }
2877 
JSON_Parser_GetNumberHandler(JSON_Parser parser)2878 JSON_Parser_NumberHandler JSON_CALL JSON_Parser_GetNumberHandler(JSON_Parser parser)
2879 {
2880    return parser ? parser->numberHandler : NULL;
2881 }
2882 
JSON_Parser_SetNumberHandler(JSON_Parser parser,JSON_Parser_NumberHandler handler)2883 JSON_Status JSON_CALL JSON_Parser_SetNumberHandler(
2884       JSON_Parser parser, JSON_Parser_NumberHandler handler)
2885 {
2886    if (!parser)
2887       return JSON_Failure;
2888 
2889    parser->numberHandler = handler;
2890    return JSON_Success;
2891 }
2892 
JSON_Parser_GetSpecialNumberHandler(JSON_Parser parser)2893 JSON_Parser_SpecialNumberHandler JSON_CALL JSON_Parser_GetSpecialNumberHandler(JSON_Parser parser)
2894 {
2895    return parser ? parser->specialNumberHandler : NULL;
2896 }
2897 
JSON_Parser_SetSpecialNumberHandler(JSON_Parser parser,JSON_Parser_SpecialNumberHandler handler)2898 JSON_Status JSON_CALL JSON_Parser_SetSpecialNumberHandler(
2899       JSON_Parser parser, JSON_Parser_SpecialNumberHandler handler)
2900 {
2901    if (!parser)
2902       return JSON_Failure;
2903    parser->specialNumberHandler = handler;
2904    return JSON_Success;
2905 }
2906 
JSON_Parser_GetStartObjectHandler(JSON_Parser parser)2907 JSON_Parser_StartObjectHandler JSON_CALL JSON_Parser_GetStartObjectHandler(JSON_Parser parser)
2908 {
2909    return parser ? parser->startObjectHandler : NULL;
2910 }
2911 
JSON_Parser_SetStartObjectHandler(JSON_Parser parser,JSON_Parser_StartObjectHandler handler)2912 JSON_Status JSON_CALL JSON_Parser_SetStartObjectHandler(
2913       JSON_Parser parser, JSON_Parser_StartObjectHandler handler)
2914 {
2915    if (!parser)
2916       return JSON_Failure;
2917 
2918    parser->startObjectHandler = handler;
2919    return JSON_Success;
2920 }
2921 
JSON_Parser_GetEndObjectHandler(JSON_Parser parser)2922 JSON_Parser_EndObjectHandler JSON_CALL JSON_Parser_GetEndObjectHandler(JSON_Parser parser)
2923 {
2924    return parser ? parser->endObjectHandler : NULL;
2925 }
2926 
JSON_Parser_SetEndObjectHandler(JSON_Parser parser,JSON_Parser_EndObjectHandler handler)2927 JSON_Status JSON_CALL JSON_Parser_SetEndObjectHandler(
2928       JSON_Parser parser, JSON_Parser_EndObjectHandler handler)
2929 {
2930    if (!parser)
2931       return JSON_Failure;
2932 
2933    parser->endObjectHandler = handler;
2934    return JSON_Success;
2935 }
2936 
JSON_Parser_GetObjectMemberHandler(JSON_Parser parser)2937 JSON_Parser_ObjectMemberHandler JSON_CALL JSON_Parser_GetObjectMemberHandler(JSON_Parser parser)
2938 {
2939    return parser ? parser->objectMemberHandler : NULL;
2940 }
2941 
JSON_Parser_SetObjectMemberHandler(JSON_Parser parser,JSON_Parser_ObjectMemberHandler handler)2942 JSON_Status JSON_CALL JSON_Parser_SetObjectMemberHandler(
2943       JSON_Parser parser, JSON_Parser_ObjectMemberHandler handler)
2944 {
2945    if (!parser)
2946       return JSON_Failure;
2947 
2948    parser->objectMemberHandler = handler;
2949    return JSON_Success;
2950 }
2951 
JSON_Parser_GetStartArrayHandler(JSON_Parser parser)2952 JSON_Parser_StartArrayHandler JSON_CALL JSON_Parser_GetStartArrayHandler(JSON_Parser parser)
2953 {
2954    return parser ? parser->startArrayHandler : NULL;
2955 }
2956 
JSON_Parser_SetStartArrayHandler(JSON_Parser parser,JSON_Parser_StartArrayHandler handler)2957 JSON_Status JSON_CALL JSON_Parser_SetStartArrayHandler(
2958       JSON_Parser parser, JSON_Parser_StartArrayHandler handler)
2959 {
2960    if (!parser)
2961       return JSON_Failure;
2962 
2963    parser->startArrayHandler = handler;
2964    return JSON_Success;
2965 }
2966 
JSON_Parser_GetEndArrayHandler(JSON_Parser parser)2967 JSON_Parser_EndArrayHandler JSON_CALL JSON_Parser_GetEndArrayHandler(JSON_Parser parser)
2968 {
2969    return parser ? parser->endArrayHandler : NULL;
2970 }
2971 
JSON_Parser_SetEndArrayHandler(JSON_Parser parser,JSON_Parser_EndArrayHandler handler)2972 JSON_Status JSON_CALL JSON_Parser_SetEndArrayHandler(
2973       JSON_Parser parser, JSON_Parser_EndArrayHandler handler)
2974 {
2975    if (!parser)
2976       return JSON_Failure;
2977 
2978    parser->endArrayHandler = handler;
2979    return JSON_Success;
2980 }
2981 
JSON_Parser_GetArrayItemHandler(JSON_Parser parser)2982 JSON_Parser_ArrayItemHandler JSON_CALL JSON_Parser_GetArrayItemHandler(JSON_Parser parser)
2983 {
2984    return parser ? parser->arrayItemHandler : NULL;
2985 }
2986 
JSON_Parser_SetArrayItemHandler(JSON_Parser parser,JSON_Parser_ArrayItemHandler handler)2987 JSON_Status JSON_CALL JSON_Parser_SetArrayItemHandler(
2988       JSON_Parser parser, JSON_Parser_ArrayItemHandler handler)
2989 {
2990    if (!parser)
2991       return JSON_Failure;
2992 
2993    parser->arrayItemHandler = handler;
2994    return JSON_Success;
2995 }
2996 
JSON_Parser_Parse(JSON_Parser parser,const char * pBytes,size_t length,JSON_Boolean isFinal)2997 JSON_Status JSON_CALL JSON_Parser_Parse(JSON_Parser parser, const char* pBytes, size_t length, JSON_Boolean isFinal)
2998 {
2999    JSON_Status status = JSON_Failure;
3000    if (parser && (pBytes || !length) && !GET_FLAGS(parser->state, PARSER_FINISHED | PARSER_IN_PROTECTED_API))
3001    {
3002       int finishedParsing = 0;
3003       SET_FLAGS_ON(ParserState, parser->state, PARSER_STARTED | PARSER_IN_PROTECTED_API);
3004       if (JSON_Parser_ProcessInputBytes(parser, (const byte*)pBytes, length))
3005       {
3006          /* New input was parsed successfully. */
3007          if (isFinal)
3008          {
3009             /* Make sure there is nothing pending in the decoder, lexer,
3010                or parser. */
3011             if (JSON_Parser_FlushDecoder(parser) &&
3012                   JSON_Parser_FlushLexer(parser) &&
3013                   JSON_Parser_FlushParser(parser))
3014                status = JSON_Success;
3015 
3016             finishedParsing = 1;
3017          }
3018          else
3019             status = JSON_Success;
3020       }
3021       else
3022       {
3023          /* New input failed to parse. */
3024          finishedParsing = 1;
3025       }
3026       if (finishedParsing)
3027       {
3028          SET_FLAGS_ON(ParserState, parser->state, PARSER_FINISHED);
3029       }
3030       SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_PROTECTED_API);
3031    }
3032    return status;
3033 }
3034 
3035 #endif /* JSON_NO_PARSER */
3036 
3037 /******************** JSON Writer ********************/
3038 
3039 #ifndef JSON_NO_WRITER
3040 
3041 /* Combinable writer state flags. */
3042 #define WRITER_RESET            0x0
3043 #define WRITER_STARTED          0x1
3044 #define WRITER_IN_PROTECTED_API 0x2
3045 typedef byte WriterState;
3046 
3047 /* Combinable writer settings flags. */
3048 #define WRITER_DEFAULT_FLAGS    0x0
3049 #define WRITER_USE_CRLF         0x1
3050 #define WRITER_REPLACE_INVALID  0x2
3051 #define WRITER_ESCAPE_NON_ASCII 0x4
3052 typedef byte WriterFlags;
3053 
3054 /* A writer instance. */
3055 struct JSON_Writer_Data
3056 {
3057    JSON_MemorySuite          memorySuite;
3058    void*                     userData;
3059    WriterState               state;
3060    WriterFlags               flags;
3061    Encoding                  outputEncoding;
3062    Error                     error;
3063    GrammarianData            grammarianData;
3064    JSON_Writer_OutputHandler outputHandler;
3065 };
3066 
3067 /* Writer internal functions. */
3068 
JSON_Writer_ResetData(JSON_Writer writer,int isInitialized)3069 static void JSON_Writer_ResetData(JSON_Writer writer, int isInitialized)
3070 {
3071    writer->userData = NULL;
3072    writer->flags = WRITER_DEFAULT_FLAGS;
3073    writer->outputEncoding = JSON_UTF8;
3074    writer->error = JSON_Error_None;
3075    Grammarian_Reset(&writer->grammarianData, isInitialized);
3076    writer->outputHandler = NULL;
3077    writer->state = WRITER_RESET; /* do this last! */
3078 }
3079 
JSON_Writer_SetError(JSON_Writer writer,Error error)3080 static void JSON_Writer_SetError(JSON_Writer writer, Error error)
3081 {
3082    writer->error = error;
3083 }
3084 
JSON_Writer_ProcessToken(JSON_Writer writer,Symbol token)3085 static JSON_Status JSON_Writer_ProcessToken(JSON_Writer writer, Symbol token)
3086 {
3087    GrammarianOutput output = Grammarian_ProcessToken(&writer->grammarianData, token, &writer->memorySuite);
3088    switch (GRAMMARIAN_RESULT_CODE(output))
3089    {
3090       case REJECTED_TOKEN:
3091          JSON_Writer_SetError(writer, JSON_Error_UnexpectedToken);
3092          return JSON_Failure;
3093 
3094       case SYMBOL_STACK_FULL:
3095          JSON_Writer_SetError(writer, JSON_Error_OutOfMemory);
3096          return JSON_Failure;
3097    }
3098    return JSON_Success;
3099 }
3100 
JSON_Writer_OutputBytes(JSON_Writer writer,const byte * pBytes,size_t length)3101 static JSON_Status JSON_Writer_OutputBytes(JSON_Writer writer, const byte* pBytes, size_t length)
3102 {
3103    if (writer->outputHandler && length)
3104    {
3105       if (writer->outputHandler(writer, (const char*)pBytes, length) != JSON_Writer_Continue)
3106       {
3107          JSON_Writer_SetError(writer, JSON_Error_AbortedByHandler);
3108          return JSON_Failure;
3109       }
3110    }
3111    return JSON_Success;
3112 }
3113 
JSON_Writer_GetCodepointEscapeCharacter(JSON_Writer writer,Codepoint c)3114 static Codepoint JSON_Writer_GetCodepointEscapeCharacter(JSON_Writer writer, Codepoint c)
3115 {
3116    switch (c)
3117    {
3118       case BACKSPACE_CODEPOINT:
3119          return 'b';
3120 
3121       case TAB_CODEPOINT:
3122          return 't';
3123 
3124       case LINE_FEED_CODEPOINT:
3125          return 'n';
3126 
3127       case FORM_FEED_CODEPOINT:
3128          return 'f';
3129 
3130       case CARRIAGE_RETURN_CODEPOINT:
3131          return 'r';
3132 
3133       case '"':
3134          return '"';
3135       /* Don't escape forward slashes */
3136       /*case '/':
3137          return '/';*/
3138 
3139       case '\\':
3140          return '\\';
3141 
3142       case DELETE_CODEPOINT:
3143       case LINE_SEPARATOR_CODEPOINT:
3144       case PARAGRAPH_SEPARATOR_CODEPOINT:
3145          return 'u';
3146 
3147       default:
3148          if (c < FIRST_NON_CONTROL_CODEPOINT || IS_NONCHARACTER(c) ||
3149                (GET_FLAGS(writer->flags, WRITER_ESCAPE_NON_ASCII) && c > FIRST_NON_ASCII_CODEPOINT))
3150             return 'u';
3151          break;
3152    }
3153    return 0;
3154 }
3155 
3156 typedef struct tag_WriteBufferData
3157 {
3158    size_t used;
3159    byte   bytes[256];
3160 } WriteBufferData;
3161 typedef WriteBufferData* WriteBuffer;
3162 
WriteBuffer_Reset(WriteBuffer buffer)3163 static void WriteBuffer_Reset(WriteBuffer buffer)
3164 {
3165    buffer->used = 0;
3166 }
3167 
WriteBuffer_Flush(WriteBuffer buffer,JSON_Writer writer)3168 static JSON_Status WriteBuffer_Flush(WriteBuffer buffer, JSON_Writer writer)
3169 {
3170    JSON_Status status = JSON_Writer_OutputBytes(writer, buffer->bytes, buffer->used);
3171    buffer->used = 0;
3172    return status;
3173 }
3174 
WriteBuffer_WriteBytes(WriteBuffer buffer,JSON_Writer writer,const byte * pBytes,size_t length)3175 static JSON_Status WriteBuffer_WriteBytes(WriteBuffer buffer, JSON_Writer writer, const byte* pBytes, size_t length)
3176 {
3177    if (buffer->used + length > sizeof(buffer->bytes) &&
3178          !WriteBuffer_Flush(buffer, writer))
3179       return JSON_Failure;
3180 
3181    memcpy(&buffer->bytes[buffer->used], pBytes, length);
3182    buffer->used += length;
3183    return JSON_Success;
3184 }
3185 
WriteBuffer_WriteCodepoint(WriteBuffer buffer,JSON_Writer writer,Codepoint c)3186 static JSON_Status WriteBuffer_WriteCodepoint(WriteBuffer buffer, JSON_Writer writer, Codepoint c)
3187 {
3188    if (buffer->used + LONGEST_ENCODING_SEQUENCE > sizeof(buffer->bytes) &&
3189          !WriteBuffer_Flush(buffer, writer))
3190       return JSON_Failure;
3191 
3192    buffer->used += EncodeCodepoint(c, writer->outputEncoding, &buffer->bytes[buffer->used]);
3193    return JSON_Success;
3194 }
3195 
WriteBuffer_WriteHexEscapeSequence(WriteBuffer buffer,JSON_Writer writer,Codepoint c)3196 static JSON_Status WriteBuffer_WriteHexEscapeSequence(WriteBuffer buffer, JSON_Writer writer, Codepoint c)
3197 {
3198    if (c >= FIRST_NON_BMP_CODEPOINT)
3199    {
3200       /* Non-BMP codepoints must be hex-escaped by escaping the UTF-16
3201          surrogate pair for the codepoint. We put the leading surrogate
3202          in the low 16 bits of c so that it gets written first, then
3203          the second pass through the loop will write out the trailing
3204          surrogate. x*/
3205       c = SURROGATES_FROM_CODEPOINT(c);
3206       c = (c << 16) | (c >> 16);
3207    }
3208    do
3209    {
3210       static const byte hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
3211       byte escapeSequence[6];
3212       int i;
3213       escapeSequence[0] = '\\';
3214       escapeSequence[1] = 'u';
3215       escapeSequence[2] = hexDigits[(c >> 12) & 0xF];
3216       escapeSequence[3] = hexDigits[(c >> 8) & 0xF];
3217       escapeSequence[4] = hexDigits[(c >> 4) & 0xF];
3218       escapeSequence[5] = hexDigits[c & 0xF];
3219       for (i = 0; i < sizeof(escapeSequence); i++)
3220       {
3221          if (!WriteBuffer_WriteCodepoint(buffer, writer, escapeSequence[i]))
3222             return JSON_Failure;
3223       }
3224       c >>= 16;
3225    } while (c);
3226    return JSON_Success;
3227 }
3228 
JSON_Writer_OutputString(JSON_Writer writer,const byte * pBytes,size_t length,Encoding encoding)3229 static JSON_Status JSON_Writer_OutputString(JSON_Writer writer, const byte* pBytes, size_t length, Encoding encoding)
3230 {
3231    static const byte quoteUTF[] = { 0, 0, 0, '"', 0, 0, 0 };
3232    static const byte* const quoteEncodings[5] = { quoteUTF + 3, quoteUTF + 3, quoteUTF + 2, quoteUTF + 3, quoteUTF };
3233 
3234    const byte* pQuoteEncoded = quoteEncodings[writer->outputEncoding - 1];
3235    size_t minSequenceLength = (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3236    DecoderData decoderData;
3237    WriteBufferData bufferData;
3238    size_t i = 0;
3239 
3240    WriteBuffer_Reset(&bufferData);
3241 
3242    /* Start quote. */
3243    if (!WriteBuffer_WriteBytes(&bufferData, writer, pQuoteEncoded, minSequenceLength))
3244       return JSON_Failure;
3245 
3246    /* String contents. */
3247    Decoder_Reset(&decoderData);
3248    while (i < length)
3249    {
3250       DecoderOutput output = Decoder_ProcessByte(&decoderData, encoding, pBytes[i]);
3251       DecoderResultCode result = DECODER_RESULT_CODE(output);
3252       Codepoint c;
3253       Codepoint escapeCharacter;
3254       switch (result)
3255       {
3256          case SEQUENCE_PENDING:
3257             i++;
3258             break;
3259 
3260          case SEQUENCE_COMPLETE:
3261             c = DECODER_CODEPOINT(output);
3262             escapeCharacter = JSON_Writer_GetCodepointEscapeCharacter(writer, c);
3263             switch (escapeCharacter)
3264             {
3265                case 0:
3266                   /* Output the codepoint as a normal encoding sequence. */
3267                   if (!WriteBuffer_WriteCodepoint(&bufferData, writer, c))
3268                      return JSON_Failure;
3269                   break;
3270 
3271                case 'u':
3272                   /* Output the codepoint as 1 or 2 hex escape sequences. */
3273                   if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, c))
3274                      return JSON_Failure;
3275                   break;
3276 
3277                default:
3278                   /* Output the codepoint as a simple escape sequence. */
3279                   if (!WriteBuffer_WriteCodepoint(&bufferData, writer, '\\') ||
3280                         !WriteBuffer_WriteCodepoint(&bufferData, writer, escapeCharacter))
3281                      return JSON_Failure;
3282                   break;
3283             }
3284             i++;
3285             break;
3286 
3287          case SEQUENCE_INVALID_INCLUSIVE:
3288             i++;
3289             /* fallthrough */
3290          case SEQUENCE_INVALID_EXCLUSIVE:
3291             if (GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID))
3292             {
3293                if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, REPLACEMENT_CHARACTER_CODEPOINT))
3294                   return JSON_Failure;
3295             }
3296             else
3297             {
3298                /* Output whatever valid bytes we've accumulated before failing. */
3299                if (WriteBuffer_Flush(&bufferData, writer))
3300                   JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3301                return JSON_Failure;
3302             }
3303             break;
3304       }
3305    }
3306    if (Decoder_SequencePending(&decoderData))
3307    {
3308       if (GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID))
3309       {
3310          if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, REPLACEMENT_CHARACTER_CODEPOINT))
3311             return JSON_Failure;
3312       }
3313       else
3314       {
3315          /* Output whatever valid bytes we've accumulated before failing. */
3316          if (WriteBuffer_Flush(&bufferData, writer))
3317             JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3318          return JSON_Failure;
3319       }
3320    }
3321 
3322    /* End quote. */
3323    if (!WriteBuffer_WriteBytes(&bufferData, writer, pQuoteEncoded, minSequenceLength) ||
3324          !WriteBuffer_Flush(&bufferData, writer))
3325       return JSON_Failure;
3326    return JSON_Success;
3327 }
3328 
LexNumberCharacter(LexerState state,Codepoint c)3329 static LexerState LexNumberCharacter(LexerState state, Codepoint c)
3330 {
3331    switch (state)
3332    {
3333       case LEXING_WHITESPACE:
3334          if (c == '-')
3335             state = LEXING_NUMBER_AFTER_MINUS;
3336          else if (c == '0')
3337             state = LEXING_NUMBER_AFTER_LEADING_ZERO;
3338          else if (c >= '1' && c <= '9')
3339             state = LEXING_NUMBER_DECIMAL_DIGITS;
3340          else
3341             state = LEXER_ERROR;
3342          break;
3343 
3344       case LEXING_NUMBER_AFTER_MINUS:
3345          if (c == '0')
3346             state = LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO;
3347          else if (c >= '1' && c <= '9')
3348             state = LEXING_NUMBER_DECIMAL_DIGITS;
3349          else
3350             state = LEXER_ERROR;
3351          break;
3352 
3353       case LEXING_NUMBER_AFTER_LEADING_ZERO:
3354       case LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO:
3355          if (c == '.')
3356             state = LEXING_NUMBER_AFTER_DOT;
3357          else if (c == 'e' || c == 'E')
3358             state = LEXING_NUMBER_AFTER_E;
3359          else if ((c == 'x' || c == 'X') && state == LEXING_NUMBER_AFTER_LEADING_ZERO)
3360             state = LEXING_NUMBER_AFTER_X;
3361          else if (c == EOF_CODEPOINT)
3362             state = LEXING_WHITESPACE;
3363          else
3364             state = LEXER_ERROR;
3365          break;
3366 
3367       case LEXING_NUMBER_AFTER_X:
3368          if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
3369             state = LEXING_NUMBER_HEX_DIGITS;
3370          else
3371             state = LEXER_ERROR;
3372          break;
3373 
3374       case LEXING_NUMBER_HEX_DIGITS:
3375          if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
3376          {
3377             /* Still LEXING_NUMBER_HEX_DIGITS. */
3378          }
3379          else if (c == EOF_CODEPOINT)
3380             state = LEXING_WHITESPACE;
3381          else
3382             state = LEXER_ERROR;
3383          break;
3384 
3385       case LEXING_NUMBER_DECIMAL_DIGITS:
3386          if (c >= '0' && c <= '9')
3387          {
3388             /* Still LEXING_NUMBER_DECIMAL_DIGITS. */
3389          }
3390          else if (c == '.')
3391             state = LEXING_NUMBER_AFTER_DOT;
3392          else if (c == 'e' || c == 'E')
3393             state = LEXING_NUMBER_AFTER_E;
3394          else if (c == EOF_CODEPOINT)
3395             state = LEXING_WHITESPACE;
3396          else
3397             state = LEXER_ERROR;
3398          break;
3399 
3400       case LEXING_NUMBER_AFTER_DOT:
3401          if (c >= '0' && c <= '9')
3402             state = LEXING_NUMBER_FRACTIONAL_DIGITS;
3403          else
3404             state = LEXER_ERROR;
3405          break;
3406 
3407       case LEXING_NUMBER_FRACTIONAL_DIGITS:
3408          if (c >= '0' && c <= '9')
3409          {
3410             /* Still LEXING_NUMBER_FRACTIONAL_DIGITS. */
3411          }
3412          else if (c == 'e' || c == 'E')
3413             state = LEXING_NUMBER_AFTER_E;
3414          else if (c == EOF_CODEPOINT)
3415             state = LEXING_WHITESPACE;
3416          else
3417             state = LEXER_ERROR;
3418          break;
3419 
3420       case LEXING_NUMBER_AFTER_E:
3421          if (c == '+' || c == '-')
3422             state = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
3423          else if (c >= '0' && c <= '9')
3424             state = LEXING_NUMBER_EXPONENT_DIGITS;
3425          else
3426             state = LEXER_ERROR;
3427          break;
3428 
3429       case LEXING_NUMBER_AFTER_EXPONENT_SIGN:
3430          if (c >= '0' && c <= '9')
3431             state = LEXING_NUMBER_EXPONENT_DIGITS;
3432          else
3433             state = LEXER_ERROR;
3434          break;
3435 
3436       case LEXING_NUMBER_EXPONENT_DIGITS:
3437          if (c >= '0' && c <= '9')
3438          {
3439             /* Still LEXING_NUMBER_EXPONENT_DIGITS. */
3440          }
3441          else if (c == EOF_CODEPOINT)
3442             state = LEXING_WHITESPACE;
3443          else
3444             state = LEXER_ERROR;
3445          break;
3446    }
3447    return state;
3448 }
3449 
JSON_Writer_OutputNumber(JSON_Writer writer,const byte * pBytes,size_t length,Encoding encoding)3450 static JSON_Status JSON_Writer_OutputNumber(JSON_Writer writer, const byte* pBytes, size_t length, Encoding encoding)
3451 {
3452    DecoderData decoderData;
3453    WriteBufferData bufferData;
3454    LexerState lexerState = LEXING_WHITESPACE;
3455    size_t i;
3456    Decoder_Reset(&decoderData);
3457    WriteBuffer_Reset(&bufferData);
3458    for (i = 0; i < length; i++)
3459    {
3460       DecoderOutput output = Decoder_ProcessByte(&decoderData, encoding, pBytes[i]);
3461       DecoderResultCode result = DECODER_RESULT_CODE(output);
3462       Codepoint c;
3463       switch (result)
3464       {
3465          case SEQUENCE_PENDING:
3466             break;
3467 
3468          case SEQUENCE_COMPLETE:
3469             c = DECODER_CODEPOINT(output);
3470             lexerState = LexNumberCharacter(lexerState, c);
3471             if (lexerState == LEXER_ERROR)
3472             {
3473                /* Output whatever valid bytes we've accumulated before failing. */
3474                if (WriteBuffer_Flush(&bufferData, writer))
3475                   JSON_Writer_SetError(writer, JSON_Error_InvalidNumber);
3476                return JSON_Failure;
3477             }
3478             if (!WriteBuffer_WriteCodepoint(&bufferData, writer, c))
3479                return JSON_Failure;
3480             break;
3481 
3482          case SEQUENCE_INVALID_INCLUSIVE:
3483          case SEQUENCE_INVALID_EXCLUSIVE:
3484             /* Output whatever valid bytes we've accumulated before failing. */
3485             if (WriteBuffer_Flush(&bufferData, writer))
3486                JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3487             return JSON_Failure;
3488       }
3489    }
3490    if (!WriteBuffer_Flush(&bufferData, writer))
3491       return JSON_Failure;
3492    if (Decoder_SequencePending(&decoderData))
3493    {
3494       JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3495       return JSON_Failure;
3496    }
3497    if (LexNumberCharacter(lexerState, EOF_CODEPOINT) == LEXER_ERROR)
3498    {
3499       JSON_Writer_SetError(writer, JSON_Error_InvalidNumber);
3500       return JSON_Failure;
3501    }
3502    return JSON_Success;
3503 }
3504 
3505 #define SPACES_PER_CHUNK 8
JSON_Writer_OutputSpaces(JSON_Writer writer,size_t numberOfSpaces)3506 static JSON_Status JSON_Writer_OutputSpaces(JSON_Writer writer, size_t numberOfSpaces)
3507 {
3508    static const byte spacesUTF8[SPACES_PER_CHUNK] = { ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' };
3509    static const byte spacesUTF16[SPACES_PER_CHUNK * 2 + 1] = { 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0 };
3510    static const byte spacesUTF32[SPACES_PER_CHUNK * 4 + 3] = { 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0 };
3511    static const byte* const spacesEncodings[5] = { spacesUTF8, spacesUTF16 + 1, spacesUTF16, spacesUTF32 + 3, spacesUTF32 };
3512 
3513    size_t encodedLength = (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3514    const byte* encoded = spacesEncodings[writer->outputEncoding - 1];
3515    while (numberOfSpaces > SPACES_PER_CHUNK)
3516    {
3517       if (!JSON_Writer_OutputBytes(writer, encoded, SPACES_PER_CHUNK * encodedLength))
3518          return JSON_Failure;
3519       numberOfSpaces -= SPACES_PER_CHUNK;
3520    }
3521 
3522    if (!JSON_Writer_OutputBytes(writer, encoded, numberOfSpaces * encodedLength))
3523       return JSON_Failure;
3524    return JSON_Success;
3525 }
3526 
JSON_Writer_WriteSimpleToken(JSON_Writer writer,Symbol token,const byte * const * encodings,size_t length)3527 static JSON_Status JSON_Writer_WriteSimpleToken(JSON_Writer writer, Symbol token, const byte* const* encodings, size_t length)
3528 {
3529    JSON_Status status = JSON_Failure;
3530    if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3531    {
3532       size_t encodedLength = length * (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3533       SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3534       if (JSON_Writer_ProcessToken(writer, token) &&
3535             JSON_Writer_OutputBytes(writer, encodings[writer->outputEncoding - 1], encodedLength))
3536          status = JSON_Success;
3537       SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3538    }
3539    return status;
3540 }
3541 
3542 /* Writer API functions. */
3543 
JSON_Writer_Create(const JSON_MemorySuite * pMemorySuite)3544 JSON_Writer JSON_CALL JSON_Writer_Create(const JSON_MemorySuite* pMemorySuite)
3545 {
3546    JSON_Writer writer;
3547    JSON_MemorySuite memorySuite;
3548    if (pMemorySuite)
3549    {
3550       memorySuite = *pMemorySuite;
3551       /* The full memory suite must be specified. */
3552       if (!memorySuite.realloc || !memorySuite.free)
3553          return NULL;
3554    }
3555    else
3556       memorySuite = defaultMemorySuite;
3557 
3558    writer = (JSON_Writer)memorySuite.realloc(memorySuite.userData, NULL, sizeof(struct JSON_Writer_Data));
3559 
3560    if (!writer)
3561       return NULL;
3562 
3563    writer->memorySuite = memorySuite;
3564    JSON_Writer_ResetData(writer, 0/* isInitialized */);
3565    return writer;
3566 }
3567 
JSON_Writer_Free(JSON_Writer writer)3568 JSON_Status JSON_CALL JSON_Writer_Free(JSON_Writer writer)
3569 {
3570    if (!writer || GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API))
3571       return JSON_Failure;
3572 
3573    SET_FLAGS_ON(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3574    Grammarian_FreeAllocations(&writer->grammarianData, &writer->memorySuite);
3575    writer->memorySuite.free(writer->memorySuite.userData, writer);
3576    return JSON_Success;
3577 }
3578 
JSON_Writer_Reset(JSON_Writer writer)3579 JSON_Status JSON_CALL JSON_Writer_Reset(JSON_Writer writer)
3580 {
3581    if (!writer || GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API))
3582       return JSON_Failure;
3583 
3584    SET_FLAGS_ON(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3585    JSON_Writer_ResetData(writer, 1/* isInitialized */);
3586    /* Note that JSON_Writer_ResetData() unset WRITER_IN_PROTECTED_API for us. */
3587    return JSON_Success;
3588 }
3589 
JSON_Writer_GetUserData(JSON_Writer writer)3590 void* JSON_CALL JSON_Writer_GetUserData(JSON_Writer writer)
3591 {
3592    return writer ? writer->userData : NULL;
3593 }
3594 
JSON_Writer_SetUserData(JSON_Writer writer,void * userData)3595 JSON_Status JSON_CALL JSON_Writer_SetUserData(JSON_Writer writer, void* userData)
3596 {
3597    if (!writer)
3598       return JSON_Failure;
3599 
3600    writer->userData = userData;
3601    return JSON_Success;
3602 }
3603 
JSON_Writer_GetOutputEncoding(JSON_Writer writer)3604 JSON_Encoding JSON_CALL JSON_Writer_GetOutputEncoding(JSON_Writer writer)
3605 {
3606    return writer ? (JSON_Encoding)writer->outputEncoding : JSON_UTF8;
3607 }
3608 
JSON_Writer_SetOutputEncoding(JSON_Writer writer,JSON_Encoding encoding)3609 JSON_Status JSON_CALL JSON_Writer_SetOutputEncoding(JSON_Writer writer, JSON_Encoding encoding)
3610 {
3611    if (!writer || GET_FLAGS(writer->state, WRITER_STARTED) || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE)
3612       return JSON_Failure;
3613 
3614    writer->outputEncoding = (Encoding)encoding;
3615    return JSON_Success;
3616 }
3617 
JSON_Writer_GetUseCRLF(JSON_Writer writer)3618 JSON_Boolean JSON_CALL JSON_Writer_GetUseCRLF(JSON_Writer writer)
3619 {
3620    return (writer && GET_FLAGS(writer->flags, WRITER_USE_CRLF)) ? JSON_True : JSON_False;
3621 }
3622 
JSON_Writer_SetUseCRLF(JSON_Writer writer,JSON_Boolean useCRLF)3623 JSON_Status JSON_CALL JSON_Writer_SetUseCRLF(JSON_Writer writer, JSON_Boolean useCRLF)
3624 {
3625    if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
3626       return JSON_Failure;
3627 
3628    SET_FLAGS(WriterFlags, writer->flags, WRITER_USE_CRLF, useCRLF);
3629    return JSON_Success;
3630 }
3631 
JSON_Writer_GetReplaceInvalidEncodingSequences(JSON_Writer writer)3632 JSON_Boolean JSON_CALL JSON_Writer_GetReplaceInvalidEncodingSequences(JSON_Writer writer)
3633 {
3634    return (writer && GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID)) ? JSON_True : JSON_False;
3635 }
3636 
JSON_Writer_SetReplaceInvalidEncodingSequences(JSON_Writer writer,JSON_Boolean replaceInvalidEncodingSequences)3637 JSON_Status JSON_CALL JSON_Writer_SetReplaceInvalidEncodingSequences(JSON_Writer writer, JSON_Boolean replaceInvalidEncodingSequences)
3638 {
3639    if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
3640       return JSON_Failure;
3641 
3642    SET_FLAGS(WriterFlags, writer->flags, WRITER_REPLACE_INVALID, replaceInvalidEncodingSequences);
3643    return JSON_Success;
3644 }
3645 
JSON_Writer_GetEscapeAllNonASCIICharacters(JSON_Writer writer)3646 JSON_Boolean JSON_CALL JSON_Writer_GetEscapeAllNonASCIICharacters(JSON_Writer writer)
3647 {
3648    return (writer && GET_FLAGS(writer->flags, WRITER_ESCAPE_NON_ASCII)) ? JSON_True : JSON_False;
3649 }
3650 
JSON_Writer_SetEscapeAllNonASCIICharacters(JSON_Writer writer,JSON_Boolean escapeAllNonASCIICharacters)3651 JSON_Status JSON_CALL JSON_Writer_SetEscapeAllNonASCIICharacters(JSON_Writer writer, JSON_Boolean escapeAllNonASCIICharacters)
3652 {
3653    if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
3654       return JSON_Failure;
3655 
3656    SET_FLAGS(WriterFlags, writer->flags, WRITER_ESCAPE_NON_ASCII, escapeAllNonASCIICharacters);
3657    return JSON_Success;
3658 }
3659 
JSON_Writer_GetError(JSON_Writer writer)3660 JSON_Error JSON_CALL JSON_Writer_GetError(JSON_Writer writer)
3661 {
3662    return writer ? (JSON_Error)writer->error : JSON_Error_None;
3663 }
3664 
JSON_Writer_GetOutputHandler(JSON_Writer writer)3665 JSON_Writer_OutputHandler JSON_CALL JSON_Writer_GetOutputHandler(JSON_Writer writer)
3666 {
3667    return writer ? writer->outputHandler : NULL;
3668 }
3669 
JSON_Writer_SetOutputHandler(JSON_Writer writer,JSON_Writer_OutputHandler handler)3670 JSON_Status JSON_CALL JSON_Writer_SetOutputHandler(JSON_Writer writer, JSON_Writer_OutputHandler handler)
3671 {
3672    if (!writer)
3673       return JSON_Failure;
3674 
3675    writer->outputHandler = handler;
3676    return JSON_Success;
3677 }
3678 
JSON_Writer_WriteNull(JSON_Writer writer)3679 JSON_Status JSON_CALL JSON_Writer_WriteNull(JSON_Writer writer)
3680 {
3681    static const byte nullUTF8[] = { 'n', 'u', 'l', 'l' };
3682    static const byte nullUTF16[] = { 0, 'n', 0, 'u', 0, 'l', 0, 'l', 0 };
3683    static const byte nullUTF32[] = { 0, 0, 0, 'n', 0, 0, 0, 'u', 0, 0, 0, 'l', 0, 0, 0, 'l', 0, 0, 0 };
3684    static const byte* const nullEncodings[5] = { nullUTF8, nullUTF16 + 1, nullUTF16, nullUTF32 + 3, nullUTF32 };
3685 
3686    return JSON_Writer_WriteSimpleToken(writer, T_NULL, nullEncodings, sizeof(nullUTF8));
3687 }
3688 
JSON_Writer_WriteBoolean(JSON_Writer writer,JSON_Boolean value)3689 JSON_Status JSON_CALL JSON_Writer_WriteBoolean(JSON_Writer writer, JSON_Boolean value)
3690 {
3691    static const byte trueUTF8[] = { 't', 'r', 'u', 'e' };
3692    static const byte trueUTF16[] = { 0, 't', 0, 'r', 0, 'u', 0, 'e', 0 };
3693    static const byte trueUTF32[] = { 0, 0, 0, 't', 0, 0, 0, 'r', 0, 0, 0, 'u', 0, 0, 0, 'e', 0, 0, 0 };
3694    static const byte* const trueEncodings[5] = { trueUTF8, trueUTF16 + 1, trueUTF16, trueUTF32 + 3, trueUTF32 };
3695 
3696    static const byte falseUTF8[] = { 'f', 'a', 'l', 's', 'e' };
3697    static const byte falseUTF16[] = { 0, 'f', 0, 'a', 0, 'l', 0, 's', 0, 'e', 0 };
3698    static const byte falseUTF32[] = { 0, 0, 0, 'f', 0, 0, 0, 'a', 0, 0, 0, 'l', 0, 0, 0, 's', 0, 0, 0, 'e', 0, 0, 0 };
3699    static const byte* const falseEncodings[5] = { falseUTF8, falseUTF16 + 1, falseUTF16, falseUTF32 + 3, falseUTF32 };
3700 
3701    Symbol token;
3702    const byte* const* encodings;
3703    size_t length;
3704    if (value)
3705    {
3706       token = T_TRUE;
3707       encodings = trueEncodings;
3708       length = sizeof(trueUTF8);
3709    }
3710    else
3711    {
3712       token = T_FALSE;
3713       encodings = falseEncodings;
3714       length = sizeof(falseUTF8);
3715    }
3716    return JSON_Writer_WriteSimpleToken(writer, token, encodings, length);
3717 }
3718 
JSON_Writer_WriteString(JSON_Writer writer,const char * pValue,size_t length,JSON_Encoding encoding)3719 JSON_Status JSON_CALL JSON_Writer_WriteString(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding)
3720 {
3721    JSON_Status status = JSON_Failure;
3722    if (writer && (pValue || !length) && encoding > JSON_UnknownEncoding && encoding <= JSON_UTF32BE &&
3723          !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3724    {
3725       SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3726       if (JSON_Writer_ProcessToken(writer, T_STRING))
3727          status = JSON_Writer_OutputString(writer, (const byte*)pValue, length, (Encoding)encoding);
3728 
3729       SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3730    }
3731    return status;
3732 }
3733 
JSON_Writer_WriteNumber(JSON_Writer writer,const char * pValue,size_t length,JSON_Encoding encoding)3734 JSON_Status JSON_CALL JSON_Writer_WriteNumber(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding)
3735 {
3736    JSON_Status status = JSON_Failure;
3737    if (writer && pValue && length && encoding > JSON_UnknownEncoding && encoding <= JSON_UTF32BE &&
3738          !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3739    {
3740       SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3741       if (JSON_Writer_ProcessToken(writer, T_NUMBER))
3742          status = JSON_Writer_OutputNumber(writer, (const byte*)pValue, length, (Encoding)encoding);
3743 
3744       SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3745    }
3746    return status;
3747 }
3748 
JSON_Writer_WriteSpecialNumber(JSON_Writer writer,JSON_SpecialNumber value)3749 JSON_Status JSON_CALL JSON_Writer_WriteSpecialNumber(JSON_Writer writer, JSON_SpecialNumber value)
3750 {
3751    static const byte nanUTF8[] = { 'N', 'a', 'N' };
3752    static const byte nanUTF16[] = { 0, 'N', 0, 'a', 0, 'N', 0 };
3753    static const byte nanUTF32[] = { 0, 0, 0, 'N', 0, 0, 0, 'a', 0, 0, 0, 'N', 0, 0, 0 };
3754    static const byte* const nanEncodings[5] = { nanUTF8, nanUTF16 + 1, nanUTF16, nanUTF32 + 3, nanUTF32 };
3755 
3756    static const byte ninfUTF8[] = { '-', 'I', 'n', 'f', 'i', 'n', 'i', 't', 'y' };
3757    static const byte ninfUTF16[] = { 0, '-', 0, 'I', 0, 'n', 0, 'f', 0, 'i', 0, 'n', 0, 'i', 0, 't', 0, 'y', 0 };
3758    static const byte ninfUTF32[] = { 0, 0, 0, '-', 0, 0, 0, 'I', 0, 0, 0, 'n', 0, 0, 0, 'f', 0, 0, 0, 'i', 0, 0, 0, 'n', 0, 0, 0, 'i', 0, 0, 0, 't', 0, 0, 0, 'y', 0, 0, 0 };
3759    static const byte* const infinityEncodings[5] = { ninfUTF8 + 1, ninfUTF16 + 3, ninfUTF16 + 2, ninfUTF32 + 7, ninfUTF32 + 4 };
3760    static const byte* const negativeInfinityEncodings[5] = { ninfUTF8, ninfUTF16 + 1, ninfUTF16, ninfUTF32 + 3, ninfUTF32 };
3761 
3762    Symbol token;
3763    const byte* const* encodings;
3764    size_t length;
3765    if (value == JSON_Infinity)
3766    {
3767       token = T_INFINITY;
3768       encodings = infinityEncodings;
3769       length = sizeof(ninfUTF8) - 1/* - */;
3770    }
3771    else if (value == JSON_NegativeInfinity)
3772    {
3773       token = T_NEGATIVE_INFINITY;
3774       encodings = negativeInfinityEncodings;
3775       length = sizeof(ninfUTF8);
3776    }
3777    else
3778    {
3779       token = T_NAN;
3780       encodings = nanEncodings;
3781       length = sizeof(nanUTF8);
3782    }
3783    return JSON_Writer_WriteSimpleToken(writer, token, encodings, length);
3784 }
3785 
JSON_Writer_WriteStartObject(JSON_Writer writer)3786 JSON_Status JSON_CALL JSON_Writer_WriteStartObject(JSON_Writer writer)
3787 {
3788    static const byte utf[] = { 0, 0, 0, '{', 0, 0, 0 };
3789    static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3790 
3791    return JSON_Writer_WriteSimpleToken(writer, T_LEFT_CURLY, encodings, 1);
3792 }
3793 
JSON_Writer_WriteEndObject(JSON_Writer writer)3794 JSON_Status JSON_CALL JSON_Writer_WriteEndObject(JSON_Writer writer)
3795 {
3796    static const byte utf[] = { 0, 0, 0, '}', 0, 0, 0 };
3797    static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3798 
3799    return JSON_Writer_WriteSimpleToken(writer, T_RIGHT_CURLY, encodings, 1);
3800 }
3801 
JSON_Writer_WriteStartArray(JSON_Writer writer)3802 JSON_Status JSON_CALL JSON_Writer_WriteStartArray(JSON_Writer writer)
3803 {
3804    static const byte utf[] = { 0, 0, 0, '[', 0, 0, 0 };
3805    static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3806 
3807    return JSON_Writer_WriteSimpleToken(writer, T_LEFT_SQUARE, encodings, 1);
3808 }
3809 
JSON_Writer_WriteEndArray(JSON_Writer writer)3810 JSON_Status JSON_CALL JSON_Writer_WriteEndArray(JSON_Writer writer)
3811 {
3812    static const byte utf[] = { 0, 0, 0, ']', 0, 0, 0 };
3813    static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3814 
3815    return JSON_Writer_WriteSimpleToken(writer, T_RIGHT_SQUARE, encodings, 1);
3816 }
3817 
JSON_Writer_WriteColon(JSON_Writer writer)3818 JSON_Status JSON_CALL JSON_Writer_WriteColon(JSON_Writer writer)
3819 {
3820    static const byte utf[] = { 0, 0, 0, ':', 0, 0, 0 };
3821    static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3822 
3823    return JSON_Writer_WriteSimpleToken(writer, T_COLON, encodings, 1);
3824 }
3825 
JSON_Writer_WriteComma(JSON_Writer writer)3826 JSON_Status JSON_CALL JSON_Writer_WriteComma(JSON_Writer writer)
3827 {
3828    static const byte utf[] = { 0, 0, 0, ',', 0, 0, 0 };
3829    static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3830 
3831    return JSON_Writer_WriteSimpleToken(writer, T_COMMA, encodings, 1);
3832 }
3833 
JSON_Writer_WriteSpace(JSON_Writer writer,size_t numberOfSpaces)3834 JSON_Status JSON_CALL JSON_Writer_WriteSpace(JSON_Writer writer, size_t numberOfSpaces)
3835 {
3836    JSON_Status status = JSON_Failure;
3837    if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3838    {
3839       SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3840       status = JSON_Writer_OutputSpaces(writer, numberOfSpaces);
3841       SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3842    }
3843    return status;
3844 }
3845 
JSON_Writer_WriteNewLine(JSON_Writer writer)3846 JSON_Status JSON_CALL JSON_Writer_WriteNewLine(JSON_Writer writer)
3847 {
3848    static const byte lfUTF[] = { 0, 0, 0, LINE_FEED_CODEPOINT, 0, 0, 0 };
3849    static const byte* const lfEncodings[5] = { lfUTF + 3, lfUTF + 3, lfUTF + 2, lfUTF + 3, lfUTF };
3850 
3851    static const byte crlfUTF8[] = { CARRIAGE_RETURN_CODEPOINT, LINE_FEED_CODEPOINT };
3852    static const byte crlfUTF16[] = { 0, CARRIAGE_RETURN_CODEPOINT, 0, LINE_FEED_CODEPOINT, 0 };
3853    static const byte crlfUTF32[] = { 0, 0, 0, CARRIAGE_RETURN_CODEPOINT, 0, 0, 0, LINE_FEED_CODEPOINT, 0, 0, 0 };
3854    static const byte* const crlfEncodings[5] = { crlfUTF8, crlfUTF16 + 1, crlfUTF16, crlfUTF32 + 3, crlfUTF32 };
3855 
3856    JSON_Status status = JSON_Failure;
3857    if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3858    {
3859       const byte* const* encodings;
3860       size_t length;
3861       size_t encodedLength;
3862       SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3863       if (GET_FLAGS(writer->flags, WRITER_USE_CRLF))
3864       {
3865          encodings = crlfEncodings;
3866          length = 2;
3867       }
3868       else
3869       {
3870          encodings = lfEncodings;
3871          length = 1;
3872       }
3873       encodedLength = length * (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3874       if (JSON_Writer_OutputBytes(writer, encodings[writer->outputEncoding - 1], encodedLength))
3875          status = JSON_Success;
3876       SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3877    }
3878    return status;
3879 }
3880 
3881 #endif /* JSON_NO_WRITER */
3882 
3883 /******************** Miscellaneous API ********************/
3884 
JSON_LibraryVersion(void)3885 const JSON_Version* JSON_CALL JSON_LibraryVersion(void)
3886 {
3887    static JSON_Version version = { JSON_MAJOR_VERSION, JSON_MINOR_VERSION, JSON_MICRO_VERSION };
3888    return &version;
3889 }
3890 
JSON_ErrorString(JSON_Error error)3891 const char* JSON_CALL JSON_ErrorString(JSON_Error error)
3892 {
3893    /* This array must match the order and number of the JSON_Error enum. */
3894    static const char* errorStrings[] =
3895    {
3896       /* JSON_Error_None */                            "no error",
3897       /* JSON_Error_OutOfMemory */                     "could not allocate enough memory",
3898       /* JSON_Error_AbortedByHandler */                "the operation was aborted by a handler",
3899       /* JSON_Error_BOMNotAllowed */                   "the input begins with a byte-order mark (BOM), which is not allowed by RFC 4627",
3900       /* JSON_Error_InvalidEncodingSequence */         "the input contains a byte or sequence of bytes that is not valid for the input encoding",
3901       /* JSON_Error_UnknownToken */                    "the input contains an unknown token",
3902       /* JSON_Error_UnexpectedToken */                 "the input contains an unexpected token",
3903       /* JSON_Error_IncompleteToken */                 "the input ends in the middle of a token",
3904       /* JSON_Error_MoreTokensExpected */              "the input ends when more tokens are expected",
3905       /* JSON_Error_UnescapedControlCharacter */       "the input contains a string containing an unescaped control character (U+0000 - U+001F)",
3906       /* JSON_Error_InvalidEscapeSequence */           "the input contains a string containing an invalid escape sequence",
3907       /* JSON_Error_UnpairedSurrogateEscapeSequence */ "the input contains a string containing an unmatched UTF-16 surrogate codepoint",
3908       /* JSON_Error_TooLongString */                   "the input contains a string that is too long",
3909       /* JSON_Error_InvalidNumber */                   "the input contains an invalid number",
3910       /* JSON_Error_TooLongNumber */                   "the input contains a number that is too long",
3911       /* JSON_Error_DuplicateObjectMember */           "the input contains an object with duplicate members",
3912       /* JSON_Error_StoppedAfterEmbeddedDocument */    "the end of the embedded document was reached"
3913    };
3914    return ((unsigned int)error < (sizeof(errorStrings) / sizeof(errorStrings[0])))
3915       ? errorStrings[error]
3916       : "";
3917 }
3918 
3919 static const uint32_t endianEncodings = (((uint32_t)JSON_UTF32BE) << 24) | (((uint32_t)JSON_UTF16BE) << 16) | (((uint32_t)JSON_UTF16LE) << 8) | ((uint32_t)JSON_UTF32LE);
3920 
JSON_NativeUTF16Encoding(void)3921 JSON_Encoding JSON_CALL JSON_NativeUTF16Encoding(void)
3922 {
3923    return (JSON_Encoding)(((byte*)&endianEncodings)[1]);
3924 }
3925 
JSON_NativeUTF32Encoding(void)3926 JSON_Encoding JSON_CALL JSON_NativeUTF32Encoding(void)
3927 {
3928    return (JSON_Encoding)(((byte*)&endianEncodings)[0]);
3929 }
3930