1 /*
2   Copyright (c) 2012 John-Anthony Owens
3 
4   Permission is hereby granted, free of charge, to any person obtaining a
5   copy of this software and associated documentation files (the "Software"),
6   to deal in the Software without restriction, including without limitation
7   the rights to use, copy, modify, merge, publish, distribute, sublicense,
8   and/or sell copies of the Software, and to permit persons to whom the
9   Software is furnished to do so, subject to the following conditions:
10 
11   The above copyright notice and this permission notice shall be included
12   in all copies or substantial portions of the Software.
13 
14   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20   IN THE SOFTWARE.
21 */
22 
23 #include <stdlib.h>
24 #include <string.h>
25 
26 /* Ensure uint32_t type (compiler-dependent). */
27 #if defined(_MSC_VER)
28 typedef unsigned __int32 uint32_t;
29 #else
30 #include <stdint.h>
31 #endif
32 
33 /* Ensure SIZE_MAX defined. */
34 #ifndef SIZE_MAX
35 #define SIZE_MAX ((size_t)-1)
36 #endif
37 
38 /* Mark APIs for export (as opposed to import) when we build this file. */
39 #define JSON_BUILDING
40 #include <formats/jsonsax_full.h>
41 
42 /* Default allocation constants. */
43 #define DEFAULT_TOKEN_BYTES_LENGTH 64 /* MUST be a power of 2 */
44 #define DEFAULT_SYMBOL_STACK_SIZE  32 /* MUST be a power of 2 */
45 
46 /* Types for readability. */
47 typedef unsigned char byte;
48 typedef uint32_t Codepoint;
49 
50 /* Especially-relevant Unicode codepoints. */
51 #define U_(x) ((Codepoint)(x))
52 #define NULL_CODEPOINT                  U_(0x0000)
53 #define BACKSPACE_CODEPOINT             U_(0x0008)
54 #define TAB_CODEPOINT                   U_(0x0009)
55 #define LINE_FEED_CODEPOINT             U_(0x000A)
56 #define FORM_FEED_CODEPOINT             U_(0x000C)
57 #define CARRIAGE_RETURN_CODEPOINT       U_(0x000D)
58 #define FIRST_NON_CONTROL_CODEPOINT     U_(0x0020)
59 #define DELETE_CODEPOINT                U_(0x007F)
60 #define FIRST_NON_ASCII_CODEPOINT       U_(0x0080)
61 #define FIRST_2_BYTE_UTF8_CODEPOINT     U_(0x0080)
62 #define FIRST_3_BYTE_UTF8_CODEPOINT     U_(0x0800)
63 #define LINE_SEPARATOR_CODEPOINT        U_(0x2028)
64 #define PARAGRAPH_SEPARATOR_CODEPOINT   U_(0x2029)
65 #define BOM_CODEPOINT                   U_(0xFEFF)
66 #define REPLACEMENT_CHARACTER_CODEPOINT U_(0xFFFD)
67 #define FIRST_NON_BMP_CODEPOINT         U_(0x10000)
68 #define FIRST_4_BYTE_UTF8_CODEPOINT     U_(0x10000)
69 #define MAX_CODEPOINT                   U_(0x10FFFF)
70 #define EOF_CODEPOINT                   U_(0xFFFFFFFF)
71 
72 /* Bit-masking macros. */
73 #define BOTTOM_3_BITS(x) ((x) & 0x7)
74 #define BOTTOM_4_BITS(x) ((x) & 0xF)
75 #define BOTTOM_5_BITS(x) ((x) & 0x1F)
76 #define BOTTOM_6_BITS(x) ((x) & 0x3F)
77 
78 /* Bit-flag macros. */
79 #define GET_FLAGS(x, f)                  ((x) & (f))
80 #define SET_FLAGS_ON(flagstype, x, f)    do { (x) |= (flagstype)(f); } while (0)
81 #define SET_FLAGS_OFF(flagstype, x, f)   do { (x) &= (flagstype)~(f); } while (0)
82 #define SET_FLAGS(flagstype, x, f, cond) do { if (cond) (x) |= (flagstype)(f); else (x) &= (flagstype)~(f); } while (0)
83 
84 /* UTF-8 byte-related macros. */
85 #define IS_UTF8_SINGLE_BYTE(b)       (((b) & 0x80) == 0)
86 #define IS_UTF8_CONTINUATION_BYTE(b) (((b) & 0xC0) == 0x80)
87 #define IS_UTF8_FIRST_BYTE_OF_2(b)   (((b) & 0xE0) == 0xC0)
88 #define IS_UTF8_FIRST_BYTE_OF_3(b)   (((b) & 0xF0) == 0xE0)
89 #define IS_UTF8_FIRST_BYTE_OF_4(b)   (((b) & 0xF8) == 0xF0)
90 
91 /* Unicode codepoint-related macros. */
92 #define IS_NONCHARACTER(c)               ((((c) & 0xFE) == 0xFE) || (((c) >= 0xFDD0) && ((c) <= 0xFDEF)))
93 #define IS_SURROGATE(c)                  (((c) & 0xFFFFF800) == 0xD800)
94 #define IS_LEADING_SURROGATE(c)          (((c) & 0xFFFFFC00) == 0xD800)
95 #define IS_TRAILING_SURROGATE(c)         (((c) & 0xFFFFFC00) == 0xDC00)
96 #define CODEPOINT_FROM_SURROGATES(hi_lo) ((((hi_lo) >> 16) << 10) + ((hi_lo) & 0xFFFF) + 0xFCA02400)
97 #define SURROGATES_FROM_CODEPOINT(c)     ((((c) << 6) & 0x7FF0000) + ((c) & 0x3FF) + 0xD7C0DC00)
98 #define SHORTEST_ENCODING_SEQUENCE(enc)  (UINT32_C(1) << ((enc) >> 1))
99 #define LONGEST_ENCODING_SEQUENCE        4
100 
101 /* Internal types that alias enum types in the public API.
102    By using byte to represent these values internally,
103    we can guarantee minimal storage size and avoid compiler
104    warnings when using values of the type in switch statements
105    that don't have (or need) a default case. */
106 typedef byte Encoding;
107 typedef byte Error;
108 typedef byte TokenAttributes;
109 
110 /******************** Default Memory Suite ********************/
111 
DefaultReallocHandler(void * userData,void * ptr,size_t size)112 static void* JSON_CALL DefaultReallocHandler(void* userData, void* ptr, size_t size)
113 {
114    (void)userData; /* unused */
115    return realloc(ptr, size);
116 }
117 
DefaultFreeHandler(void * userData,void * ptr)118 static void JSON_CALL DefaultFreeHandler(void* userData, void* ptr)
119 {
120    (void)userData; /* unused */
121    free(ptr);
122 }
123 
124 static const JSON_MemorySuite defaultMemorySuite = { NULL, &DefaultReallocHandler, &DefaultFreeHandler };
125 
DoubleBuffer(const JSON_MemorySuite * pMemorySuite,byte * pDefaultBuffer,byte * pBuffer,size_t length)126 static byte* DoubleBuffer(const JSON_MemorySuite* pMemorySuite, byte* pDefaultBuffer, byte* pBuffer, size_t length)
127 {
128    size_t newLength = length * 2;
129    if (newLength < length)
130    {
131       pBuffer = NULL;
132    }
133    else if (pBuffer == pDefaultBuffer)
134    {
135       pBuffer = (byte*)pMemorySuite->realloc(pMemorySuite->userData, NULL, newLength);
136       if (pBuffer)
137       {
138          memcpy(pBuffer, pDefaultBuffer, length);
139       }
140    }
141    else
142    {
143       pBuffer = (byte*)pMemorySuite->realloc(pMemorySuite->userData, pBuffer, newLength);
144    }
145    return pBuffer;
146 }
147 
148 /******************** Unicode Decoder ********************/
149 
150 /* Mutually-exclusive decoder states. */
151 /* The bits of DecoderState are layed out as follows:
152 
153    ---lllnn
154 
155    - = unused (3 bits)
156    l = expected total sequence length (3 bits)
157    d = number of bytes decoded so far (2 bits)
158    */
159 
160 #define DECODER_RESET  0x00
161 #define DECODED_1_OF_2 0x09 /* 00001001 */
162 #define DECODED_1_OF_3 0x0D /* 00001101 */
163 #define DECODED_2_OF_3 0x0E /* 00001110 */
164 #define DECODED_1_OF_4 0x11 /* 00010001 */
165 #define DECODED_2_OF_4 0x12 /* 00010010 */
166 #define DECODED_3_OF_4 0x13 /* 00010011 */
167 typedef byte DecoderState;
168 
169 #define DECODER_STATE_BYTES(s) (size_t)((s) & 0x3)
170 
171 /* Decoder data. */
172 typedef struct tag_DecoderData
173 {
174    uint32_t     bits;
175    DecoderState state;  /* byte alignment */
176 } DecoderData;
177 typedef DecoderData* Decoder;
178 
179 /* The bits of DecoderOutput are layed out as follows:
180 
181    ------rrlllccccccccccccccccccccc
182 
183    - = unused (6 bits)
184    r = result code (2 bits)
185    l = sequence length (3 bits)
186    c = codepoint (21 bits)
187    */
188 #define SEQUENCE_PENDING           0
189 #define SEQUENCE_COMPLETE          1
190 #define SEQUENCE_INVALID_INCLUSIVE 2
191 #define SEQUENCE_INVALID_EXCLUSIVE 3
192 typedef uint32_t DecoderResultCode;
193 
194 #define DECODER_OUTPUT(r, l, c)    (DecoderOutput)(((r) << 24) | ((l) << 21) | (c))
195 #define DECODER_RESULT_CODE(o)     (DecoderResultCode)((DecoderOutput)(o) >> 24)
196 #define DECODER_SEQUENCE_LENGTH(o) (size_t)(((DecoderOutput)(o) >> 21) & 0x7)
197 #define DECODER_CODEPOINT(o)       (Codepoint)((DecoderOutput)(o) & 0x001FFFFF)
198 typedef uint32_t DecoderOutput;
199 
200 /* Decoder functions. */
201 
Decoder_Reset(Decoder decoder)202 static void Decoder_Reset(Decoder decoder)
203 {
204    decoder->state = DECODER_RESET;
205    decoder->bits = 0;
206 }
207 
Decoder_SequencePending(Decoder decoder)208 static int Decoder_SequencePending(Decoder decoder)
209 {
210    return decoder->state != DECODER_RESET;
211 }
212 
Decoder_ProcessByte(Decoder decoder,Encoding encoding,byte b)213 static DecoderOutput Decoder_ProcessByte(Decoder decoder, Encoding encoding, byte b)
214 {
215    DecoderOutput output = DECODER_OUTPUT(SEQUENCE_PENDING, 0, 0);
216    switch (encoding)
217    {
218       case JSON_UTF8:
219          /* When the input encoding is UTF-8, the decoded codepoint's bits are
220             recorded in the bottom 3 bytes of bits as they are decoded.
221             The top byte is not used. */
222          switch (decoder->state)
223          {
224             case DECODER_RESET:
225                if (IS_UTF8_SINGLE_BYTE(b))
226                   output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 1, b);
227                else if (IS_UTF8_FIRST_BYTE_OF_2(b))
228                {
229                   /* UTF-8 2-byte sequences that are overlong encodings can be
230                      detected from just the first byte (C0 or C1). */
231                   decoder->bits = (uint32_t)BOTTOM_5_BITS(b) << 6;
232                   if (decoder->bits < FIRST_2_BYTE_UTF8_CODEPOINT)
233                      output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
234                   else
235                   {
236                      decoder->state = DECODED_1_OF_2;
237                      goto noreset;
238                   }
239                }
240                else if (IS_UTF8_FIRST_BYTE_OF_3(b))
241                {
242                   decoder->bits = (uint32_t)BOTTOM_4_BITS(b) << 12;
243                   decoder->state = DECODED_1_OF_3;
244                   goto noreset;
245                }
246                else if (IS_UTF8_FIRST_BYTE_OF_4(b))
247                {
248                   /* Some UTF-8 4-byte sequences that encode out-of-range
249                      codepoints can be detected from the first byte (F5 - FF). */
250                   decoder->bits = (uint32_t)BOTTOM_3_BITS(b) << 18;
251                   if (decoder->bits > MAX_CODEPOINT)
252                      output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
253                   else
254                   {
255                      decoder->state = DECODED_1_OF_4;
256                      goto noreset;
257                   }
258                }
259                else
260                   /* The byte is of the form 11111xxx or 10xxxxxx, and is not
261                      a valid first byte for a UTF-8 sequence. */
262                   output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0);
263                break;
264 
265             case DECODED_1_OF_2:
266                if (IS_UTF8_CONTINUATION_BYTE(b))
267                   output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits | BOTTOM_6_BITS(b));
268                else
269                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
270                break;
271 
272             case DECODED_1_OF_3:
273                if (IS_UTF8_CONTINUATION_BYTE(b))
274                {
275                   /* UTF-8 3-byte sequences that are overlong
276                    * encodings or encode surrogate codepoints
277                    * can be detected after 2 bytes. */
278                   decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 6;
279                   if ((decoder->bits < FIRST_3_BYTE_UTF8_CODEPOINT) ||
280                         IS_SURROGATE(decoder->bits))
281                      output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
282                   else
283                   {
284                      decoder->state = DECODED_2_OF_3;
285                      goto noreset;
286                   }
287                }
288                else
289                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
290                break;
291 
292             case DECODED_2_OF_3:
293                if (IS_UTF8_CONTINUATION_BYTE(b))
294                   output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 3, decoder->bits | BOTTOM_6_BITS(b));
295                else
296                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
297                break;
298 
299             case DECODED_1_OF_4:
300                if (IS_UTF8_CONTINUATION_BYTE(b))
301                {
302                   /* UTF-8 4-byte sequences that are overlong encodings or encode
303                      out-of-range codepoints can be detected after 2 bytes. */
304                   decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 12;
305                   if (  (decoder->bits < FIRST_4_BYTE_UTF8_CODEPOINT) ||
306                         (decoder->bits > MAX_CODEPOINT))
307                      output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
308                   else
309                   {
310                      decoder->state = DECODED_2_OF_4;
311                      goto noreset;
312                   }
313                }
314                else
315                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0);
316                break;
317 
318             case DECODED_2_OF_4:
319                if (IS_UTF8_CONTINUATION_BYTE(b))
320                {
321                   decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 6;
322                   decoder->state = DECODED_3_OF_4;
323                   goto noreset;
324                }
325 
326                output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
327                break;
328 
329             case DECODED_3_OF_4:
330                if (IS_UTF8_CONTINUATION_BYTE(b))
331                   output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits | BOTTOM_6_BITS(b));
332                else
333                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 3, 0);
334                break;
335          }
336          break;
337 
338       case JSON_UTF16LE:
339          /* When the input encoding is UTF-16, the decoded codepoint's bits are
340             recorded in the bottom 2 bytes of bits as they are decoded.
341             If those 2 bytes form a leading surrogate, the decoder treats the
342             surrogate pair as a single 4-byte sequence, shifts the leading
343             surrogate into the high 2 bytes of bits, and decodes the
344             trailing surrogate's bits in the bottom 2 bytes of bits. */
345          switch (decoder->state)
346          {
347             case DECODER_RESET:
348                decoder->bits = b;
349                decoder->state = DECODED_1_OF_2;
350                goto noreset;
351 
352             case DECODED_1_OF_2:
353                decoder->bits |= (uint32_t)b << 8;
354                /* A trailing surrogate cannot appear on its own. */
355                if (IS_TRAILING_SURROGATE(decoder->bits))
356                   output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 2, 0);
357                else if (IS_LEADING_SURROGATE(decoder->bits))
358                {
359                   /* A leading surrogate implies a 4-byte surrogate pair. */
360                   decoder->bits <<= 16;
361                   decoder->state  = DECODED_2_OF_4;
362                   goto noreset;
363                }
364                else
365                   output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits);
366                break;
367 
368             case DECODED_2_OF_4:
369                decoder->bits |= b;
370                decoder->state = DECODED_3_OF_4;
371                goto noreset;
372 
373             case DECODED_3_OF_4:
374                decoder->bits |= (uint32_t)b << 8;
375                if (!IS_TRAILING_SURROGATE(decoder->bits & 0xFFFF))
376                {
377                   /* A leading surrogate must be followed by a trailing one.
378                      Treat the previous 3 bytes as an invalid 2-byte sequence
379                      followed by the first byte of a new sequence. */
380                   decoder->bits &= 0xFF;
381                   decoder->state = DECODED_1_OF_2;
382                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
383                   goto noreset;
384                }
385 
386                output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, CODEPOINT_FROM_SURROGATES(decoder->bits));
387                break;
388          }
389          break;
390 
391       case JSON_UTF16BE:
392          /* When the input encoding is UTF-16, the decoded codepoint's bits are
393             recorded in the bottom 2 bytes of bits as they are decoded.
394             If those 2 bytes form a leading surrogate, the decoder treats the
395             surrogate pair as a single 4-byte sequence, shifts the leading
396             surrogate into the high 2 bytes of bits, and decodes the
397             trailing surrogate's bits in the bottom 2 bytes of bits. */
398          switch (decoder->state)
399          {
400             case DECODER_RESET:
401                decoder->bits = (uint32_t)b << 8;
402                decoder->state = DECODED_1_OF_2;
403                goto noreset;
404 
405             case DECODED_1_OF_2:
406                decoder->bits |= b;
407                /* A trailing surrogate cannot appear on its own. */
408                if (IS_TRAILING_SURROGATE(decoder->bits))
409                   output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 2, 0);
410                else if (IS_LEADING_SURROGATE(decoder->bits))
411                {
412                   /* A leading surrogate implies a 4-byte surrogate pair. */
413                   decoder->bits <<= 16;
414                   decoder->state = DECODED_2_OF_4;
415                   goto noreset;
416                }
417                else
418                   output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits);
419                break;
420 
421             case DECODED_2_OF_4:
422                decoder->bits |= (uint32_t)b << 8;
423                decoder->state = DECODED_3_OF_4;
424                goto noreset;
425 
426             case DECODED_3_OF_4:
427                decoder->bits |= b;
428                if (!IS_TRAILING_SURROGATE(decoder->bits & 0xFFFF))
429                {
430                   /* A leading surrogate must be followed by a trailing one.
431                      Treat the previous 3 bytes as an invalid 2-byte sequence
432                      followed by the first byte of a new sequence. */
433                   decoder->bits &= 0xFF00;
434                   decoder->state = DECODED_1_OF_2;
435                   output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0);
436                   goto noreset;
437                }
438 
439                output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4,
440                      CODEPOINT_FROM_SURROGATES(decoder->bits));
441                break;
442          }
443          break;
444 
445       case JSON_UTF32LE:
446          /* When the input encoding is UTF-32, the decoded codepoint's bits are
447             recorded in bits as they are decoded. */
448          switch (decoder->state)
449          {
450             case DECODER_RESET:
451                decoder->state = DECODED_1_OF_4;
452                decoder->bits = (uint32_t)b;
453                goto noreset;
454 
455             case DECODED_1_OF_4:
456                decoder->state = DECODED_2_OF_4;
457                decoder->bits |= (uint32_t)b << 8;
458                goto noreset;
459 
460             case DECODED_2_OF_4:
461                decoder->state = DECODED_3_OF_4;
462                decoder->bits |= (uint32_t)b << 16;
463                goto noreset;
464 
465             case DECODED_3_OF_4:
466                decoder->bits |= (uint32_t)b << 24;
467                output = (
468                      IS_SURROGATE(decoder->bits) ||
469                      (decoder->bits > MAX_CODEPOINT))
470                   ? DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 4, 0)
471                   : DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits);
472                break;
473          }
474          break;
475 
476       case JSON_UTF32BE:
477          /* When the input encoding is UTF-32, the decoded codepoint's bits are
478             recorded in bits as they are decoded. */
479          switch (decoder->state)
480          {
481             case DECODER_RESET:
482                decoder->state = DECODED_1_OF_4;
483                decoder->bits  = (uint32_t)b << 24;
484                goto noreset;
485 
486             case DECODED_1_OF_4:
487                decoder->state = DECODED_2_OF_4;
488                decoder->bits |= (uint32_t)b << 16;
489                goto noreset;
490 
491             case DECODED_2_OF_4:
492                decoder->state = DECODED_3_OF_4;
493                decoder->bits |= (uint32_t)b << 8;
494                goto noreset;
495 
496             case DECODED_3_OF_4:
497                decoder->bits |= b;
498                output = (IS_SURROGATE(decoder->bits) ||
499                      (decoder->bits > MAX_CODEPOINT))
500                   ? DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 4, 0)
501                   : DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits);
502                break;
503          }
504          break;
505    }
506 
507    /* Reset the decoder for the next sequence. */
508    Decoder_Reset(decoder);
509 
510 noreset:
511    return output;
512 }
513 
514 /******************** Unicode Encoder ********************/
515 
516 /* This function makes the following assumptions about its input:
517 
518    1. The c argument is a valid codepoint (U+0000 - U+10FFFF).
519    2. The encoding argument is not JSON_UnknownEncoding.
520    3. The pBytes argument points to an array of at least 4 bytes.
521    */
EncodeCodepoint(Codepoint c,Encoding encoding,byte * pBytes)522 static size_t EncodeCodepoint(Codepoint c, Encoding encoding, byte* pBytes)
523 {
524    size_t length = 0;
525    switch (encoding)
526    {
527       case JSON_UTF8:
528          if (c < FIRST_2_BYTE_UTF8_CODEPOINT)
529          {
530             pBytes[0] = (byte)c;
531             length    = 1;
532          }
533          else if (c < FIRST_3_BYTE_UTF8_CODEPOINT)
534          {
535             pBytes[0] = (byte)(0xC0 | (c >> 6));
536             pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c));
537             length    = 2;
538          }
539          else if (c < FIRST_4_BYTE_UTF8_CODEPOINT)
540          {
541             pBytes[0] = (byte)(0xE0 | (c >> 12));
542             pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c >> 6));
543             pBytes[2] = (byte)(0x80 | BOTTOM_6_BITS(c));
544             length    = 3;
545          }
546          else
547          {
548             pBytes[0] = (byte)(0xF0 | (c >> 18));
549             pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c >> 12));
550             pBytes[2] = (byte)(0x80 | BOTTOM_6_BITS(c >> 6));
551             pBytes[3] = (byte)(0x80 | BOTTOM_6_BITS(c));
552             length    = 4;
553          }
554          break;
555 
556       case JSON_UTF16LE:
557          if (c < FIRST_NON_BMP_CODEPOINT)
558          {
559             pBytes[0] = (byte)(c);
560             pBytes[1] = (byte)(c >> 8);
561             length    = 2;
562          }
563          else
564          {
565             uint32_t surrogates = SURROGATES_FROM_CODEPOINT(c);
566 
567             /* Leading surrogate. */
568             pBytes[0] = (byte)(surrogates >> 16);
569             pBytes[1] = (byte)(surrogates >> 24);
570 
571             /* Trailing surrogate. */
572             pBytes[2] = (byte)(surrogates);
573             pBytes[3] = (byte)(surrogates >> 8);
574             length    = 4;
575          }
576          break;
577 
578       case JSON_UTF16BE:
579          if (c < FIRST_NON_BMP_CODEPOINT)
580          {
581             pBytes[1] = (byte)(c);
582             pBytes[0] = (byte)(c >> 8);
583             length    = 2;
584          }
585          else
586          {
587             /* The codepoint requires a surrogate pair in UTF-16. */
588             uint32_t surrogates = SURROGATES_FROM_CODEPOINT(c);
589 
590             /* Leading surrogate. */
591             pBytes[1] = (byte)(surrogates >> 16);
592             pBytes[0] = (byte)(surrogates >> 24);
593 
594             /* Trailing surrogate. */
595             pBytes[3] = (byte)(surrogates);
596             pBytes[2] = (byte)(surrogates >> 8);
597             length    = 4;
598          }
599          break;
600 
601       case JSON_UTF32LE:
602          pBytes[0] = (byte)(c);
603          pBytes[1] = (byte)(c >> 8);
604          pBytes[2] = (byte)(c >> 16);
605          pBytes[3] = (byte)(c >> 24);
606          length    = 4;
607          break;
608 
609       case JSON_UTF32BE:
610          pBytes[3] = (byte)(c);
611          pBytes[2] = (byte)(c >> 8);
612          pBytes[1] = (byte)(c >> 16);
613          pBytes[0] = (byte)(c >> 24);
614          length    = 4;
615          break;
616    }
617    return length;
618 }
619 
620 /******************** JSON Lexer States ********************/
621 
622 /* Mutually-exclusive lexer states. */
623 #define LEXING_WHITESPACE                                     0
624 #define LEXING_LITERAL                                        1
625 #define LEXING_STRING                                         2
626 #define LEXING_STRING_ESCAPE                                  3
627 #define LEXING_STRING_HEX_ESCAPE_BYTE_1                       4
628 #define LEXING_STRING_HEX_ESCAPE_BYTE_2                       5
629 #define LEXING_STRING_HEX_ESCAPE_BYTE_3                       6
630 #define LEXING_STRING_HEX_ESCAPE_BYTE_4                       7
631 #define LEXING_STRING_HEX_ESCAPE_BYTE_5                       8
632 #define LEXING_STRING_HEX_ESCAPE_BYTE_6                       9
633 #define LEXING_STRING_HEX_ESCAPE_BYTE_7                       10
634 #define LEXING_STRING_HEX_ESCAPE_BYTE_8                       11
635 #define LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH 12
636 #define LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U         13
637 #define LEXING_NUMBER_AFTER_MINUS                             14
638 #define LEXING_NUMBER_AFTER_LEADING_ZERO                      15
639 #define LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO             16
640 #define LEXING_NUMBER_AFTER_X                                 17
641 #define LEXING_NUMBER_HEX_DIGITS                              18
642 #define LEXING_NUMBER_DECIMAL_DIGITS                          19
643 #define LEXING_NUMBER_AFTER_DOT                               20
644 #define LEXING_NUMBER_FRACTIONAL_DIGITS                       21
645 #define LEXING_NUMBER_AFTER_E                                 22
646 #define LEXING_NUMBER_AFTER_EXPONENT_SIGN                     23
647 #define LEXING_NUMBER_EXPONENT_DIGITS                         24
648 #define LEXING_COMMENT_AFTER_SLASH                            25
649 #define LEXING_SINGLE_LINE_COMMENT                            26
650 #define LEXING_MULTI_LINE_COMMENT                             27
651 #define LEXING_MULTI_LINE_COMMENT_AFTER_STAR                  28
652 #define LEXER_ERROR                                           255
653 typedef byte LexerState;
654 
655 /******************** JSON Grammarian ********************/
656 
657 /* The JSON grammar comprises the following productions:
658 
659    1.  VALUE => null
660    2.  VALUE => boolean
661    3.  VALUE => string
662    4.  VALUE => number
663    5.  VALUE => specialnumber
664    6.  VALUE => { MEMBERS }
665    7.  VALUE => [ ITEMS ]
666    8.  MEMBERS => MEMBER MORE_MEMBERS
667    9.  MEMBERS => e
668    10. MEMBER => string : VALUE
669    11. MORE_MEMBERS => , MEMBER MORE_MEMBERS
670    12. MORE_MEMBERS => e
671    13. ITEMS => ITEM MORE_ITEMS
672    14. ITEMS => e
673    15. ITEM => VALUE
674    16. MORE_ITEMS => , ITEM MORE_ITEMS
675    17. MORE_ITEMS => e
676 
677    We implement a simple LL(1) parser based on this grammar, with events
678    emitted when certain non-terminals are replaced.
679    */
680 
681 /* Mutually-exclusive grammar tokens and non-terminals. The values are defined
682    so that the bottom 4 bits of a value can be used as an index into the
683    grammar production rule table. */
684 #define T_NONE              0x00 /* tokens are in the form 0x0X */
685 #define T_NULL              0x01
686 #define T_TRUE              0x02
687 #define T_FALSE             0x03
688 #define T_STRING            0x04
689 #define T_NUMBER            0x05
690 #define T_NAN               0x06
691 #define T_INFINITY          0x07
692 #define T_NEGATIVE_INFINITY 0x08
693 #define T_LEFT_CURLY        0x09
694 #define T_RIGHT_CURLY       0x0A
695 #define T_LEFT_SQUARE       0x0B
696 #define T_RIGHT_SQUARE      0x0C
697 #define T_COLON             0x0D
698 #define T_COMMA             0x0E
699 #define NT_VALUE            0x10 /* non-terminals are in the form 0x1X */
700 #define NT_MEMBERS          0x11
701 #define NT_MEMBER           0x12
702 #define NT_MORE_MEMBERS     0x13
703 #define NT_ITEMS            0x14
704 #define NT_ITEM             0x15
705 #define NT_MORE_ITEMS       0x16
706 typedef byte Symbol;
707 
708 #define IS_NONTERMINAL(s) ((s) & 0x10)
709 #define IS_TOKEN(s)       !IS_NONTERMINAL(s)
710 
711 /* Grammarian data. */
712 typedef struct tag_GrammarianData
713 {
714    Symbol* pStack; /* initially set to defaultStack */
715    size_t  stackSize;
716    size_t  stackUsed;
717    Symbol  defaultStack[DEFAULT_SYMBOL_STACK_SIZE];
718 } GrammarianData;
719 typedef GrammarianData* Grammarian;
720 
721 /* Mutually-exclusive result codes returned by the grammarian
722    after processing a token. */
723 #define ACCEPTED_TOKEN    0
724 #define REJECTED_TOKEN    1
725 #define SYMBOL_STACK_FULL 2
726 typedef uint32_t GrammarianResultCode;
727 
728 /* Events emitted by the grammarian as a result of processing a
729    token. Note that EMIT_ARRAY_ITEM always appears bitwise OR-ed
730    with one of the other values. */
731 #define EMIT_NOTHING        0x00
732 #define EMIT_NULL           0x01
733 #define EMIT_BOOLEAN        0x02
734 #define EMIT_STRING         0x03
735 #define EMIT_NUMBER         0x04
736 #define EMIT_SPECIAL_NUMBER 0x05
737 #define EMIT_START_OBJECT   0x06
738 #define EMIT_END_OBJECT     0x07
739 #define EMIT_OBJECT_MEMBER  0x08
740 #define EMIT_START_ARRAY    0x09
741 #define EMIT_END_ARRAY      0x0A
742 #define EMIT_ARRAY_ITEM     0x10 /* may be combined with other values */
743 typedef byte GrammarEvent;
744 
745 /* The bits of GrammarianOutput are layed out as follows:
746 
747    -rreeeee
748 
749    - = unused (1 bit)
750    r = result code (2 bits)
751    e = event (5 bits)
752    */
753 #define GRAMMARIAN_OUTPUT(r, e)   (GrammarianOutput)(((GrammarianResultCode)(r) << 5) | (GrammarEvent)(e))
754 #define GRAMMARIAN_RESULT_CODE(o) (GrammarianResultCode)((GrammarianOutput)(o) >> 5)
755 #define GRAMMARIAN_EVENT(o)       (GrammarEvent)((GrammarianOutput)(o) & 0x1F)
756 typedef byte GrammarianOutput;
757 
758 /* Grammar rule used by the grammarian to process a token. */
759 typedef struct tag_GrammarRule
760 {
761    Symbol       symbolToPush1; /* byte alignment */
762    Symbol       symbolToPush2; /* byte alignment */
763    byte         reprocess;
764    GrammarEvent emit;          /* byte alignment */
765 } GrammarRule;
766 
767 /* Grammarian functions. */
768 
Grammarian_Reset(Grammarian grammarian,int isInitialized)769 static void Grammarian_Reset(Grammarian grammarian, int isInitialized)
770 {
771    /* When we reset the grammarian, we keep the symbol stack that has
772       already been allocated, if any. If the client wants to reclaim the
773       memory used by the that buffer, he needs to free the grammarian
774       and create a new one. */
775    if (!isInitialized)
776    {
777       grammarian->pStack = grammarian->defaultStack;
778       grammarian->stackSize = sizeof(grammarian->defaultStack);
779    }
780 
781    /* The grammarian always starts with NT_VALUE on the symbol stack. */
782    grammarian->pStack[0] = NT_VALUE;
783    grammarian->stackUsed = 1;
784 }
785 
Grammarian_FreeAllocations(Grammarian grammarian,const JSON_MemorySuite * pMemorySuite)786 static void Grammarian_FreeAllocations(Grammarian grammarian,
787       const JSON_MemorySuite* pMemorySuite)
788 {
789    if (grammarian->pStack != grammarian->defaultStack)
790       pMemorySuite->free(pMemorySuite->userData, grammarian->pStack);
791 }
792 
Grammarian_FinishedDocument(Grammarian grammarian)793 static int Grammarian_FinishedDocument(Grammarian grammarian)
794 {
795    return !grammarian->stackUsed;
796 }
797 
Grammarian_ProcessToken(Grammarian grammarian,Symbol token,const JSON_MemorySuite * pMemorySuite)798 static GrammarianOutput Grammarian_ProcessToken(Grammarian grammarian,
799       Symbol token, const JSON_MemorySuite* pMemorySuite)
800 {
801    /* The order and number of the rows and columns in this table must
802       match the defined token and non-terminal symbol values.
803 
804       The row index is the incoming token's Symbol value.
805 
806       The column index is the bottom 4 bits of Symbol value of
807       the non-terminal at the top of the processing stack.
808       Since non-terminal Symbol values start at 0x10, taking
809       the bottom 4 bits yields a 0-based index. */
810    static const byte ruleLookup[15][7] =
811    {
812       /*             V     MS    M     MM    IS    I     MI  */
813       /*  ----  */ { 0,    0,    0,    0,    0,    0,    0  },
814       /*  null  */ { 1,    0,    0,    0,    13,   15,   0  },
815       /*  true  */ { 2,    0,    0,    0,    13,   15,   0  },
816       /* false  */ { 2,    0,    0,    0,    13,   15,   0  },
817       /* string */ { 3,    8,    10,   0,    13,   15,   0  },
818       /* number */ { 4,    0,    0,    0,    13,   15,   0  },
819       /*  NaN   */ { 5,    0,    0,    0,    13,   15,   0  },
820       /*  Inf   */ { 5,    0,    0,    0,    13,   15,   0  },
821       /* -Inf   */ { 5,    0,    0,    0,    13,   15,   0  },
822       /*   {    */ { 6,    0,    0,    0,    13,   15,   0  },
823       /*   }    */ { 0,    9,    0,    12,   0,    0,    0  },
824       /*   [    */ { 7,    0,    0,    0,    13,   15,   0  },
825       /*   ]    */ { 0,    0,    0,    0,    14,   0,    17 },
826       /*   :    */ { 0,    0,    0,    0,    0,    0,    0  },
827       /*   ,    */ { 0,    0,    0,    11,   0,    0,    16 }
828    };
829 
830    static const GrammarRule rules[17] =
831    {
832       /* 1.  */ { T_NONE,          T_NONE,      0, EMIT_NULL           },
833       /* 2.  */ { T_NONE,          T_NONE,      0, EMIT_BOOLEAN        },
834       /* 3.  */ { T_NONE,          T_NONE,      0, EMIT_STRING         },
835       /* 4.  */ { T_NONE,          T_NONE,      0, EMIT_NUMBER         },
836       /* 5.  */ { T_NONE,          T_NONE,      0, EMIT_SPECIAL_NUMBER },
837       /* 6.  */ { T_RIGHT_CURLY,   NT_MEMBERS,  0, EMIT_START_OBJECT   },
838       /* 7.  */ { T_RIGHT_SQUARE,  NT_ITEMS,    0, EMIT_START_ARRAY    },
839       /* 8.  */ { NT_MORE_MEMBERS, NT_MEMBER,   1, EMIT_NOTHING        },
840       /* 9.  */ { T_NONE,          T_NONE,      1, EMIT_END_OBJECT     },
841       /* 10. */ { NT_VALUE,        T_COLON,     0, EMIT_OBJECT_MEMBER  },
842       /* 11. */ { NT_MORE_MEMBERS, NT_MEMBER,   0, EMIT_NOTHING        },
843       /* 12. */ { T_NONE,          T_NONE,      1, EMIT_END_OBJECT     },
844       /* 13. */ { NT_MORE_ITEMS,   NT_ITEM,     1, EMIT_NOTHING        },
845       /* 14. */ { T_NONE,          T_NONE,      1, EMIT_END_ARRAY      },
846       /* 15. */ { NT_VALUE,        T_NONE,      1, EMIT_ARRAY_ITEM     },
847       /* 16. */ { NT_MORE_ITEMS,   NT_ITEM,     0, EMIT_NOTHING        },
848       /* 17. */ { T_NONE,          T_NONE,      1, EMIT_END_ARRAY      }
849    };
850 
851    GrammarEvent emit = EMIT_NOTHING;
852 
853    /* If the stack is empty, no more tokens were expected. */
854    if (Grammarian_FinishedDocument(grammarian))
855       return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
856 
857    for (;;)
858    {
859       Symbol topSymbol = grammarian->pStack[grammarian->stackUsed - 1];
860       if (IS_TOKEN(topSymbol))
861       {
862          if (topSymbol != token)
863             return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
864          grammarian->stackUsed--;
865          break;
866       }
867       else
868       {
869          const GrammarRule* pRule = NULL;
870          byte ruleNumber          = ruleLookup[token][BOTTOM_4_BITS(topSymbol)];
871 
872          if (ruleNumber == 0)
873             return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING);
874 
875          pRule = &rules[ruleNumber - 1];
876 
877          /* The rule removes the top symbol and does not replace it. */
878          if (pRule->symbolToPush1 == T_NONE)
879             grammarian->stackUsed--;
880          else
881          {
882             /* The rule replaces the top symbol with 1 or 2 symbols. */
883             grammarian->pStack[grammarian->stackUsed - 1] = pRule->symbolToPush1;
884             if (pRule->symbolToPush2 != T_NONE)
885             {
886                /* The rule replaces the top symbol with 2 symbols.
887                   Make sure the stack has room for the second one. */
888                if (grammarian->stackUsed == grammarian->stackSize)
889                {
890                   Symbol* pBiggerStack = DoubleBuffer(pMemorySuite,
891                         grammarian->defaultStack, grammarian->pStack,
892                         grammarian->stackSize);
893 
894                   if (!pBiggerStack)
895                      return GRAMMARIAN_OUTPUT(SYMBOL_STACK_FULL, EMIT_NOTHING);
896 
897                   grammarian->pStack     = pBiggerStack;
898                   grammarian->stackSize *= 2;
899                }
900                grammarian->pStack[grammarian->stackUsed] = pRule->symbolToPush2;
901                grammarian->stackUsed++;
902             }
903          }
904          emit |= pRule->emit;
905          if (!pRule->reprocess)
906             break;
907       }
908    }
909 
910    return GRAMMARIAN_OUTPUT(ACCEPTED_TOKEN, emit);
911 }
912 
913 /******************** JSON Parser ********************/
914 
915 #ifndef JSON_NO_PARSER
916 
917 /* Combinable parser state flags. */
918 #define PARSER_RESET                 0x00
919 #define PARSER_STARTED               0x01
920 #define PARSER_FINISHED              0x02
921 #define PARSER_IN_PROTECTED_API      0x04
922 #define PARSER_IN_TOKEN_HANDLER      0x08
923 #define PARSER_AFTER_CARRIAGE_RETURN 0x10
924 typedef byte ParserState;
925 
926 /* Combinable parser settings flags. */
927 #define PARSER_DEFAULT_FLAGS         0x00
928 #define PARSER_ALLOW_BOM             0x01
929 #define PARSER_ALLOW_COMMENTS        0x02
930 #define PARSER_ALLOW_SPECIAL_NUMBERS 0x04
931 #define PARSER_ALLOW_HEX_NUMBERS     0x08
932 #define PARSER_REPLACE_INVALID       0x10
933 #define PARSER_TRACK_OBJECT_MEMBERS  0x20
934 #define PARSER_ALLOW_CONTROL_CHARS   0x40
935 #define PARSER_EMBEDDED_DOCUMENT     0x80
936 typedef byte ParserFlags;
937 
938 /* Sentinel value for parser error location offset. */
939 #define ERROR_LOCATION_IS_TOKEN_START 0xFF
940 
941 /* An object member name stored in an unordered, singly-linked-list, used for
942    detecting duplicate member names. Note that the name string is not null-
943    terminated. */
944 typedef struct tag_MemberName
945 {
946    struct tag_MemberName* pNextName;
947    size_t                 length;
948    byte                   pBytes[1]; /* variable-size buffer */
949 } MemberName;
950 
951 /* An object's list of member names, and a pointer to the object's
952    nearest ancestor object, if any. This is used as a stack. Because arrays
953    do not have named items, they do not need to be recorded in the stack. */
954 typedef struct tag_MemberNames
955 {
956    struct tag_MemberNames* pAncestor;
957    MemberName*             pFirstName;
958 } MemberNames;
959 
960 /* A parser instance. */
961 struct JSON_Parser_Data
962 {
963    JSON_MemorySuite                    memorySuite;      /* ptr alignment */
964    void*                               userData;
965    byte*                               pTokenBytes;
966    MemberNames*                        pMemberNames;
967    GrammarianData                      grammarianData;   /* ptr  alignment */
968    JSON_Parser_EncodingDetectedHandler encodingDetectedHandler; /* ptr alignment */
969    JSON_Parser_NullHandler             nullHandler;
970    JSON_Parser_BooleanHandler          booleanHandler;
971    JSON_Parser_StringHandler           stringHandler;
972    JSON_Parser_NumberHandler           numberHandler;
973    JSON_Parser_SpecialNumberHandler    specialNumberHandler;
974    JSON_Parser_StartObjectHandler      startObjectHandler;
975    JSON_Parser_EndObjectHandler        endObjectHandler;
976    JSON_Parser_ObjectMemberHandler     objectMemberHandler;
977    JSON_Parser_StartArrayHandler       startArrayHandler;
978    JSON_Parser_EndArrayHandler         endArrayHandler;
979    JSON_Parser_ArrayItemHandler        arrayItemHandler;
980    uint32_t                            lexerBits;
981    DecoderData                         decoderData;
982                                                          /* uint32 alignment */
983    size_t                              codepointLocationByte;
984    size_t                              codepointLocationLine;
985    size_t                              codepointLocationColumn;
986    size_t                              tokenLocationByte;
987    size_t                              tokenLocationLine;
988    size_t                              tokenLocationColumn;
989    size_t                              depth;
990    size_t                              tokenBytesLength;
991    size_t                              tokenBytesUsed;
992    size_t                              maxStringLength;
993    size_t                              maxNumberLength;
994    ParserState                         state;            /* byte alignment */
995    ParserFlags                         flags;            /* byte alignment */
996    Encoding                            inputEncoding;    /* byte alignment */
997    Encoding                            stringEncoding;   /* byte alignment */
998    Encoding                            numberEncoding;   /* byte alignment */
999    Symbol                              token;            /* byte alignment */
1000    TokenAttributes                     tokenAttributes;  /* byte alignment */
1001    Error                               error;            /* byte alignment */
1002    byte                                errorOffset;
1003    LexerState                          lexerState;       /* byte alignment */
1004    byte                                defaultTokenBytes[DEFAULT_TOKEN_BYTES_LENGTH];
1005 };
1006 
1007 /* Parser internal functions. */
1008 
JSON_Parser_SetErrorAtCodepoint(JSON_Parser parser,Error error)1009 static void JSON_Parser_SetErrorAtCodepoint(JSON_Parser parser, Error error)
1010 {
1011    parser->error = error;
1012 }
1013 
JSON_Parser_SetErrorAtStringEscapeSequenceStart(JSON_Parser parser,Error error,int codepointsAgo)1014 static void JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1015       JSON_Parser parser, Error error, int codepointsAgo)
1016 {
1017    /* Note that backtracking from the current codepoint requires us to make
1018       three assumptions, which are always valid in the context of a string
1019       escape sequence:
1020 
1021       1. The input encoding is not JSON_UnknownEncoding.
1022 
1023       2 The codepoints we are backing up across are all in the range
1024       U+0000 - U+007F, aka ASCII, so we can assume the number of
1025       bytes comprising them based on the input encoding.
1026 
1027       3. The codepoints we are backing up across do not include any
1028       line breaks, so we can assume that the line number stays the
1029       same and the column number can simply be decremented.
1030       */
1031    parser->error = error;
1032    parser->errorOffset = (byte)codepointsAgo;
1033 }
1034 
JSON_Parser_SetErrorAtToken(JSON_Parser parser,Error error)1035 static void JSON_Parser_SetErrorAtToken(JSON_Parser parser, Error error)
1036 {
1037    parser->error = error;
1038    parser->errorOffset = ERROR_LOCATION_IS_TOKEN_START;
1039 }
1040 
JSON_Parser_PushMemberNameList(JSON_Parser parser)1041 static JSON_Status JSON_Parser_PushMemberNameList(JSON_Parser parser)
1042 {
1043    MemberNames* pNames = (MemberNames*)parser->memorySuite.realloc(
1044          parser->memorySuite.userData, NULL, sizeof(MemberNames));
1045 
1046    if (!pNames)
1047    {
1048       JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
1049       return JSON_Failure;
1050    }
1051 
1052    pNames->pAncestor    = parser->pMemberNames;
1053    pNames->pFirstName   = NULL;
1054    parser->pMemberNames = pNames;
1055    return JSON_Success;
1056 }
1057 
JSON_Parser_PopMemberNameList(JSON_Parser parser)1058 static void JSON_Parser_PopMemberNameList(JSON_Parser parser)
1059 {
1060    MemberNames* pAncestor = parser->pMemberNames->pAncestor;
1061    while (parser->pMemberNames->pFirstName)
1062    {
1063       MemberName* pNextName = parser->pMemberNames->pFirstName->pNextName;
1064       parser->memorySuite.free(parser->memorySuite.userData, parser->pMemberNames->pFirstName);
1065       parser->pMemberNames->pFirstName = pNextName;
1066    }
1067    parser->memorySuite.free(parser->memorySuite.userData, parser->pMemberNames);
1068    parser->pMemberNames = pAncestor;
1069 }
1070 
JSON_Parser_StartContainer(JSON_Parser parser,int isObject)1071 static JSON_Status JSON_Parser_StartContainer(JSON_Parser parser, int isObject)
1072 {
1073    if (isObject && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS) &&
1074          !JSON_Parser_PushMemberNameList(parser))
1075    {
1076       return JSON_Failure;
1077    }
1078    parser->depth++;
1079    return JSON_Success;
1080 }
1081 
JSON_Parser_EndContainer(JSON_Parser parser,int isObject)1082 static void JSON_Parser_EndContainer(JSON_Parser parser, int isObject)
1083 {
1084    parser->depth--;
1085    if (isObject && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS))
1086    {
1087       JSON_Parser_PopMemberNameList(parser);
1088    }
1089 }
1090 
JSON_Parser_AddMemberNameToList(JSON_Parser parser)1091 static JSON_Status JSON_Parser_AddMemberNameToList(JSON_Parser parser)
1092 {
1093    if (GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS))
1094    {
1095       MemberName* pName;
1096       for (pName = parser->pMemberNames->pFirstName; pName; pName = pName->pNextName)
1097       {
1098          if (pName->length == parser->tokenBytesUsed && !memcmp(pName->pBytes, parser->pTokenBytes, pName->length))
1099          {
1100             JSON_Parser_SetErrorAtToken(parser, JSON_Error_DuplicateObjectMember);
1101             return JSON_Failure;
1102          }
1103       }
1104       pName = (MemberName*)parser->memorySuite.realloc(parser->memorySuite.userData, NULL, sizeof(MemberName) + parser->tokenBytesUsed - 1);
1105       if (!pName)
1106       {
1107          JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
1108          return JSON_Failure;
1109       }
1110       pName->pNextName = parser->pMemberNames->pFirstName;
1111       pName->length = parser->tokenBytesUsed;
1112       memcpy(pName->pBytes, parser->pTokenBytes, parser->tokenBytesUsed);
1113       parser->pMemberNames->pFirstName = pName;
1114    }
1115    return JSON_Success;
1116 }
1117 
JSON_Parser_ResetData(JSON_Parser parser,int isInitialized)1118 static void JSON_Parser_ResetData(JSON_Parser parser, int isInitialized)
1119 {
1120    parser->userData                 = NULL;
1121    parser->flags                    = PARSER_DEFAULT_FLAGS;
1122    parser->inputEncoding            = JSON_UnknownEncoding;
1123    parser->stringEncoding           = JSON_UTF8;
1124    parser->numberEncoding           = JSON_UTF8;
1125    parser->token                    = T_NONE;
1126    parser->tokenAttributes          = 0;
1127    parser->error                    = JSON_Error_None;
1128    parser->errorOffset              = 0;
1129    parser->lexerState               = LEXING_WHITESPACE;
1130    parser->lexerBits                = 0;
1131    parser->codepointLocationByte    = 0;
1132    parser->codepointLocationLine    = 0;
1133    parser->codepointLocationColumn  = 0;
1134    parser->tokenLocationByte        = 0;
1135    parser->tokenLocationLine        = 0;
1136    parser->tokenLocationColumn      = 0;
1137    parser->depth                    = 0;
1138 
1139    if (!isInitialized)
1140    {
1141       parser->pTokenBytes      = parser->defaultTokenBytes;
1142       parser->tokenBytesLength = sizeof(parser->defaultTokenBytes);
1143    }
1144    else
1145    {
1146       /* When we reset the parser, we keep the output buffer and the symbol
1147          stack that have already been allocated, if any. If the client wants
1148          to reclaim the memory used by the those buffers, he needs to free
1149          the parser and create a new one. */
1150    }
1151    parser->tokenBytesUsed  = 0;
1152    parser->maxStringLength = SIZE_MAX;
1153    parser->maxNumberLength = SIZE_MAX;
1154    if (!isInitialized)
1155       parser->pMemberNames = NULL;
1156    else
1157    {
1158       while (parser->pMemberNames)
1159          JSON_Parser_PopMemberNameList(parser);
1160    }
1161    Decoder_Reset(&parser->decoderData);
1162    Grammarian_Reset(&parser->grammarianData, isInitialized);
1163    parser->encodingDetectedHandler = NULL;
1164    parser->nullHandler = NULL;
1165    parser->booleanHandler = NULL;
1166    parser->stringHandler = NULL;
1167    parser->numberHandler = NULL;
1168    parser->specialNumberHandler = NULL;
1169    parser->startObjectHandler = NULL;
1170    parser->endObjectHandler = NULL;
1171    parser->objectMemberHandler = NULL;
1172    parser->startArrayHandler = NULL;
1173    parser->endArrayHandler = NULL;
1174    parser->arrayItemHandler = NULL;
1175    parser->state = PARSER_RESET; /* do this last! */
1176 }
1177 
JSON_Parser_NullTerminateToken(JSON_Parser parser)1178 static void JSON_Parser_NullTerminateToken(JSON_Parser parser)
1179 {
1180    /* Because we always ensure that there are LONGEST_ENCODING_SEQUENCE bytes
1181       available at the end of the token buffer when we record codepoints, we
1182       can write the null terminator to the buffer with impunity. */
1183    static const byte nullTerminatorBytes[LONGEST_ENCODING_SEQUENCE] = { 0 };
1184    Encoding encoding = (Encoding)((parser->token == T_NUMBER) ? parser->numberEncoding : parser->stringEncoding);
1185    memcpy(parser->pTokenBytes + parser->tokenBytesUsed, nullTerminatorBytes, (size_t)SHORTEST_ENCODING_SEQUENCE(encoding));
1186 }
1187 
JSON_Parser_FlushParser(JSON_Parser parser)1188 static JSON_Status JSON_Parser_FlushParser(JSON_Parser parser)
1189 {
1190    /* The symbol stack should be empty when parsing finishes. */
1191    if (!Grammarian_FinishedDocument(&parser->grammarianData))
1192    {
1193       JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_ExpectedMoreTokens);
1194       return JSON_Failure;
1195    }
1196    return JSON_Success;
1197 }
1198 
1199 typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_SimpleTokenHandler)(JSON_Parser parser);
JSON_Parser_CallSimpleTokenHandler(JSON_Parser parser,JSON_Parser_SimpleTokenHandler handler)1200 static JSON_Status JSON_Parser_CallSimpleTokenHandler(JSON_Parser parser, JSON_Parser_SimpleTokenHandler handler)
1201 {
1202    if (handler)
1203    {
1204       JSON_Parser_HandlerResult result;
1205       SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1206       result = handler(parser);
1207       SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1208       if (result != JSON_Parser_Continue)
1209       {
1210          JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1211          return JSON_Failure;
1212       }
1213    }
1214    return JSON_Success;
1215 }
1216 
JSON_Parser_CallBooleanHandler(JSON_Parser parser)1217 static JSON_Status JSON_Parser_CallBooleanHandler(JSON_Parser parser)
1218 {
1219    if (parser->booleanHandler)
1220    {
1221       JSON_Parser_HandlerResult result;
1222       SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1223       result = parser->booleanHandler(parser, parser->token == T_TRUE ? JSON_True : JSON_False);
1224       SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1225       if (result != JSON_Parser_Continue)
1226       {
1227          JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1228          return JSON_Failure;
1229       }
1230    }
1231    return JSON_Success;
1232 }
1233 
JSON_Parser_CallStringHandler(JSON_Parser parser,int isObjectMember)1234 static JSON_Status JSON_Parser_CallStringHandler(JSON_Parser parser, int isObjectMember)
1235 {
1236    JSON_Parser_StringHandler handler = isObjectMember ? parser->objectMemberHandler : parser->stringHandler;
1237    if (handler)
1238    {
1239       JSON_Parser_HandlerResult result;
1240       JSON_Parser_NullTerminateToken(parser);
1241       SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1242       result = handler(parser, (char*)parser->pTokenBytes, parser->tokenBytesUsed, parser->tokenAttributes);
1243       SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1244 
1245       if (result != JSON_Parser_Continue)
1246       {
1247          JSON_Parser_SetErrorAtToken(parser,
1248                (isObjectMember && result == JSON_Parser_TreatAsDuplicateObjectMember)
1249                ? JSON_Error_DuplicateObjectMember
1250                : JSON_Error_AbortedByHandler);
1251          return JSON_Failure;
1252       }
1253    }
1254    return JSON_Success;
1255 }
1256 
JSON_Parser_CallNumberHandler(JSON_Parser parser)1257 static JSON_Status JSON_Parser_CallNumberHandler(JSON_Parser parser)
1258 {
1259    if (parser->numberHandler)
1260    {
1261       JSON_Parser_HandlerResult result;
1262       JSON_Parser_NullTerminateToken(parser);
1263       SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1264       result = parser->numberHandler(parser, (char*)parser->pTokenBytes,
1265             parser->tokenBytesUsed, parser->tokenAttributes);
1266 
1267       SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1268 
1269       if (result != JSON_Parser_Continue)
1270       {
1271          JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1272          return JSON_Failure;
1273       }
1274    }
1275    return JSON_Success;
1276 }
1277 
JSON_Parser_CallSpecialNumberHandler(JSON_Parser parser)1278 static JSON_Status JSON_Parser_CallSpecialNumberHandler(JSON_Parser parser)
1279 {
1280    if (parser->specialNumberHandler)
1281    {
1282       JSON_Parser_HandlerResult result;
1283       SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1284       result = parser->specialNumberHandler(parser, parser->token == T_NAN ? JSON_NaN :
1285             (parser->token == T_INFINITY ? JSON_Infinity : JSON_NegativeInfinity));
1286       SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER);
1287 
1288       if (result != JSON_Parser_Continue)
1289       {
1290          JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler);
1291          return JSON_Failure;
1292       }
1293    }
1294    return JSON_Success;
1295 }
1296 
JSON_Parser_HandleGrammarEvents(JSON_Parser parser,byte emit)1297 static JSON_Status JSON_Parser_HandleGrammarEvents(JSON_Parser parser, byte emit)
1298 {
1299    if (GET_FLAGS(emit, EMIT_ARRAY_ITEM))
1300    {
1301       if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->arrayItemHandler))
1302       {
1303          return JSON_Failure;
1304       }
1305       SET_FLAGS_OFF(byte, emit, EMIT_ARRAY_ITEM);
1306    }
1307    switch (emit)
1308    {
1309       case EMIT_NULL:
1310          if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->nullHandler))
1311             return JSON_Failure;
1312          break;
1313 
1314       case EMIT_BOOLEAN:
1315          if (!JSON_Parser_CallBooleanHandler(parser))
1316             return JSON_Failure;
1317          break;
1318 
1319       case EMIT_STRING:
1320          if (!JSON_Parser_CallStringHandler(parser, 0/* isObjectMember */))
1321             return JSON_Failure;
1322          break;
1323 
1324       case EMIT_NUMBER:
1325          if (!JSON_Parser_CallNumberHandler(parser))
1326             return JSON_Failure;
1327          break;
1328 
1329       case EMIT_SPECIAL_NUMBER:
1330          if (!JSON_Parser_CallSpecialNumberHandler(parser))
1331             return JSON_Failure;
1332          break;
1333 
1334       case EMIT_START_OBJECT:
1335          if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->startObjectHandler) ||
1336                !JSON_Parser_StartContainer(parser, 1/*isObject*/))
1337             return JSON_Failure;
1338          break;
1339 
1340       case EMIT_END_OBJECT:
1341          JSON_Parser_EndContainer(parser, 1/*isObject*/);
1342          if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->endObjectHandler))
1343             return JSON_Failure;
1344          break;
1345       case EMIT_OBJECT_MEMBER:
1346          if (!JSON_Parser_AddMemberNameToList(parser) || /* will fail if member is duplicate */
1347                !JSON_Parser_CallStringHandler(parser, 1 /* isObjectMember */))
1348             return JSON_Failure;
1349          break;
1350 
1351       case EMIT_START_ARRAY:
1352          if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->startArrayHandler) ||
1353                !JSON_Parser_StartContainer(parser, 0/*isObject*/))
1354             return JSON_Failure;
1355          break;
1356 
1357       case EMIT_END_ARRAY:
1358          JSON_Parser_EndContainer(parser, 0/*isObject*/);
1359          if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->endArrayHandler))
1360             return JSON_Failure;
1361          break;
1362    }
1363 
1364    if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
1365    {
1366       JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_StoppedAfterEmbeddedDocument);
1367       return JSON_Failure;
1368    }
1369    return JSON_Success;
1370 }
1371 
JSON_Parser_ProcessToken(JSON_Parser parser)1372 static JSON_Status JSON_Parser_ProcessToken(JSON_Parser parser)
1373 {
1374    GrammarianOutput output;
1375    output = Grammarian_ProcessToken(&parser->grammarianData, parser->token, &parser->memorySuite);
1376    switch (GRAMMARIAN_RESULT_CODE(output))
1377    {
1378       case ACCEPTED_TOKEN:
1379          if (!JSON_Parser_HandleGrammarEvents(parser, GRAMMARIAN_EVENT(output)))
1380             return JSON_Failure;
1381          break;
1382 
1383       case REJECTED_TOKEN:
1384          JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnexpectedToken);
1385          return JSON_Failure;
1386 
1387       case SYMBOL_STACK_FULL:
1388          JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
1389          return JSON_Failure;
1390    }
1391 
1392    /* Reset the lexer to prepare for the next token. */
1393    parser->lexerState = LEXING_WHITESPACE;
1394    parser->lexerBits = 0;
1395    parser->token = T_NONE;
1396    parser->tokenAttributes = 0;
1397    parser->tokenBytesUsed = 0;
1398    return JSON_Success;
1399 }
1400 
1401 /* Lexer functions. */
1402 
1403 static const byte expectedLiteralChars[] = { 'u', 'l', 'l', 0, 'r', 'u', 'e', 0, 'a', 'l', 's', 'e', 0, 'a', 'N', 0, 'n', 'f', 'i', 'n', 'i', 't', 'y', 0  };
1404 
1405 #define NULL_LITERAL_EXPECTED_CHARS_START_INDEX     0
1406 #define TRUE_LITERAL_EXPECTED_CHARS_START_INDEX     4
1407 #define FALSE_LITERAL_EXPECTED_CHARS_START_INDEX    8
1408 #define NAN_LITERAL_EXPECTED_CHARS_START_INDEX      13
1409 #define INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX 16
1410 
1411 /* Forward declaration. */
1412 static JSON_Status JSON_Parser_FlushLexer(JSON_Parser parser);
1413 static JSON_Status JSON_Parser_ProcessCodepoint(
1414       JSON_Parser parser, Codepoint c, size_t encodedLength);
1415 
JSON_Parser_HandleInvalidEncodingSequence(JSON_Parser parser,size_t encodedLength)1416 static JSON_Status JSON_Parser_HandleInvalidEncodingSequence(
1417       JSON_Parser parser, size_t encodedLength)
1418 {
1419    if (parser->token == T_STRING && GET_FLAGS(parser->flags, PARSER_REPLACE_INVALID))
1420    {
1421       /* Since we're inside a string token, replacing the invalid sequence
1422          with the Unicode replacement character as requested by the client
1423          is a viable way to avoid a parse failure. Outside a string token,
1424          such a replacement would simply trigger JSON_Error_UnknownToken
1425          when we tried to process the replacement character, so it's less
1426          confusing to stick with JSON_Error_InvalidEncodingSequence in that
1427          case. */
1428       SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsReplacedCharacter);
1429       return JSON_Parser_ProcessCodepoint(parser, REPLACEMENT_CHARACTER_CODEPOINT, encodedLength);
1430    }
1431    else if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
1432    {
1433       /* Since we're parsing the top-level value of an embedded
1434          document, assume that the invalid encoding sequence we've
1435          encountered does not actually belong to the document, and
1436          finish parsing by pretending that we've encountered EOF
1437          instead of an invalid sequence. If the content is valid,
1438          this will fail with JSON_Error_StoppedAfterEmbeddedDocument;
1439          otherwise, it will fail with an appropriate error. */
1440       return (JSON_Status)(JSON_Parser_FlushLexer(parser) && JSON_Parser_FlushParser(parser));
1441    }
1442    JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_InvalidEncodingSequence);
1443    return JSON_Failure;
1444 }
1445 
JSON_Parser_HandleInvalidNumber(JSON_Parser parser,Codepoint c,int codepointsSinceValidNumber,TokenAttributes attributesToRemove)1446 static JSON_Status JSON_Parser_HandleInvalidNumber(JSON_Parser parser,
1447       Codepoint c, int codepointsSinceValidNumber, TokenAttributes attributesToRemove)
1448 {
1449    SET_FLAGS_OFF(TokenAttributes, parser->tokenAttributes, attributesToRemove);
1450    if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT))
1451    {
1452       /* The invalid number is the top-level value of an embedded document,
1453          and it has a prefix that can be interpreted as a valid number.
1454          We want to backtrack so that we are at the end of that prefix,
1455          and then process the valid token.
1456 
1457          Note that backtracking requires us to make three assumptions, which
1458          are always valid in the context of a number token:
1459 
1460          1. The input encoding is not JSON_UnknownEncoding.
1461 
1462          2 The codepoints we are backing up across are all in the range
1463          U+0000 - U+007F, aka ASCII, so we can assume the number of
1464          bytes comprising them based on the input encoding.
1465 
1466          3. The codepoints we are backing up across do not include any
1467          line breaks, so we can assume that the line number stays the
1468          same and the column number can simply be decremented.
1469 
1470          For example:
1471 
1472          "01"     => "0"
1473          "123.!"  => "123"
1474          "123e!"  => "123"
1475          "123e+!" => "123"
1476          "123e-!" => "123"
1477          "1.2e!"  => "1.2"
1478          "1.2e+!" => "1.2"
1479          "1.2e-!" => "1.2"
1480          */
1481       parser->codepointLocationByte -= (size_t)codepointsSinceValidNumber
1482          * (size_t)SHORTEST_ENCODING_SEQUENCE(parser->inputEncoding);
1483       parser->codepointLocationColumn -= (size_t)codepointsSinceValidNumber;
1484       parser->tokenBytesUsed -= (size_t)codepointsSinceValidNumber
1485          * (size_t)SHORTEST_ENCODING_SEQUENCE(parser->numberEncoding);
1486       return JSON_Parser_ProcessToken(parser); /* always fails */
1487    }
1488    /* Allow JSON_Parser_FlushLexer() to fail. */
1489    else if (c == EOF_CODEPOINT)
1490       return JSON_Success;
1491 
1492    JSON_Parser_SetErrorAtToken(parser, JSON_Error_InvalidNumber);
1493    return JSON_Failure;
1494 }
1495 
JSON_Parser_StartToken(JSON_Parser parser,Symbol token)1496 static void JSON_Parser_StartToken(JSON_Parser parser, Symbol token)
1497 {
1498    parser->token               = token;
1499    parser->tokenLocationByte   = parser->codepointLocationByte;
1500    parser->tokenLocationLine   = parser->codepointLocationLine;
1501    parser->tokenLocationColumn = parser->codepointLocationColumn;
1502 }
1503 
JSON_Parser_ProcessCodepoint(JSON_Parser parser,Codepoint c,size_t encodedLength)1504 static JSON_Status JSON_Parser_ProcessCodepoint(JSON_Parser parser, Codepoint c, size_t encodedLength)
1505 {
1506    Encoding tokenEncoding;
1507    size_t maxTokenLength;
1508    int tokenFinished           = 0;
1509    Codepoint codepointToRecord = EOF_CODEPOINT;
1510 
1511    /* If the previous codepoint was U+000D (CARRIAGE RETURN), and the current
1512       codepoint is U+000A (LINE FEED), then treat the 2 codepoints as a single
1513       line break. */
1514    if (GET_FLAGS(parser->state, PARSER_AFTER_CARRIAGE_RETURN))
1515    {
1516       if (c == LINE_FEED_CODEPOINT)
1517          parser->codepointLocationLine--;
1518       SET_FLAGS_OFF(ParserState, parser->state, PARSER_AFTER_CARRIAGE_RETURN);
1519    }
1520 
1521 reprocess:
1522 
1523    switch (parser->lexerState)
1524    {
1525       case LEXING_WHITESPACE:
1526          if (c == '{')
1527          {
1528             JSON_Parser_StartToken(parser, T_LEFT_CURLY);
1529             tokenFinished = 1;
1530          }
1531          else if (c == '}')
1532          {
1533             JSON_Parser_StartToken(parser, T_RIGHT_CURLY);
1534             tokenFinished = 1;
1535          }
1536          else if (c == '[')
1537          {
1538             JSON_Parser_StartToken(parser, T_LEFT_SQUARE);
1539             tokenFinished = 1;
1540          }
1541          else if (c == ']')
1542          {
1543             JSON_Parser_StartToken(parser, T_RIGHT_SQUARE);
1544             tokenFinished = 1;
1545          }
1546          else if (c == ':')
1547          {
1548             JSON_Parser_StartToken(parser, T_COLON);
1549             tokenFinished = 1;
1550          }
1551          else if (c == ',')
1552          {
1553             JSON_Parser_StartToken(parser, T_COMMA);
1554             tokenFinished = 1;
1555          }
1556          else if (c == 'n')
1557          {
1558             JSON_Parser_StartToken(parser, T_NULL);
1559             parser->lexerBits = NULL_LITERAL_EXPECTED_CHARS_START_INDEX;
1560             parser->lexerState = LEXING_LITERAL;
1561          }
1562          else if (c == 't')
1563          {
1564             JSON_Parser_StartToken(parser, T_TRUE);
1565             parser->lexerBits = TRUE_LITERAL_EXPECTED_CHARS_START_INDEX;
1566             parser->lexerState = LEXING_LITERAL;
1567          }
1568          else if (c == 'f')
1569          {
1570             JSON_Parser_StartToken(parser, T_FALSE);
1571             parser->lexerBits = FALSE_LITERAL_EXPECTED_CHARS_START_INDEX;
1572             parser->lexerState = LEXING_LITERAL;
1573          }
1574          else if (c == '"')
1575          {
1576             JSON_Parser_StartToken(parser, T_STRING);
1577             parser->lexerState = LEXING_STRING;
1578          }
1579          else if (c == '-')
1580          {
1581             JSON_Parser_StartToken(parser, T_NUMBER);
1582             parser->tokenAttributes = JSON_IsNegative;
1583             codepointToRecord = '-';
1584             parser->lexerState = LEXING_NUMBER_AFTER_MINUS;
1585             goto recordNumberCodepointAndAdvance;
1586          }
1587          else if (c == '0')
1588          {
1589             JSON_Parser_StartToken(parser, T_NUMBER);
1590             codepointToRecord = '0';
1591             parser->lexerState = LEXING_NUMBER_AFTER_LEADING_ZERO;
1592             goto recordNumberCodepointAndAdvance;
1593          }
1594          else if (c >= '1' && c <= '9')
1595          {
1596             JSON_Parser_StartToken(parser, T_NUMBER);
1597             codepointToRecord = c;
1598             parser->lexerState = LEXING_NUMBER_DECIMAL_DIGITS;
1599             goto recordNumberCodepointAndAdvance;
1600          }
1601          else if (c == ' ' || c == TAB_CODEPOINT || c == LINE_FEED_CODEPOINT ||
1602                c == CARRIAGE_RETURN_CODEPOINT || c == EOF_CODEPOINT)
1603          {
1604             /* Ignore whitespace between tokens. */
1605          }
1606          else if (c == BOM_CODEPOINT && parser->codepointLocationByte == 0)
1607          {
1608             /* OK, we'll allow the BOM. */
1609             if (GET_FLAGS(parser->flags, PARSER_ALLOW_BOM)) { }
1610             else
1611             {
1612                JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_BOMNotAllowed);
1613                return JSON_Failure;
1614             }
1615          }
1616          else if (c == '/' && GET_FLAGS(parser->flags, PARSER_ALLOW_COMMENTS))
1617          {
1618             /* Comments are not real tokens, but we save the location
1619                of the comment as the token location in case of an error. */
1620             parser->tokenLocationByte = parser->codepointLocationByte;
1621             parser->tokenLocationLine = parser->codepointLocationLine;
1622             parser->tokenLocationColumn = parser->codepointLocationColumn;
1623             parser->lexerState = LEXING_COMMENT_AFTER_SLASH;
1624          }
1625          else if (c == 'N' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
1626          {
1627             JSON_Parser_StartToken(parser, T_NAN);
1628             parser->lexerBits = NAN_LITERAL_EXPECTED_CHARS_START_INDEX;
1629             parser->lexerState = LEXING_LITERAL;
1630          }
1631          else if (c == 'I' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
1632          {
1633             JSON_Parser_StartToken(parser, T_INFINITY);
1634             parser->lexerBits = INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX;
1635             parser->lexerState = LEXING_LITERAL;
1636          }
1637          else
1638          {
1639             JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_UnknownToken);
1640             return JSON_Failure;
1641          }
1642          goto advance;
1643 
1644       case LEXING_LITERAL:
1645          /* While lexing a literal we store an index into expectedLiteralChars
1646             in lexerBits. */
1647          if (expectedLiteralChars[parser->lexerBits])
1648          {
1649             /* The codepoint should match the next character in the literal. */
1650             if (c != expectedLiteralChars[parser->lexerBits])
1651             {
1652                JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
1653                return JSON_Failure;
1654             }
1655             parser->lexerBits++;
1656 
1657             /* If the literal is the top-level value of an embedded document,
1658                process it as soon as we consume its last expected codepoint.
1659                Normally we defer processing until the following codepoint
1660                has been examined, so that we can treat sequences like "nullx"
1661                as a single, unknown token rather than a null literal followed
1662                by an unknown token. */
1663             if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT) &&
1664                   !expectedLiteralChars[parser->lexerBits])
1665                tokenFinished = 1;
1666          }
1667          else
1668          {
1669             /* The literal should be finished, so the codepoint should not be
1670                a plausible JSON literal character, but rather EOF, whitespace,
1671                or the first character of the next token. */
1672             if ((c >= 'A' && c <= 'Z') ||
1673                   (c >= 'a' && c <= 'z') ||
1674                   (c >= '0' && c <= '9') ||
1675                   (c == '_'))
1676             {
1677                JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
1678                return JSON_Failure;
1679             }
1680             if (!JSON_Parser_ProcessToken(parser))
1681                return JSON_Failure;
1682             goto reprocess;
1683          }
1684          goto advance;
1685 
1686       case LEXING_STRING:
1687          /* Allow JSON_Parser_FlushLexer() to fail. */
1688          if (c == EOF_CODEPOINT) { }
1689          else if (c == '"')
1690             tokenFinished = 1;
1691          else if (c == '\\')
1692             parser->lexerState = LEXING_STRING_ESCAPE;
1693          else if (c < 0x20 && !GET_FLAGS(parser->flags, PARSER_ALLOW_CONTROL_CHARS))
1694          {
1695             /* ASCII control characters (U+0000 - U+001F) are not allowed to
1696                appear unescaped in string values unless specifically allowed. */
1697             JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_UnescapedControlCharacter);
1698             return JSON_Failure;
1699          }
1700          else
1701          {
1702             codepointToRecord = c;
1703             goto recordStringCodepointAndAdvance;
1704          }
1705          goto advance;
1706 
1707       case LEXING_STRING_ESCAPE:
1708          if (c == EOF_CODEPOINT)
1709          {
1710             /* Allow JSON_Parser_FlushLexer() to fail. */
1711          }
1712          else
1713          {
1714             if (c == 'u')
1715                parser->lexerState = LEXING_STRING_HEX_ESCAPE_BYTE_1;
1716             else
1717             {
1718                if (c == '"' || c == '\\' || c == '/')
1719                   codepointToRecord = c;
1720                else if (c == 'b')
1721                   codepointToRecord = BACKSPACE_CODEPOINT;
1722                else if (c == 't')
1723                   codepointToRecord = TAB_CODEPOINT;
1724                else if (c == 'n')
1725                   codepointToRecord = LINE_FEED_CODEPOINT;
1726                else if (c == 'f')
1727                   codepointToRecord = FORM_FEED_CODEPOINT;
1728                else if (c == 'r')
1729                   codepointToRecord = CARRIAGE_RETURN_CODEPOINT;
1730                else
1731                {
1732                   /* The current codepoint location is the first character after
1733                      the backslash that started the escape sequence. The error
1734                      location should be the beginning of the escape sequence, 1
1735                      character earlier. */
1736                   JSON_Parser_SetErrorAtStringEscapeSequenceStart(parser, JSON_Error_InvalidEscapeSequence, 1);
1737                   return JSON_Failure;
1738                }
1739                parser->lexerState = LEXING_STRING;
1740                goto recordStringCodepointAndAdvance;
1741             }
1742          }
1743          goto advance;
1744 
1745       case LEXING_STRING_HEX_ESCAPE_BYTE_1:
1746       case LEXING_STRING_HEX_ESCAPE_BYTE_2:
1747       case LEXING_STRING_HEX_ESCAPE_BYTE_3:
1748       case LEXING_STRING_HEX_ESCAPE_BYTE_4:
1749       case LEXING_STRING_HEX_ESCAPE_BYTE_5:
1750       case LEXING_STRING_HEX_ESCAPE_BYTE_6:
1751       case LEXING_STRING_HEX_ESCAPE_BYTE_7:
1752       case LEXING_STRING_HEX_ESCAPE_BYTE_8:
1753          /* Allow JSON_Parser_FlushLexer() to fail. */
1754          if (c != EOF_CODEPOINT)
1755          {
1756             /* While lexing a string hex escape sequence we store the bytes
1757                of the escaped codepoint in the low 2 bytes of lexerBits. If
1758                the escape sequence represents a leading surrogate, we shift
1759                the leading surrogate into the high 2 bytes and lex a second
1760                hex escape sequence (which should be a trailing surrogate). */
1761             int byteNumber = (parser->lexerState - LEXING_STRING_HEX_ESCAPE_BYTE_1) & 0x3;
1762             uint32_t nibble;
1763             if (c >= '0' && c <= '9')
1764                nibble = c - '0';
1765             else if (c >= 'A' && c <= 'F')
1766                nibble = c - 'A' + 10;
1767             else if (c >= 'a' && c <= 'f')
1768                nibble = c - 'a' + 10;
1769             else
1770             {
1771                /* The current codepoint location is one of the 4 hex digit
1772                   character slots in the hex escape sequence. The error
1773                   location should be the beginning of the hex escape
1774                   sequence, between 2 and 5 bytes earlier. */
1775                int codepointsAgo = 2 /* for "\u" */ + byteNumber;
1776                JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1777                      parser, JSON_Error_InvalidEscapeSequence, codepointsAgo);
1778                return JSON_Failure;
1779             }
1780             /* Store the hex digit's bits in the appropriate byte of lexerBits. */
1781             nibble <<= (3 - byteNumber) * 4 /* shift left by 12, 8, 4, 0 */ ;
1782             parser->lexerBits |= nibble;
1783             if (parser->lexerState == LEXING_STRING_HEX_ESCAPE_BYTE_4)
1784             {
1785                /* The escape sequence is complete. We need to check whether
1786                   it represents a leading surrogate (which implies that it
1787                   will be immediately followed by a hex-escaped trailing
1788                   surrogate), a trailing surrogate (which is invalid), or a
1789                   valid codepoint (which should simply be appended to the
1790                   string token value). */
1791                if (IS_LEADING_SURROGATE(parser->lexerBits))
1792                {
1793                   /* Shift the leading surrogate into the high 2 bytes of
1794                      lexerBits so that the trailing surrogate can be stored
1795                      in the low 2 bytes. */
1796                   parser->lexerBits <<= 16;
1797                   parser->lexerState = LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH;
1798                }
1799                else if (IS_TRAILING_SURROGATE(parser->lexerBits))
1800                {
1801                   /* The current codepoint location is the last hex digit
1802                      of the hex escape sequence. The error location should
1803                      be the beginning of the hex escape sequence, 5
1804                      characters earlier. */
1805                   JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1806                         parser, JSON_Error_UnpairedSurrogateEscapeSequence, 5);
1807                   return JSON_Failure;
1808                }
1809                else
1810                {
1811                   /* The escape sequence represents a BMP codepoint. */
1812                   codepointToRecord = parser->lexerBits;
1813                   parser->lexerBits = 0;
1814                   parser->lexerState = LEXING_STRING;
1815                   goto recordStringCodepointAndAdvance;
1816                }
1817             }
1818             else if (parser->lexerState == LEXING_STRING_HEX_ESCAPE_BYTE_8)
1819             {
1820                /* The second hex escape sequence is complete. We need to
1821                   check whether it represents a trailing surrogate as
1822                   expected. If so, the surrogate pair represents a single
1823                   non-BMP codepoint. */
1824                if (!IS_TRAILING_SURROGATE(parser->lexerBits & 0xFFFF))
1825                {
1826                   /* The current codepoint location is the last hex digit of
1827                      the second hex escape sequence. The error location
1828                      should be the beginning of the leading surrogate
1829                      hex escape sequence, 11 characters earlier. */
1830                   JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1831                         parser, JSON_Error_UnpairedSurrogateEscapeSequence, 11);
1832                   return JSON_Failure;
1833                }
1834                /* The escape sequence represents a non-BMP codepoint. */
1835                codepointToRecord = CODEPOINT_FROM_SURROGATES(parser->lexerBits);
1836                parser->lexerBits = 0;
1837                parser->lexerState = LEXING_STRING;
1838                goto recordStringCodepointAndAdvance;
1839             }
1840             else
1841                parser->lexerState++;
1842          }
1843          goto advance;
1844 
1845       case LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH:
1846          if (c != EOF_CODEPOINT)
1847          {
1848             if (c != '\\')
1849             {
1850                /* The current codepoint location is the first character after
1851                   the leading surrogate hex escape sequence. The error
1852                   location should be the beginning of the leading surrogate
1853                   hex escape sequence, 6 characters earlier. */
1854                JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1855                      parser, JSON_Error_UnpairedSurrogateEscapeSequence, 6);
1856                return JSON_Failure;
1857             }
1858             parser->lexerState = LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U;
1859          }
1860          goto advance;
1861 
1862       case LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U:
1863          if (c != EOF_CODEPOINT)
1864          {
1865             if (c != 'u')
1866             {
1867                /* Distinguish between a totally bogus escape sequence
1868                   and a valid one that just isn't the hex escape kind
1869                   that we require for a trailing surrogate. The current
1870                   codepoint location is the first character after the
1871                   backslash that should have introduced the trailing
1872                   surrogate hex escape sequence. */
1873                if (c == '"' || c == '\\' || c == '/' || c == 'b' ||
1874                      c == 't' || c == 'n' || c == 'f' || c == 'r')
1875                {
1876                   /* The error location should be at that beginning of the
1877                      leading surrogate's hex escape sequence, 7 characters
1878                      earlier. */
1879                   JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1880                         parser, JSON_Error_UnpairedSurrogateEscapeSequence, 7);
1881                }
1882                else
1883                {
1884                   /* The error location should be at that backslash, 1
1885                      character earlier. */
1886                   JSON_Parser_SetErrorAtStringEscapeSequenceStart(
1887                         parser, JSON_Error_InvalidEscapeSequence, 1);
1888                }
1889                return JSON_Failure;
1890             }
1891             parser->lexerState = LEXING_STRING_HEX_ESCAPE_BYTE_5;
1892          }
1893          goto advance;
1894 
1895       case LEXING_NUMBER_AFTER_MINUS:
1896          if (c == EOF_CODEPOINT)
1897          {
1898             /* Allow JSON_Parser_FlushLexer() to fail. */
1899          }
1900          else if (c == 'I' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS))
1901          {
1902             parser->token      = T_NEGATIVE_INFINITY; /* changing horses mid-stream, so to speak */
1903             parser->lexerBits  = INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX;
1904             parser->lexerState = LEXING_LITERAL;
1905          }
1906          else
1907          {
1908             if (c == '0')
1909             {
1910                codepointToRecord  = '0';
1911                parser->lexerState = LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO;
1912                goto recordNumberCodepointAndAdvance;
1913             }
1914             else if (c >= '1' && c <= '9')
1915             {
1916                codepointToRecord  = c;
1917                parser->lexerState = LEXING_NUMBER_DECIMAL_DIGITS;
1918                goto recordNumberCodepointAndAdvance;
1919             }
1920             else
1921             {
1922                /* We trigger an unknown token error rather than an invalid number
1923                   error so that "Foo" and "-Foo" trigger the same error. */
1924                JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
1925                return JSON_Failure;
1926             }
1927          }
1928          goto advance;
1929 
1930       case LEXING_NUMBER_AFTER_LEADING_ZERO:
1931       case LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO:
1932          if (c == '.')
1933          {
1934             codepointToRecord = '.';
1935             SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsDecimalPoint);
1936             parser->lexerState = LEXING_NUMBER_AFTER_DOT;
1937             goto recordNumberCodepointAndAdvance;
1938          }
1939          else if (c == 'e' || c == 'E')
1940          {
1941             codepointToRecord = c;
1942             SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
1943             parser->lexerState = LEXING_NUMBER_AFTER_E;
1944             goto recordNumberCodepointAndAdvance;
1945          }
1946          else if (c >= '0' && c <= '9')
1947          {
1948             /* JSON does not allow the integer part of a number to have any
1949                digits after a leading zero. */
1950             if (!JSON_Parser_HandleInvalidNumber(parser, c, 0, 0))
1951                return JSON_Failure;
1952          }
1953          else if ((c == 'x' || c == 'X') &&
1954                parser->lexerState == LEXING_NUMBER_AFTER_LEADING_ZERO &&
1955                GET_FLAGS(parser->flags, PARSER_ALLOW_HEX_NUMBERS))
1956          {
1957             codepointToRecord = c;
1958             SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_IsHex);
1959             parser->lexerState = LEXING_NUMBER_AFTER_X;
1960             goto recordNumberCodepointAndAdvance;
1961          }
1962          else
1963          {
1964             /* The number is finished. */
1965             if (!JSON_Parser_ProcessToken(parser))
1966                return JSON_Failure;
1967             goto reprocess;
1968          }
1969          goto advance;
1970 
1971       case LEXING_NUMBER_AFTER_X:
1972          if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
1973          {
1974             codepointToRecord = c;
1975             parser->lexerState = LEXING_NUMBER_HEX_DIGITS;
1976             goto recordNumberCodepointAndAdvance;
1977          }
1978          else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_IsHex))
1979             return JSON_Failure;
1980          goto advance;
1981 
1982       case LEXING_NUMBER_HEX_DIGITS:
1983          if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
1984          {
1985             codepointToRecord = c;
1986             goto recordNumberCodepointAndAdvance;
1987          }
1988          /* The number is finished. */
1989          if (!JSON_Parser_ProcessToken(parser))
1990             return JSON_Failure;
1991          goto reprocess;
1992 
1993       case LEXING_NUMBER_DECIMAL_DIGITS:
1994          if (c >= '0' && c <= '9')
1995          {
1996             codepointToRecord = c;
1997             goto recordNumberCodepointAndAdvance;
1998          }
1999          else if (c == '.')
2000          {
2001             codepointToRecord = '.';
2002             SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsDecimalPoint);
2003             parser->lexerState = LEXING_NUMBER_AFTER_DOT;
2004             goto recordNumberCodepointAndAdvance;
2005          }
2006          else if (c == 'e' || c == 'E')
2007          {
2008             codepointToRecord = c;
2009             SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
2010             parser->lexerState = LEXING_NUMBER_AFTER_E;
2011             goto recordNumberCodepointAndAdvance;
2012          }
2013          /* The number is finished. */
2014          if (!JSON_Parser_ProcessToken(parser))
2015             return JSON_Failure;
2016          goto reprocess;
2017 
2018       case LEXING_NUMBER_AFTER_DOT:
2019          if (c >= '0' && c <= '9')
2020          {
2021             codepointToRecord = c;
2022             parser->lexerState = LEXING_NUMBER_FRACTIONAL_DIGITS;
2023             goto recordNumberCodepointAndAdvance;
2024          }
2025          else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_ContainsDecimalPoint))
2026             return JSON_Failure;
2027          goto advance;
2028 
2029       case LEXING_NUMBER_FRACTIONAL_DIGITS:
2030          if (c >= '0' && c <= '9')
2031          {
2032             codepointToRecord = c;
2033             goto recordNumberCodepointAndAdvance;
2034          }
2035          else if (c == 'e' || c == 'E')
2036          {
2037             codepointToRecord = c;
2038             SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent);
2039             parser->lexerState = LEXING_NUMBER_AFTER_E;
2040             goto recordNumberCodepointAndAdvance;
2041          }
2042          /* The number is finished. */
2043          if (!JSON_Parser_ProcessToken(parser))
2044             return JSON_Failure;
2045          goto reprocess;
2046 
2047       case LEXING_NUMBER_AFTER_E:
2048          if (c == '+')
2049          {
2050             codepointToRecord = c;
2051             parser->lexerState = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
2052             goto recordNumberCodepointAndAdvance;
2053          }
2054          else if (c == '-')
2055          {
2056             codepointToRecord = c;
2057             SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNegativeExponent);
2058             parser->lexerState = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
2059             goto recordNumberCodepointAndAdvance;
2060          }
2061          else if (c >= '0' && c <= '9')
2062          {
2063             codepointToRecord = c;
2064             parser->lexerState = LEXING_NUMBER_EXPONENT_DIGITS;
2065             goto recordNumberCodepointAndAdvance;
2066          }
2067          else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_ContainsExponent))
2068             return JSON_Failure;
2069          goto advance;
2070 
2071       case LEXING_NUMBER_AFTER_EXPONENT_SIGN:
2072          if (c >= '0' && c <= '9')
2073          {
2074             codepointToRecord = c;
2075             parser->lexerState = LEXING_NUMBER_EXPONENT_DIGITS;
2076             goto recordNumberCodepointAndAdvance;
2077          }
2078          else if (!JSON_Parser_HandleInvalidNumber(parser, c, 2, JSON_ContainsExponent | JSON_ContainsNegativeExponent))
2079             return JSON_Failure;
2080          goto advance;
2081 
2082       case LEXING_NUMBER_EXPONENT_DIGITS:
2083          if (c >= '0' && c <= '9')
2084          {
2085             codepointToRecord = c;
2086             goto recordNumberCodepointAndAdvance;
2087          }
2088          /* The number is finished. */
2089          if (!JSON_Parser_ProcessToken(parser))
2090             return JSON_Failure;
2091          goto reprocess;
2092 
2093       case LEXING_COMMENT_AFTER_SLASH:
2094          if (c == '/')
2095             parser->lexerState = LEXING_SINGLE_LINE_COMMENT;
2096          else if (c == '*')
2097             parser->lexerState = LEXING_MULTI_LINE_COMMENT;
2098          else
2099          {
2100             JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken);
2101             return JSON_Failure;
2102          }
2103          goto advance;
2104 
2105       case LEXING_SINGLE_LINE_COMMENT:
2106          if (c == CARRIAGE_RETURN_CODEPOINT || c == LINE_FEED_CODEPOINT || c == EOF_CODEPOINT)
2107             parser->lexerState = LEXING_WHITESPACE;
2108          goto advance;
2109 
2110       case LEXING_MULTI_LINE_COMMENT:
2111          if (c == '*')
2112             parser->lexerState = LEXING_MULTI_LINE_COMMENT_AFTER_STAR;
2113          goto advance;
2114 
2115       case LEXING_MULTI_LINE_COMMENT_AFTER_STAR:
2116          if (c == '/')
2117             parser->lexerState = LEXING_WHITESPACE;
2118          else if (c != '*')
2119             parser->lexerState = LEXING_MULTI_LINE_COMMENT;
2120          goto advance;
2121    }
2122 
2123 recordStringCodepointAndAdvance:
2124 
2125    tokenEncoding  = parser->stringEncoding;
2126    maxTokenLength = parser->maxStringLength;
2127    if (!codepointToRecord)
2128    {
2129       SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNullCharacter | JSON_ContainsControlCharacter);
2130    }
2131    else if (codepointToRecord < FIRST_NON_CONTROL_CODEPOINT)
2132    {
2133       SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsControlCharacter);
2134    }
2135    else if (codepointToRecord >= FIRST_NON_BMP_CODEPOINT)
2136    {
2137       SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNonASCIICharacter | JSON_ContainsNonBMPCharacter);
2138    }
2139    else if (codepointToRecord >= FIRST_NON_ASCII_CODEPOINT)
2140    {
2141       SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNonASCIICharacter);
2142    }
2143    goto recordCodepointAndAdvance;
2144 
2145 recordNumberCodepointAndAdvance:
2146 
2147    tokenEncoding = parser->numberEncoding;
2148    maxTokenLength = parser->maxNumberLength;
2149    goto recordCodepointAndAdvance;
2150 
2151 recordCodepointAndAdvance:
2152 
2153    /* We always ensure that there are LONGEST_ENCODING_SEQUENCE bytes
2154       available in the buffer for the next codepoint, so we don't have to
2155       check whether there is room when we decode a new codepoint, and if
2156       there isn't another codepoint, we have space already allocated for
2157       the encoded null terminator.*/
2158    parser->tokenBytesUsed += EncodeCodepoint(codepointToRecord, tokenEncoding, parser->pTokenBytes + parser->tokenBytesUsed);
2159    if (parser->tokenBytesUsed > maxTokenLength)
2160    {
2161       JSON_Parser_SetErrorAtToken(parser, parser->token == T_NUMBER ? JSON_Error_TooLongNumber : JSON_Error_TooLongString);
2162       return JSON_Failure;
2163    }
2164    if (parser->tokenBytesUsed > parser->tokenBytesLength - LONGEST_ENCODING_SEQUENCE)
2165    {
2166       byte* pBiggerBuffer = DoubleBuffer(&parser->memorySuite, parser->defaultTokenBytes, parser->pTokenBytes, parser->tokenBytesLength);
2167       if (!pBiggerBuffer)
2168       {
2169          JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory);
2170          return JSON_Failure;
2171       }
2172       parser->pTokenBytes = pBiggerBuffer;
2173       parser->tokenBytesLength *= 2;
2174    }
2175    goto advance;
2176 
2177 advance:
2178 
2179    /* The current codepoint has been accepted, so advance the codepoint
2180       location counters accordingly. Note that the one time we don't
2181       do this is when the codepoint is EOF, which doesn't actually
2182       appear in the input stream. */
2183    if (c == CARRIAGE_RETURN_CODEPOINT)
2184    {
2185       SET_FLAGS_ON(ParserState, parser->state, PARSER_AFTER_CARRIAGE_RETURN);
2186    }
2187    if (c != EOF_CODEPOINT)
2188    {
2189       parser->codepointLocationByte += encodedLength;
2190       if (c == CARRIAGE_RETURN_CODEPOINT || c == LINE_FEED_CODEPOINT)
2191       {
2192          /* The next character will begin a new line. */
2193          parser->codepointLocationLine++;
2194          parser->codepointLocationColumn = 0;
2195       }
2196       else
2197       {
2198          /* The next character will be on the same line. */
2199          parser->codepointLocationColumn++;
2200       }
2201    }
2202 
2203    if (tokenFinished && !JSON_Parser_ProcessToken(parser))
2204       return JSON_Failure;
2205 
2206    return JSON_Success;
2207 }
2208 
JSON_Parser_FlushLexer(JSON_Parser parser)2209 static JSON_Status JSON_Parser_FlushLexer(JSON_Parser parser)
2210 {
2211    /* Push the EOF codepoint to the lexer so that it can finish the pending
2212       token, if any. The EOF codepoint is never emitted by the decoder
2213       itself, since it is outside the Unicode range and therefore cannot
2214       be encoded in any of the possible input encodings. */
2215    if (!JSON_Parser_ProcessCodepoint(parser, EOF_CODEPOINT, 0))
2216       return JSON_Failure;
2217 
2218    /* The lexer should be idle when parsing finishes. */
2219    if (parser->lexerState != LEXING_WHITESPACE)
2220    {
2221       JSON_Parser_SetErrorAtToken(parser, JSON_Error_IncompleteToken);
2222       return JSON_Failure;
2223    }
2224    return JSON_Success;
2225 }
2226 
2227 /* Parser's decoder functions. */
2228 
JSON_Parser_CallEncodingDetectedHandler(JSON_Parser parser)2229 static JSON_Status JSON_Parser_CallEncodingDetectedHandler(JSON_Parser parser)
2230 {
2231    if (parser->encodingDetectedHandler && parser->encodingDetectedHandler(parser) != JSON_Parser_Continue)
2232    {
2233       JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_AbortedByHandler);
2234       return JSON_Failure;
2235    }
2236    return JSON_Success;
2237 }
2238 
2239 /* Forward declaration. */
2240 static JSON_Status JSON_Parser_ProcessInputBytes(JSON_Parser parser, const byte* pBytes, size_t length);
2241 
JSON_Parser_ProcessUnknownByte(JSON_Parser parser,byte b)2242 static JSON_Status JSON_Parser_ProcessUnknownByte(JSON_Parser parser, byte b)
2243 {
2244    /* When the input encoding is unknown, the first 4 bytes of input are
2245       recorded in decoder.bits. */
2246    byte bytes[LONGEST_ENCODING_SEQUENCE];
2247 
2248    switch (parser->decoderData.state)
2249    {
2250       case DECODER_RESET:
2251          parser->decoderData.state = DECODED_1_OF_4;
2252          parser->decoderData.bits = (uint32_t)b << 24;
2253          break;
2254 
2255       case DECODED_1_OF_4:
2256          parser->decoderData.state = DECODED_2_OF_4;
2257          parser->decoderData.bits |= (uint32_t)b << 16;
2258          break;
2259 
2260       case DECODED_2_OF_4:
2261          parser->decoderData.state = DECODED_3_OF_4;
2262          parser->decoderData.bits |= (uint32_t)b << 8;
2263          break;
2264 
2265       case DECODED_3_OF_4:
2266          bytes[0] = (byte)(parser->decoderData.bits >> 24);
2267          bytes[1] = (byte)(parser->decoderData.bits >> 16);
2268          bytes[2] = (byte)(parser->decoderData.bits >> 8);
2269          bytes[3] = (byte)(b);
2270 
2271          /* We try to match the following patterns in order, where .. is any
2272             byte value and nz is any non-zero byte value:
2273             EF BB BF .. => UTF-8 with BOM
2274             FF FE 00 00 => UTF-32LE with BOM
2275             FF FE nz 00 => UTF-16LE with BOM
2276             00 00 FE FF -> UTF-32BE with BOM
2277             FE FF .. .. => UTF-16BE with BOM
2278             nz nz .. .. => UTF-8
2279             nz 00 nz .. => UTF-16LE
2280             nz 00 00 00 => UTF-32LE
2281             00 nz .. .. => UTF-16BE
2282             00 00 00 nz => UTF-32BE
2283             .. .. .. .. => unknown encoding */
2284          if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF)
2285          {
2286             /* EF BB BF .. */
2287             parser->inputEncoding = JSON_UTF8;
2288          }
2289          else if (bytes[0] == 0xFF && bytes[1] == 0xFE && bytes[3] == 0x00)
2290          {
2291             /* FF FE 00 00 or
2292                FF FE nz 00 */
2293             parser->inputEncoding = (bytes[2] == 0x00) ? JSON_UTF32LE : JSON_UTF16LE;
2294          }
2295          else if (bytes[0] == 0x00 && bytes[1] == 0x00 && bytes[2] == 0xFE && bytes[3] == 0xFF)
2296          {
2297             /* 00 00 FE FF */
2298             parser->inputEncoding = JSON_UTF32BE;
2299          }
2300          else if (bytes[0] == 0xFE && bytes[1] == 0xFF)
2301          {
2302             /* FE FF .. .. */
2303             parser->inputEncoding = JSON_UTF16BE;
2304          }
2305          else if (bytes[0] != 0x00)
2306          {
2307             /* nz .. .. .. */
2308             if (bytes[1] != 0x00)
2309             {
2310                /* nz nz .. .. */
2311                parser->inputEncoding = JSON_UTF8;
2312             }
2313             else if (bytes[2] != 0x00)
2314             {
2315                /* nz 00 nz .. */
2316                parser->inputEncoding = JSON_UTF16LE;
2317             }
2318             else if (bytes[3] == 0x00)
2319             {
2320                /* nz 00 00 00 */
2321                parser->inputEncoding = JSON_UTF32LE;
2322             }
2323             else
2324             {
2325                /* nz 00 00 nz => error */
2326             }
2327          }
2328          else if (bytes[1] != 0x00)
2329          {
2330             /* 00 nz .. .. */
2331             parser->inputEncoding = JSON_UTF16BE;
2332          }
2333          else if (bytes[2] == 0x00 && bytes[3] != 0x00)
2334          {
2335             /* 00 00 00 nz */
2336             parser->inputEncoding = JSON_UTF32BE;
2337          }
2338          else
2339          {
2340             /* 00 00 nz .. or
2341                00 00 00 00 => error */
2342          }
2343 
2344          if (parser->inputEncoding == JSON_UnknownEncoding)
2345             return JSON_Parser_HandleInvalidEncodingSequence(parser, 4);
2346 
2347          if (!JSON_Parser_CallEncodingDetectedHandler(parser))
2348             return JSON_Failure;
2349 
2350          /* Reset the decoder before reprocessing the bytes. */
2351          Decoder_Reset(&parser->decoderData);
2352          return JSON_Parser_ProcessInputBytes(parser, bytes, 4);
2353    }
2354 
2355    /* We don't have 4 bytes yet. */
2356    return JSON_Success;
2357 }
2358 
JSON_Parser_ProcessInputBytes(JSON_Parser parser,const byte * pBytes,size_t length)2359 JSON_Status JSON_Parser_ProcessInputBytes(JSON_Parser parser, const byte* pBytes, size_t length)
2360 {
2361    /* Note that if length is 0, pBytes is allowed to be NULL. */
2362    size_t i = 0;
2363    while (parser->inputEncoding == JSON_UnknownEncoding && i < length)
2364    {
2365       if (!JSON_Parser_ProcessUnknownByte(parser, pBytes[i]))
2366          return JSON_Failure;
2367       i++;
2368    }
2369    while (i < length)
2370    {
2371       DecoderOutput output     = Decoder_ProcessByte(
2372             &parser->decoderData, parser->inputEncoding, pBytes[i]);
2373       DecoderResultCode result = DECODER_RESULT_CODE(output);
2374       switch (result)
2375       {
2376          case SEQUENCE_PENDING:
2377             i++;
2378             break;
2379 
2380          case SEQUENCE_COMPLETE:
2381             if (!JSON_Parser_ProcessCodepoint(
2382                      parser, DECODER_CODEPOINT(output),
2383                      DECODER_SEQUENCE_LENGTH(output)))
2384                return JSON_Failure;
2385             i++;
2386             break;
2387 
2388          case SEQUENCE_INVALID_INCLUSIVE:
2389             i++;
2390             /* fallthrough */
2391          case SEQUENCE_INVALID_EXCLUSIVE:
2392             if (!JSON_Parser_HandleInvalidEncodingSequence(
2393                      parser, DECODER_SEQUENCE_LENGTH(output)))
2394                return JSON_Failure;
2395             break;
2396       }
2397    }
2398    return JSON_Success;
2399 }
2400 
JSON_Parser_FlushDecoder(JSON_Parser parser)2401 static JSON_Status JSON_Parser_FlushDecoder(JSON_Parser parser)
2402 {
2403    /* If the input was 1, 2, or 3 bytes long, and the input encoding was not
2404       explicitly specified by the client, we can sometimes make a reasonable
2405       guess. If the input was 1 or 3 bytes long, the only encoding that could
2406       possibly be valid JSON is UF-8. If the input was 2 bytes long, we try
2407       to match the following patterns in order, where .. is any byte value
2408       and nz is any non-zero byte value:
2409       FF FE => UTF-16LE with BOM
2410       FE FF => UTF-16BE with BOM
2411       nz nz => UTF-8
2412       nz 00 => UTF-16LE
2413       00 nz => UTF-16BE
2414       .. .. => unknown encoding
2415       */
2416    if (parser->inputEncoding == JSON_UnknownEncoding &&
2417          parser->decoderData.state != DECODER_RESET)
2418    {
2419       byte bytes[3];
2420       size_t length = 0;
2421       bytes[0] = (byte)(parser->decoderData.bits >> 24);
2422       bytes[1] = (byte)(parser->decoderData.bits >> 16);
2423       bytes[2] = (byte)(parser->decoderData.bits >> 8);
2424 
2425       switch (parser->decoderData.state)
2426       {
2427          case DECODED_1_OF_4:
2428             parser->inputEncoding = JSON_UTF8;
2429             length = 1;
2430             break;
2431 
2432          case DECODED_2_OF_4:
2433             /* FF FE */
2434             if (bytes[0] == 0xFF && bytes[1] == 0xFE)
2435                parser->inputEncoding = JSON_UTF16LE;
2436             /* FE FF */
2437             else if (bytes[0] == 0xFE && bytes[1] == 0xFF)
2438                parser->inputEncoding = JSON_UTF16BE;
2439             else if (bytes[0] != 0x00)
2440             {
2441                /* nz nz or
2442                   nz 00 */
2443                parser->inputEncoding = bytes[1] ? JSON_UTF8 : JSON_UTF16LE;
2444             }
2445             /* 00 nz */
2446             else if (bytes[1] != 0x00)
2447                parser->inputEncoding = JSON_UTF16BE;
2448             /* 00 00 */
2449             else
2450                return JSON_Parser_HandleInvalidEncodingSequence(parser, 2);
2451             length = 2;
2452             break;
2453 
2454          case DECODED_3_OF_4:
2455             parser->inputEncoding = JSON_UTF8;
2456             length = 3;
2457             break;
2458       }
2459 
2460       if (!JSON_Parser_CallEncodingDetectedHandler(parser))
2461          return JSON_Failure;
2462 
2463       /* Reset the decoder before reprocessing the bytes. */
2464       parser->decoderData.state = DECODER_RESET;
2465       parser->decoderData.bits = 0;
2466       if (!JSON_Parser_ProcessInputBytes(parser, bytes, length))
2467          return JSON_Failure;
2468    }
2469 
2470    /* The decoder should be idle when parsing finishes. */
2471    if (Decoder_SequencePending(&parser->decoderData))
2472       return JSON_Parser_HandleInvalidEncodingSequence(
2473             parser, DECODER_STATE_BYTES(parser->decoderData.state));
2474    return JSON_Success;
2475 }
2476 
2477 /* Parser API functions. */
2478 
JSON_Parser_Create(const JSON_MemorySuite * pMemorySuite)2479 JSON_Parser JSON_CALL JSON_Parser_Create(const JSON_MemorySuite* pMemorySuite)
2480 {
2481    JSON_Parser parser;
2482    JSON_MemorySuite memorySuite;
2483 
2484    if (pMemorySuite)
2485    {
2486       memorySuite = *pMemorySuite;
2487 
2488       /* The full memory suite must be specified. */
2489       if (!memorySuite.realloc || !memorySuite.free)
2490          return NULL;
2491    }
2492    else
2493       memorySuite = defaultMemorySuite;
2494 
2495    parser = (JSON_Parser)memorySuite.realloc(memorySuite.userData, NULL, sizeof(struct JSON_Parser_Data));
2496 
2497    if (!parser)
2498       return NULL;
2499 
2500    parser->memorySuite = memorySuite;
2501    JSON_Parser_ResetData(parser, 0/* isInitialized */);
2502    return parser;
2503 }
2504 
JSON_Parser_Free(JSON_Parser parser)2505 JSON_Status JSON_CALL JSON_Parser_Free(JSON_Parser parser)
2506 {
2507    if (!parser || GET_FLAGS(parser->state, PARSER_IN_PROTECTED_API))
2508       return JSON_Failure;
2509 
2510    SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_PROTECTED_API);
2511 
2512    if (parser->pTokenBytes != parser->defaultTokenBytes)
2513       parser->memorySuite.free(parser->memorySuite.userData, parser->pTokenBytes);
2514 
2515    while (parser->pMemberNames)
2516       JSON_Parser_PopMemberNameList(parser);
2517 
2518    Grammarian_FreeAllocations(&parser->grammarianData, &parser->memorySuite);
2519    parser->memorySuite.free(parser->memorySuite.userData, parser);
2520    return JSON_Success;
2521 }
2522 
JSON_Parser_Reset(JSON_Parser parser)2523 JSON_Status JSON_CALL JSON_Parser_Reset(JSON_Parser parser)
2524 {
2525    if (!parser || GET_FLAGS(parser->state, PARSER_IN_PROTECTED_API))
2526       return JSON_Failure;
2527    SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_PROTECTED_API);
2528    JSON_Parser_ResetData(parser, 1/* isInitialized */);
2529    /* Note that JSON_Parser_ResetData() unset PARSER_IN_PROTECTED_API for us. */
2530    return JSON_Success;
2531 }
2532 
JSON_Parser_GetUserData(JSON_Parser parser)2533 void* JSON_CALL JSON_Parser_GetUserData(JSON_Parser parser)
2534 {
2535    return parser ? parser->userData : NULL;
2536 }
2537 
JSON_Parser_SetUserData(JSON_Parser parser,void * userData)2538 JSON_Status JSON_CALL JSON_Parser_SetUserData(JSON_Parser parser, void* userData)
2539 {
2540    if (!parser)
2541       return JSON_Failure;
2542    parser->userData = userData;
2543    return JSON_Success;
2544 }
2545 
JSON_Parser_GetInputEncoding(JSON_Parser parser)2546 JSON_Encoding JSON_CALL JSON_Parser_GetInputEncoding(JSON_Parser parser)
2547 {
2548    return parser ? (JSON_Encoding)parser->inputEncoding : JSON_UnknownEncoding;
2549 }
2550 
JSON_Parser_SetInputEncoding(JSON_Parser parser,JSON_Encoding encoding)2551 JSON_Status JSON_CALL JSON_Parser_SetInputEncoding(JSON_Parser parser, JSON_Encoding encoding)
2552 {
2553    if (     !parser
2554          || encoding < JSON_UnknownEncoding
2555          || encoding > JSON_UTF32BE
2556          || GET_FLAGS(parser->state, PARSER_STARTED))
2557       return JSON_Failure;
2558    parser->inputEncoding = (Encoding)encoding;
2559    return JSON_Success;
2560 }
2561 
JSON_Parser_GetStringEncoding(JSON_Parser parser)2562 JSON_Encoding JSON_CALL JSON_Parser_GetStringEncoding(JSON_Parser parser)
2563 {
2564    return parser ? (JSON_Encoding)parser->stringEncoding : JSON_UTF8;
2565 }
2566 
JSON_Parser_SetStringEncoding(JSON_Parser parser,JSON_Encoding encoding)2567 JSON_Status JSON_CALL JSON_Parser_SetStringEncoding(JSON_Parser parser, JSON_Encoding encoding)
2568 {
2569    if (
2570             !parser
2571          || encoding <= JSON_UnknownEncoding
2572          || encoding > JSON_UTF32BE
2573          || GET_FLAGS(parser->state, PARSER_STARTED))
2574       return JSON_Failure;
2575    parser->stringEncoding = (Encoding)encoding;
2576    return JSON_Success;
2577 }
2578 
JSON_Parser_GetMaxStringLength(JSON_Parser parser)2579 size_t JSON_CALL JSON_Parser_GetMaxStringLength(JSON_Parser parser)
2580 {
2581    return parser ? parser->maxStringLength : SIZE_MAX;
2582 }
2583 
JSON_Parser_SetMaxStringLength(JSON_Parser parser,size_t maxLength)2584 JSON_Status JSON_CALL JSON_Parser_SetMaxStringLength(JSON_Parser parser, size_t maxLength)
2585 {
2586    if (     !parser
2587          || GET_FLAGS(parser->state, PARSER_STARTED))
2588       return JSON_Failure;
2589    parser->maxStringLength = maxLength;
2590    return JSON_Success;
2591 }
2592 
JSON_Parser_GetNumberEncoding(JSON_Parser parser)2593 JSON_Encoding JSON_CALL JSON_Parser_GetNumberEncoding(JSON_Parser parser)
2594 {
2595    return parser ? (JSON_Encoding)parser->numberEncoding : JSON_UTF8;
2596 }
2597 
JSON_Parser_SetNumberEncoding(JSON_Parser parser,JSON_Encoding encoding)2598 JSON_Status JSON_CALL JSON_Parser_SetNumberEncoding(JSON_Parser parser, JSON_Encoding encoding)
2599 {
2600    if (!parser || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE || GET_FLAGS(parser->state, PARSER_STARTED))
2601       return JSON_Failure;
2602    parser->numberEncoding = (Encoding)encoding;
2603    return JSON_Success;
2604 }
2605 
JSON_Parser_GetMaxNumberLength(JSON_Parser parser)2606 size_t JSON_CALL JSON_Parser_GetMaxNumberLength(JSON_Parser parser)
2607 {
2608    return parser ? parser->maxNumberLength : SIZE_MAX;
2609 }
2610 
JSON_Parser_SetMaxNumberLength(JSON_Parser parser,size_t maxLength)2611 JSON_Status JSON_CALL JSON_Parser_SetMaxNumberLength(JSON_Parser parser, size_t maxLength)
2612 {
2613    if (     !parser
2614          || GET_FLAGS(parser->state, PARSER_STARTED))
2615       return JSON_Failure;
2616    parser->maxNumberLength = maxLength;
2617    return JSON_Success;
2618 }
2619 
JSON_Parser_GetAllowBOM(JSON_Parser parser)2620 JSON_Boolean JSON_CALL JSON_Parser_GetAllowBOM(JSON_Parser parser)
2621 {
2622    return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_BOM)) ? JSON_True : JSON_False;
2623 }
2624 
JSON_Parser_SetAllowBOM(JSON_Parser parser,JSON_Boolean allowBOM)2625 JSON_Status JSON_CALL JSON_Parser_SetAllowBOM(JSON_Parser parser, JSON_Boolean allowBOM)
2626 {
2627    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2628       return JSON_Failure;
2629    SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_BOM, allowBOM);
2630    return JSON_Success;
2631 }
2632 
JSON_Parser_GetAllowComments(JSON_Parser parser)2633 JSON_Boolean JSON_CALL JSON_Parser_GetAllowComments(JSON_Parser parser)
2634 {
2635    return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_COMMENTS)) ? JSON_True : JSON_False;
2636 }
2637 
JSON_Parser_SetAllowComments(JSON_Parser parser,JSON_Boolean allowComments)2638 JSON_Status JSON_CALL JSON_Parser_SetAllowComments(JSON_Parser parser, JSON_Boolean allowComments)
2639 {
2640    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2641       return JSON_Failure;
2642    SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_COMMENTS, allowComments);
2643    return JSON_Success;
2644 }
2645 
JSON_Parser_GetAllowSpecialNumbers(JSON_Parser parser)2646 JSON_Boolean JSON_CALL JSON_Parser_GetAllowSpecialNumbers(JSON_Parser parser)
2647 {
2648    return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS)) ? JSON_True : JSON_False;
2649 }
2650 
JSON_Parser_SetAllowSpecialNumbers(JSON_Parser parser,JSON_Boolean allowSpecialNumbers)2651 JSON_Status JSON_CALL JSON_Parser_SetAllowSpecialNumbers(JSON_Parser parser, JSON_Boolean allowSpecialNumbers)
2652 {
2653    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2654       return JSON_Failure;
2655    SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS, allowSpecialNumbers);
2656    return JSON_Success;
2657 }
2658 
JSON_Parser_GetAllowHexNumbers(JSON_Parser parser)2659 JSON_Boolean JSON_CALL JSON_Parser_GetAllowHexNumbers(JSON_Parser parser)
2660 {
2661    return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_HEX_NUMBERS)) ? JSON_True : JSON_False;
2662 }
2663 
JSON_Parser_SetAllowHexNumbers(JSON_Parser parser,JSON_Boolean allowHexNumbers)2664 JSON_Status JSON_CALL JSON_Parser_SetAllowHexNumbers(JSON_Parser parser, JSON_Boolean allowHexNumbers)
2665 {
2666    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2667       return JSON_Failure;
2668    SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_HEX_NUMBERS, allowHexNumbers);
2669    return JSON_Success;
2670 }
2671 
JSON_Parser_GetAllowUnescapedControlCharacters(JSON_Parser parser)2672 JSON_Boolean JSON_CALL JSON_Parser_GetAllowUnescapedControlCharacters(JSON_Parser parser)
2673 {
2674    return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_CONTROL_CHARS)) ? JSON_True : JSON_False;
2675 }
2676 
JSON_Parser_SetAllowUnescapedControlCharacters(JSON_Parser parser,JSON_Boolean allowUnescapedControlCharacters)2677 JSON_Status JSON_CALL JSON_Parser_SetAllowUnescapedControlCharacters(JSON_Parser parser, JSON_Boolean allowUnescapedControlCharacters)
2678 {
2679    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2680       return JSON_Failure;
2681    SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_CONTROL_CHARS, allowUnescapedControlCharacters);
2682    return JSON_Success;
2683 }
2684 
JSON_Parser_GetReplaceInvalidEncodingSequences(JSON_Parser parser)2685 JSON_Boolean JSON_CALL JSON_Parser_GetReplaceInvalidEncodingSequences(JSON_Parser parser)
2686 {
2687    return (parser && GET_FLAGS(parser->flags, PARSER_REPLACE_INVALID)) ? JSON_True : JSON_False;
2688 }
2689 
JSON_Parser_SetReplaceInvalidEncodingSequences(JSON_Parser parser,JSON_Boolean replaceInvalidEncodingSequences)2690 JSON_Status JSON_CALL JSON_Parser_SetReplaceInvalidEncodingSequences(
2691       JSON_Parser parser, JSON_Boolean replaceInvalidEncodingSequences)
2692 {
2693    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2694       return JSON_Failure;
2695    SET_FLAGS(ParserFlags, parser->flags, PARSER_REPLACE_INVALID, replaceInvalidEncodingSequences);
2696    return JSON_Success;
2697 }
2698 
JSON_Parser_GetTrackObjectMembers(JSON_Parser parser)2699 JSON_Boolean JSON_CALL JSON_Parser_GetTrackObjectMembers(JSON_Parser parser)
2700 {
2701    return (parser && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS)) ? JSON_True : JSON_False;
2702 }
2703 
JSON_Parser_SetTrackObjectMembers(JSON_Parser parser,JSON_Boolean trackObjectMembers)2704 JSON_Status JSON_CALL JSON_Parser_SetTrackObjectMembers(JSON_Parser parser, JSON_Boolean trackObjectMembers)
2705 {
2706    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2707    {
2708       return JSON_Failure;
2709    }
2710    SET_FLAGS(ParserFlags, parser->flags, PARSER_TRACK_OBJECT_MEMBERS, trackObjectMembers);
2711    return JSON_Success;
2712 }
2713 
JSON_Parser_GetStopAfterEmbeddedDocument(JSON_Parser parser)2714 JSON_Boolean JSON_CALL JSON_Parser_GetStopAfterEmbeddedDocument(JSON_Parser parser)
2715 {
2716    return (parser && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT)) ? JSON_True : JSON_False;
2717 }
2718 
JSON_Parser_SetStopAfterEmbeddedDocument(JSON_Parser parser,JSON_Boolean stopAfterEmbeddedDocument)2719 JSON_Status JSON_CALL JSON_Parser_SetStopAfterEmbeddedDocument(
2720       JSON_Parser parser, JSON_Boolean stopAfterEmbeddedDocument)
2721 {
2722    if (!parser || GET_FLAGS(parser->state, PARSER_STARTED))
2723    {
2724       return JSON_Failure;
2725    }
2726    SET_FLAGS(ParserFlags, parser->flags, PARSER_EMBEDDED_DOCUMENT, stopAfterEmbeddedDocument);
2727    return JSON_Success;
2728 }
2729 
JSON_Parser_GetError(JSON_Parser parser)2730 JSON_Error JSON_CALL JSON_Parser_GetError(JSON_Parser parser)
2731 {
2732    return parser ? (JSON_Error)parser->error : JSON_Error_None;
2733 }
2734 
JSON_Parser_GetErrorLocation(JSON_Parser parser,JSON_Location * pLocation)2735 JSON_Status JSON_CALL JSON_Parser_GetErrorLocation(
2736       JSON_Parser parser, JSON_Location* pLocation)
2737 {
2738    if (!pLocation || !parser || parser->error == JSON_Error_None)
2739       return JSON_Failure;
2740 
2741    if (parser->errorOffset == ERROR_LOCATION_IS_TOKEN_START)
2742    {
2743       pLocation->byte = parser->tokenLocationByte;
2744       pLocation->line = parser->tokenLocationLine;
2745       pLocation->column = parser->tokenLocationColumn;
2746    }
2747    else
2748    {
2749       pLocation->byte = parser->codepointLocationByte - (SHORTEST_ENCODING_SEQUENCE(parser->inputEncoding) * parser->errorOffset);
2750       pLocation->line = parser->codepointLocationLine;
2751       pLocation->column = parser->codepointLocationColumn - parser->errorOffset;
2752    }
2753    pLocation->depth = parser->depth;
2754    return JSON_Success;
2755 }
2756 
JSON_Parser_GetTokenLocation(JSON_Parser parser,JSON_Location * pLocation)2757 JSON_Status JSON_CALL JSON_Parser_GetTokenLocation(
2758       JSON_Parser parser, JSON_Location* pLocation)
2759 {
2760    if (!parser || !pLocation || !GET_FLAGS(parser->state, PARSER_IN_TOKEN_HANDLER))
2761       return JSON_Failure;
2762 
2763    pLocation->byte = parser->tokenLocationByte;
2764    pLocation->line = parser->tokenLocationLine;
2765    pLocation->column = parser->tokenLocationColumn;
2766    pLocation->depth = parser->depth;
2767    return JSON_Success;
2768 }
2769 
JSON_Parser_GetAfterTokenLocation(JSON_Parser parser,JSON_Location * pLocation)2770 JSON_Status JSON_CALL JSON_Parser_GetAfterTokenLocation(
2771       JSON_Parser parser, JSON_Location* pLocation)
2772 {
2773    if (!parser || !pLocation || !GET_FLAGS(parser->state, PARSER_IN_TOKEN_HANDLER))
2774       return JSON_Failure;
2775 
2776    pLocation->byte = parser->codepointLocationByte;
2777    pLocation->line = parser->codepointLocationLine;
2778    pLocation->column = parser->codepointLocationColumn;
2779    pLocation->depth = parser->depth;
2780    return JSON_Success;
2781 }
2782 
JSON_Parser_GetEncodingDetectedHandler(JSON_Parser parser)2783 JSON_Parser_NullHandler JSON_CALL JSON_Parser_GetEncodingDetectedHandler(JSON_Parser parser)
2784 {
2785    return parser ? parser->encodingDetectedHandler : NULL;
2786 }
2787 
JSON_Parser_SetEncodingDetectedHandler(JSON_Parser parser,JSON_Parser_EncodingDetectedHandler handler)2788 JSON_Status JSON_CALL JSON_Parser_SetEncodingDetectedHandler(
2789       JSON_Parser parser, JSON_Parser_EncodingDetectedHandler handler)
2790 {
2791    if (!parser)
2792       return JSON_Failure;
2793 
2794    parser->encodingDetectedHandler = handler;
2795    return JSON_Success;
2796 }
2797 
JSON_Parser_GetNullHandler(JSON_Parser parser)2798 JSON_Parser_NullHandler JSON_CALL JSON_Parser_GetNullHandler(JSON_Parser parser)
2799 {
2800    return parser ? parser->nullHandler : NULL;
2801 }
2802 
JSON_Parser_SetNullHandler(JSON_Parser parser,JSON_Parser_NullHandler handler)2803 JSON_Status JSON_CALL JSON_Parser_SetNullHandler(
2804       JSON_Parser parser, JSON_Parser_NullHandler handler)
2805 {
2806    if (!parser)
2807       return JSON_Failure;
2808 
2809    parser->nullHandler = handler;
2810    return JSON_Success;
2811 }
2812 
JSON_Parser_GetBooleanHandler(JSON_Parser parser)2813 JSON_Parser_BooleanHandler JSON_CALL JSON_Parser_GetBooleanHandler(JSON_Parser parser)
2814 {
2815    return parser ? parser->booleanHandler : NULL;
2816 }
2817 
JSON_Parser_SetBooleanHandler(JSON_Parser parser,JSON_Parser_BooleanHandler handler)2818 JSON_Status JSON_CALL JSON_Parser_SetBooleanHandler(
2819       JSON_Parser parser, JSON_Parser_BooleanHandler handler)
2820 {
2821    if (!parser)
2822       return JSON_Failure;
2823 
2824    parser->booleanHandler = handler;
2825    return JSON_Success;
2826 }
2827 
JSON_Parser_GetStringHandler(JSON_Parser parser)2828 JSON_Parser_StringHandler JSON_CALL JSON_Parser_GetStringHandler(JSON_Parser parser)
2829 {
2830    return parser ? parser->stringHandler : NULL;
2831 }
2832 
JSON_Parser_SetStringHandler(JSON_Parser parser,JSON_Parser_StringHandler handler)2833 JSON_Status JSON_CALL JSON_Parser_SetStringHandler(
2834       JSON_Parser parser, JSON_Parser_StringHandler handler)
2835 {
2836    if (!parser)
2837       return JSON_Failure;
2838 
2839    parser->stringHandler = handler;
2840    return JSON_Success;
2841 }
2842 
JSON_Parser_GetNumberHandler(JSON_Parser parser)2843 JSON_Parser_NumberHandler JSON_CALL JSON_Parser_GetNumberHandler(JSON_Parser parser)
2844 {
2845    return parser ? parser->numberHandler : NULL;
2846 }
2847 
JSON_Parser_SetNumberHandler(JSON_Parser parser,JSON_Parser_NumberHandler handler)2848 JSON_Status JSON_CALL JSON_Parser_SetNumberHandler(
2849       JSON_Parser parser, JSON_Parser_NumberHandler handler)
2850 {
2851    if (!parser)
2852       return JSON_Failure;
2853 
2854    parser->numberHandler = handler;
2855    return JSON_Success;
2856 }
2857 
JSON_Parser_GetSpecialNumberHandler(JSON_Parser parser)2858 JSON_Parser_SpecialNumberHandler JSON_CALL JSON_Parser_GetSpecialNumberHandler(JSON_Parser parser)
2859 {
2860    return parser ? parser->specialNumberHandler : NULL;
2861 }
2862 
JSON_Parser_SetSpecialNumberHandler(JSON_Parser parser,JSON_Parser_SpecialNumberHandler handler)2863 JSON_Status JSON_CALL JSON_Parser_SetSpecialNumberHandler(
2864       JSON_Parser parser, JSON_Parser_SpecialNumberHandler handler)
2865 {
2866    if (!parser)
2867       return JSON_Failure;
2868    parser->specialNumberHandler = handler;
2869    return JSON_Success;
2870 }
2871 
JSON_Parser_GetStartObjectHandler(JSON_Parser parser)2872 JSON_Parser_StartObjectHandler JSON_CALL JSON_Parser_GetStartObjectHandler(JSON_Parser parser)
2873 {
2874    return parser ? parser->startObjectHandler : NULL;
2875 }
2876 
JSON_Parser_SetStartObjectHandler(JSON_Parser parser,JSON_Parser_StartObjectHandler handler)2877 JSON_Status JSON_CALL JSON_Parser_SetStartObjectHandler(
2878       JSON_Parser parser, JSON_Parser_StartObjectHandler handler)
2879 {
2880    if (!parser)
2881       return JSON_Failure;
2882 
2883    parser->startObjectHandler = handler;
2884    return JSON_Success;
2885 }
2886 
JSON_Parser_GetEndObjectHandler(JSON_Parser parser)2887 JSON_Parser_EndObjectHandler JSON_CALL JSON_Parser_GetEndObjectHandler(JSON_Parser parser)
2888 {
2889    return parser ? parser->endObjectHandler : NULL;
2890 }
2891 
JSON_Parser_SetEndObjectHandler(JSON_Parser parser,JSON_Parser_EndObjectHandler handler)2892 JSON_Status JSON_CALL JSON_Parser_SetEndObjectHandler(
2893       JSON_Parser parser, JSON_Parser_EndObjectHandler handler)
2894 {
2895    if (!parser)
2896       return JSON_Failure;
2897 
2898    parser->endObjectHandler = handler;
2899    return JSON_Success;
2900 }
2901 
JSON_Parser_GetObjectMemberHandler(JSON_Parser parser)2902 JSON_Parser_ObjectMemberHandler JSON_CALL JSON_Parser_GetObjectMemberHandler(JSON_Parser parser)
2903 {
2904    return parser ? parser->objectMemberHandler : NULL;
2905 }
2906 
JSON_Parser_SetObjectMemberHandler(JSON_Parser parser,JSON_Parser_ObjectMemberHandler handler)2907 JSON_Status JSON_CALL JSON_Parser_SetObjectMemberHandler(
2908       JSON_Parser parser, JSON_Parser_ObjectMemberHandler handler)
2909 {
2910    if (!parser)
2911       return JSON_Failure;
2912 
2913    parser->objectMemberHandler = handler;
2914    return JSON_Success;
2915 }
2916 
JSON_Parser_GetStartArrayHandler(JSON_Parser parser)2917 JSON_Parser_StartArrayHandler JSON_CALL JSON_Parser_GetStartArrayHandler(JSON_Parser parser)
2918 {
2919    return parser ? parser->startArrayHandler : NULL;
2920 }
2921 
JSON_Parser_SetStartArrayHandler(JSON_Parser parser,JSON_Parser_StartArrayHandler handler)2922 JSON_Status JSON_CALL JSON_Parser_SetStartArrayHandler(
2923       JSON_Parser parser, JSON_Parser_StartArrayHandler handler)
2924 {
2925    if (!parser)
2926       return JSON_Failure;
2927 
2928    parser->startArrayHandler = handler;
2929    return JSON_Success;
2930 }
2931 
JSON_Parser_GetEndArrayHandler(JSON_Parser parser)2932 JSON_Parser_EndArrayHandler JSON_CALL JSON_Parser_GetEndArrayHandler(JSON_Parser parser)
2933 {
2934    return parser ? parser->endArrayHandler : NULL;
2935 }
2936 
JSON_Parser_SetEndArrayHandler(JSON_Parser parser,JSON_Parser_EndArrayHandler handler)2937 JSON_Status JSON_CALL JSON_Parser_SetEndArrayHandler(
2938       JSON_Parser parser, JSON_Parser_EndArrayHandler handler)
2939 {
2940    if (!parser)
2941       return JSON_Failure;
2942 
2943    parser->endArrayHandler = handler;
2944    return JSON_Success;
2945 }
2946 
JSON_Parser_GetArrayItemHandler(JSON_Parser parser)2947 JSON_Parser_ArrayItemHandler JSON_CALL JSON_Parser_GetArrayItemHandler(JSON_Parser parser)
2948 {
2949    return parser ? parser->arrayItemHandler : NULL;
2950 }
2951 
JSON_Parser_SetArrayItemHandler(JSON_Parser parser,JSON_Parser_ArrayItemHandler handler)2952 JSON_Status JSON_CALL JSON_Parser_SetArrayItemHandler(
2953       JSON_Parser parser, JSON_Parser_ArrayItemHandler handler)
2954 {
2955    if (!parser)
2956       return JSON_Failure;
2957 
2958    parser->arrayItemHandler = handler;
2959    return JSON_Success;
2960 }
2961 
JSON_Parser_Parse(JSON_Parser parser,const char * pBytes,size_t length,JSON_Boolean isFinal)2962 JSON_Status JSON_CALL JSON_Parser_Parse(JSON_Parser parser, const char* pBytes, size_t length, JSON_Boolean isFinal)
2963 {
2964    JSON_Status status = JSON_Failure;
2965    if (parser && (pBytes || !length) && !GET_FLAGS(parser->state, PARSER_FINISHED | PARSER_IN_PROTECTED_API))
2966    {
2967       int finishedParsing = 0;
2968       SET_FLAGS_ON(ParserState, parser->state, PARSER_STARTED | PARSER_IN_PROTECTED_API);
2969       if (JSON_Parser_ProcessInputBytes(parser, (const byte*)pBytes, length))
2970       {
2971          /* New input was parsed successfully. */
2972          if (isFinal)
2973          {
2974             /* Make sure there is nothing pending in the decoder, lexer,
2975                or parser. */
2976             if (JSON_Parser_FlushDecoder(parser) &&
2977                   JSON_Parser_FlushLexer(parser) &&
2978                   JSON_Parser_FlushParser(parser))
2979                status = JSON_Success;
2980 
2981             finishedParsing = 1;
2982          }
2983          else
2984             status = JSON_Success;
2985       }
2986       else
2987       {
2988          /* New input failed to parse. */
2989          finishedParsing = 1;
2990       }
2991       if (finishedParsing)
2992       {
2993          SET_FLAGS_ON(ParserState, parser->state, PARSER_FINISHED);
2994       }
2995       SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_PROTECTED_API);
2996    }
2997    return status;
2998 }
2999 
3000 #endif /* JSON_NO_PARSER */
3001 
3002 /******************** JSON Writer ********************/
3003 
3004 #ifndef JSON_NO_WRITER
3005 
3006 /* Combinable writer state flags. */
3007 #define WRITER_RESET            0x0
3008 #define WRITER_STARTED          0x1
3009 #define WRITER_IN_PROTECTED_API 0x2
3010 typedef byte WriterState;
3011 
3012 /* Combinable writer settings flags. */
3013 #define WRITER_DEFAULT_FLAGS    0x0
3014 #define WRITER_USE_CRLF         0x1
3015 #define WRITER_REPLACE_INVALID  0x2
3016 #define WRITER_ESCAPE_NON_ASCII 0x4
3017 typedef byte WriterFlags;
3018 
3019 /* A writer instance. */
3020 struct JSON_Writer_Data
3021 {
3022    JSON_MemorySuite          memorySuite;
3023    void*                     userData;
3024    WriterState               state;
3025    WriterFlags               flags;
3026    Encoding                  outputEncoding;
3027    Error                     error;
3028    GrammarianData            grammarianData;
3029    JSON_Writer_OutputHandler outputHandler;
3030 };
3031 
3032 /* Writer internal functions. */
3033 
JSON_Writer_ResetData(JSON_Writer writer,int isInitialized)3034 static void JSON_Writer_ResetData(JSON_Writer writer, int isInitialized)
3035 {
3036    writer->userData = NULL;
3037    writer->flags = WRITER_DEFAULT_FLAGS;
3038    writer->outputEncoding = JSON_UTF8;
3039    writer->error = JSON_Error_None;
3040    Grammarian_Reset(&writer->grammarianData, isInitialized);
3041    writer->outputHandler = NULL;
3042    writer->state = WRITER_RESET; /* do this last! */
3043 }
3044 
JSON_Writer_SetError(JSON_Writer writer,Error error)3045 static void JSON_Writer_SetError(JSON_Writer writer, Error error)
3046 {
3047    writer->error = error;
3048 }
3049 
JSON_Writer_ProcessToken(JSON_Writer writer,Symbol token)3050 static JSON_Status JSON_Writer_ProcessToken(JSON_Writer writer, Symbol token)
3051 {
3052    GrammarianOutput output = Grammarian_ProcessToken(&writer->grammarianData, token, &writer->memorySuite);
3053    switch (GRAMMARIAN_RESULT_CODE(output))
3054    {
3055       case REJECTED_TOKEN:
3056          JSON_Writer_SetError(writer, JSON_Error_UnexpectedToken);
3057          return JSON_Failure;
3058 
3059       case SYMBOL_STACK_FULL:
3060          JSON_Writer_SetError(writer, JSON_Error_OutOfMemory);
3061          return JSON_Failure;
3062    }
3063    return JSON_Success;
3064 }
3065 
JSON_Writer_OutputBytes(JSON_Writer writer,const byte * pBytes,size_t length)3066 static JSON_Status JSON_Writer_OutputBytes(JSON_Writer writer, const byte* pBytes, size_t length)
3067 {
3068    if (writer->outputHandler && length)
3069    {
3070       if (writer->outputHandler(writer, (const char*)pBytes, length) != JSON_Writer_Continue)
3071       {
3072          JSON_Writer_SetError(writer, JSON_Error_AbortedByHandler);
3073          return JSON_Failure;
3074       }
3075    }
3076    return JSON_Success;
3077 }
3078 
JSON_Writer_GetCodepointEscapeCharacter(JSON_Writer writer,Codepoint c)3079 static Codepoint JSON_Writer_GetCodepointEscapeCharacter(JSON_Writer writer, Codepoint c)
3080 {
3081    switch (c)
3082    {
3083       case BACKSPACE_CODEPOINT:
3084          return 'b';
3085 
3086       case TAB_CODEPOINT:
3087          return 't';
3088 
3089       case LINE_FEED_CODEPOINT:
3090          return 'n';
3091 
3092       case FORM_FEED_CODEPOINT:
3093          return 'f';
3094 
3095       case CARRIAGE_RETURN_CODEPOINT:
3096          return 'r';
3097 
3098       case '"':
3099          return '"';
3100       /* Don't escape forward slashes */
3101       /*case '/':
3102          return '/';*/
3103 
3104       case '\\':
3105          return '\\';
3106 
3107       case DELETE_CODEPOINT:
3108       case LINE_SEPARATOR_CODEPOINT:
3109       case PARAGRAPH_SEPARATOR_CODEPOINT:
3110          return 'u';
3111 
3112       default:
3113          if (c < FIRST_NON_CONTROL_CODEPOINT || IS_NONCHARACTER(c) ||
3114                (GET_FLAGS(writer->flags, WRITER_ESCAPE_NON_ASCII) && c > FIRST_NON_ASCII_CODEPOINT))
3115             return 'u';
3116          break;
3117    }
3118    return 0;
3119 }
3120 
3121 typedef struct tag_WriteBufferData
3122 {
3123    size_t used;
3124    byte   bytes[256];
3125 } WriteBufferData;
3126 typedef WriteBufferData* WriteBuffer;
3127 
WriteBuffer_Reset(WriteBuffer buffer)3128 static void WriteBuffer_Reset(WriteBuffer buffer)
3129 {
3130    buffer->used = 0;
3131 }
3132 
WriteBuffer_Flush(WriteBuffer buffer,JSON_Writer writer)3133 static JSON_Status WriteBuffer_Flush(WriteBuffer buffer, JSON_Writer writer)
3134 {
3135    JSON_Status status = JSON_Writer_OutputBytes(writer, buffer->bytes, buffer->used);
3136    buffer->used = 0;
3137    return status;
3138 }
3139 
WriteBuffer_WriteBytes(WriteBuffer buffer,JSON_Writer writer,const byte * pBytes,size_t length)3140 static JSON_Status WriteBuffer_WriteBytes(WriteBuffer buffer, JSON_Writer writer, const byte* pBytes, size_t length)
3141 {
3142    if (buffer->used + length > sizeof(buffer->bytes) &&
3143          !WriteBuffer_Flush(buffer, writer))
3144       return JSON_Failure;
3145 
3146    memcpy(&buffer->bytes[buffer->used], pBytes, length);
3147    buffer->used += length;
3148    return JSON_Success;
3149 }
3150 
WriteBuffer_WriteCodepoint(WriteBuffer buffer,JSON_Writer writer,Codepoint c)3151 static JSON_Status WriteBuffer_WriteCodepoint(WriteBuffer buffer, JSON_Writer writer, Codepoint c)
3152 {
3153    if (buffer->used + LONGEST_ENCODING_SEQUENCE > sizeof(buffer->bytes) &&
3154          !WriteBuffer_Flush(buffer, writer))
3155       return JSON_Failure;
3156 
3157    buffer->used += EncodeCodepoint(c, writer->outputEncoding, &buffer->bytes[buffer->used]);
3158    return JSON_Success;
3159 }
3160 
WriteBuffer_WriteHexEscapeSequence(WriteBuffer buffer,JSON_Writer writer,Codepoint c)3161 static JSON_Status WriteBuffer_WriteHexEscapeSequence(WriteBuffer buffer, JSON_Writer writer, Codepoint c)
3162 {
3163    if (c >= FIRST_NON_BMP_CODEPOINT)
3164    {
3165       /* Non-BMP codepoints must be hex-escaped by escaping the UTF-16
3166          surrogate pair for the codepoint. We put the leading surrogate
3167          in the low 16 bits of c so that it gets written first, then
3168          the second pass through the loop will write out the trailing
3169          surrogate. x*/
3170       c = SURROGATES_FROM_CODEPOINT(c);
3171       c = (c << 16) | (c >> 16);
3172    }
3173    do
3174    {
3175       static const byte hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
3176       byte escapeSequence[6];
3177       int i;
3178       escapeSequence[0] = '\\';
3179       escapeSequence[1] = 'u';
3180       escapeSequence[2] = hexDigits[(c >> 12) & 0xF];
3181       escapeSequence[3] = hexDigits[(c >> 8) & 0xF];
3182       escapeSequence[4] = hexDigits[(c >> 4) & 0xF];
3183       escapeSequence[5] = hexDigits[c & 0xF];
3184       for (i = 0; i < sizeof(escapeSequence); i++)
3185       {
3186          if (!WriteBuffer_WriteCodepoint(buffer, writer, escapeSequence[i]))
3187             return JSON_Failure;
3188       }
3189       c >>= 16;
3190    } while (c);
3191    return JSON_Success;
3192 }
3193 
JSON_Writer_OutputString(JSON_Writer writer,const byte * pBytes,size_t length,Encoding encoding)3194 static JSON_Status JSON_Writer_OutputString(JSON_Writer writer, const byte* pBytes, size_t length, Encoding encoding)
3195 {
3196    static const byte quoteUTF[] = { 0, 0, 0, '"', 0, 0, 0 };
3197    static const byte* const quoteEncodings[5] = { quoteUTF + 3, quoteUTF + 3, quoteUTF + 2, quoteUTF + 3, quoteUTF };
3198 
3199    const byte* pQuoteEncoded = quoteEncodings[writer->outputEncoding - 1];
3200    size_t minSequenceLength = (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3201    DecoderData decoderData;
3202    WriteBufferData bufferData;
3203    size_t i = 0;
3204 
3205    WriteBuffer_Reset(&bufferData);
3206 
3207    /* Start quote. */
3208    if (!WriteBuffer_WriteBytes(&bufferData, writer, pQuoteEncoded, minSequenceLength))
3209       return JSON_Failure;
3210 
3211    /* String contents. */
3212    Decoder_Reset(&decoderData);
3213    while (i < length)
3214    {
3215       DecoderOutput output = Decoder_ProcessByte(&decoderData, encoding, pBytes[i]);
3216       DecoderResultCode result = DECODER_RESULT_CODE(output);
3217       Codepoint c;
3218       Codepoint escapeCharacter;
3219       switch (result)
3220       {
3221          case SEQUENCE_PENDING:
3222             i++;
3223             break;
3224 
3225          case SEQUENCE_COMPLETE:
3226             c = DECODER_CODEPOINT(output);
3227             escapeCharacter = JSON_Writer_GetCodepointEscapeCharacter(writer, c);
3228             switch (escapeCharacter)
3229             {
3230                case 0:
3231                   /* Output the codepoint as a normal encoding sequence. */
3232                   if (!WriteBuffer_WriteCodepoint(&bufferData, writer, c))
3233                      return JSON_Failure;
3234                   break;
3235 
3236                case 'u':
3237                   /* Output the codepoint as 1 or 2 hex escape sequences. */
3238                   if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, c))
3239                      return JSON_Failure;
3240                   break;
3241 
3242                default:
3243                   /* Output the codepoint as a simple escape sequence. */
3244                   if (!WriteBuffer_WriteCodepoint(&bufferData, writer, '\\') ||
3245                         !WriteBuffer_WriteCodepoint(&bufferData, writer, escapeCharacter))
3246                      return JSON_Failure;
3247                   break;
3248             }
3249             i++;
3250             break;
3251 
3252          case SEQUENCE_INVALID_INCLUSIVE:
3253             i++;
3254             /* fallthrough */
3255          case SEQUENCE_INVALID_EXCLUSIVE:
3256             if (GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID))
3257             {
3258                if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, REPLACEMENT_CHARACTER_CODEPOINT))
3259                   return JSON_Failure;
3260             }
3261             else
3262             {
3263                /* Output whatever valid bytes we've accumulated before failing. */
3264                if (WriteBuffer_Flush(&bufferData, writer))
3265                   JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3266                return JSON_Failure;
3267             }
3268             break;
3269       }
3270    }
3271    if (Decoder_SequencePending(&decoderData))
3272    {
3273       if (GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID))
3274       {
3275          if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, REPLACEMENT_CHARACTER_CODEPOINT))
3276             return JSON_Failure;
3277       }
3278       else
3279       {
3280          /* Output whatever valid bytes we've accumulated before failing. */
3281          if (WriteBuffer_Flush(&bufferData, writer))
3282             JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3283          return JSON_Failure;
3284       }
3285    }
3286 
3287    /* End quote. */
3288    if (!WriteBuffer_WriteBytes(&bufferData, writer, pQuoteEncoded, minSequenceLength) ||
3289          !WriteBuffer_Flush(&bufferData, writer))
3290       return JSON_Failure;
3291    return JSON_Success;
3292 }
3293 
LexNumberCharacter(LexerState state,Codepoint c)3294 static LexerState LexNumberCharacter(LexerState state, Codepoint c)
3295 {
3296    switch (state)
3297    {
3298       case LEXING_WHITESPACE:
3299          if (c == '-')
3300             state = LEXING_NUMBER_AFTER_MINUS;
3301          else if (c == '0')
3302             state = LEXING_NUMBER_AFTER_LEADING_ZERO;
3303          else if (c >= '1' && c <= '9')
3304             state = LEXING_NUMBER_DECIMAL_DIGITS;
3305          else
3306             state = LEXER_ERROR;
3307          break;
3308 
3309       case LEXING_NUMBER_AFTER_MINUS:
3310          if (c == '0')
3311             state = LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO;
3312          else if (c >= '1' && c <= '9')
3313             state = LEXING_NUMBER_DECIMAL_DIGITS;
3314          else
3315             state = LEXER_ERROR;
3316          break;
3317 
3318       case LEXING_NUMBER_AFTER_LEADING_ZERO:
3319       case LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO:
3320          if (c == '.')
3321             state = LEXING_NUMBER_AFTER_DOT;
3322          else if (c == 'e' || c == 'E')
3323             state = LEXING_NUMBER_AFTER_E;
3324          else if ((c == 'x' || c == 'X') && state == LEXING_NUMBER_AFTER_LEADING_ZERO)
3325             state = LEXING_NUMBER_AFTER_X;
3326          else if (c == EOF_CODEPOINT)
3327             state = LEXING_WHITESPACE;
3328          else
3329             state = LEXER_ERROR;
3330          break;
3331 
3332       case LEXING_NUMBER_AFTER_X:
3333          if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
3334             state = LEXING_NUMBER_HEX_DIGITS;
3335          else
3336             state = LEXER_ERROR;
3337          break;
3338 
3339       case LEXING_NUMBER_HEX_DIGITS:
3340          if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
3341          {
3342             /* Still LEXING_NUMBER_HEX_DIGITS. */
3343          }
3344          else if (c == EOF_CODEPOINT)
3345             state = LEXING_WHITESPACE;
3346          else
3347             state = LEXER_ERROR;
3348          break;
3349 
3350       case LEXING_NUMBER_DECIMAL_DIGITS:
3351          if (c >= '0' && c <= '9')
3352          {
3353             /* Still LEXING_NUMBER_DECIMAL_DIGITS. */
3354          }
3355          else if (c == '.')
3356             state = LEXING_NUMBER_AFTER_DOT;
3357          else if (c == 'e' || c == 'E')
3358             state = LEXING_NUMBER_AFTER_E;
3359          else if (c == EOF_CODEPOINT)
3360             state = LEXING_WHITESPACE;
3361          else
3362             state = LEXER_ERROR;
3363          break;
3364 
3365       case LEXING_NUMBER_AFTER_DOT:
3366          if (c >= '0' && c <= '9')
3367             state = LEXING_NUMBER_FRACTIONAL_DIGITS;
3368          else
3369             state = LEXER_ERROR;
3370          break;
3371 
3372       case LEXING_NUMBER_FRACTIONAL_DIGITS:
3373          if (c >= '0' && c <= '9')
3374          {
3375             /* Still LEXING_NUMBER_FRACTIONAL_DIGITS. */
3376          }
3377          else if (c == 'e' || c == 'E')
3378             state = LEXING_NUMBER_AFTER_E;
3379          else if (c == EOF_CODEPOINT)
3380             state = LEXING_WHITESPACE;
3381          else
3382             state = LEXER_ERROR;
3383          break;
3384 
3385       case LEXING_NUMBER_AFTER_E:
3386          if (c == '+' || c == '-')
3387             state = LEXING_NUMBER_AFTER_EXPONENT_SIGN;
3388          else if (c >= '0' && c <= '9')
3389             state = LEXING_NUMBER_EXPONENT_DIGITS;
3390          else
3391             state = LEXER_ERROR;
3392          break;
3393 
3394       case LEXING_NUMBER_AFTER_EXPONENT_SIGN:
3395          if (c >= '0' && c <= '9')
3396             state = LEXING_NUMBER_EXPONENT_DIGITS;
3397          else
3398             state = LEXER_ERROR;
3399          break;
3400 
3401       case LEXING_NUMBER_EXPONENT_DIGITS:
3402          if (c >= '0' && c <= '9')
3403          {
3404             /* Still LEXING_NUMBER_EXPONENT_DIGITS. */
3405          }
3406          else if (c == EOF_CODEPOINT)
3407             state = LEXING_WHITESPACE;
3408          else
3409             state = LEXER_ERROR;
3410          break;
3411    }
3412    return state;
3413 }
3414 
JSON_Writer_OutputNumber(JSON_Writer writer,const byte * pBytes,size_t length,Encoding encoding)3415 static JSON_Status JSON_Writer_OutputNumber(JSON_Writer writer, const byte* pBytes, size_t length, Encoding encoding)
3416 {
3417    DecoderData decoderData;
3418    WriteBufferData bufferData;
3419    LexerState lexerState = LEXING_WHITESPACE;
3420    size_t i;
3421    Decoder_Reset(&decoderData);
3422    WriteBuffer_Reset(&bufferData);
3423    for (i = 0; i < length; i++)
3424    {
3425       DecoderOutput output = Decoder_ProcessByte(&decoderData, encoding, pBytes[i]);
3426       DecoderResultCode result = DECODER_RESULT_CODE(output);
3427       Codepoint c;
3428       switch (result)
3429       {
3430          case SEQUENCE_PENDING:
3431             break;
3432 
3433          case SEQUENCE_COMPLETE:
3434             c = DECODER_CODEPOINT(output);
3435             lexerState = LexNumberCharacter(lexerState, c);
3436             if (lexerState == LEXER_ERROR)
3437             {
3438                /* Output whatever valid bytes we've accumulated before failing. */
3439                if (WriteBuffer_Flush(&bufferData, writer))
3440                   JSON_Writer_SetError(writer, JSON_Error_InvalidNumber);
3441                return JSON_Failure;
3442             }
3443             if (!WriteBuffer_WriteCodepoint(&bufferData, writer, c))
3444                return JSON_Failure;
3445             break;
3446 
3447          case SEQUENCE_INVALID_INCLUSIVE:
3448          case SEQUENCE_INVALID_EXCLUSIVE:
3449             /* Output whatever valid bytes we've accumulated before failing. */
3450             if (WriteBuffer_Flush(&bufferData, writer))
3451                JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3452             return JSON_Failure;
3453       }
3454    }
3455    if (!WriteBuffer_Flush(&bufferData, writer))
3456       return JSON_Failure;
3457    if (Decoder_SequencePending(&decoderData))
3458    {
3459       JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence);
3460       return JSON_Failure;
3461    }
3462    if (LexNumberCharacter(lexerState, EOF_CODEPOINT) == LEXER_ERROR)
3463    {
3464       JSON_Writer_SetError(writer, JSON_Error_InvalidNumber);
3465       return JSON_Failure;
3466    }
3467    return JSON_Success;
3468 }
3469 
3470 #define SPACES_PER_CHUNK 8
JSON_Writer_OutputSpaces(JSON_Writer writer,size_t numberOfSpaces)3471 static JSON_Status JSON_Writer_OutputSpaces(JSON_Writer writer, size_t numberOfSpaces)
3472 {
3473    static const byte spacesUTF8[SPACES_PER_CHUNK] = { ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' };
3474    static const byte spacesUTF16[SPACES_PER_CHUNK * 2 + 1] = { 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0 };
3475    static const byte spacesUTF32[SPACES_PER_CHUNK * 4 + 3] = { 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0 };
3476    static const byte* const spacesEncodings[5] = { spacesUTF8, spacesUTF16 + 1, spacesUTF16, spacesUTF32 + 3, spacesUTF32 };
3477 
3478    size_t encodedLength = (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3479    const byte* encoded = spacesEncodings[writer->outputEncoding - 1];
3480    while (numberOfSpaces > SPACES_PER_CHUNK)
3481    {
3482       if (!JSON_Writer_OutputBytes(writer, encoded, SPACES_PER_CHUNK * encodedLength))
3483          return JSON_Failure;
3484       numberOfSpaces -= SPACES_PER_CHUNK;
3485    }
3486 
3487    if (!JSON_Writer_OutputBytes(writer, encoded, numberOfSpaces * encodedLength))
3488       return JSON_Failure;
3489    return JSON_Success;
3490 }
3491 
JSON_Writer_WriteSimpleToken(JSON_Writer writer,Symbol token,const byte * const * encodings,size_t length)3492 static JSON_Status JSON_Writer_WriteSimpleToken(JSON_Writer writer, Symbol token, const byte* const* encodings, size_t length)
3493 {
3494    JSON_Status status = JSON_Failure;
3495    if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3496    {
3497       size_t encodedLength = length * (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3498       SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3499       if (JSON_Writer_ProcessToken(writer, token) &&
3500             JSON_Writer_OutputBytes(writer, encodings[writer->outputEncoding - 1], encodedLength))
3501          status = JSON_Success;
3502       SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3503    }
3504    return status;
3505 }
3506 
3507 /* Writer API functions. */
3508 
JSON_Writer_Create(const JSON_MemorySuite * pMemorySuite)3509 JSON_Writer JSON_CALL JSON_Writer_Create(const JSON_MemorySuite* pMemorySuite)
3510 {
3511    JSON_Writer writer;
3512    JSON_MemorySuite memorySuite;
3513    if (pMemorySuite)
3514    {
3515       memorySuite = *pMemorySuite;
3516       /* The full memory suite must be specified. */
3517       if (!memorySuite.realloc || !memorySuite.free)
3518          return NULL;
3519    }
3520    else
3521       memorySuite = defaultMemorySuite;
3522 
3523    writer = (JSON_Writer)memorySuite.realloc(memorySuite.userData, NULL, sizeof(struct JSON_Writer_Data));
3524 
3525    if (!writer)
3526       return NULL;
3527 
3528    writer->memorySuite = memorySuite;
3529    JSON_Writer_ResetData(writer, 0/* isInitialized */);
3530    return writer;
3531 }
3532 
JSON_Writer_Free(JSON_Writer writer)3533 JSON_Status JSON_CALL JSON_Writer_Free(JSON_Writer writer)
3534 {
3535    if (!writer || GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API))
3536       return JSON_Failure;
3537 
3538    SET_FLAGS_ON(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3539    Grammarian_FreeAllocations(&writer->grammarianData, &writer->memorySuite);
3540    writer->memorySuite.free(writer->memorySuite.userData, writer);
3541    return JSON_Success;
3542 }
3543 
JSON_Writer_Reset(JSON_Writer writer)3544 JSON_Status JSON_CALL JSON_Writer_Reset(JSON_Writer writer)
3545 {
3546    if (!writer || GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API))
3547       return JSON_Failure;
3548 
3549    SET_FLAGS_ON(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3550    JSON_Writer_ResetData(writer, 1/* isInitialized */);
3551    /* Note that JSON_Writer_ResetData() unset WRITER_IN_PROTECTED_API for us. */
3552    return JSON_Success;
3553 }
3554 
JSON_Writer_GetUserData(JSON_Writer writer)3555 void* JSON_CALL JSON_Writer_GetUserData(JSON_Writer writer)
3556 {
3557    return writer ? writer->userData : NULL;
3558 }
3559 
JSON_Writer_SetUserData(JSON_Writer writer,void * userData)3560 JSON_Status JSON_CALL JSON_Writer_SetUserData(JSON_Writer writer, void* userData)
3561 {
3562    if (!writer)
3563       return JSON_Failure;
3564 
3565    writer->userData = userData;
3566    return JSON_Success;
3567 }
3568 
JSON_Writer_GetOutputEncoding(JSON_Writer writer)3569 JSON_Encoding JSON_CALL JSON_Writer_GetOutputEncoding(JSON_Writer writer)
3570 {
3571    return writer ? (JSON_Encoding)writer->outputEncoding : JSON_UTF8;
3572 }
3573 
JSON_Writer_SetOutputEncoding(JSON_Writer writer,JSON_Encoding encoding)3574 JSON_Status JSON_CALL JSON_Writer_SetOutputEncoding(JSON_Writer writer, JSON_Encoding encoding)
3575 {
3576    if (!writer || GET_FLAGS(writer->state, WRITER_STARTED) || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE)
3577       return JSON_Failure;
3578 
3579    writer->outputEncoding = (Encoding)encoding;
3580    return JSON_Success;
3581 }
3582 
JSON_Writer_GetUseCRLF(JSON_Writer writer)3583 JSON_Boolean JSON_CALL JSON_Writer_GetUseCRLF(JSON_Writer writer)
3584 {
3585    return (writer && GET_FLAGS(writer->flags, WRITER_USE_CRLF)) ? JSON_True : JSON_False;
3586 }
3587 
JSON_Writer_SetUseCRLF(JSON_Writer writer,JSON_Boolean useCRLF)3588 JSON_Status JSON_CALL JSON_Writer_SetUseCRLF(JSON_Writer writer, JSON_Boolean useCRLF)
3589 {
3590    if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
3591       return JSON_Failure;
3592 
3593    SET_FLAGS(WriterFlags, writer->flags, WRITER_USE_CRLF, useCRLF);
3594    return JSON_Success;
3595 }
3596 
JSON_Writer_GetReplaceInvalidEncodingSequences(JSON_Writer writer)3597 JSON_Boolean JSON_CALL JSON_Writer_GetReplaceInvalidEncodingSequences(JSON_Writer writer)
3598 {
3599    return (writer && GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID)) ? JSON_True : JSON_False;
3600 }
3601 
JSON_Writer_SetReplaceInvalidEncodingSequences(JSON_Writer writer,JSON_Boolean replaceInvalidEncodingSequences)3602 JSON_Status JSON_CALL JSON_Writer_SetReplaceInvalidEncodingSequences(JSON_Writer writer, JSON_Boolean replaceInvalidEncodingSequences)
3603 {
3604    if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
3605       return JSON_Failure;
3606 
3607    SET_FLAGS(WriterFlags, writer->flags, WRITER_REPLACE_INVALID, replaceInvalidEncodingSequences);
3608    return JSON_Success;
3609 }
3610 
JSON_Writer_GetEscapeAllNonASCIICharacters(JSON_Writer writer)3611 JSON_Boolean JSON_CALL JSON_Writer_GetEscapeAllNonASCIICharacters(JSON_Writer writer)
3612 {
3613    return (writer && GET_FLAGS(writer->flags, WRITER_ESCAPE_NON_ASCII)) ? JSON_True : JSON_False;
3614 }
3615 
JSON_Writer_SetEscapeAllNonASCIICharacters(JSON_Writer writer,JSON_Boolean escapeAllNonASCIICharacters)3616 JSON_Status JSON_CALL JSON_Writer_SetEscapeAllNonASCIICharacters(JSON_Writer writer, JSON_Boolean escapeAllNonASCIICharacters)
3617 {
3618    if (!writer || GET_FLAGS(writer->state, WRITER_STARTED))
3619       return JSON_Failure;
3620 
3621    SET_FLAGS(WriterFlags, writer->flags, WRITER_ESCAPE_NON_ASCII, escapeAllNonASCIICharacters);
3622    return JSON_Success;
3623 }
3624 
JSON_Writer_GetError(JSON_Writer writer)3625 JSON_Error JSON_CALL JSON_Writer_GetError(JSON_Writer writer)
3626 {
3627    return writer ? (JSON_Error)writer->error : JSON_Error_None;
3628 }
3629 
JSON_Writer_GetOutputHandler(JSON_Writer writer)3630 JSON_Writer_OutputHandler JSON_CALL JSON_Writer_GetOutputHandler(JSON_Writer writer)
3631 {
3632    return writer ? writer->outputHandler : NULL;
3633 }
3634 
JSON_Writer_SetOutputHandler(JSON_Writer writer,JSON_Writer_OutputHandler handler)3635 JSON_Status JSON_CALL JSON_Writer_SetOutputHandler(JSON_Writer writer, JSON_Writer_OutputHandler handler)
3636 {
3637    if (!writer)
3638       return JSON_Failure;
3639 
3640    writer->outputHandler = handler;
3641    return JSON_Success;
3642 }
3643 
JSON_Writer_WriteNull(JSON_Writer writer)3644 JSON_Status JSON_CALL JSON_Writer_WriteNull(JSON_Writer writer)
3645 {
3646    static const byte nullUTF8[] = { 'n', 'u', 'l', 'l' };
3647    static const byte nullUTF16[] = { 0, 'n', 0, 'u', 0, 'l', 0, 'l', 0 };
3648    static const byte nullUTF32[] = { 0, 0, 0, 'n', 0, 0, 0, 'u', 0, 0, 0, 'l', 0, 0, 0, 'l', 0, 0, 0 };
3649    static const byte* const nullEncodings[5] = { nullUTF8, nullUTF16 + 1, nullUTF16, nullUTF32 + 3, nullUTF32 };
3650 
3651    return JSON_Writer_WriteSimpleToken(writer, T_NULL, nullEncodings, sizeof(nullUTF8));
3652 }
3653 
JSON_Writer_WriteBoolean(JSON_Writer writer,JSON_Boolean value)3654 JSON_Status JSON_CALL JSON_Writer_WriteBoolean(JSON_Writer writer, JSON_Boolean value)
3655 {
3656    static const byte trueUTF8[] = { 't', 'r', 'u', 'e' };
3657    static const byte trueUTF16[] = { 0, 't', 0, 'r', 0, 'u', 0, 'e', 0 };
3658    static const byte trueUTF32[] = { 0, 0, 0, 't', 0, 0, 0, 'r', 0, 0, 0, 'u', 0, 0, 0, 'e', 0, 0, 0 };
3659    static const byte* const trueEncodings[5] = { trueUTF8, trueUTF16 + 1, trueUTF16, trueUTF32 + 3, trueUTF32 };
3660 
3661    static const byte falseUTF8[] = { 'f', 'a', 'l', 's', 'e' };
3662    static const byte falseUTF16[] = { 0, 'f', 0, 'a', 0, 'l', 0, 's', 0, 'e', 0 };
3663    static const byte falseUTF32[] = { 0, 0, 0, 'f', 0, 0, 0, 'a', 0, 0, 0, 'l', 0, 0, 0, 's', 0, 0, 0, 'e', 0, 0, 0 };
3664    static const byte* const falseEncodings[5] = { falseUTF8, falseUTF16 + 1, falseUTF16, falseUTF32 + 3, falseUTF32 };
3665 
3666    Symbol token;
3667    const byte* const* encodings;
3668    size_t length;
3669    if (value)
3670    {
3671       token = T_TRUE;
3672       encodings = trueEncodings;
3673       length = sizeof(trueUTF8);
3674    }
3675    else
3676    {
3677       token = T_FALSE;
3678       encodings = falseEncodings;
3679       length = sizeof(falseUTF8);
3680    }
3681    return JSON_Writer_WriteSimpleToken(writer, token, encodings, length);
3682 }
3683 
JSON_Writer_WriteString(JSON_Writer writer,const char * pValue,size_t length,JSON_Encoding encoding)3684 JSON_Status JSON_CALL JSON_Writer_WriteString(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding)
3685 {
3686    JSON_Status status = JSON_Failure;
3687    if (writer && (pValue || !length) && encoding > JSON_UnknownEncoding && encoding <= JSON_UTF32BE &&
3688          !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3689    {
3690       SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3691       if (JSON_Writer_ProcessToken(writer, T_STRING))
3692          status = JSON_Writer_OutputString(writer, (const byte*)pValue, length, (Encoding)encoding);
3693 
3694       SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3695    }
3696    return status;
3697 }
3698 
JSON_Writer_WriteNumber(JSON_Writer writer,const char * pValue,size_t length,JSON_Encoding encoding)3699 JSON_Status JSON_CALL JSON_Writer_WriteNumber(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding)
3700 {
3701    JSON_Status status = JSON_Failure;
3702    if (writer && pValue && length && encoding > JSON_UnknownEncoding && encoding <= JSON_UTF32BE &&
3703          !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3704    {
3705       SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3706       if (JSON_Writer_ProcessToken(writer, T_NUMBER))
3707          status = JSON_Writer_OutputNumber(writer, (const byte*)pValue, length, (Encoding)encoding);
3708 
3709       SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3710    }
3711    return status;
3712 }
3713 
JSON_Writer_WriteSpecialNumber(JSON_Writer writer,JSON_SpecialNumber value)3714 JSON_Status JSON_CALL JSON_Writer_WriteSpecialNumber(JSON_Writer writer, JSON_SpecialNumber value)
3715 {
3716    static const byte nanUTF8[] = { 'N', 'a', 'N' };
3717    static const byte nanUTF16[] = { 0, 'N', 0, 'a', 0, 'N', 0 };
3718    static const byte nanUTF32[] = { 0, 0, 0, 'N', 0, 0, 0, 'a', 0, 0, 0, 'N', 0, 0, 0 };
3719    static const byte* const nanEncodings[5] = { nanUTF8, nanUTF16 + 1, nanUTF16, nanUTF32 + 3, nanUTF32 };
3720 
3721    static const byte ninfUTF8[] = { '-', 'I', 'n', 'f', 'i', 'n', 'i', 't', 'y' };
3722    static const byte ninfUTF16[] = { 0, '-', 0, 'I', 0, 'n', 0, 'f', 0, 'i', 0, 'n', 0, 'i', 0, 't', 0, 'y', 0 };
3723    static const byte ninfUTF32[] = { 0, 0, 0, '-', 0, 0, 0, 'I', 0, 0, 0, 'n', 0, 0, 0, 'f', 0, 0, 0, 'i', 0, 0, 0, 'n', 0, 0, 0, 'i', 0, 0, 0, 't', 0, 0, 0, 'y', 0, 0, 0 };
3724    static const byte* const infinityEncodings[5] = { ninfUTF8 + 1, ninfUTF16 + 3, ninfUTF16 + 2, ninfUTF32 + 7, ninfUTF32 + 4 };
3725    static const byte* const negativeInfinityEncodings[5] = { ninfUTF8, ninfUTF16 + 1, ninfUTF16, ninfUTF32 + 3, ninfUTF32 };
3726 
3727    Symbol token;
3728    const byte* const* encodings;
3729    size_t length;
3730    if (value == JSON_Infinity)
3731    {
3732       token = T_INFINITY;
3733       encodings = infinityEncodings;
3734       length = sizeof(ninfUTF8) - 1/* - */;
3735    }
3736    else if (value == JSON_NegativeInfinity)
3737    {
3738       token = T_NEGATIVE_INFINITY;
3739       encodings = negativeInfinityEncodings;
3740       length = sizeof(ninfUTF8);
3741    }
3742    else
3743    {
3744       token = T_NAN;
3745       encodings = nanEncodings;
3746       length = sizeof(nanUTF8);
3747    }
3748    return JSON_Writer_WriteSimpleToken(writer, token, encodings, length);
3749 }
3750 
JSON_Writer_WriteStartObject(JSON_Writer writer)3751 JSON_Status JSON_CALL JSON_Writer_WriteStartObject(JSON_Writer writer)
3752 {
3753    static const byte utf[] = { 0, 0, 0, '{', 0, 0, 0 };
3754    static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3755 
3756    return JSON_Writer_WriteSimpleToken(writer, T_LEFT_CURLY, encodings, 1);
3757 }
3758 
JSON_Writer_WriteEndObject(JSON_Writer writer)3759 JSON_Status JSON_CALL JSON_Writer_WriteEndObject(JSON_Writer writer)
3760 {
3761    static const byte utf[] = { 0, 0, 0, '}', 0, 0, 0 };
3762    static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3763 
3764    return JSON_Writer_WriteSimpleToken(writer, T_RIGHT_CURLY, encodings, 1);
3765 }
3766 
JSON_Writer_WriteStartArray(JSON_Writer writer)3767 JSON_Status JSON_CALL JSON_Writer_WriteStartArray(JSON_Writer writer)
3768 {
3769    static const byte utf[] = { 0, 0, 0, '[', 0, 0, 0 };
3770    static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3771 
3772    return JSON_Writer_WriteSimpleToken(writer, T_LEFT_SQUARE, encodings, 1);
3773 }
3774 
JSON_Writer_WriteEndArray(JSON_Writer writer)3775 JSON_Status JSON_CALL JSON_Writer_WriteEndArray(JSON_Writer writer)
3776 {
3777    static const byte utf[] = { 0, 0, 0, ']', 0, 0, 0 };
3778    static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3779 
3780    return JSON_Writer_WriteSimpleToken(writer, T_RIGHT_SQUARE, encodings, 1);
3781 }
3782 
JSON_Writer_WriteColon(JSON_Writer writer)3783 JSON_Status JSON_CALL JSON_Writer_WriteColon(JSON_Writer writer)
3784 {
3785    static const byte utf[] = { 0, 0, 0, ':', 0, 0, 0 };
3786    static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3787 
3788    return JSON_Writer_WriteSimpleToken(writer, T_COLON, encodings, 1);
3789 }
3790 
JSON_Writer_WriteComma(JSON_Writer writer)3791 JSON_Status JSON_CALL JSON_Writer_WriteComma(JSON_Writer writer)
3792 {
3793    static const byte utf[] = { 0, 0, 0, ',', 0, 0, 0 };
3794    static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf };
3795 
3796    return JSON_Writer_WriteSimpleToken(writer, T_COMMA, encodings, 1);
3797 }
3798 
JSON_Writer_WriteSpace(JSON_Writer writer,size_t numberOfSpaces)3799 JSON_Status JSON_CALL JSON_Writer_WriteSpace(JSON_Writer writer, size_t numberOfSpaces)
3800 {
3801    JSON_Status status = JSON_Failure;
3802    if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3803    {
3804       SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3805       status = JSON_Writer_OutputSpaces(writer, numberOfSpaces);
3806       SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3807    }
3808    return status;
3809 }
3810 
JSON_Writer_WriteNewLine(JSON_Writer writer)3811 JSON_Status JSON_CALL JSON_Writer_WriteNewLine(JSON_Writer writer)
3812 {
3813    static const byte lfUTF[] = { 0, 0, 0, LINE_FEED_CODEPOINT, 0, 0, 0 };
3814    static const byte* const lfEncodings[5] = { lfUTF + 3, lfUTF + 3, lfUTF + 2, lfUTF + 3, lfUTF };
3815 
3816    static const byte crlfUTF8[] = { CARRIAGE_RETURN_CODEPOINT, LINE_FEED_CODEPOINT };
3817    static const byte crlfUTF16[] = { 0, CARRIAGE_RETURN_CODEPOINT, 0, LINE_FEED_CODEPOINT, 0 };
3818    static const byte crlfUTF32[] = { 0, 0, 0, CARRIAGE_RETURN_CODEPOINT, 0, 0, 0, LINE_FEED_CODEPOINT, 0, 0, 0 };
3819    static const byte* const crlfEncodings[5] = { crlfUTF8, crlfUTF16 + 1, crlfUTF16, crlfUTF32 + 3, crlfUTF32 };
3820 
3821    JSON_Status status = JSON_Failure;
3822    if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None)
3823    {
3824       const byte* const* encodings;
3825       size_t length;
3826       size_t encodedLength;
3827       SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API);
3828       if (GET_FLAGS(writer->flags, WRITER_USE_CRLF))
3829       {
3830          encodings = crlfEncodings;
3831          length = 2;
3832       }
3833       else
3834       {
3835          encodings = lfEncodings;
3836          length = 1;
3837       }
3838       encodedLength = length * (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding);
3839       if (JSON_Writer_OutputBytes(writer, encodings[writer->outputEncoding - 1], encodedLength))
3840          status = JSON_Success;
3841       SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API);
3842    }
3843    return status;
3844 }
3845 
3846 #endif /* JSON_NO_WRITER */
3847 
3848 /******************** Miscellaneous API ********************/
3849 
JSON_LibraryVersion(void)3850 const JSON_Version* JSON_CALL JSON_LibraryVersion(void)
3851 {
3852    static JSON_Version version = { JSON_MAJOR_VERSION, JSON_MINOR_VERSION, JSON_MICRO_VERSION };
3853    return &version;
3854 }
3855 
JSON_ErrorString(JSON_Error error)3856 const char* JSON_CALL JSON_ErrorString(JSON_Error error)
3857 {
3858    /* This array must match the order and number of the JSON_Error enum. */
3859    static const char* errorStrings[] =
3860    {
3861       /* JSON_Error_None */                            "no error",
3862       /* JSON_Error_OutOfMemory */                     "could not allocate enough memory",
3863       /* JSON_Error_AbortedByHandler */                "the operation was aborted by a handler",
3864       /* JSON_Error_BOMNotAllowed */                   "the input begins with a byte-order mark (BOM), which is not allowed by RFC 4627",
3865       /* JSON_Error_InvalidEncodingSequence */         "the input contains a byte or sequence of bytes that is not valid for the input encoding",
3866       /* JSON_Error_UnknownToken */                    "the input contains an unknown token",
3867       /* JSON_Error_UnexpectedToken */                 "the input contains an unexpected token",
3868       /* JSON_Error_IncompleteToken */                 "the input ends in the middle of a token",
3869       /* JSON_Error_MoreTokensExpected */              "the input ends when more tokens are expected",
3870       /* JSON_Error_UnescapedControlCharacter */       "the input contains a string containing an unescaped control character (U+0000 - U+001F)",
3871       /* JSON_Error_InvalidEscapeSequence */           "the input contains a string containing an invalid escape sequence",
3872       /* JSON_Error_UnpairedSurrogateEscapeSequence */ "the input contains a string containing an unmatched UTF-16 surrogate codepoint",
3873       /* JSON_Error_TooLongString */                   "the input contains a string that is too long",
3874       /* JSON_Error_InvalidNumber */                   "the input contains an invalid number",
3875       /* JSON_Error_TooLongNumber */                   "the input contains a number that is too long",
3876       /* JSON_Error_DuplicateObjectMember */           "the input contains an object with duplicate members",
3877       /* JSON_Error_StoppedAfterEmbeddedDocument */    "the end of the embedded document was reached"
3878    };
3879    return ((unsigned int)error < (sizeof(errorStrings) / sizeof(errorStrings[0])))
3880       ? errorStrings[error]
3881       : "";
3882 }
3883 
3884 static const uint32_t endianEncodings = (((uint32_t)JSON_UTF32BE) << 24) | (((uint32_t)JSON_UTF16BE) << 16) | (((uint32_t)JSON_UTF16LE) << 8) | ((uint32_t)JSON_UTF32LE);
3885 
JSON_NativeUTF16Encoding(void)3886 JSON_Encoding JSON_CALL JSON_NativeUTF16Encoding(void)
3887 {
3888    return (JSON_Encoding)(((byte*)&endianEncodings)[1]);
3889 }
3890 
JSON_NativeUTF32Encoding(void)3891 JSON_Encoding JSON_CALL JSON_NativeUTF32Encoding(void)
3892 {
3893    return (JSON_Encoding)(((byte*)&endianEncodings)[0]);
3894 }
3895