1 // Copyright 2014 BitPay Inc.
2 // Distributed under the MIT software license, see the accompanying
3 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
4 
5 #include <string.h>
6 #include <vector>
7 #include <stdio.h>
8 #include "univalue.h"
9 #include "univalue_utffilter.h"
10 
11 /*
12  * According to stackexchange, the original json test suite wanted
13  * to limit depth to 22.  Widely-deployed PHP bails at depth 512,
14  * so we will follow PHP's lead, which should be more than sufficient
15  * (further stackexchange comments indicate depth > 32 rarely occurs).
16  */
17 static const size_t MAX_JSON_DEPTH = 512;
18 
json_isdigit(int ch)19 static bool json_isdigit(int ch)
20 {
21     return ((ch >= '0') && (ch <= '9'));
22 }
23 
24 // convert hexadecimal string to unsigned integer
hatoui(const char * first,const char * last,unsigned int & out)25 static const char *hatoui(const char *first, const char *last,
26                           unsigned int& out)
27 {
28     unsigned int result = 0;
29     for (; first != last; ++first)
30     {
31         int digit;
32         if (json_isdigit(*first))
33             digit = *first - '0';
34 
35         else if (*first >= 'a' && *first <= 'f')
36             digit = *first - 'a' + 10;
37 
38         else if (*first >= 'A' && *first <= 'F')
39             digit = *first - 'A' + 10;
40 
41         else
42             break;
43 
44         result = 16 * result + digit;
45     }
46     out = result;
47 
48     return first;
49 }
50 
getJsonToken(std::string & tokenVal,unsigned int & consumed,const char * raw,const char * end)51 enum jtokentype getJsonToken(std::string& tokenVal, unsigned int& consumed,
52                             const char *raw, const char *end)
53 {
54     tokenVal.clear();
55     consumed = 0;
56 
57     const char *rawStart = raw;
58 
59     while (raw < end && (json_isspace(*raw)))          // skip whitespace
60         raw++;
61 
62     if (raw >= end)
63         return JTOK_NONE;
64 
65     switch (*raw) {
66 
67     case '{':
68         raw++;
69         consumed = (raw - rawStart);
70         return JTOK_OBJ_OPEN;
71     case '}':
72         raw++;
73         consumed = (raw - rawStart);
74         return JTOK_OBJ_CLOSE;
75     case '[':
76         raw++;
77         consumed = (raw - rawStart);
78         return JTOK_ARR_OPEN;
79     case ']':
80         raw++;
81         consumed = (raw - rawStart);
82         return JTOK_ARR_CLOSE;
83 
84     case ':':
85         raw++;
86         consumed = (raw - rawStart);
87         return JTOK_COLON;
88     case ',':
89         raw++;
90         consumed = (raw - rawStart);
91         return JTOK_COMMA;
92 
93     case 'n':
94     case 't':
95     case 'f':
96         if (!strncmp(raw, "null", 4)) {
97             raw += 4;
98             consumed = (raw - rawStart);
99             return JTOK_KW_NULL;
100         } else if (!strncmp(raw, "true", 4)) {
101             raw += 4;
102             consumed = (raw - rawStart);
103             return JTOK_KW_TRUE;
104         } else if (!strncmp(raw, "false", 5)) {
105             raw += 5;
106             consumed = (raw - rawStart);
107             return JTOK_KW_FALSE;
108         } else
109             return JTOK_ERR;
110 
111     case '-':
112     case '0':
113     case '1':
114     case '2':
115     case '3':
116     case '4':
117     case '5':
118     case '6':
119     case '7':
120     case '8':
121     case '9': {
122         // part 1: int
123         std::string numStr;
124 
125         const char *first = raw;
126 
127         const char *firstDigit = first;
128         if (!json_isdigit(*firstDigit))
129             firstDigit++;
130         if ((*firstDigit == '0') && json_isdigit(firstDigit[1]))
131             return JTOK_ERR;
132 
133         numStr += *raw;                       // copy first char
134         raw++;
135 
136         if ((*first == '-') && (raw < end) && (!json_isdigit(*raw)))
137             return JTOK_ERR;
138 
139         while (raw < end && json_isdigit(*raw)) {  // copy digits
140             numStr += *raw;
141             raw++;
142         }
143 
144         // part 2: frac
145         if (raw < end && *raw == '.') {
146             numStr += *raw;                   // copy .
147             raw++;
148 
149             if (raw >= end || !json_isdigit(*raw))
150                 return JTOK_ERR;
151             while (raw < end && json_isdigit(*raw)) { // copy digits
152                 numStr += *raw;
153                 raw++;
154             }
155         }
156 
157         // part 3: exp
158         if (raw < end && (*raw == 'e' || *raw == 'E')) {
159             numStr += *raw;                   // copy E
160             raw++;
161 
162             if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/-
163                 numStr += *raw;
164                 raw++;
165             }
166 
167             if (raw >= end || !json_isdigit(*raw))
168                 return JTOK_ERR;
169             while (raw < end && json_isdigit(*raw)) { // copy digits
170                 numStr += *raw;
171                 raw++;
172             }
173         }
174 
175         tokenVal = numStr;
176         consumed = (raw - rawStart);
177         return JTOK_NUMBER;
178         }
179 
180     case '"': {
181         raw++;                                // skip "
182 
183         std::string valStr;
184         JSONUTF8StringFilter writer(valStr);
185 
186         while (true) {
187             if (raw >= end || (unsigned char)*raw < 0x20)
188                 return JTOK_ERR;
189 
190             else if (*raw == '\\') {
191                 raw++;                        // skip backslash
192 
193                 if (raw >= end)
194                     return JTOK_ERR;
195 
196                 switch (*raw) {
197                 case '"':  writer.push_back('\"'); break;
198                 case '\\': writer.push_back('\\'); break;
199                 case '/':  writer.push_back('/'); break;
200                 case 'b':  writer.push_back('\b'); break;
201                 case 'f':  writer.push_back('\f'); break;
202                 case 'n':  writer.push_back('\n'); break;
203                 case 'r':  writer.push_back('\r'); break;
204                 case 't':  writer.push_back('\t'); break;
205 
206                 case 'u': {
207                     unsigned int codepoint;
208                     if (raw + 1 + 4 >= end ||
209                         hatoui(raw + 1, raw + 1 + 4, codepoint) !=
210                                raw + 1 + 4)
211                         return JTOK_ERR;
212                     writer.push_back_u(codepoint);
213                     raw += 4;
214                     break;
215                     }
216                 default:
217                     return JTOK_ERR;
218 
219                 }
220 
221                 raw++;                        // skip esc'd char
222             }
223 
224             else if (*raw == '"') {
225                 raw++;                        // skip "
226                 break;                        // stop scanning
227             }
228 
229             else {
230                 writer.push_back(*raw);
231                 raw++;
232             }
233         }
234 
235         if (!writer.finalize())
236             return JTOK_ERR;
237         tokenVal = valStr;
238         consumed = (raw - rawStart);
239         return JTOK_STRING;
240         }
241 
242     default:
243         return JTOK_ERR;
244     }
245 }
246 
247 enum expect_bits {
248     EXP_OBJ_NAME = (1U << 0),
249     EXP_COLON = (1U << 1),
250     EXP_ARR_VALUE = (1U << 2),
251     EXP_VALUE = (1U << 3),
252     EXP_NOT_VALUE = (1U << 4),
253 };
254 
255 #define expect(bit) (expectMask & (EXP_##bit))
256 #define setExpect(bit) (expectMask |= EXP_##bit)
257 #define clearExpect(bit) (expectMask &= ~EXP_##bit)
258 
read(const char * raw,size_t size)259 bool UniValue::read(const char *raw, size_t size)
260 {
261     clear();
262 
263     uint32_t expectMask = 0;
264     std::vector<UniValue*> stack;
265 
266     std::string tokenVal;
267     unsigned int consumed;
268     enum jtokentype tok = JTOK_NONE;
269     enum jtokentype last_tok = JTOK_NONE;
270     const char* end = raw + size;
271     do {
272         last_tok = tok;
273 
274         tok = getJsonToken(tokenVal, consumed, raw, end);
275         if (tok == JTOK_NONE || tok == JTOK_ERR)
276             return false;
277         raw += consumed;
278 
279         bool isValueOpen = jsonTokenIsValue(tok) ||
280             tok == JTOK_OBJ_OPEN || tok == JTOK_ARR_OPEN;
281 
282         if (expect(VALUE)) {
283             if (!isValueOpen)
284                 return false;
285             clearExpect(VALUE);
286 
287         } else if (expect(ARR_VALUE)) {
288             bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE);
289             if (!isArrValue)
290                 return false;
291 
292             clearExpect(ARR_VALUE);
293 
294         } else if (expect(OBJ_NAME)) {
295             bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING);
296             if (!isObjName)
297                 return false;
298 
299         } else if (expect(COLON)) {
300             if (tok != JTOK_COLON)
301                 return false;
302             clearExpect(COLON);
303 
304         } else if (!expect(COLON) && (tok == JTOK_COLON)) {
305             return false;
306         }
307 
308         if (expect(NOT_VALUE)) {
309             if (isValueOpen)
310                 return false;
311             clearExpect(NOT_VALUE);
312         }
313 
314         switch (tok) {
315 
316         case JTOK_OBJ_OPEN:
317         case JTOK_ARR_OPEN: {
318             VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR);
319             if (!stack.size()) {
320                 if (utyp == VOBJ)
321                     setObject();
322                 else
323                     setArray();
324                 stack.push_back(this);
325             } else {
326                 UniValue tmpVal(utyp);
327                 UniValue *top = stack.back();
328                 top->values.push_back(tmpVal);
329 
330                 UniValue *newTop = &(top->values.back());
331                 stack.push_back(newTop);
332             }
333 
334             if (stack.size() > MAX_JSON_DEPTH)
335                 return false;
336 
337             if (utyp == VOBJ)
338                 setExpect(OBJ_NAME);
339             else
340                 setExpect(ARR_VALUE);
341             break;
342             }
343 
344         case JTOK_OBJ_CLOSE:
345         case JTOK_ARR_CLOSE: {
346             if (!stack.size() || (last_tok == JTOK_COMMA))
347                 return false;
348 
349             VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR);
350             UniValue *top = stack.back();
351             if (utyp != top->getType())
352                 return false;
353 
354             stack.pop_back();
355             clearExpect(OBJ_NAME);
356             setExpect(NOT_VALUE);
357             break;
358             }
359 
360         case JTOK_COLON: {
361             if (!stack.size())
362                 return false;
363 
364             UniValue *top = stack.back();
365             if (top->getType() != VOBJ)
366                 return false;
367 
368             setExpect(VALUE);
369             break;
370             }
371 
372         case JTOK_COMMA: {
373             if (!stack.size() ||
374                 (last_tok == JTOK_COMMA) || (last_tok == JTOK_ARR_OPEN))
375                 return false;
376 
377             UniValue *top = stack.back();
378             if (top->getType() == VOBJ)
379                 setExpect(OBJ_NAME);
380             else
381                 setExpect(ARR_VALUE);
382             break;
383             }
384 
385         case JTOK_KW_NULL:
386         case JTOK_KW_TRUE:
387         case JTOK_KW_FALSE: {
388             UniValue tmpVal;
389             switch (tok) {
390             case JTOK_KW_NULL:
391                 // do nothing more
392                 break;
393             case JTOK_KW_TRUE:
394                 tmpVal.setBool(true);
395                 break;
396             case JTOK_KW_FALSE:
397                 tmpVal.setBool(false);
398                 break;
399             default: /* impossible */ break;
400             }
401 
402             if (!stack.size()) {
403                 *this = tmpVal;
404                 break;
405             }
406 
407             UniValue *top = stack.back();
408             top->values.push_back(tmpVal);
409 
410             setExpect(NOT_VALUE);
411             break;
412             }
413 
414         case JTOK_NUMBER: {
415             UniValue tmpVal(VNUM, tokenVal);
416             if (!stack.size()) {
417                 *this = tmpVal;
418                 break;
419             }
420 
421             UniValue *top = stack.back();
422             top->values.push_back(tmpVal);
423 
424             setExpect(NOT_VALUE);
425             break;
426             }
427 
428         case JTOK_STRING: {
429             if (expect(OBJ_NAME)) {
430                 UniValue *top = stack.back();
431                 top->keys.push_back(tokenVal);
432                 clearExpect(OBJ_NAME);
433                 setExpect(COLON);
434             } else {
435                 UniValue tmpVal(VSTR, tokenVal);
436                 if (!stack.size()) {
437                     *this = tmpVal;
438                     break;
439                 }
440                 UniValue *top = stack.back();
441                 top->values.push_back(tmpVal);
442             }
443 
444             setExpect(NOT_VALUE);
445             break;
446             }
447 
448         default:
449             return false;
450         }
451     } while (!stack.empty ());
452 
453     /* Check that nothing follows the initial construct (parsed above).  */
454     tok = getJsonToken(tokenVal, consumed, raw, end);
455     if (tok != JTOK_NONE)
456         return false;
457 
458     return true;
459 }
460 
461