1 // Copyright 2014 BitPay Inc.
2 // Distributed under the MIT software license, see the accompanying
3 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
4
5 #include <string.h>
6 #include <vector>
7 #include <stdio.h>
8 #include "univalue.h"
9 #include "univalue_utffilter.h"
10
11 /*
12 * According to stackexchange, the original json test suite wanted
13 * to limit depth to 22. Widely-deployed PHP bails at depth 512,
14 * so we will follow PHP's lead, which should be more than sufficient
15 * (further stackexchange comments indicate depth > 32 rarely occurs).
16 */
17 static const size_t MAX_JSON_DEPTH = 512;
18
json_isdigit(int ch)19 static bool json_isdigit(int ch)
20 {
21 return ((ch >= '0') && (ch <= '9'));
22 }
23
24 // convert hexadecimal string to unsigned integer
hatoui(const char * first,const char * last,unsigned int & out)25 static const char *hatoui(const char *first, const char *last,
26 unsigned int& out)
27 {
28 unsigned int result = 0;
29 for (; first != last; ++first)
30 {
31 int digit;
32 if (json_isdigit(*first))
33 digit = *first - '0';
34
35 else if (*first >= 'a' && *first <= 'f')
36 digit = *first - 'a' + 10;
37
38 else if (*first >= 'A' && *first <= 'F')
39 digit = *first - 'A' + 10;
40
41 else
42 break;
43
44 result = 16 * result + digit;
45 }
46 out = result;
47
48 return first;
49 }
50
getJsonToken(std::string & tokenVal,unsigned int & consumed,const char * raw,const char * end)51 enum jtokentype getJsonToken(std::string& tokenVal, unsigned int& consumed,
52 const char *raw, const char *end)
53 {
54 tokenVal.clear();
55 consumed = 0;
56
57 const char *rawStart = raw;
58
59 while (raw < end && (json_isspace(*raw))) // skip whitespace
60 raw++;
61
62 if (raw >= end)
63 return JTOK_NONE;
64
65 switch (*raw) {
66
67 case '{':
68 raw++;
69 consumed = (raw - rawStart);
70 return JTOK_OBJ_OPEN;
71 case '}':
72 raw++;
73 consumed = (raw - rawStart);
74 return JTOK_OBJ_CLOSE;
75 case '[':
76 raw++;
77 consumed = (raw - rawStart);
78 return JTOK_ARR_OPEN;
79 case ']':
80 raw++;
81 consumed = (raw - rawStart);
82 return JTOK_ARR_CLOSE;
83
84 case ':':
85 raw++;
86 consumed = (raw - rawStart);
87 return JTOK_COLON;
88 case ',':
89 raw++;
90 consumed = (raw - rawStart);
91 return JTOK_COMMA;
92
93 case 'n':
94 case 't':
95 case 'f':
96 if (!strncmp(raw, "null", 4)) {
97 raw += 4;
98 consumed = (raw - rawStart);
99 return JTOK_KW_NULL;
100 } else if (!strncmp(raw, "true", 4)) {
101 raw += 4;
102 consumed = (raw - rawStart);
103 return JTOK_KW_TRUE;
104 } else if (!strncmp(raw, "false", 5)) {
105 raw += 5;
106 consumed = (raw - rawStart);
107 return JTOK_KW_FALSE;
108 } else
109 return JTOK_ERR;
110
111 case '-':
112 case '0':
113 case '1':
114 case '2':
115 case '3':
116 case '4':
117 case '5':
118 case '6':
119 case '7':
120 case '8':
121 case '9': {
122 // part 1: int
123 std::string numStr;
124
125 const char *first = raw;
126
127 const char *firstDigit = first;
128 if (!json_isdigit(*firstDigit))
129 firstDigit++;
130 if ((*firstDigit == '0') && json_isdigit(firstDigit[1]))
131 return JTOK_ERR;
132
133 numStr += *raw; // copy first char
134 raw++;
135
136 if ((*first == '-') && (raw < end) && (!json_isdigit(*raw)))
137 return JTOK_ERR;
138
139 while (raw < end && json_isdigit(*raw)) { // copy digits
140 numStr += *raw;
141 raw++;
142 }
143
144 // part 2: frac
145 if (raw < end && *raw == '.') {
146 numStr += *raw; // copy .
147 raw++;
148
149 if (raw >= end || !json_isdigit(*raw))
150 return JTOK_ERR;
151 while (raw < end && json_isdigit(*raw)) { // copy digits
152 numStr += *raw;
153 raw++;
154 }
155 }
156
157 // part 3: exp
158 if (raw < end && (*raw == 'e' || *raw == 'E')) {
159 numStr += *raw; // copy E
160 raw++;
161
162 if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/-
163 numStr += *raw;
164 raw++;
165 }
166
167 if (raw >= end || !json_isdigit(*raw))
168 return JTOK_ERR;
169 while (raw < end && json_isdigit(*raw)) { // copy digits
170 numStr += *raw;
171 raw++;
172 }
173 }
174
175 tokenVal = numStr;
176 consumed = (raw - rawStart);
177 return JTOK_NUMBER;
178 }
179
180 case '"': {
181 raw++; // skip "
182
183 std::string valStr;
184 JSONUTF8StringFilter writer(valStr);
185
186 while (true) {
187 if (raw >= end || (unsigned char)*raw < 0x20)
188 return JTOK_ERR;
189
190 else if (*raw == '\\') {
191 raw++; // skip backslash
192
193 if (raw >= end)
194 return JTOK_ERR;
195
196 switch (*raw) {
197 case '"': writer.push_back('\"'); break;
198 case '\\': writer.push_back('\\'); break;
199 case '/': writer.push_back('/'); break;
200 case 'b': writer.push_back('\b'); break;
201 case 'f': writer.push_back('\f'); break;
202 case 'n': writer.push_back('\n'); break;
203 case 'r': writer.push_back('\r'); break;
204 case 't': writer.push_back('\t'); break;
205
206 case 'u': {
207 unsigned int codepoint;
208 if (raw + 1 + 4 >= end ||
209 hatoui(raw + 1, raw + 1 + 4, codepoint) !=
210 raw + 1 + 4)
211 return JTOK_ERR;
212 writer.push_back_u(codepoint);
213 raw += 4;
214 break;
215 }
216 default:
217 return JTOK_ERR;
218
219 }
220
221 raw++; // skip esc'd char
222 }
223
224 else if (*raw == '"') {
225 raw++; // skip "
226 break; // stop scanning
227 }
228
229 else {
230 writer.push_back(*raw);
231 raw++;
232 }
233 }
234
235 if (!writer.finalize())
236 return JTOK_ERR;
237 tokenVal = valStr;
238 consumed = (raw - rawStart);
239 return JTOK_STRING;
240 }
241
242 default:
243 return JTOK_ERR;
244 }
245 }
246
247 enum expect_bits {
248 EXP_OBJ_NAME = (1U << 0),
249 EXP_COLON = (1U << 1),
250 EXP_ARR_VALUE = (1U << 2),
251 EXP_VALUE = (1U << 3),
252 EXP_NOT_VALUE = (1U << 4),
253 };
254
255 #define expect(bit) (expectMask & (EXP_##bit))
256 #define setExpect(bit) (expectMask |= EXP_##bit)
257 #define clearExpect(bit) (expectMask &= ~EXP_##bit)
258
read(const char * raw,size_t size)259 bool UniValue::read(const char *raw, size_t size)
260 {
261 clear();
262
263 uint32_t expectMask = 0;
264 std::vector<UniValue*> stack;
265
266 std::string tokenVal;
267 unsigned int consumed;
268 enum jtokentype tok = JTOK_NONE;
269 enum jtokentype last_tok = JTOK_NONE;
270 const char* end = raw + size;
271 do {
272 last_tok = tok;
273
274 tok = getJsonToken(tokenVal, consumed, raw, end);
275 if (tok == JTOK_NONE || tok == JTOK_ERR)
276 return false;
277 raw += consumed;
278
279 bool isValueOpen = jsonTokenIsValue(tok) ||
280 tok == JTOK_OBJ_OPEN || tok == JTOK_ARR_OPEN;
281
282 if (expect(VALUE)) {
283 if (!isValueOpen)
284 return false;
285 clearExpect(VALUE);
286
287 } else if (expect(ARR_VALUE)) {
288 bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE);
289 if (!isArrValue)
290 return false;
291
292 clearExpect(ARR_VALUE);
293
294 } else if (expect(OBJ_NAME)) {
295 bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING);
296 if (!isObjName)
297 return false;
298
299 } else if (expect(COLON)) {
300 if (tok != JTOK_COLON)
301 return false;
302 clearExpect(COLON);
303
304 } else if (!expect(COLON) && (tok == JTOK_COLON)) {
305 return false;
306 }
307
308 if (expect(NOT_VALUE)) {
309 if (isValueOpen)
310 return false;
311 clearExpect(NOT_VALUE);
312 }
313
314 switch (tok) {
315
316 case JTOK_OBJ_OPEN:
317 case JTOK_ARR_OPEN: {
318 VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR);
319 if (!stack.size()) {
320 if (utyp == VOBJ)
321 setObject();
322 else
323 setArray();
324 stack.push_back(this);
325 } else {
326 UniValue tmpVal(utyp);
327 UniValue *top = stack.back();
328 top->values.push_back(tmpVal);
329
330 UniValue *newTop = &(top->values.back());
331 stack.push_back(newTop);
332 }
333
334 if (stack.size() > MAX_JSON_DEPTH)
335 return false;
336
337 if (utyp == VOBJ)
338 setExpect(OBJ_NAME);
339 else
340 setExpect(ARR_VALUE);
341 break;
342 }
343
344 case JTOK_OBJ_CLOSE:
345 case JTOK_ARR_CLOSE: {
346 if (!stack.size() || (last_tok == JTOK_COMMA))
347 return false;
348
349 VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR);
350 UniValue *top = stack.back();
351 if (utyp != top->getType())
352 return false;
353
354 stack.pop_back();
355 clearExpect(OBJ_NAME);
356 setExpect(NOT_VALUE);
357 break;
358 }
359
360 case JTOK_COLON: {
361 if (!stack.size())
362 return false;
363
364 UniValue *top = stack.back();
365 if (top->getType() != VOBJ)
366 return false;
367
368 setExpect(VALUE);
369 break;
370 }
371
372 case JTOK_COMMA: {
373 if (!stack.size() ||
374 (last_tok == JTOK_COMMA) || (last_tok == JTOK_ARR_OPEN))
375 return false;
376
377 UniValue *top = stack.back();
378 if (top->getType() == VOBJ)
379 setExpect(OBJ_NAME);
380 else
381 setExpect(ARR_VALUE);
382 break;
383 }
384
385 case JTOK_KW_NULL:
386 case JTOK_KW_TRUE:
387 case JTOK_KW_FALSE: {
388 UniValue tmpVal;
389 switch (tok) {
390 case JTOK_KW_NULL:
391 // do nothing more
392 break;
393 case JTOK_KW_TRUE:
394 tmpVal.setBool(true);
395 break;
396 case JTOK_KW_FALSE:
397 tmpVal.setBool(false);
398 break;
399 default: /* impossible */ break;
400 }
401
402 if (!stack.size()) {
403 *this = tmpVal;
404 break;
405 }
406
407 UniValue *top = stack.back();
408 top->values.push_back(tmpVal);
409
410 setExpect(NOT_VALUE);
411 break;
412 }
413
414 case JTOK_NUMBER: {
415 UniValue tmpVal(VNUM, tokenVal);
416 if (!stack.size()) {
417 *this = tmpVal;
418 break;
419 }
420
421 UniValue *top = stack.back();
422 top->values.push_back(tmpVal);
423
424 setExpect(NOT_VALUE);
425 break;
426 }
427
428 case JTOK_STRING: {
429 if (expect(OBJ_NAME)) {
430 UniValue *top = stack.back();
431 top->keys.push_back(tokenVal);
432 clearExpect(OBJ_NAME);
433 setExpect(COLON);
434 } else {
435 UniValue tmpVal(VSTR, tokenVal);
436 if (!stack.size()) {
437 *this = tmpVal;
438 break;
439 }
440 UniValue *top = stack.back();
441 top->values.push_back(tmpVal);
442 }
443
444 setExpect(NOT_VALUE);
445 break;
446 }
447
448 default:
449 return false;
450 }
451 } while (!stack.empty ());
452
453 /* Check that nothing follows the initial construct (parsed above). */
454 tok = getJsonToken(tokenVal, consumed, raw, end);
455 if (tok != JTOK_NONE)
456 return false;
457
458 return true;
459 }
460
461