1 /*
2 * nxjson
3 * Copyright 2018 Yaroslav Stavnichiy <yarosla@gmail.com>
4 * SPDX-License-Identifier: MIT
5 * Explicit permission to use this project
6 * under the MIT license has been given by Yaroslav Stavnichiy
7 * on Sun, May 20, 2018 at 1:29 PM (CET). Original license is LGPL v3
8 */
9 // this file can be #included in your code
10 #ifndef NXJSON_C
11 #define NXJSON_C
12
13 #ifdef __cplusplus
14 extern "C" {
15 #endif
16
17
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <assert.h>
22 #include <errno.h>
23
24 #include "nxjson.h"
25
26 // redefine NX_JSON_CALLOC & NX_JSON_FREE to use custom allocator
27 #ifndef NX_JSON_CALLOC
28 #define NX_JSON_CALLOC() calloc(1, sizeof(nx_json))
29 #define NX_JSON_FREE(json) free((void*)(json))
30 #endif
31
32 // redefine NX_JSON_REPORT_ERROR to use custom error reporting
33 #ifndef NX_JSON_REPORT_ERROR
34 #define NX_JSON_REPORT_ERROR(msg, p) fprintf(stderr, "NXJSON PARSE ERROR (%d): " msg " at %s\n", __LINE__, p)
35 #endif
36
37 #define IS_WHITESPACE(c) ((unsigned char)(c)<=(unsigned char)' ')
38
39 static const nx_json dummy={ NX_JSON_NULL };
40
create_json(nx_json_type type,const char * key,nx_json * parent)41 static nx_json* create_json(nx_json_type type, const char* key, nx_json* parent) {
42 nx_json* js=NX_JSON_CALLOC();
43 assert(js);
44 js->type=type;
45 js->key=key;
46 js->parent=parent;
47 if (!parent->last_child) {
48 parent->child=parent->last_child=js;
49 }
50 else {
51 parent->last_child->next=js;
52 parent->last_child=js;
53 }
54 parent->length++;
55 return js;
56 }
57
nx_json_free(const nx_json * js)58 void nx_json_free(const nx_json* js) {
59 nx_json* p=js->child;
60 nx_json* p1;
61 while (p) {
62 p1=p->next;
63 nx_json_free(p);
64 p=p1;
65 }
66 NX_JSON_FREE(js);
67 }
68
unicode_to_utf8(unsigned int codepoint,char * p,char ** endp)69 static int unicode_to_utf8(unsigned int codepoint, char* p, char** endp) {
70 // code from http://stackoverflow.com/a/4609989/697313
71 if (codepoint<0x80) *p++=codepoint;
72 else if (codepoint<0x800) *p++=192+codepoint/64, *p++=128+codepoint%64;
73 else if (codepoint-0xd800u<0x800) return 0; // surrogate must have been treated earlier
74 else if (codepoint<0x10000) *p++=224+codepoint/4096, *p++=128+codepoint/64%64, *p++=128+codepoint%64;
75 else if (codepoint<0x110000) *p++=240+codepoint/262144, *p++=128+codepoint/4096%64, *p++=128+codepoint/64%64, *p++=128+codepoint%64;
76 else return 0; // error
77 *endp=p;
78 return 1;
79 }
80
81 nx_json_unicode_encoder nx_json_unicode_to_utf8=unicode_to_utf8;
82
hex_val(char c)83 static inline int hex_val(char c) {
84 if (c>='0' && c<='9') return c-'0';
85 if (c>='a' && c<='f') return c-'a'+10;
86 if (c>='A' && c<='F') return c-'A'+10;
87 return -1;
88 }
89
unescape_string(char * s,char ** end,nx_json_unicode_encoder encoder)90 static char* unescape_string(char* s, char** end, nx_json_unicode_encoder encoder) {
91 char* p=s;
92 char* d=s;
93 char c;
94 while ((c=*p++)) {
95 if (c=='"') {
96 *d='\0';
97 *end=p;
98 return s;
99 }
100 else if (c=='\\') {
101 switch (*p) {
102 case '\\':
103 case '/':
104 case '"':
105 *d++=*p++;
106 break;
107 case 'b':
108 *d++='\b'; p++;
109 break;
110 case 'f':
111 *d++='\f'; p++;
112 break;
113 case 'n':
114 *d++='\n'; p++;
115 break;
116 case 'r':
117 *d++='\r'; p++;
118 break;
119 case 't':
120 *d++='\t'; p++;
121 break;
122 case 'u': // unicode
123 if (!encoder) {
124 // leave untouched
125 *d++=c;
126 break;
127 }
128 char* ps=p-1;
129 int h1, h2, h3, h4;
130 if ((h1=hex_val(p[1]))<0 || (h2=hex_val(p[2]))<0 || (h3=hex_val(p[3]))<0 || (h4=hex_val(p[4]))<0) {
131 NX_JSON_REPORT_ERROR("invalid unicode escape", p-1);
132 return 0;
133 }
134 unsigned int codepoint=h1<<12|h2<<8|h3<<4|h4;
135 if ((codepoint & 0xfc00)==0xd800) { // high surrogate; need one more unicode to succeed
136 p+=6;
137 if (p[-1]!='\\' || *p!='u' || (h1=hex_val(p[1]))<0 || (h2=hex_val(p[2]))<0 || (h3=hex_val(p[3]))<0 || (h4=hex_val(p[4]))<0) {
138 NX_JSON_REPORT_ERROR("invalid unicode surrogate", ps);
139 return 0;
140 }
141 unsigned int codepoint2=h1<<12|h2<<8|h3<<4|h4;
142 if ((codepoint2 & 0xfc00)!=0xdc00) {
143 NX_JSON_REPORT_ERROR("invalid unicode surrogate", ps);
144 return 0;
145 }
146 codepoint=0x10000+((codepoint-0xd800)<<10)+(codepoint2-0xdc00);
147 }
148 if (!encoder(codepoint, d, &d)) {
149 NX_JSON_REPORT_ERROR("invalid codepoint", ps);
150 return 0;
151 }
152 p+=5;
153 break;
154 default:
155 // leave untouched
156 *d++=c;
157 break;
158 }
159 }
160 else {
161 *d++=c;
162 }
163 }
164 NX_JSON_REPORT_ERROR("no closing quote for string", s);
165 return 0;
166 }
167
skip_block_comment(char * p)168 static char* skip_block_comment(char* p) {
169 // assume p[-2]=='/' && p[-1]=='*'
170 char* ps=p-2;
171 if (!*p) {
172 NX_JSON_REPORT_ERROR("endless comment", ps);
173 return 0;
174 }
175 REPEAT:
176 p=strchr(p+1, '/');
177 if (!p) {
178 NX_JSON_REPORT_ERROR("endless comment", ps);
179 return 0;
180 }
181 if (p[-1]!='*') goto REPEAT;
182 return p+1;
183 }
184
parse_key(const char ** key,char * p,nx_json_unicode_encoder encoder)185 static char* parse_key(const char** key, char* p, nx_json_unicode_encoder encoder) {
186 // on '}' return with *p=='}'
187 char c;
188 while ((c=*p++)) {
189 if (c=='"') {
190 *key=unescape_string(p, &p, encoder);
191 if (!*key) return 0; // propagate error
192 while (*p && IS_WHITESPACE(*p)) p++;
193 if (*p==':') return p+1;
194 NX_JSON_REPORT_ERROR("unexpected chars", p);
195 return 0;
196 }
197 else if (IS_WHITESPACE(c) || c==',') {
198 // continue
199 }
200 else if (c=='}') {
201 return p-1;
202 }
203 else if (c=='/') {
204 if (*p=='/') { // line comment
205 char* ps=p-1;
206 p=strchr(p+1, '\n');
207 if (!p) {
208 NX_JSON_REPORT_ERROR("endless comment", ps);
209 return 0; // error
210 }
211 p++;
212 }
213 else if (*p=='*') { // block comment
214 p=skip_block_comment(p+1);
215 if (!p) return 0;
216 }
217 else {
218 NX_JSON_REPORT_ERROR("unexpected chars", p-1);
219 return 0; // error
220 }
221 }
222 else {
223 NX_JSON_REPORT_ERROR("unexpected chars", p-1);
224 return 0; // error
225 }
226 }
227 NX_JSON_REPORT_ERROR("unexpected chars", p-1);
228 return 0; // error
229 }
230
parse_value(nx_json * parent,const char * key,char * p,nx_json_unicode_encoder encoder)231 static char* parse_value(nx_json* parent, const char* key, char* p, nx_json_unicode_encoder encoder) {
232 nx_json* js;
233 while (1) {
234 switch (*p) {
235 case '\0':
236 NX_JSON_REPORT_ERROR("unexpected end of text", p);
237 return 0; // error
238 case ' ': case '\t': case '\n': case '\r':
239 case ',':
240 // skip
241 p++;
242 break;
243 case '{':
244 js=create_json(NX_JSON_OBJECT, key, parent);
245 p++;
246 while (1) {
247 const char* new_key;
248 p=parse_key(&new_key, p, encoder);
249 if (!p) return 0; // error
250 if (*p=='}') return p+1; // end of object
251 p=parse_value(js, new_key, p, encoder);
252 if (!p) return 0; // error
253 }
254 case '[':
255 js=create_json(NX_JSON_ARRAY, key, parent);
256 p++;
257 while (1) {
258 p=parse_value(js, 0, p, encoder);
259 if (!p) return 0; // error
260 if (*p==']') return p+1; // end of array
261 }
262 case ']':
263 return p;
264 case '"':
265 p++;
266 js=create_json(NX_JSON_STRING, key, parent);
267 js->text_value=unescape_string(p, &p, encoder);
268 if (!js->text_value) return 0; // propagate error
269 return p;
270 case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
271 {
272 js=create_json(NX_JSON_INTEGER, key, parent);
273 char* pe;
274 js->int_value=strtoll(p, &pe, 0);
275 if (pe==p || errno==ERANGE) {
276 NX_JSON_REPORT_ERROR("invalid number", p);
277 return 0; // error
278 }
279 if (*pe=='.' || *pe=='e' || *pe=='E') { // double value
280 js->type=NX_JSON_DOUBLE;
281 js->dbl_value=strtod(p, &pe);
282 if (pe==p || errno==ERANGE) {
283 NX_JSON_REPORT_ERROR("invalid number", p);
284 return 0; // error
285 }
286 }
287 else {
288 js->dbl_value=js->int_value;
289 }
290 return pe;
291 }
292 case 't':
293 if (!strncmp(p, "true", 4)) {
294 js=create_json(NX_JSON_BOOL, key, parent);
295 js->int_value=1;
296 return p+4;
297 }
298 NX_JSON_REPORT_ERROR("unexpected chars", p);
299 return 0; // error
300 case 'f':
301 if (!strncmp(p, "false", 5)) {
302 js=create_json(NX_JSON_BOOL, key, parent);
303 js->int_value=0;
304 return p+5;
305 }
306 NX_JSON_REPORT_ERROR("unexpected chars", p);
307 return 0; // error
308 case 'n':
309 if (!strncmp(p, "null", 4)) {
310 create_json(NX_JSON_NULL, key, parent);
311 return p+4;
312 }
313 NX_JSON_REPORT_ERROR("unexpected chars", p);
314 return 0; // error
315 case '/': // comment
316 if (p[1]=='/') { // line comment
317 char* ps=p;
318 p=strchr(p+2, '\n');
319 if (!p) {
320 NX_JSON_REPORT_ERROR("endless comment", ps);
321 return 0; // error
322 }
323 p++;
324 }
325 else if (p[1]=='*') { // block comment
326 p=skip_block_comment(p+2);
327 if (!p) return 0;
328 }
329 else {
330 NX_JSON_REPORT_ERROR("unexpected chars", p);
331 return 0; // error
332 }
333 break;
334 default:
335 NX_JSON_REPORT_ERROR("unexpected chars", p);
336 return 0; // error
337 }
338 }
339 }
340
nx_json_parse_utf8(char * text)341 const nx_json* nx_json_parse_utf8(char* text) {
342 return nx_json_parse(text, unicode_to_utf8);
343 }
344
nx_json_parse(char * text,nx_json_unicode_encoder encoder)345 const nx_json* nx_json_parse(char* text, nx_json_unicode_encoder encoder) {
346 nx_json js={0};
347 if (!parse_value(&js, 0, text, encoder)) {
348 if (js.child) nx_json_free(js.child);
349 return 0;
350 }
351 return js.child;
352 }
353
nx_json_get(const nx_json * json,const char * key)354 const nx_json* nx_json_get(const nx_json* json, const char* key) {
355 if (!json || !key) return &dummy; // never return null
356 nx_json* js;
357 for (js=json->child; js; js=js->next) {
358 if (js->key && !strcmp(js->key, key)) return js;
359 }
360 return &dummy; // never return null
361 }
362
nx_json_item(const nx_json * json,int idx)363 const nx_json* nx_json_item(const nx_json* json, int idx) {
364 if (!json) return &dummy; // never return null
365 nx_json* js;
366 for (js=json->child; js; js=js->next) {
367 if (!idx--) return js;
368 }
369 return &dummy; // never return null
370 }
371
372
373 #ifdef __cplusplus
374 }
375 #endif
376
377 #endif /* NXJSON_C */
378