1 /*
2  * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
3  * Michael Clark <michael@metaparadigm.com>
4  *
5  * This library is free software; you can redistribute it and/or modify
6  * it under the terms of the MIT license. See COPYING for details.
7  *
8  */
9 
10 #ifndef _fj_json_tokener_h_
11 #define _fj_json_tokener_h_
12 
13 #include <stddef.h>
14 #include "json_object.h"
15 
16 #ifdef __cplusplus
17 extern "C" {
18 #endif
19 
20 enum fjson_tokener_error {
21 	fjson_tokener_success,
22 	fjson_tokener_continue,
23 	fjson_tokener_error_depth,
24 	fjson_tokener_error_parse_eof,
25 	fjson_tokener_error_parse_unexpected,
26 	fjson_tokener_error_parse_null,
27 	fjson_tokener_error_parse_boolean,
28 	fjson_tokener_error_parse_number,
29 	fjson_tokener_error_parse_array,
30 	fjson_tokener_error_parse_object_key_name,
31 	fjson_tokener_error_parse_object_key_sep,
32 	fjson_tokener_error_parse_object_value_sep,
33 	fjson_tokener_error_parse_string,
34 	fjson_tokener_error_parse_comment,
35 	fjson_tokener_error_size
36 };
37 
38 enum fjson_tokener_state {
39 	fjson_tokener_state_eatws,
40 	fjson_tokener_state_start,
41 	fjson_tokener_state_finish,
42 	fjson_tokener_state_null,
43 	fjson_tokener_state_comment_start,
44 	fjson_tokener_state_comment,
45 	fjson_tokener_state_comment_eol,
46 	fjson_tokener_state_comment_end,
47 	fjson_tokener_state_string,
48 	fjson_tokener_state_string_escape,
49 	fjson_tokener_state_escape_unicode,
50 	fjson_tokener_state_boolean,
51 	fjson_tokener_state_number,
52 	fjson_tokener_state_array,
53 	fjson_tokener_state_array_add,
54 	fjson_tokener_state_array_sep,
55 	fjson_tokener_state_object_field_start,
56 	fjson_tokener_state_object_field,
57 	fjson_tokener_state_object_field_end,
58 	fjson_tokener_state_object_value,
59 	fjson_tokener_state_object_value_add,
60 	fjson_tokener_state_object_sep,
61 	fjson_tokener_state_array_after_sep,
62 	fjson_tokener_state_object_field_start_after_sep,
63 	fjson_tokener_state_inf
64 };
65 
66 struct fjson_tokener_srec
67 {
68 	enum fjson_tokener_state state, saved_state;
69 	struct fjson_object *obj;
70 	struct fjson_object *current;
71 	char *obj_field_name;
72 };
73 
74 #define FJSON_TOKENER_DEFAULT_DEPTH 32
75 
76 struct fjson_tokener
77 {
78 	char *str;
79 	struct printbuf *pb;
80 	int max_depth, depth, is_double, st_pos, char_offset;
81 	enum fjson_tokener_error err;
82 	unsigned int ucs_char;
83 	char quote_char;
84 	struct fjson_tokener_srec *stack;
85 	int flags;
86 };
87 
88 /**
89  * Be strict when parsing JSON input.  Use caution with
90  * this flag as what is considered valid may become more
91  * restrictive from one release to the next, causing your
92  * code to fail on previously working input.
93  *
94  * This flag is not set by default.
95  *
96  * @see fjson_tokener_set_flags()
97  */
98 #define FJSON_TOKENER_STRICT  0x01
99 
100 /**
101  * Given an error previously returned by fjson_tokener_get_error(),
102  * return a human readable description of the error.
103  *
104  * @return a generic error message is returned if an invalid error value is provided.
105  */
106 const char *fjson_tokener_error_desc(enum fjson_tokener_error jerr);
107 
108 /**
109  * Retrieve the error caused by the last call to fjson_tokener_parse_ex(),
110  * or fjson_tokener_success if there is no error.
111  *
112  * When parsing a JSON string in pieces, if the tokener is in the middle
113  * of parsing this will return fjson_tokener_continue.
114  *
115  * See also fjson_tokener_error_desc().
116  */
117 enum fjson_tokener_error fjson_tokener_get_error(struct fjson_tokener *tok);
118 
119 extern struct fjson_tokener* fjson_tokener_new(void);
120 extern struct fjson_tokener* fjson_tokener_new_ex(int depth);
121 extern void fjson_tokener_free(struct fjson_tokener *tok);
122 extern void fjson_tokener_reset(struct fjson_tokener *tok);
123 extern struct fjson_object* fjson_tokener_parse(const char *str);
124 extern struct fjson_object* fjson_tokener_parse_verbose(const char *str, enum fjson_tokener_error *error);
125 
126 /**
127  * Set flags that control how parsing will be done.
128  */
129 extern void fjson_tokener_set_flags(struct fjson_tokener *tok, int flags);
130 
131 /**
132  * Parse a string and return a non-NULL fjson_object if a valid JSON value
133  * is found.  The string does not need to be a JSON object or array;
134  * it can also be a string, number or boolean value.
135  *
136  * A partial JSON string can be parsed.  If the parsing is incomplete,
137  * NULL will be returned and fjson_tokener_get_error() will be return
138  * fjson_tokener_continue.
139  * fjson_tokener_parse_ex() can then be called with additional bytes in str
140  * to continue the parsing.
141  *
142  * If fjson_tokener_parse_ex() returns NULL and the error anything other than
143  * fjson_tokener_continue, a fatal error has occurred and parsing must be
144  * halted.  Then tok object must not be re-used until fjson_tokener_reset() is
145  * called.
146  *
147  * When a valid JSON value is parsed, a non-NULL fjson_object will be
148  * returned.  Also, fjson_tokener_get_error() will return fjson_tokener_success.
149  * Be sure to check the type with fjson_object_is_type() or
150  * fjson_object_get_type() before using the object.
151  *
152  * @b XXX this shouldn't use internal fields:
153  * Trailing characters after the parsed value do not automatically cause an
154  * error.  It is up to the caller to decide whether to treat this as an
155  * error or to handle the additional characters, perhaps by parsing another
156  * json value starting from that point.
157  *
158  * Extra characters can be detected by comparing the tok->char_offset against
159  * the length of the last len parameter passed in.
160  *
161  * The tokener does \b not maintain an internal buffer so the caller is
162  * responsible for calling fjson_tokener_parse_ex with an appropriate str
163  * parameter starting with the extra characters.
164  *
165  * This interface is presently not 64-bit clean due to the int len argument
166  * so the function limits the maximum string size to INT32_MAX (2GB).
167  * If the function is called with len == -1 then strlen is called to check
168  * the string length is less than INT32_MAX (2GB)
169  *
170  * Example:
171  * @code
172 fjson_object *jobj = NULL;
173 const char *mystring = NULL;
174 int stringlen = 0;
175 enum fjson_tokener_error jerr;
176 do {
177 	mystring = ...  // get JSON string, e.g. read from file, etc...
178 	stringlen = strlen(mystring);
179 	jobj = fjson_tokener_parse_ex(tok, mystring, stringlen);
180 } while ((jerr = fjson_tokener_get_error(tok)) == fjson_tokener_continue);
181 if (jerr != fjson_tokener_success)
182 {
183 	fprintf(stderr, "Error: %s\n", fjson_tokener_error_desc(jerr));
184 	// Handle errors, as appropriate for your application.
185 }
186 if (tok->char_offset < stringlen) // XXX shouldn't access internal fields
187 {
188 	// Handle extra characters after parsed object as desired.
189 	// e.g. issue an error, parse another object from that point, etc...
190 }
191 // Success, use jobj here.
192 
193 @endcode
194  *
195  * @param tok a fjson_tokener previously allocated with fjson_tokener_new()
196  * @param str an string with any valid JSON expression, or portion of.  This does not need to be null terminated.
197  * @param len the length of str
198  */
199 extern struct fjson_object* fjson_tokener_parse_ex(struct fjson_tokener *tok,
200 						 const char *str, int len);
201 
202 #ifndef FJSON_NATIVE_API_ONLY
203 #define json_tokener fjson_tokener
204 #define json_tokener_error fjson_tokener_error
205 extern const char* fjson_tokener_errors[15];
206 #define json_tokener_errors fjson_tokener_errors
207 #define json_tokener_continue fjson_tokener_continue
208 #define json_tokener_reset fjson_tokener_reset
209 
210 #define json_tokener_new() fjson_tokener_new()
211 #define json_tokener_parse fjson_tokener_parse
212 #define json_tokener_parse_ex(a, b, c) fjson_tokener_parse_ex((a), (b), (c))
213 #define json_tokener_free(a) fjson_tokener_free((a))
214 #define json_tokener_error_desc(a) fjson_tokener_error_desc((a))
215 #endif
216 
217 #ifdef __cplusplus
218 }
219 #endif
220 
221 #endif
222