1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_JSON_PARSER_HPP
9 #define INCLUDED_ORCUS_JSON_PARSER_HPP
10 
11 #include "orcus/json_parser_base.hpp"
12 
13 #include <cassert>
14 #include <cmath>
15 
16 namespace orcus {
17 
18 class json_handler
19 {
20 public:
21     /**
22      * Called when the parsing begins.
23      */
begin_parse()24     void begin_parse() {}
25 
26     /**
27      * Called when the parsing ends.
28      */
end_parse()29     void end_parse() {}
30 
31     /**
32      * Called when the opening brace of an array is encountered.
33      */
begin_array()34     void begin_array() {}
35 
36     /**
37      * Called when the closing brace of an array is encountered.
38      */
end_array()39     void end_array() {}
40 
41     /**
42      * Called when the opening curly brace of an object is encountered.
43      */
begin_object()44     void begin_object() {}
45 
46     /**
47      * Called when a key value string of an object is encountered.
48      *
49      * @param p pointer to the first character of the key value string.
50      * @param len length of the key value string.
51      * @param transient true if the string value is stored in a temporary
52      *                  buffer which is not guaranteed to hold the string
53      *                  value after the end of this callback. When false, the
54      *                  pointer points to somewhere in the JSON stream being
55      *                  parsed.
56      */
object_key(const char * p,size_t len,bool transient)57     void object_key(const char* p, size_t len, bool transient)
58     {
59         (void)p; (void)len; (void)transient;
60     }
61 
62     /**
63      * Called when the closing curly brace of an object is encountered.
64      */
end_object()65     void end_object() {}
66 
67     /**
68      * Called when a boolean 'true' keyword is encountered.
69      */
boolean_true()70     void boolean_true() {}
71 
72     /**
73      * Called when a boolean 'false' keyword is encountered.
74      */
boolean_false()75     void boolean_false() {}
76 
77     /**
78      * Called when a 'null' keyword is encountered.
79      */
null()80     void null() {}
81 
82     /**
83      * Called when a string value is encountered.
84      *
85      * @param p pointer to the first character of the string value.
86      * @param len length of the string value.
87      * @param transient true if the string value is stored in a temporary
88      *                  buffer which is not guaranteed to hold the string
89      *                  value after the end of this callback. When false, the
90      *                  pointer points to somewhere in the JSON stream being
91      *                  parsed.
92      */
string(const char * p,size_t len,bool transient)93     void string(const char* p, size_t len, bool transient)
94     {
95         (void)p; (void)len; (void)transient;
96     }
97 
98     /**
99      * Called when a numeric value is encountered.
100      *
101      * @param val numeric value.
102      */
number(double val)103     void number(double val)
104     {
105         (void)val;
106     }
107 };
108 
109 /**
110  * Low-level JSON parser.  The caller must provide a handler class to
111  * receive callbacks.
112  */
113 template<typename _Handler>
114 class json_parser : public json::parser_base
115 {
116 public:
117     typedef _Handler handler_type;
118 
119     /**
120      * Constructor.
121      *
122      * @param p pointer to a string stream containing JSON string.
123      * @param n size of the stream.
124      * @param hdl handler class instance.
125      */
126     json_parser(const char* p, size_t n, handler_type& hdl);
127 
128     /**
129      * Call this method to start parsing.
130      */
131     void parse();
132 
133 private:
134     void root_value();
135     void value();
136     void array();
137     void end_array();
138     void object();
139     void number();
140     void string();
141 
142 private:
143     handler_type& m_handler;
144 };
145 
146 template<typename _Handler>
json_parser(const char * p,size_t n,handler_type & hdl)147 json_parser<_Handler>::json_parser(
148     const char* p, size_t n, handler_type& hdl) :
149     json::parser_base(p, n), m_handler(hdl) {}
150 
151 template<typename _Handler>
parse()152 void json_parser<_Handler>::parse()
153 {
154     m_handler.begin_parse();
155 
156     skip_ws();
157     if (has_char())
158         root_value();
159     else
160         throw json::parse_error("parse: no json content could be found in file", offset());
161 
162     if (has_char())
163         throw json::parse_error("parse: unexpected trailing string segment.", offset());
164 
165     m_handler.end_parse();
166 }
167 
168 template<typename _Handler>
root_value()169 void json_parser<_Handler>::root_value()
170 {
171     char c = cur_char();
172 
173     switch (c)
174     {
175         case '[':
176             array();
177         break;
178         case '{':
179             object();
180         break;
181         default:
182             json::parse_error::throw_with(
183                 "root_value: either '[' or '{' was expected, but '", cur_char(), "' was found.", offset());
184     }
185 }
186 
187 template<typename _Handler>
value()188 void json_parser<_Handler>::value()
189 {
190     char c = cur_char();
191     if (is_numeric(c))
192     {
193         number();
194         return;
195     }
196 
197     switch (c)
198     {
199         case '-':
200             number();
201         break;
202         case '[':
203             array();
204         break;
205         case '{':
206             object();
207         break;
208         case 't':
209             parse_true();
210             m_handler.boolean_true();
211         break;
212         case 'f':
213             parse_false();
214             m_handler.boolean_false();
215         break;
216         case 'n':
217             parse_null();
218             m_handler.null();
219         break;
220         case '"':
221             string();
222         break;
223         default:
224             json::parse_error::throw_with("value: failed to parse '", cur_char(), "'.", offset());
225     }
226 }
227 
228 template<typename _Handler>
array()229 void json_parser<_Handler>::array()
230 {
231     assert(cur_char() == '[');
232 
233     m_handler.begin_array();
234     for (next(); has_char(); next())
235     {
236         skip_ws();
237 
238         if (cur_char() == ']')
239         {
240             end_array();
241             return;
242         }
243 
244         value();
245         skip_ws();
246 
247         if (has_char())
248         {
249             switch (cur_char())
250             {
251                 case ']':
252                     end_array();
253                     return;
254                 case ',':
255                     if (next_char() == ']')
256                     {
257                         json::parse_error::throw_with(
258                             "array: ']' expected but '", cur_char(), "' found.", offset() );
259                     }
260                     continue;
261                 default:
262                     json::parse_error::throw_with(
263                         "array: either ']' or ',' expected, but '", cur_char(), "' found.", offset());
264             }
265         }
266         else
267         {
268             // needs to be handled here,
269             // we would call next() before checking again with has_char() which
270             // is already past the end
271             break;
272         }
273     }
274 
275     throw json::parse_error("array: failed to parse array.", offset());
276 }
277 
278 template<typename _Handler>
end_array()279 void json_parser<_Handler>::end_array()
280 {
281     m_handler.end_array();
282     next();
283     skip_ws();
284 }
285 
286 template<typename _Handler>
object()287 void json_parser<_Handler>::object()
288 {
289     assert(cur_char() == '{');
290 
291     bool require_new_key = false;
292     m_handler.begin_object();
293     for (next(); has_char(); next())
294     {
295         skip_ws();
296         if (!has_char())
297             throw json::parse_error("object: stream ended prematurely before reaching a key.", offset());
298 
299         switch (cur_char())
300         {
301             case '}':
302                 if (require_new_key)
303                 {
304                     json::parse_error::throw_with(
305                         "object: new key expected, but '", cur_char(), "' found.", offset());
306                 }
307                 m_handler.end_object();
308                 next();
309                 skip_ws();
310                 return;
311             case '"':
312                 break;
313             default:
314                 json::parse_error::throw_with(
315                     "object: '\"' was expected, but '", cur_char(), "' found.", offset());
316         }
317         require_new_key = false;
318 
319         parse_quoted_string_state res = parse_string();
320         if (!res.str)
321         {
322             // Parsing was unsuccessful.
323             if (res.length == parse_quoted_string_state::error_no_closing_quote)
324                 throw json::parse_error("object: stream ended prematurely before reaching the closing quote of a key.", offset());
325             else if (res.length == parse_quoted_string_state::error_illegal_escape_char)
326                 json::parse_error::throw_with(
327                     "object: illegal escape character '", cur_char(), "' in key value.", offset());
328             else
329                 throw json::parse_error("object: unknown error while parsing a key value.", offset());
330         }
331 
332         m_handler.object_key(res.str, res.length, res.transient);
333 
334         skip_ws();
335         if (cur_char() != ':')
336             json::parse_error::throw_with(
337                 "object: ':' was expected, but '", cur_char(), "' found.", offset());
338 
339         next();
340         skip_ws();
341 
342         if (!has_char())
343             throw json::parse_error("object: stream ended prematurely before reaching a value.", offset());
344 
345         value();
346 
347         skip_ws();
348         if (!has_char())
349             throw json::parse_error("object: stream ended prematurely before reaching either '}' or ','.", offset());
350 
351         switch (cur_char())
352         {
353             case '}':
354                 m_handler.end_object();
355                 next();
356                 skip_ws();
357                 return;
358             case ',':
359                 require_new_key = true;
360                 continue;
361             default:
362                 json::parse_error::throw_with(
363                     "object: either '}' or ',' expected, but '", cur_char(), "' found.", offset());
364         }
365     }
366 
367     throw json::parse_error("object: closing '}' was never reached.", offset());
368 }
369 
370 template<typename _Handler>
number()371 void json_parser<_Handler>::number()
372 {
373     assert(is_numeric(cur_char()) || cur_char() == '-');
374 
375     double val = parse_double_or_throw();
376     m_handler.number(val);
377     skip_ws();
378 }
379 
380 template<typename _Handler>
string()381 void json_parser<_Handler>::string()
382 {
383     parse_quoted_string_state res = parse_string();
384     if (res.str)
385     {
386         m_handler.string(res.str, res.length, res.transient);
387         return;
388     }
389 
390     // Parsing was unsuccessful.
391     if (res.length == parse_quoted_string_state::error_no_closing_quote)
392         throw json::parse_error("string: stream ended prematurely before reaching the closing quote.", offset());
393     else if (res.length == parse_quoted_string_state::error_illegal_escape_char)
394         json::parse_error::throw_with("string: illegal escape character '", cur_char(), "'.", offset());
395     else
396         throw json::parse_error("string: unknown error.", offset());
397 }
398 
399 }
400 
401 #endif
402 
403 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
404