1 #pragma once
2 
3 #include <cassert> // assert
4 #include <cmath> // isfinite
5 #include <cstdint> // uint8_t
6 #include <functional> // function
7 #include <string> // string
8 #include <utility> // move
9 #include <vector> // vector
10 
11 #include <nlohmann/detail/exceptions.hpp>
12 #include <nlohmann/detail/input/input_adapters.hpp>
13 #include <nlohmann/detail/input/json_sax.hpp>
14 #include <nlohmann/detail/input/lexer.hpp>
15 #include <nlohmann/detail/macro_scope.hpp>
16 #include <nlohmann/detail/meta/is_sax.hpp>
17 #include <nlohmann/detail/value_t.hpp>
18 
19 namespace nlohmann
20 {
21 namespace detail
22 {
23 ////////////
24 // parser //
25 ////////////
26 
27 /*!
28 @brief syntax analysis
29 
30 This class implements a recursive decent parser.
31 */
32 template<typename BasicJsonType>
33 class parser
34 {
35     using number_integer_t = typename BasicJsonType::number_integer_t;
36     using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
37     using number_float_t = typename BasicJsonType::number_float_t;
38     using string_t = typename BasicJsonType::string_t;
39     using lexer_t = lexer<BasicJsonType>;
40     using token_type = typename lexer_t::token_type;
41 
42   public:
43     enum class parse_event_t : uint8_t
44     {
45         /// the parser read `{` and started to process a JSON object
46         object_start,
47         /// the parser read `}` and finished processing a JSON object
48         object_end,
49         /// the parser read `[` and started to process a JSON array
50         array_start,
51         /// the parser read `]` and finished processing a JSON array
52         array_end,
53         /// the parser read a key of a value in an object
54         key,
55         /// the parser finished reading a JSON value
56         value
57     };
58 
59     using parser_callback_t =
60         std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
61 
62     /// a parser reading from an input adapter
parser(detail::input_adapter_t && adapter,const parser_callback_t cb=nullptr,const bool allow_exceptions_=true)63     explicit parser(detail::input_adapter_t&& adapter,
64                     const parser_callback_t cb = nullptr,
65                     const bool allow_exceptions_ = true)
66         : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_)
67     {
68         // read first token
69         get_token();
70     }
71 
72     /*!
73     @brief public parser interface
74 
75     @param[in] strict      whether to expect the last token to be EOF
76     @param[in,out] result  parsed JSON value
77 
78     @throw parse_error.101 in case of an unexpected token
79     @throw parse_error.102 if to_unicode fails or surrogate error
80     @throw parse_error.103 if to_unicode fails
81     */
parse(const bool strict,BasicJsonType & result)82     void parse(const bool strict, BasicJsonType& result)
83     {
84         if (callback)
85         {
86             json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
87             sax_parse_internal(&sdp);
88             result.assert_invariant();
89 
90             // in strict mode, input must be completely read
91             if (strict and (get_token() != token_type::end_of_input))
92             {
93                 sdp.parse_error(m_lexer.get_position(),
94                                 m_lexer.get_token_string(),
95                                 parse_error::create(101, m_lexer.get_position(),
96                                                     exception_message(token_type::end_of_input, "value")));
97             }
98 
99             // in case of an error, return discarded value
100             if (sdp.is_errored())
101             {
102                 result = value_t::discarded;
103                 return;
104             }
105 
106             // set top-level value to null if it was discarded by the callback
107             // function
108             if (result.is_discarded())
109             {
110                 result = nullptr;
111             }
112         }
113         else
114         {
115             json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
116             sax_parse_internal(&sdp);
117             result.assert_invariant();
118 
119             // in strict mode, input must be completely read
120             if (strict and (get_token() != token_type::end_of_input))
121             {
122                 sdp.parse_error(m_lexer.get_position(),
123                                 m_lexer.get_token_string(),
124                                 parse_error::create(101, m_lexer.get_position(),
125                                                     exception_message(token_type::end_of_input, "value")));
126             }
127 
128             // in case of an error, return discarded value
129             if (sdp.is_errored())
130             {
131                 result = value_t::discarded;
132                 return;
133             }
134         }
135     }
136 
137     /*!
138     @brief public accept interface
139 
140     @param[in] strict  whether to expect the last token to be EOF
141     @return whether the input is a proper JSON text
142     */
accept(const bool strict=true)143     bool accept(const bool strict = true)
144     {
145         json_sax_acceptor<BasicJsonType> sax_acceptor;
146         return sax_parse(&sax_acceptor, strict);
147     }
148 
149     template <typename SAX>
sax_parse(SAX * sax,const bool strict=true)150     bool sax_parse(SAX* sax, const bool strict = true)
151     {
152         (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
153         const bool result = sax_parse_internal(sax);
154 
155         // strict mode: next byte must be EOF
156         if (result and strict and (get_token() != token_type::end_of_input))
157         {
158             return sax->parse_error(m_lexer.get_position(),
159                                     m_lexer.get_token_string(),
160                                     parse_error::create(101, m_lexer.get_position(),
161                                             exception_message(token_type::end_of_input, "value")));
162         }
163 
164         return result;
165     }
166 
167   private:
168     template <typename SAX>
sax_parse_internal(SAX * sax)169     bool sax_parse_internal(SAX* sax)
170     {
171         // stack to remember the hierarchy of structured values we are parsing
172         // true = array; false = object
173         std::vector<bool> states;
174         // value to avoid a goto (see comment where set to true)
175         bool skip_to_state_evaluation = false;
176 
177         while (true)
178         {
179             if (not skip_to_state_evaluation)
180             {
181                 // invariant: get_token() was called before each iteration
182                 switch (last_token)
183                 {
184                     case token_type::begin_object:
185                     {
186                         if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1))))
187                         {
188                             return false;
189                         }
190 
191                         // closing } -> we are done
192                         if (get_token() == token_type::end_object)
193                         {
194                             if (JSON_UNLIKELY(not sax->end_object()))
195                             {
196                                 return false;
197                             }
198                             break;
199                         }
200 
201                         // parse key
202                         if (JSON_UNLIKELY(last_token != token_type::value_string))
203                         {
204                             return sax->parse_error(m_lexer.get_position(),
205                                                     m_lexer.get_token_string(),
206                                                     parse_error::create(101, m_lexer.get_position(),
207                                                             exception_message(token_type::value_string, "object key")));
208                         }
209                         if (JSON_UNLIKELY(not sax->key(m_lexer.get_string())))
210                         {
211                             return false;
212                         }
213 
214                         // parse separator (:)
215                         if (JSON_UNLIKELY(get_token() != token_type::name_separator))
216                         {
217                             return sax->parse_error(m_lexer.get_position(),
218                                                     m_lexer.get_token_string(),
219                                                     parse_error::create(101, m_lexer.get_position(),
220                                                             exception_message(token_type::name_separator, "object separator")));
221                         }
222 
223                         // remember we are now inside an object
224                         states.push_back(false);
225 
226                         // parse values
227                         get_token();
228                         continue;
229                     }
230 
231                     case token_type::begin_array:
232                     {
233                         if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1))))
234                         {
235                             return false;
236                         }
237 
238                         // closing ] -> we are done
239                         if (get_token() == token_type::end_array)
240                         {
241                             if (JSON_UNLIKELY(not sax->end_array()))
242                             {
243                                 return false;
244                             }
245                             break;
246                         }
247 
248                         // remember we are now inside an array
249                         states.push_back(true);
250 
251                         // parse values (no need to call get_token)
252                         continue;
253                     }
254 
255                     case token_type::value_float:
256                     {
257                         const auto res = m_lexer.get_number_float();
258 
259                         if (JSON_UNLIKELY(not std::isfinite(res)))
260                         {
261                             return sax->parse_error(m_lexer.get_position(),
262                                                     m_lexer.get_token_string(),
263                                                     out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'"));
264                         }
265 
266                         if (JSON_UNLIKELY(not sax->number_float(res, m_lexer.get_string())))
267                         {
268                             return false;
269                         }
270 
271                         break;
272                     }
273 
274                     case token_type::literal_false:
275                     {
276                         if (JSON_UNLIKELY(not sax->boolean(false)))
277                         {
278                             return false;
279                         }
280                         break;
281                     }
282 
283                     case token_type::literal_null:
284                     {
285                         if (JSON_UNLIKELY(not sax->null()))
286                         {
287                             return false;
288                         }
289                         break;
290                     }
291 
292                     case token_type::literal_true:
293                     {
294                         if (JSON_UNLIKELY(not sax->boolean(true)))
295                         {
296                             return false;
297                         }
298                         break;
299                     }
300 
301                     case token_type::value_integer:
302                     {
303                         if (JSON_UNLIKELY(not sax->number_integer(m_lexer.get_number_integer())))
304                         {
305                             return false;
306                         }
307                         break;
308                     }
309 
310                     case token_type::value_string:
311                     {
312                         if (JSON_UNLIKELY(not sax->string(m_lexer.get_string())))
313                         {
314                             return false;
315                         }
316                         break;
317                     }
318 
319                     case token_type::value_unsigned:
320                     {
321                         if (JSON_UNLIKELY(not sax->number_unsigned(m_lexer.get_number_unsigned())))
322                         {
323                             return false;
324                         }
325                         break;
326                     }
327 
328                     case token_type::parse_error:
329                     {
330                         // using "uninitialized" to avoid "expected" message
331                         return sax->parse_error(m_lexer.get_position(),
332                                                 m_lexer.get_token_string(),
333                                                 parse_error::create(101, m_lexer.get_position(),
334                                                         exception_message(token_type::uninitialized, "value")));
335                     }
336 
337                     default: // the last token was unexpected
338                     {
339                         return sax->parse_error(m_lexer.get_position(),
340                                                 m_lexer.get_token_string(),
341                                                 parse_error::create(101, m_lexer.get_position(),
342                                                         exception_message(token_type::literal_or_value, "value")));
343                     }
344                 }
345             }
346             else
347             {
348                 skip_to_state_evaluation = false;
349             }
350 
351             // we reached this line after we successfully parsed a value
352             if (states.empty())
353             {
354                 // empty stack: we reached the end of the hierarchy: done
355                 return true;
356             }
357 
358             if (states.back())  // array
359             {
360                 // comma -> next value
361                 if (get_token() == token_type::value_separator)
362                 {
363                     // parse a new value
364                     get_token();
365                     continue;
366                 }
367 
368                 // closing ]
369                 if (JSON_LIKELY(last_token == token_type::end_array))
370                 {
371                     if (JSON_UNLIKELY(not sax->end_array()))
372                     {
373                         return false;
374                     }
375 
376                     // We are done with this array. Before we can parse a
377                     // new value, we need to evaluate the new state first.
378                     // By setting skip_to_state_evaluation to false, we
379                     // are effectively jumping to the beginning of this if.
380                     assert(not states.empty());
381                     states.pop_back();
382                     skip_to_state_evaluation = true;
383                     continue;
384                 }
385 
386                 return sax->parse_error(m_lexer.get_position(),
387                                         m_lexer.get_token_string(),
388                                         parse_error::create(101, m_lexer.get_position(),
389                                                 exception_message(token_type::end_array, "array")));
390             }
391             else  // object
392             {
393                 // comma -> next value
394                 if (get_token() == token_type::value_separator)
395                 {
396                     // parse key
397                     if (JSON_UNLIKELY(get_token() != token_type::value_string))
398                     {
399                         return sax->parse_error(m_lexer.get_position(),
400                                                 m_lexer.get_token_string(),
401                                                 parse_error::create(101, m_lexer.get_position(),
402                                                         exception_message(token_type::value_string, "object key")));
403                     }
404 
405                     if (JSON_UNLIKELY(not sax->key(m_lexer.get_string())))
406                     {
407                         return false;
408                     }
409 
410                     // parse separator (:)
411                     if (JSON_UNLIKELY(get_token() != token_type::name_separator))
412                     {
413                         return sax->parse_error(m_lexer.get_position(),
414                                                 m_lexer.get_token_string(),
415                                                 parse_error::create(101, m_lexer.get_position(),
416                                                         exception_message(token_type::name_separator, "object separator")));
417                     }
418 
419                     // parse values
420                     get_token();
421                     continue;
422                 }
423 
424                 // closing }
425                 if (JSON_LIKELY(last_token == token_type::end_object))
426                 {
427                     if (JSON_UNLIKELY(not sax->end_object()))
428                     {
429                         return false;
430                     }
431 
432                     // We are done with this object. Before we can parse a
433                     // new value, we need to evaluate the new state first.
434                     // By setting skip_to_state_evaluation to false, we
435                     // are effectively jumping to the beginning of this if.
436                     assert(not states.empty());
437                     states.pop_back();
438                     skip_to_state_evaluation = true;
439                     continue;
440                 }
441 
442                 return sax->parse_error(m_lexer.get_position(),
443                                         m_lexer.get_token_string(),
444                                         parse_error::create(101, m_lexer.get_position(),
445                                                 exception_message(token_type::end_object, "object")));
446             }
447         }
448     }
449 
450     /// get next token from lexer
get_token()451     token_type get_token()
452     {
453         return last_token = m_lexer.scan();
454     }
455 
exception_message(const token_type expected,const std::string & context)456     std::string exception_message(const token_type expected, const std::string& context)
457     {
458         std::string error_msg = "syntax error ";
459 
460         if (not context.empty())
461         {
462             error_msg += "while parsing " + context + " ";
463         }
464 
465         error_msg += "- ";
466 
467         if (last_token == token_type::parse_error)
468         {
469             error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" +
470                          m_lexer.get_token_string() + "'";
471         }
472         else
473         {
474             error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token));
475         }
476 
477         if (expected != token_type::uninitialized)
478         {
479             error_msg += "; expected " + std::string(lexer_t::token_type_name(expected));
480         }
481 
482         return error_msg;
483     }
484 
485   private:
486     /// callback function
487     const parser_callback_t callback = nullptr;
488     /// the type of the last read token
489     token_type last_token = token_type::uninitialized;
490     /// the lexer
491     lexer_t m_lexer;
492     /// whether to throw exceptions in case of errors
493     const bool allow_exceptions = true;
494 };
495 }  // namespace detail
496 }  // namespace nlohmann
497