1 #pragma once
2 
3 #include <cassert> // assert
4 #include <cmath> // isfinite
5 #include <cstdint> // uint8_t
6 #include <functional> // function
7 #include <string> // string
8 #include <utility> // move
9 #include <vector> // vector
10 
11 #include <nlohmann/detail/exceptions.hpp>
12 #include <nlohmann/detail/input/input_adapters.hpp>
13 #include <nlohmann/detail/input/json_sax.hpp>
14 #include <nlohmann/detail/input/lexer.hpp>
15 #include <nlohmann/detail/macro_scope.hpp>
16 #include <nlohmann/detail/meta/is_sax.hpp>
17 #include <nlohmann/detail/value_t.hpp>
18 
19 namespace nlohmann
20 {
21 namespace detail
22 {
23 ////////////
24 // parser //
25 ////////////
26 
27 /*!
28 @brief syntax analysis
29 
30 This class implements a recursive decent parser.
31 */
32 template<typename BasicJsonType>
33 class parser
34 {
35     using number_integer_t = typename BasicJsonType::number_integer_t;
36     using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
37     using number_float_t = typename BasicJsonType::number_float_t;
38     using string_t = typename BasicJsonType::string_t;
39     using lexer_t = lexer<BasicJsonType>;
40     using token_type = typename lexer_t::token_type;
41 
42   public:
43     enum class parse_event_t : uint8_t
44     {
45         /// the parser read `{` and started to process a JSON object
46         object_start,
47         /// the parser read `}` and finished processing a JSON object
48         object_end,
49         /// the parser read `[` and started to process a JSON array
50         array_start,
51         /// the parser read `]` and finished processing a JSON array
52         array_end,
53         /// the parser read a key of a value in an object
54         key,
55         /// the parser finished reading a JSON value
56         value
57     };
58 
59     using parser_callback_t =
60         std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
61 
62     /// a parser reading from an input adapter
parser(detail::input_adapter_t && adapter,const parser_callback_t cb=nullptr,const bool allow_exceptions_=true)63     explicit parser(detail::input_adapter_t&& adapter,
64                     const parser_callback_t cb = nullptr,
65                     const bool allow_exceptions_ = true)
66         : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_)
67     {
68         // read first token
69         get_token();
70     }
71 
72     /*!
73     @brief public parser interface
74 
75     @param[in] strict      whether to expect the last token to be EOF
76     @param[in,out] result  parsed JSON value
77 
78     @throw parse_error.101 in case of an unexpected token
79     @throw parse_error.102 if to_unicode fails or surrogate error
80     @throw parse_error.103 if to_unicode fails
81     */
parse(const bool strict,BasicJsonType & result)82     void parse(const bool strict, BasicJsonType& result)
83     {
84         if (callback)
85         {
86             json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
87             sax_parse_internal(&sdp);
88             result.assert_invariant();
89 
90             // in strict mode, input must be completely read
91             if (strict and (get_token() != token_type::end_of_input))
92             {
93                 sdp.parse_error(m_lexer.get_position(),
94                                 m_lexer.get_token_string(),
95                                 parse_error::create(101, m_lexer.get_position(),
96                                                     exception_message(token_type::end_of_input, "value")));
97             }
98 
99             // in case of an error, return discarded value
100             if (sdp.is_errored())
101             {
102                 result = value_t::discarded;
103                 return;
104             }
105 
106             // set top-level value to null if it was discarded by the callback
107             // function
108             if (result.is_discarded())
109             {
110                 result = nullptr;
111             }
112         }
113         else
114         {
115             json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
116             sax_parse_internal(&sdp);
117             result.assert_invariant();
118 
119             // in strict mode, input must be completely read
120             if (strict and (get_token() != token_type::end_of_input))
121             {
122                 sdp.parse_error(m_lexer.get_position(),
123                                 m_lexer.get_token_string(),
124                                 parse_error::create(101, m_lexer.get_position(),
125                                                     exception_message(token_type::end_of_input, "value")));
126             }
127 
128             // in case of an error, return discarded value
129             if (sdp.is_errored())
130             {
131                 result = value_t::discarded;
132                 return;
133             }
134         }
135     }
136 
137     /*!
138     @brief public accept interface
139 
140     @param[in] strict  whether to expect the last token to be EOF
141     @return whether the input is a proper JSON text
142     */
accept(const bool strict=true)143     bool accept(const bool strict = true)
144     {
145         json_sax_acceptor<BasicJsonType> sax_acceptor;
146         return sax_parse(&sax_acceptor, strict);
147     }
148 
149     template <typename SAX>
150     JSON_HEDLEY_NON_NULL(2)
sax_parse(SAX * sax,const bool strict=true)151     bool sax_parse(SAX* sax, const bool strict = true)
152     {
153         (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
154         const bool result = sax_parse_internal(sax);
155 
156         // strict mode: next byte must be EOF
157         if (result and strict and (get_token() != token_type::end_of_input))
158         {
159             return sax->parse_error(m_lexer.get_position(),
160                                     m_lexer.get_token_string(),
161                                     parse_error::create(101, m_lexer.get_position(),
162                                             exception_message(token_type::end_of_input, "value")));
163         }
164 
165         return result;
166     }
167 
168   private:
169     template <typename SAX>
170     JSON_HEDLEY_NON_NULL(2)
sax_parse_internal(SAX * sax)171     bool sax_parse_internal(SAX* sax)
172     {
173         // stack to remember the hierarchy of structured values we are parsing
174         // true = array; false = object
175         std::vector<bool> states;
176         // value to avoid a goto (see comment where set to true)
177         bool skip_to_state_evaluation = false;
178 
179         while (true)
180         {
181             if (not skip_to_state_evaluation)
182             {
183                 // invariant: get_token() was called before each iteration
184                 switch (last_token)
185                 {
186                     case token_type::begin_object:
187                     {
188                         if (JSON_HEDLEY_UNLIKELY(not sax->start_object(std::size_t(-1))))
189                         {
190                             return false;
191                         }
192 
193                         // closing } -> we are done
194                         if (get_token() == token_type::end_object)
195                         {
196                             if (JSON_HEDLEY_UNLIKELY(not sax->end_object()))
197                             {
198                                 return false;
199                             }
200                             break;
201                         }
202 
203                         // parse key
204                         if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
205                         {
206                             return sax->parse_error(m_lexer.get_position(),
207                                                     m_lexer.get_token_string(),
208                                                     parse_error::create(101, m_lexer.get_position(),
209                                                             exception_message(token_type::value_string, "object key")));
210                         }
211                         if (JSON_HEDLEY_UNLIKELY(not sax->key(m_lexer.get_string())))
212                         {
213                             return false;
214                         }
215 
216                         // parse separator (:)
217                         if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
218                         {
219                             return sax->parse_error(m_lexer.get_position(),
220                                                     m_lexer.get_token_string(),
221                                                     parse_error::create(101, m_lexer.get_position(),
222                                                             exception_message(token_type::name_separator, "object separator")));
223                         }
224 
225                         // remember we are now inside an object
226                         states.push_back(false);
227 
228                         // parse values
229                         get_token();
230                         continue;
231                     }
232 
233                     case token_type::begin_array:
234                     {
235                         if (JSON_HEDLEY_UNLIKELY(not sax->start_array(std::size_t(-1))))
236                         {
237                             return false;
238                         }
239 
240                         // closing ] -> we are done
241                         if (get_token() == token_type::end_array)
242                         {
243                             if (JSON_HEDLEY_UNLIKELY(not sax->end_array()))
244                             {
245                                 return false;
246                             }
247                             break;
248                         }
249 
250                         // remember we are now inside an array
251                         states.push_back(true);
252 
253                         // parse values (no need to call get_token)
254                         continue;
255                     }
256 
257                     case token_type::value_float:
258                     {
259                         const auto res = m_lexer.get_number_float();
260 
261                         if (JSON_HEDLEY_UNLIKELY(not std::isfinite(res)))
262                         {
263                             return sax->parse_error(m_lexer.get_position(),
264                                                     m_lexer.get_token_string(),
265                                                     out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'"));
266                         }
267 
268                         if (JSON_HEDLEY_UNLIKELY(not sax->number_float(res, m_lexer.get_string())))
269                         {
270                             return false;
271                         }
272 
273                         break;
274                     }
275 
276                     case token_type::literal_false:
277                     {
278                         if (JSON_HEDLEY_UNLIKELY(not sax->boolean(false)))
279                         {
280                             return false;
281                         }
282                         break;
283                     }
284 
285                     case token_type::literal_null:
286                     {
287                         if (JSON_HEDLEY_UNLIKELY(not sax->null()))
288                         {
289                             return false;
290                         }
291                         break;
292                     }
293 
294                     case token_type::literal_true:
295                     {
296                         if (JSON_HEDLEY_UNLIKELY(not sax->boolean(true)))
297                         {
298                             return false;
299                         }
300                         break;
301                     }
302 
303                     case token_type::value_integer:
304                     {
305                         if (JSON_HEDLEY_UNLIKELY(not sax->number_integer(m_lexer.get_number_integer())))
306                         {
307                             return false;
308                         }
309                         break;
310                     }
311 
312                     case token_type::value_string:
313                     {
314                         if (JSON_HEDLEY_UNLIKELY(not sax->string(m_lexer.get_string())))
315                         {
316                             return false;
317                         }
318                         break;
319                     }
320 
321                     case token_type::value_unsigned:
322                     {
323                         if (JSON_HEDLEY_UNLIKELY(not sax->number_unsigned(m_lexer.get_number_unsigned())))
324                         {
325                             return false;
326                         }
327                         break;
328                     }
329 
330                     case token_type::parse_error:
331                     {
332                         // using "uninitialized" to avoid "expected" message
333                         return sax->parse_error(m_lexer.get_position(),
334                                                 m_lexer.get_token_string(),
335                                                 parse_error::create(101, m_lexer.get_position(),
336                                                         exception_message(token_type::uninitialized, "value")));
337                     }
338 
339                     default: // the last token was unexpected
340                     {
341                         return sax->parse_error(m_lexer.get_position(),
342                                                 m_lexer.get_token_string(),
343                                                 parse_error::create(101, m_lexer.get_position(),
344                                                         exception_message(token_type::literal_or_value, "value")));
345                     }
346                 }
347             }
348             else
349             {
350                 skip_to_state_evaluation = false;
351             }
352 
353             // we reached this line after we successfully parsed a value
354             if (states.empty())
355             {
356                 // empty stack: we reached the end of the hierarchy: done
357                 return true;
358             }
359 
360             if (states.back())  // array
361             {
362                 // comma -> next value
363                 if (get_token() == token_type::value_separator)
364                 {
365                     // parse a new value
366                     get_token();
367                     continue;
368                 }
369 
370                 // closing ]
371                 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
372                 {
373                     if (JSON_HEDLEY_UNLIKELY(not sax->end_array()))
374                     {
375                         return false;
376                     }
377 
378                     // We are done with this array. Before we can parse a
379                     // new value, we need to evaluate the new state first.
380                     // By setting skip_to_state_evaluation to false, we
381                     // are effectively jumping to the beginning of this if.
382                     assert(not states.empty());
383                     states.pop_back();
384                     skip_to_state_evaluation = true;
385                     continue;
386                 }
387 
388                 return sax->parse_error(m_lexer.get_position(),
389                                         m_lexer.get_token_string(),
390                                         parse_error::create(101, m_lexer.get_position(),
391                                                 exception_message(token_type::end_array, "array")));
392             }
393             else  // object
394             {
395                 // comma -> next value
396                 if (get_token() == token_type::value_separator)
397                 {
398                     // parse key
399                     if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
400                     {
401                         return sax->parse_error(m_lexer.get_position(),
402                                                 m_lexer.get_token_string(),
403                                                 parse_error::create(101, m_lexer.get_position(),
404                                                         exception_message(token_type::value_string, "object key")));
405                     }
406 
407                     if (JSON_HEDLEY_UNLIKELY(not sax->key(m_lexer.get_string())))
408                     {
409                         return false;
410                     }
411 
412                     // parse separator (:)
413                     if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
414                     {
415                         return sax->parse_error(m_lexer.get_position(),
416                                                 m_lexer.get_token_string(),
417                                                 parse_error::create(101, m_lexer.get_position(),
418                                                         exception_message(token_type::name_separator, "object separator")));
419                     }
420 
421                     // parse values
422                     get_token();
423                     continue;
424                 }
425 
426                 // closing }
427                 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
428                 {
429                     if (JSON_HEDLEY_UNLIKELY(not sax->end_object()))
430                     {
431                         return false;
432                     }
433 
434                     // We are done with this object. Before we can parse a
435                     // new value, we need to evaluate the new state first.
436                     // By setting skip_to_state_evaluation to false, we
437                     // are effectively jumping to the beginning of this if.
438                     assert(not states.empty());
439                     states.pop_back();
440                     skip_to_state_evaluation = true;
441                     continue;
442                 }
443 
444                 return sax->parse_error(m_lexer.get_position(),
445                                         m_lexer.get_token_string(),
446                                         parse_error::create(101, m_lexer.get_position(),
447                                                 exception_message(token_type::end_object, "object")));
448             }
449         }
450     }
451 
452     /// get next token from lexer
get_token()453     token_type get_token()
454     {
455         return last_token = m_lexer.scan();
456     }
457 
exception_message(const token_type expected,const std::string & context)458     std::string exception_message(const token_type expected, const std::string& context)
459     {
460         std::string error_msg = "syntax error ";
461 
462         if (not context.empty())
463         {
464             error_msg += "while parsing " + context + " ";
465         }
466 
467         error_msg += "- ";
468 
469         if (last_token == token_type::parse_error)
470         {
471             error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" +
472                          m_lexer.get_token_string() + "'";
473         }
474         else
475         {
476             error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token));
477         }
478 
479         if (expected != token_type::uninitialized)
480         {
481             error_msg += "; expected " + std::string(lexer_t::token_type_name(expected));
482         }
483 
484         return error_msg;
485     }
486 
487   private:
488     /// callback function
489     const parser_callback_t callback = nullptr;
490     /// the type of the last read token
491     token_type last_token = token_type::uninitialized;
492     /// the lexer
493     lexer_t m_lexer;
494     /// whether to throw exceptions in case of errors
495     const bool allow_exceptions = true;
496 };
497 }  // namespace detail
498 }  // namespace nlohmann
499