1 #pragma once
2 
3 #include <cmath> // isfinite
4 #include <cstdint> // uint8_t
5 #include <functional> // function
6 #include <string> // string
7 #include <utility> // move
8 #include <vector> // vector
9 
10 #include <nlohmann/detail/exceptions.hpp>
11 #include <nlohmann/detail/input/input_adapters.hpp>
12 #include <nlohmann/detail/input/json_sax.hpp>
13 #include <nlohmann/detail/input/lexer.hpp>
14 #include <nlohmann/detail/macro_scope.hpp>
15 #include <nlohmann/detail/meta/is_sax.hpp>
16 #include <nlohmann/detail/value_t.hpp>
17 
18 namespace nlohmann
19 {
20 namespace detail
21 {
22 ////////////
23 // parser //
24 ////////////
25 
26 enum class parse_event_t : uint8_t
27 {
28     /// the parser read `{` and started to process a JSON object
29     object_start,
30     /// the parser read `}` and finished processing a JSON object
31     object_end,
32     /// the parser read `[` and started to process a JSON array
33     array_start,
34     /// the parser read `]` and finished processing a JSON array
35     array_end,
36     /// the parser read a key of a value in an object
37     key,
38     /// the parser finished reading a JSON value
39     value
40 };
41 
42 template<typename BasicJsonType>
43 using parser_callback_t =
44     std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
45 
46 /*!
47 @brief syntax analysis
48 
49 This class implements a recursive descent parser.
50 */
51 template<typename BasicJsonType, typename InputAdapterType>
52 class parser
53 {
54     using number_integer_t = typename BasicJsonType::number_integer_t;
55     using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
56     using number_float_t = typename BasicJsonType::number_float_t;
57     using string_t = typename BasicJsonType::string_t;
58     using lexer_t = lexer<BasicJsonType, InputAdapterType>;
59     using token_type = typename lexer_t::token_type;
60 
61   public:
62     /// a parser reading from an input adapter
parser(InputAdapterType && adapter,const parser_callback_t<BasicJsonType> cb=nullptr,const bool allow_exceptions_=true,const bool skip_comments=false)63     explicit parser(InputAdapterType&& adapter,
64                     const parser_callback_t<BasicJsonType> cb = nullptr,
65                     const bool allow_exceptions_ = true,
66                     const bool skip_comments = false)
67         : callback(cb)
68         , m_lexer(std::move(adapter), skip_comments)
69         , allow_exceptions(allow_exceptions_)
70     {
71         // read first token
72         get_token();
73     }
74 
75     /*!
76     @brief public parser interface
77 
78     @param[in] strict      whether to expect the last token to be EOF
79     @param[in,out] result  parsed JSON value
80 
81     @throw parse_error.101 in case of an unexpected token
82     @throw parse_error.102 if to_unicode fails or surrogate error
83     @throw parse_error.103 if to_unicode fails
84     */
parse(const bool strict,BasicJsonType & result)85     void parse(const bool strict, BasicJsonType& result)
86     {
87         if (callback)
88         {
89             json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
90             sax_parse_internal(&sdp);
91             result.assert_invariant();
92 
93             // in strict mode, input must be completely read
94             if (strict && (get_token() != token_type::end_of_input))
95             {
96                 sdp.parse_error(m_lexer.get_position(),
97                                 m_lexer.get_token_string(),
98                                 parse_error::create(101, m_lexer.get_position(),
99                                                     exception_message(token_type::end_of_input, "value")));
100             }
101 
102             // in case of an error, return discarded value
103             if (sdp.is_errored())
104             {
105                 result = value_t::discarded;
106                 return;
107             }
108 
109             // set top-level value to null if it was discarded by the callback
110             // function
111             if (result.is_discarded())
112             {
113                 result = nullptr;
114             }
115         }
116         else
117         {
118             json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
119             sax_parse_internal(&sdp);
120             result.assert_invariant();
121 
122             // in strict mode, input must be completely read
123             if (strict && (get_token() != token_type::end_of_input))
124             {
125                 sdp.parse_error(m_lexer.get_position(),
126                                 m_lexer.get_token_string(),
127                                 parse_error::create(101, m_lexer.get_position(),
128                                                     exception_message(token_type::end_of_input, "value")));
129             }
130 
131             // in case of an error, return discarded value
132             if (sdp.is_errored())
133             {
134                 result = value_t::discarded;
135                 return;
136             }
137         }
138     }
139 
140     /*!
141     @brief public accept interface
142 
143     @param[in] strict  whether to expect the last token to be EOF
144     @return whether the input is a proper JSON text
145     */
accept(const bool strict=true)146     bool accept(const bool strict = true)
147     {
148         json_sax_acceptor<BasicJsonType> sax_acceptor;
149         return sax_parse(&sax_acceptor, strict);
150     }
151 
152     template<typename SAX>
153     JSON_HEDLEY_NON_NULL(2)
sax_parse(SAX * sax,const bool strict=true)154     bool sax_parse(SAX* sax, const bool strict = true)
155     {
156         (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
157         const bool result = sax_parse_internal(sax);
158 
159         // strict mode: next byte must be EOF
160         if (result && strict && (get_token() != token_type::end_of_input))
161         {
162             return sax->parse_error(m_lexer.get_position(),
163                                     m_lexer.get_token_string(),
164                                     parse_error::create(101, m_lexer.get_position(),
165                                             exception_message(token_type::end_of_input, "value")));
166         }
167 
168         return result;
169     }
170 
171   private:
172     template<typename SAX>
173     JSON_HEDLEY_NON_NULL(2)
sax_parse_internal(SAX * sax)174     bool sax_parse_internal(SAX* sax)
175     {
176         // stack to remember the hierarchy of structured values we are parsing
177         // true = array; false = object
178         std::vector<bool> states;
179         // value to avoid a goto (see comment where set to true)
180         bool skip_to_state_evaluation = false;
181 
182         while (true)
183         {
184             if (!skip_to_state_evaluation)
185             {
186                 // invariant: get_token() was called before each iteration
187                 switch (last_token)
188                 {
189                     case token_type::begin_object:
190                     {
191                         if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
192                         {
193                             return false;
194                         }
195 
196                         // closing } -> we are done
197                         if (get_token() == token_type::end_object)
198                         {
199                             if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
200                             {
201                                 return false;
202                             }
203                             break;
204                         }
205 
206                         // parse key
207                         if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
208                         {
209                             return sax->parse_error(m_lexer.get_position(),
210                                                     m_lexer.get_token_string(),
211                                                     parse_error::create(101, m_lexer.get_position(),
212                                                             exception_message(token_type::value_string, "object key")));
213                         }
214                         if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
215                         {
216                             return false;
217                         }
218 
219                         // parse separator (:)
220                         if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
221                         {
222                             return sax->parse_error(m_lexer.get_position(),
223                                                     m_lexer.get_token_string(),
224                                                     parse_error::create(101, m_lexer.get_position(),
225                                                             exception_message(token_type::name_separator, "object separator")));
226                         }
227 
228                         // remember we are now inside an object
229                         states.push_back(false);
230 
231                         // parse values
232                         get_token();
233                         continue;
234                     }
235 
236                     case token_type::begin_array:
237                     {
238                         if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
239                         {
240                             return false;
241                         }
242 
243                         // closing ] -> we are done
244                         if (get_token() == token_type::end_array)
245                         {
246                             if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
247                             {
248                                 return false;
249                             }
250                             break;
251                         }
252 
253                         // remember we are now inside an array
254                         states.push_back(true);
255 
256                         // parse values (no need to call get_token)
257                         continue;
258                     }
259 
260                     case token_type::value_float:
261                     {
262                         const auto res = m_lexer.get_number_float();
263 
264                         if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res)))
265                         {
266                             return sax->parse_error(m_lexer.get_position(),
267                                                     m_lexer.get_token_string(),
268                                                     out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'"));
269                         }
270 
271                         if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string())))
272                         {
273                             return false;
274                         }
275 
276                         break;
277                     }
278 
279                     case token_type::literal_false:
280                     {
281                         if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false)))
282                         {
283                             return false;
284                         }
285                         break;
286                     }
287 
288                     case token_type::literal_null:
289                     {
290                         if (JSON_HEDLEY_UNLIKELY(!sax->null()))
291                         {
292                             return false;
293                         }
294                         break;
295                     }
296 
297                     case token_type::literal_true:
298                     {
299                         if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true)))
300                         {
301                             return false;
302                         }
303                         break;
304                     }
305 
306                     case token_type::value_integer:
307                     {
308                         if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer())))
309                         {
310                             return false;
311                         }
312                         break;
313                     }
314 
315                     case token_type::value_string:
316                     {
317                         if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string())))
318                         {
319                             return false;
320                         }
321                         break;
322                     }
323 
324                     case token_type::value_unsigned:
325                     {
326                         if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned())))
327                         {
328                             return false;
329                         }
330                         break;
331                     }
332 
333                     case token_type::parse_error:
334                     {
335                         // using "uninitialized" to avoid "expected" message
336                         return sax->parse_error(m_lexer.get_position(),
337                                                 m_lexer.get_token_string(),
338                                                 parse_error::create(101, m_lexer.get_position(),
339                                                         exception_message(token_type::uninitialized, "value")));
340                     }
341 
342                     default: // the last token was unexpected
343                     {
344                         return sax->parse_error(m_lexer.get_position(),
345                                                 m_lexer.get_token_string(),
346                                                 parse_error::create(101, m_lexer.get_position(),
347                                                         exception_message(token_type::literal_or_value, "value")));
348                     }
349                 }
350             }
351             else
352             {
353                 skip_to_state_evaluation = false;
354             }
355 
356             // we reached this line after we successfully parsed a value
357             if (states.empty())
358             {
359                 // empty stack: we reached the end of the hierarchy: done
360                 return true;
361             }
362 
363             if (states.back())  // array
364             {
365                 // comma -> next value
366                 if (get_token() == token_type::value_separator)
367                 {
368                     // parse a new value
369                     get_token();
370                     continue;
371                 }
372 
373                 // closing ]
374                 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
375                 {
376                     if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
377                     {
378                         return false;
379                     }
380 
381                     // We are done with this array. Before we can parse a
382                     // new value, we need to evaluate the new state first.
383                     // By setting skip_to_state_evaluation to false, we
384                     // are effectively jumping to the beginning of this if.
385                     JSON_ASSERT(!states.empty());
386                     states.pop_back();
387                     skip_to_state_evaluation = true;
388                     continue;
389                 }
390 
391                 return sax->parse_error(m_lexer.get_position(),
392                                         m_lexer.get_token_string(),
393                                         parse_error::create(101, m_lexer.get_position(),
394                                                 exception_message(token_type::end_array, "array")));
395             }
396             else  // object
397             {
398                 // comma -> next value
399                 if (get_token() == token_type::value_separator)
400                 {
401                     // parse key
402                     if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
403                     {
404                         return sax->parse_error(m_lexer.get_position(),
405                                                 m_lexer.get_token_string(),
406                                                 parse_error::create(101, m_lexer.get_position(),
407                                                         exception_message(token_type::value_string, "object key")));
408                     }
409 
410                     if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
411                     {
412                         return false;
413                     }
414 
415                     // parse separator (:)
416                     if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
417                     {
418                         return sax->parse_error(m_lexer.get_position(),
419                                                 m_lexer.get_token_string(),
420                                                 parse_error::create(101, m_lexer.get_position(),
421                                                         exception_message(token_type::name_separator, "object separator")));
422                     }
423 
424                     // parse values
425                     get_token();
426                     continue;
427                 }
428 
429                 // closing }
430                 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
431                 {
432                     if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
433                     {
434                         return false;
435                     }
436 
437                     // We are done with this object. Before we can parse a
438                     // new value, we need to evaluate the new state first.
439                     // By setting skip_to_state_evaluation to false, we
440                     // are effectively jumping to the beginning of this if.
441                     JSON_ASSERT(!states.empty());
442                     states.pop_back();
443                     skip_to_state_evaluation = true;
444                     continue;
445                 }
446 
447                 return sax->parse_error(m_lexer.get_position(),
448                                         m_lexer.get_token_string(),
449                                         parse_error::create(101, m_lexer.get_position(),
450                                                 exception_message(token_type::end_object, "object")));
451             }
452         }
453     }
454 
455     /// get next token from lexer
get_token()456     token_type get_token()
457     {
458         return last_token = m_lexer.scan();
459     }
460 
exception_message(const token_type expected,const std::string & context)461     std::string exception_message(const token_type expected, const std::string& context)
462     {
463         std::string error_msg = "syntax error ";
464 
465         if (!context.empty())
466         {
467             error_msg += "while parsing " + context + " ";
468         }
469 
470         error_msg += "- ";
471 
472         if (last_token == token_type::parse_error)
473         {
474             error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" +
475                          m_lexer.get_token_string() + "'";
476         }
477         else
478         {
479             error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token));
480         }
481 
482         if (expected != token_type::uninitialized)
483         {
484             error_msg += "; expected " + std::string(lexer_t::token_type_name(expected));
485         }
486 
487         return error_msg;
488     }
489 
490   private:
491     /// callback function
492     const parser_callback_t<BasicJsonType> callback = nullptr;
493     /// the type of the last read token
494     token_type last_token = token_type::uninitialized;
495     /// the lexer
496     lexer_t m_lexer;
497     /// whether to throw exceptions in case of errors
498     const bool allow_exceptions = true;
499 };
500 }  // namespace detail
501 }  // namespace nlohmann
502