1 #pragma once
2 
3 #include <cassert> // assert
4 #include <cmath> // isfinite
5 #include <cstdint> // uint8_t
6 #include <functional> // function
7 #include <string> // string
8 #include <utility> // move
9 
10 #include <nlohmann/detail/exceptions.hpp>
11 #include <nlohmann/detail/macro_scope.hpp>
12 #include <nlohmann/detail/meta/is_sax.hpp>
13 #include <nlohmann/detail/input/input_adapters.hpp>
14 #include <nlohmann/detail/input/json_sax.hpp>
15 #include <nlohmann/detail/input/lexer.hpp>
16 #include <nlohmann/detail/value_t.hpp>
17 
18 namespace nlohmann
19 {
20 namespace detail
21 {
22 ////////////
23 // parser //
24 ////////////
25 
26 /*!
27 @brief syntax analysis
28 
29 This class implements a recursive decent parser.
30 */
31 template<typename BasicJsonType>
32 class parser
33 {
34     using number_integer_t = typename BasicJsonType::number_integer_t;
35     using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
36     using number_float_t = typename BasicJsonType::number_float_t;
37     using string_t = typename BasicJsonType::string_t;
38     using lexer_t = lexer<BasicJsonType>;
39     using token_type = typename lexer_t::token_type;
40 
41   public:
42     enum class parse_event_t : uint8_t
43     {
44         /// the parser read `{` and started to process a JSON object
45         object_start,
46         /// the parser read `}` and finished processing a JSON object
47         object_end,
48         /// the parser read `[` and started to process a JSON array
49         array_start,
50         /// the parser read `]` and finished processing a JSON array
51         array_end,
52         /// the parser read a key of a value in an object
53         key,
54         /// the parser finished reading a JSON value
55         value
56     };
57 
58     using parser_callback_t =
59         std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
60 
61     /// a parser reading from an input adapter
parser(detail::input_adapter_t && adapter,const parser_callback_t cb=nullptr,const bool allow_exceptions_=true)62     explicit parser(detail::input_adapter_t&& adapter,
63                     const parser_callback_t cb = nullptr,
64                     const bool allow_exceptions_ = true)
65         : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_)
66     {
67         // read first token
68         get_token();
69     }
70 
71     /*!
72     @brief public parser interface
73 
74     @param[in] strict      whether to expect the last token to be EOF
75     @param[in,out] result  parsed JSON value
76 
77     @throw parse_error.101 in case of an unexpected token
78     @throw parse_error.102 if to_unicode fails or surrogate error
79     @throw parse_error.103 if to_unicode fails
80     */
parse(const bool strict,BasicJsonType & result)81     void parse(const bool strict, BasicJsonType& result)
82     {
83         if (callback)
84         {
85             json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
86             sax_parse_internal(&sdp);
87             result.assert_invariant();
88 
89             // in strict mode, input must be completely read
90             if (strict and (get_token() != token_type::end_of_input))
91             {
92                 sdp.parse_error(m_lexer.get_position(),
93                                 m_lexer.get_token_string(),
94                                 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input)));
95             }
96 
97             // in case of an error, return discarded value
98             if (sdp.is_errored())
99             {
100                 result = value_t::discarded;
101                 return;
102             }
103 
104             // set top-level value to null if it was discarded by the callback
105             // function
106             if (result.is_discarded())
107             {
108                 result = nullptr;
109             }
110         }
111         else
112         {
113             json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
114             sax_parse_internal(&sdp);
115             result.assert_invariant();
116 
117             // in strict mode, input must be completely read
118             if (strict and (get_token() != token_type::end_of_input))
119             {
120                 sdp.parse_error(m_lexer.get_position(),
121                                 m_lexer.get_token_string(),
122                                 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input)));
123             }
124 
125             // in case of an error, return discarded value
126             if (sdp.is_errored())
127             {
128                 result = value_t::discarded;
129                 return;
130             }
131         }
132     }
133 
134     /*!
135     @brief public accept interface
136 
137     @param[in] strict  whether to expect the last token to be EOF
138     @return whether the input is a proper JSON text
139     */
accept(const bool strict=true)140     bool accept(const bool strict = true)
141     {
142         json_sax_acceptor<BasicJsonType> sax_acceptor;
143         return sax_parse(&sax_acceptor, strict);
144     }
145 
146     template <typename SAX>
sax_parse(SAX * sax,const bool strict=true)147     bool sax_parse(SAX* sax, const bool strict = true)
148     {
149         (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
150         const bool result = sax_parse_internal(sax);
151 
152         // strict mode: next byte must be EOF
153         if (result and strict and (get_token() != token_type::end_of_input))
154         {
155             return sax->parse_error(m_lexer.get_position(),
156                                     m_lexer.get_token_string(),
157                                     parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input)));
158         }
159 
160         return result;
161     }
162 
163   private:
164     template <typename SAX>
sax_parse_internal(SAX * sax)165     bool sax_parse_internal(SAX* sax)
166     {
167         // stack to remember the hieararchy of structured values we are parsing
168         // true = array; false = object
169         std::vector<bool> states;
170         // value to avoid a goto (see comment where set to true)
171         bool skip_to_state_evaluation = false;
172 
173         while (true)
174         {
175             if (not skip_to_state_evaluation)
176             {
177                 // invariant: get_token() was called before each iteration
178                 switch (last_token)
179                 {
180                     case token_type::begin_object:
181                     {
182                         if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1))))
183                         {
184                             return false;
185                         }
186 
187                         // closing } -> we are done
188                         if (get_token() == token_type::end_object)
189                         {
190                             if (JSON_UNLIKELY(not sax->end_object()))
191                             {
192                                 return false;
193                             }
194                             break;
195                         }
196 
197                         // parse key
198                         if (JSON_UNLIKELY(last_token != token_type::value_string))
199                         {
200                             return sax->parse_error(m_lexer.get_position(),
201                                                     m_lexer.get_token_string(),
202                                                     parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string)));
203                         }
204                         else
205                         {
206                             if (JSON_UNLIKELY(not sax->key(m_lexer.get_string())))
207                             {
208                                 return false;
209                             }
210                         }
211 
212                         // parse separator (:)
213                         if (JSON_UNLIKELY(get_token() != token_type::name_separator))
214                         {
215                             return sax->parse_error(m_lexer.get_position(),
216                                                     m_lexer.get_token_string(),
217                                                     parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator)));
218                         }
219 
220                         // remember we are now inside an object
221                         states.push_back(false);
222 
223                         // parse values
224                         get_token();
225                         continue;
226                     }
227 
228                     case token_type::begin_array:
229                     {
230                         if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1))))
231                         {
232                             return false;
233                         }
234 
235                         // closing ] -> we are done
236                         if (get_token() == token_type::end_array)
237                         {
238                             if (JSON_UNLIKELY(not sax->end_array()))
239                             {
240                                 return false;
241                             }
242                             break;
243                         }
244 
245                         // remember we are now inside an array
246                         states.push_back(true);
247 
248                         // parse values (no need to call get_token)
249                         continue;
250                     }
251 
252                     case token_type::value_float:
253                     {
254                         const auto res = m_lexer.get_number_float();
255 
256                         if (JSON_UNLIKELY(not std::isfinite(res)))
257                         {
258                             return sax->parse_error(m_lexer.get_position(),
259                                                     m_lexer.get_token_string(),
260                                                     out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'"));
261                         }
262                         else
263                         {
264                             if (JSON_UNLIKELY(not sax->number_float(res, m_lexer.get_string())))
265                             {
266                                 return false;
267                             }
268                             break;
269                         }
270                     }
271 
272                     case token_type::literal_false:
273                     {
274                         if (JSON_UNLIKELY(not sax->boolean(false)))
275                         {
276                             return false;
277                         }
278                         break;
279                     }
280 
281                     case token_type::literal_null:
282                     {
283                         if (JSON_UNLIKELY(not sax->null()))
284                         {
285                             return false;
286                         }
287                         break;
288                     }
289 
290                     case token_type::literal_true:
291                     {
292                         if (JSON_UNLIKELY(not sax->boolean(true)))
293                         {
294                             return false;
295                         }
296                         break;
297                     }
298 
299                     case token_type::value_integer:
300                     {
301                         if (JSON_UNLIKELY(not sax->number_integer(m_lexer.get_number_integer())))
302                         {
303                             return false;
304                         }
305                         break;
306                     }
307 
308                     case token_type::value_string:
309                     {
310                         if (JSON_UNLIKELY(not sax->string(m_lexer.get_string())))
311                         {
312                             return false;
313                         }
314                         break;
315                     }
316 
317                     case token_type::value_unsigned:
318                     {
319                         if (JSON_UNLIKELY(not sax->number_unsigned(m_lexer.get_number_unsigned())))
320                         {
321                             return false;
322                         }
323                         break;
324                     }
325 
326                     case token_type::parse_error:
327                     {
328                         // using "uninitialized" to avoid "expected" message
329                         return sax->parse_error(m_lexer.get_position(),
330                                                 m_lexer.get_token_string(),
331                                                 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized)));
332                     }
333 
334                     default: // the last token was unexpected
335                     {
336                         return sax->parse_error(m_lexer.get_position(),
337                                                 m_lexer.get_token_string(),
338                                                 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value)));
339                     }
340                 }
341             }
342             else
343             {
344                 skip_to_state_evaluation = false;
345             }
346 
347             // we reached this line after we successfully parsed a value
348             if (states.empty())
349             {
350                 // empty stack: we reached the end of the hieararchy: done
351                 return true;
352             }
353             else
354             {
355                 if (states.back())  // array
356                 {
357                     // comma -> next value
358                     if (get_token() == token_type::value_separator)
359                     {
360                         // parse a new value
361                         get_token();
362                         continue;
363                     }
364 
365                     // closing ]
366                     if (JSON_LIKELY(last_token == token_type::end_array))
367                     {
368                         if (JSON_UNLIKELY(not sax->end_array()))
369                         {
370                             return false;
371                         }
372 
373                         // We are done with this array. Before we can parse a
374                         // new value, we need to evaluate the new state first.
375                         // By setting skip_to_state_evaluation to false, we
376                         // are effectively jumping to the beginning of this if.
377                         assert(not states.empty());
378                         states.pop_back();
379                         skip_to_state_evaluation = true;
380                         continue;
381                     }
382                     else
383                     {
384                         return sax->parse_error(m_lexer.get_position(),
385                                                 m_lexer.get_token_string(),
386                                                 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array)));
387                     }
388                 }
389                 else  // object
390                 {
391                     // comma -> next value
392                     if (get_token() == token_type::value_separator)
393                     {
394                         // parse key
395                         if (JSON_UNLIKELY(get_token() != token_type::value_string))
396                         {
397                             return sax->parse_error(m_lexer.get_position(),
398                                                     m_lexer.get_token_string(),
399                                                     parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string)));
400                         }
401                         else
402                         {
403                             if (JSON_UNLIKELY(not sax->key(m_lexer.get_string())))
404                             {
405                                 return false;
406                             }
407                         }
408 
409                         // parse separator (:)
410                         if (JSON_UNLIKELY(get_token() != token_type::name_separator))
411                         {
412                             return sax->parse_error(m_lexer.get_position(),
413                                                     m_lexer.get_token_string(),
414                                                     parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator)));
415                         }
416 
417                         // parse values
418                         get_token();
419                         continue;
420                     }
421 
422                     // closing }
423                     if (JSON_LIKELY(last_token == token_type::end_object))
424                     {
425                         if (JSON_UNLIKELY(not sax->end_object()))
426                         {
427                             return false;
428                         }
429 
430                         // We are done with this object. Before we can parse a
431                         // new value, we need to evaluate the new state first.
432                         // By setting skip_to_state_evaluation to false, we
433                         // are effectively jumping to the beginning of this if.
434                         assert(not states.empty());
435                         states.pop_back();
436                         skip_to_state_evaluation = true;
437                         continue;
438                     }
439                     else
440                     {
441                         return sax->parse_error(m_lexer.get_position(),
442                                                 m_lexer.get_token_string(),
443                                                 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object)));
444                     }
445                 }
446             }
447         }
448     }
449 
450     /// get next token from lexer
get_token()451     token_type get_token()
452     {
453         return (last_token = m_lexer.scan());
454     }
455 
exception_message(const token_type expected)456     std::string exception_message(const token_type expected)
457     {
458         std::string error_msg = "syntax error - ";
459         if (last_token == token_type::parse_error)
460         {
461             error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" +
462                          m_lexer.get_token_string() + "'";
463         }
464         else
465         {
466             error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token));
467         }
468 
469         if (expected != token_type::uninitialized)
470         {
471             error_msg += "; expected " + std::string(lexer_t::token_type_name(expected));
472         }
473 
474         return error_msg;
475     }
476 
477   private:
478     /// callback function
479     const parser_callback_t callback = nullptr;
480     /// the type of the last read token
481     token_type last_token = token_type::uninitialized;
482     /// the lexer
483     lexer_t m_lexer;
484     /// whether to throw exceptions in case of errors
485     const bool allow_exceptions = true;
486 };
487 }
488 }
489