1 #pragma once 2 3 #include <cassert> // assert 4 #include <cmath> // isfinite 5 #include <cstdint> // uint8_t 6 #include <functional> // function 7 #include <string> // string 8 #include <utility> // move 9 #include <vector> // vector 10 11 #include <nlohmann/detail/exceptions.hpp> 12 #include <nlohmann/detail/input/input_adapters.hpp> 13 #include <nlohmann/detail/input/json_sax.hpp> 14 #include <nlohmann/detail/input/lexer.hpp> 15 #include <nlohmann/detail/macro_scope.hpp> 16 #include <nlohmann/detail/meta/is_sax.hpp> 17 #include <nlohmann/detail/value_t.hpp> 18 19 namespace nlohmann 20 { 21 namespace detail 22 { 23 //////////// 24 // parser // 25 //////////// 26 27 /*! 28 @brief syntax analysis 29 30 This class implements a recursive decent parser. 31 */ 32 template<typename BasicJsonType> 33 class parser 34 { 35 using number_integer_t = typename BasicJsonType::number_integer_t; 36 using number_unsigned_t = typename BasicJsonType::number_unsigned_t; 37 using number_float_t = typename BasicJsonType::number_float_t; 38 using string_t = typename BasicJsonType::string_t; 39 using lexer_t = lexer<BasicJsonType>; 40 using token_type = typename lexer_t::token_type; 41 42 public: 43 enum class parse_event_t : uint8_t 44 { 45 /// the parser read `{` and started to process a JSON object 46 object_start, 47 /// the parser read `}` and finished processing a JSON object 48 object_end, 49 /// the parser read `[` and started to process a JSON array 50 array_start, 51 /// the parser read `]` and finished processing a JSON array 52 array_end, 53 /// the parser read a key of a value in an object 54 key, 55 /// the parser finished reading a JSON value 56 value 57 }; 58 59 using parser_callback_t = 60 std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>; 61 62 /// a parser reading from an input adapter parser(detail::input_adapter_t && adapter,const parser_callback_t cb=nullptr,const bool allow_exceptions_=true)63 explicit parser(detail::input_adapter_t&& adapter, 64 const parser_callback_t cb = nullptr, 65 const bool allow_exceptions_ = true) 66 : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_) 67 { 68 // read first token 69 get_token(); 70 } 71 72 /*! 73 @brief public parser interface 74 75 @param[in] strict whether to expect the last token to be EOF 76 @param[in,out] result parsed JSON value 77 78 @throw parse_error.101 in case of an unexpected token 79 @throw parse_error.102 if to_unicode fails or surrogate error 80 @throw parse_error.103 if to_unicode fails 81 */ parse(const bool strict,BasicJsonType & result)82 void parse(const bool strict, BasicJsonType& result) 83 { 84 if (callback) 85 { 86 json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions); 87 sax_parse_internal(&sdp); 88 result.assert_invariant(); 89 90 // in strict mode, input must be completely read 91 if (strict and (get_token() != token_type::end_of_input)) 92 { 93 sdp.parse_error(m_lexer.get_position(), 94 m_lexer.get_token_string(), 95 parse_error::create(101, m_lexer.get_position(), 96 exception_message(token_type::end_of_input, "value"))); 97 } 98 99 // in case of an error, return discarded value 100 if (sdp.is_errored()) 101 { 102 result = value_t::discarded; 103 return; 104 } 105 106 // set top-level value to null if it was discarded by the callback 107 // function 108 if (result.is_discarded()) 109 { 110 result = nullptr; 111 } 112 } 113 else 114 { 115 json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions); 116 sax_parse_internal(&sdp); 117 result.assert_invariant(); 118 119 // in strict mode, input must be completely read 120 if (strict and (get_token() != token_type::end_of_input)) 121 { 122 sdp.parse_error(m_lexer.get_position(), 123 m_lexer.get_token_string(), 124 parse_error::create(101, m_lexer.get_position(), 125 exception_message(token_type::end_of_input, "value"))); 126 } 127 128 // in case of an error, return discarded value 129 if (sdp.is_errored()) 130 { 131 result = value_t::discarded; 132 return; 133 } 134 } 135 } 136 137 /*! 138 @brief public accept interface 139 140 @param[in] strict whether to expect the last token to be EOF 141 @return whether the input is a proper JSON text 142 */ accept(const bool strict=true)143 bool accept(const bool strict = true) 144 { 145 json_sax_acceptor<BasicJsonType> sax_acceptor; 146 return sax_parse(&sax_acceptor, strict); 147 } 148 149 template <typename SAX> 150 JSON_HEDLEY_NON_NULL(2) sax_parse(SAX * sax,const bool strict=true)151 bool sax_parse(SAX* sax, const bool strict = true) 152 { 153 (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; 154 const bool result = sax_parse_internal(sax); 155 156 // strict mode: next byte must be EOF 157 if (result and strict and (get_token() != token_type::end_of_input)) 158 { 159 return sax->parse_error(m_lexer.get_position(), 160 m_lexer.get_token_string(), 161 parse_error::create(101, m_lexer.get_position(), 162 exception_message(token_type::end_of_input, "value"))); 163 } 164 165 return result; 166 } 167 168 private: 169 template <typename SAX> 170 JSON_HEDLEY_NON_NULL(2) sax_parse_internal(SAX * sax)171 bool sax_parse_internal(SAX* sax) 172 { 173 // stack to remember the hierarchy of structured values we are parsing 174 // true = array; false = object 175 std::vector<bool> states; 176 // value to avoid a goto (see comment where set to true) 177 bool skip_to_state_evaluation = false; 178 179 while (true) 180 { 181 if (not skip_to_state_evaluation) 182 { 183 // invariant: get_token() was called before each iteration 184 switch (last_token) 185 { 186 case token_type::begin_object: 187 { 188 if (JSON_HEDLEY_UNLIKELY(not sax->start_object(std::size_t(-1)))) 189 { 190 return false; 191 } 192 193 // closing } -> we are done 194 if (get_token() == token_type::end_object) 195 { 196 if (JSON_HEDLEY_UNLIKELY(not sax->end_object())) 197 { 198 return false; 199 } 200 break; 201 } 202 203 // parse key 204 if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string)) 205 { 206 return sax->parse_error(m_lexer.get_position(), 207 m_lexer.get_token_string(), 208 parse_error::create(101, m_lexer.get_position(), 209 exception_message(token_type::value_string, "object key"))); 210 } 211 if (JSON_HEDLEY_UNLIKELY(not sax->key(m_lexer.get_string()))) 212 { 213 return false; 214 } 215 216 // parse separator (:) 217 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) 218 { 219 return sax->parse_error(m_lexer.get_position(), 220 m_lexer.get_token_string(), 221 parse_error::create(101, m_lexer.get_position(), 222 exception_message(token_type::name_separator, "object separator"))); 223 } 224 225 // remember we are now inside an object 226 states.push_back(false); 227 228 // parse values 229 get_token(); 230 continue; 231 } 232 233 case token_type::begin_array: 234 { 235 if (JSON_HEDLEY_UNLIKELY(not sax->start_array(std::size_t(-1)))) 236 { 237 return false; 238 } 239 240 // closing ] -> we are done 241 if (get_token() == token_type::end_array) 242 { 243 if (JSON_HEDLEY_UNLIKELY(not sax->end_array())) 244 { 245 return false; 246 } 247 break; 248 } 249 250 // remember we are now inside an array 251 states.push_back(true); 252 253 // parse values (no need to call get_token) 254 continue; 255 } 256 257 case token_type::value_float: 258 { 259 const auto res = m_lexer.get_number_float(); 260 261 if (JSON_HEDLEY_UNLIKELY(not std::isfinite(res))) 262 { 263 return sax->parse_error(m_lexer.get_position(), 264 m_lexer.get_token_string(), 265 out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); 266 } 267 268 if (JSON_HEDLEY_UNLIKELY(not sax->number_float(res, m_lexer.get_string()))) 269 { 270 return false; 271 } 272 273 break; 274 } 275 276 case token_type::literal_false: 277 { 278 if (JSON_HEDLEY_UNLIKELY(not sax->boolean(false))) 279 { 280 return false; 281 } 282 break; 283 } 284 285 case token_type::literal_null: 286 { 287 if (JSON_HEDLEY_UNLIKELY(not sax->null())) 288 { 289 return false; 290 } 291 break; 292 } 293 294 case token_type::literal_true: 295 { 296 if (JSON_HEDLEY_UNLIKELY(not sax->boolean(true))) 297 { 298 return false; 299 } 300 break; 301 } 302 303 case token_type::value_integer: 304 { 305 if (JSON_HEDLEY_UNLIKELY(not sax->number_integer(m_lexer.get_number_integer()))) 306 { 307 return false; 308 } 309 break; 310 } 311 312 case token_type::value_string: 313 { 314 if (JSON_HEDLEY_UNLIKELY(not sax->string(m_lexer.get_string()))) 315 { 316 return false; 317 } 318 break; 319 } 320 321 case token_type::value_unsigned: 322 { 323 if (JSON_HEDLEY_UNLIKELY(not sax->number_unsigned(m_lexer.get_number_unsigned()))) 324 { 325 return false; 326 } 327 break; 328 } 329 330 case token_type::parse_error: 331 { 332 // using "uninitialized" to avoid "expected" message 333 return sax->parse_error(m_lexer.get_position(), 334 m_lexer.get_token_string(), 335 parse_error::create(101, m_lexer.get_position(), 336 exception_message(token_type::uninitialized, "value"))); 337 } 338 339 default: // the last token was unexpected 340 { 341 return sax->parse_error(m_lexer.get_position(), 342 m_lexer.get_token_string(), 343 parse_error::create(101, m_lexer.get_position(), 344 exception_message(token_type::literal_or_value, "value"))); 345 } 346 } 347 } 348 else 349 { 350 skip_to_state_evaluation = false; 351 } 352 353 // we reached this line after we successfully parsed a value 354 if (states.empty()) 355 { 356 // empty stack: we reached the end of the hierarchy: done 357 return true; 358 } 359 360 if (states.back()) // array 361 { 362 // comma -> next value 363 if (get_token() == token_type::value_separator) 364 { 365 // parse a new value 366 get_token(); 367 continue; 368 } 369 370 // closing ] 371 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array)) 372 { 373 if (JSON_HEDLEY_UNLIKELY(not sax->end_array())) 374 { 375 return false; 376 } 377 378 // We are done with this array. Before we can parse a 379 // new value, we need to evaluate the new state first. 380 // By setting skip_to_state_evaluation to false, we 381 // are effectively jumping to the beginning of this if. 382 assert(not states.empty()); 383 states.pop_back(); 384 skip_to_state_evaluation = true; 385 continue; 386 } 387 388 return sax->parse_error(m_lexer.get_position(), 389 m_lexer.get_token_string(), 390 parse_error::create(101, m_lexer.get_position(), 391 exception_message(token_type::end_array, "array"))); 392 } 393 else // object 394 { 395 // comma -> next value 396 if (get_token() == token_type::value_separator) 397 { 398 // parse key 399 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string)) 400 { 401 return sax->parse_error(m_lexer.get_position(), 402 m_lexer.get_token_string(), 403 parse_error::create(101, m_lexer.get_position(), 404 exception_message(token_type::value_string, "object key"))); 405 } 406 407 if (JSON_HEDLEY_UNLIKELY(not sax->key(m_lexer.get_string()))) 408 { 409 return false; 410 } 411 412 // parse separator (:) 413 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) 414 { 415 return sax->parse_error(m_lexer.get_position(), 416 m_lexer.get_token_string(), 417 parse_error::create(101, m_lexer.get_position(), 418 exception_message(token_type::name_separator, "object separator"))); 419 } 420 421 // parse values 422 get_token(); 423 continue; 424 } 425 426 // closing } 427 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object)) 428 { 429 if (JSON_HEDLEY_UNLIKELY(not sax->end_object())) 430 { 431 return false; 432 } 433 434 // We are done with this object. Before we can parse a 435 // new value, we need to evaluate the new state first. 436 // By setting skip_to_state_evaluation to false, we 437 // are effectively jumping to the beginning of this if. 438 assert(not states.empty()); 439 states.pop_back(); 440 skip_to_state_evaluation = true; 441 continue; 442 } 443 444 return sax->parse_error(m_lexer.get_position(), 445 m_lexer.get_token_string(), 446 parse_error::create(101, m_lexer.get_position(), 447 exception_message(token_type::end_object, "object"))); 448 } 449 } 450 } 451 452 /// get next token from lexer get_token()453 token_type get_token() 454 { 455 return last_token = m_lexer.scan(); 456 } 457 exception_message(const token_type expected,const std::string & context)458 std::string exception_message(const token_type expected, const std::string& context) 459 { 460 std::string error_msg = "syntax error "; 461 462 if (not context.empty()) 463 { 464 error_msg += "while parsing " + context + " "; 465 } 466 467 error_msg += "- "; 468 469 if (last_token == token_type::parse_error) 470 { 471 error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" + 472 m_lexer.get_token_string() + "'"; 473 } 474 else 475 { 476 error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token)); 477 } 478 479 if (expected != token_type::uninitialized) 480 { 481 error_msg += "; expected " + std::string(lexer_t::token_type_name(expected)); 482 } 483 484 return error_msg; 485 } 486 487 private: 488 /// callback function 489 const parser_callback_t callback = nullptr; 490 /// the type of the last read token 491 token_type last_token = token_type::uninitialized; 492 /// the lexer 493 lexer_t m_lexer; 494 /// whether to throw exceptions in case of errors 495 const bool allow_exceptions = true; 496 }; 497 } // namespace detail 498 } // namespace nlohmann 499