1 #pragma once 2 3 #include <cassert> // assert 4 #include <cmath> // isfinite 5 #include <cstdint> // uint8_t 6 #include <functional> // function 7 #include <string> // string 8 #include <utility> // move 9 #include <vector> // vector 10 11 #include <nlohmann/detail/exceptions.hpp> 12 #include <nlohmann/detail/input/input_adapters.hpp> 13 #include <nlohmann/detail/input/json_sax.hpp> 14 #include <nlohmann/detail/input/lexer.hpp> 15 #include <nlohmann/detail/macro_scope.hpp> 16 #include <nlohmann/detail/meta/is_sax.hpp> 17 #include <nlohmann/detail/value_t.hpp> 18 19 namespace nlohmann 20 { 21 namespace detail 22 { 23 //////////// 24 // parser // 25 //////////// 26 27 /*! 28 @brief syntax analysis 29 30 This class implements a recursive decent parser. 31 */ 32 template<typename BasicJsonType> 33 class parser 34 { 35 using number_integer_t = typename BasicJsonType::number_integer_t; 36 using number_unsigned_t = typename BasicJsonType::number_unsigned_t; 37 using number_float_t = typename BasicJsonType::number_float_t; 38 using string_t = typename BasicJsonType::string_t; 39 using lexer_t = lexer<BasicJsonType>; 40 using token_type = typename lexer_t::token_type; 41 42 public: 43 enum class parse_event_t : uint8_t 44 { 45 /// the parser read `{` and started to process a JSON object 46 object_start, 47 /// the parser read `}` and finished processing a JSON object 48 object_end, 49 /// the parser read `[` and started to process a JSON array 50 array_start, 51 /// the parser read `]` and finished processing a JSON array 52 array_end, 53 /// the parser read a key of a value in an object 54 key, 55 /// the parser finished reading a JSON value 56 value 57 }; 58 59 using parser_callback_t = 60 std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>; 61 62 /// a parser reading from an input adapter parser(detail::input_adapter_t && adapter,const parser_callback_t cb=nullptr,const bool allow_exceptions_=true)63 explicit parser(detail::input_adapter_t&& adapter, 64 const parser_callback_t cb = nullptr, 65 const bool allow_exceptions_ = true) 66 : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_) 67 { 68 // read first token 69 get_token(); 70 } 71 72 /*! 73 @brief public parser interface 74 75 @param[in] strict whether to expect the last token to be EOF 76 @param[in,out] result parsed JSON value 77 78 @throw parse_error.101 in case of an unexpected token 79 @throw parse_error.102 if to_unicode fails or surrogate error 80 @throw parse_error.103 if to_unicode fails 81 */ parse(const bool strict,BasicJsonType & result)82 void parse(const bool strict, BasicJsonType& result) 83 { 84 if (callback) 85 { 86 json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions); 87 sax_parse_internal(&sdp); 88 result.assert_invariant(); 89 90 // in strict mode, input must be completely read 91 if (strict and (get_token() != token_type::end_of_input)) 92 { 93 sdp.parse_error(m_lexer.get_position(), 94 m_lexer.get_token_string(), 95 parse_error::create(101, m_lexer.get_position(), 96 exception_message(token_type::end_of_input, "value"))); 97 } 98 99 // in case of an error, return discarded value 100 if (sdp.is_errored()) 101 { 102 result = value_t::discarded; 103 return; 104 } 105 106 // set top-level value to null if it was discarded by the callback 107 // function 108 if (result.is_discarded()) 109 { 110 result = nullptr; 111 } 112 } 113 else 114 { 115 json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions); 116 sax_parse_internal(&sdp); 117 result.assert_invariant(); 118 119 // in strict mode, input must be completely read 120 if (strict and (get_token() != token_type::end_of_input)) 121 { 122 sdp.parse_error(m_lexer.get_position(), 123 m_lexer.get_token_string(), 124 parse_error::create(101, m_lexer.get_position(), 125 exception_message(token_type::end_of_input, "value"))); 126 } 127 128 // in case of an error, return discarded value 129 if (sdp.is_errored()) 130 { 131 result = value_t::discarded; 132 return; 133 } 134 } 135 } 136 137 /*! 138 @brief public accept interface 139 140 @param[in] strict whether to expect the last token to be EOF 141 @return whether the input is a proper JSON text 142 */ accept(const bool strict=true)143 bool accept(const bool strict = true) 144 { 145 json_sax_acceptor<BasicJsonType> sax_acceptor; 146 return sax_parse(&sax_acceptor, strict); 147 } 148 149 template <typename SAX> sax_parse(SAX * sax,const bool strict=true)150 bool sax_parse(SAX* sax, const bool strict = true) 151 { 152 (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; 153 const bool result = sax_parse_internal(sax); 154 155 // strict mode: next byte must be EOF 156 if (result and strict and (get_token() != token_type::end_of_input)) 157 { 158 return sax->parse_error(m_lexer.get_position(), 159 m_lexer.get_token_string(), 160 parse_error::create(101, m_lexer.get_position(), 161 exception_message(token_type::end_of_input, "value"))); 162 } 163 164 return result; 165 } 166 167 private: 168 template <typename SAX> sax_parse_internal(SAX * sax)169 bool sax_parse_internal(SAX* sax) 170 { 171 // stack to remember the hierarchy of structured values we are parsing 172 // true = array; false = object 173 std::vector<bool> states; 174 // value to avoid a goto (see comment where set to true) 175 bool skip_to_state_evaluation = false; 176 177 while (true) 178 { 179 if (not skip_to_state_evaluation) 180 { 181 // invariant: get_token() was called before each iteration 182 switch (last_token) 183 { 184 case token_type::begin_object: 185 { 186 if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) 187 { 188 return false; 189 } 190 191 // closing } -> we are done 192 if (get_token() == token_type::end_object) 193 { 194 if (JSON_UNLIKELY(not sax->end_object())) 195 { 196 return false; 197 } 198 break; 199 } 200 201 // parse key 202 if (JSON_UNLIKELY(last_token != token_type::value_string)) 203 { 204 return sax->parse_error(m_lexer.get_position(), 205 m_lexer.get_token_string(), 206 parse_error::create(101, m_lexer.get_position(), 207 exception_message(token_type::value_string, "object key"))); 208 } 209 if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) 210 { 211 return false; 212 } 213 214 // parse separator (:) 215 if (JSON_UNLIKELY(get_token() != token_type::name_separator)) 216 { 217 return sax->parse_error(m_lexer.get_position(), 218 m_lexer.get_token_string(), 219 parse_error::create(101, m_lexer.get_position(), 220 exception_message(token_type::name_separator, "object separator"))); 221 } 222 223 // remember we are now inside an object 224 states.push_back(false); 225 226 // parse values 227 get_token(); 228 continue; 229 } 230 231 case token_type::begin_array: 232 { 233 if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) 234 { 235 return false; 236 } 237 238 // closing ] -> we are done 239 if (get_token() == token_type::end_array) 240 { 241 if (JSON_UNLIKELY(not sax->end_array())) 242 { 243 return false; 244 } 245 break; 246 } 247 248 // remember we are now inside an array 249 states.push_back(true); 250 251 // parse values (no need to call get_token) 252 continue; 253 } 254 255 case token_type::value_float: 256 { 257 const auto res = m_lexer.get_number_float(); 258 259 if (JSON_UNLIKELY(not std::isfinite(res))) 260 { 261 return sax->parse_error(m_lexer.get_position(), 262 m_lexer.get_token_string(), 263 out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); 264 } 265 266 if (JSON_UNLIKELY(not sax->number_float(res, m_lexer.get_string()))) 267 { 268 return false; 269 } 270 271 break; 272 } 273 274 case token_type::literal_false: 275 { 276 if (JSON_UNLIKELY(not sax->boolean(false))) 277 { 278 return false; 279 } 280 break; 281 } 282 283 case token_type::literal_null: 284 { 285 if (JSON_UNLIKELY(not sax->null())) 286 { 287 return false; 288 } 289 break; 290 } 291 292 case token_type::literal_true: 293 { 294 if (JSON_UNLIKELY(not sax->boolean(true))) 295 { 296 return false; 297 } 298 break; 299 } 300 301 case token_type::value_integer: 302 { 303 if (JSON_UNLIKELY(not sax->number_integer(m_lexer.get_number_integer()))) 304 { 305 return false; 306 } 307 break; 308 } 309 310 case token_type::value_string: 311 { 312 if (JSON_UNLIKELY(not sax->string(m_lexer.get_string()))) 313 { 314 return false; 315 } 316 break; 317 } 318 319 case token_type::value_unsigned: 320 { 321 if (JSON_UNLIKELY(not sax->number_unsigned(m_lexer.get_number_unsigned()))) 322 { 323 return false; 324 } 325 break; 326 } 327 328 case token_type::parse_error: 329 { 330 // using "uninitialized" to avoid "expected" message 331 return sax->parse_error(m_lexer.get_position(), 332 m_lexer.get_token_string(), 333 parse_error::create(101, m_lexer.get_position(), 334 exception_message(token_type::uninitialized, "value"))); 335 } 336 337 default: // the last token was unexpected 338 { 339 return sax->parse_error(m_lexer.get_position(), 340 m_lexer.get_token_string(), 341 parse_error::create(101, m_lexer.get_position(), 342 exception_message(token_type::literal_or_value, "value"))); 343 } 344 } 345 } 346 else 347 { 348 skip_to_state_evaluation = false; 349 } 350 351 // we reached this line after we successfully parsed a value 352 if (states.empty()) 353 { 354 // empty stack: we reached the end of the hierarchy: done 355 return true; 356 } 357 358 if (states.back()) // array 359 { 360 // comma -> next value 361 if (get_token() == token_type::value_separator) 362 { 363 // parse a new value 364 get_token(); 365 continue; 366 } 367 368 // closing ] 369 if (JSON_LIKELY(last_token == token_type::end_array)) 370 { 371 if (JSON_UNLIKELY(not sax->end_array())) 372 { 373 return false; 374 } 375 376 // We are done with this array. Before we can parse a 377 // new value, we need to evaluate the new state first. 378 // By setting skip_to_state_evaluation to false, we 379 // are effectively jumping to the beginning of this if. 380 assert(not states.empty()); 381 states.pop_back(); 382 skip_to_state_evaluation = true; 383 continue; 384 } 385 386 return sax->parse_error(m_lexer.get_position(), 387 m_lexer.get_token_string(), 388 parse_error::create(101, m_lexer.get_position(), 389 exception_message(token_type::end_array, "array"))); 390 } 391 else // object 392 { 393 // comma -> next value 394 if (get_token() == token_type::value_separator) 395 { 396 // parse key 397 if (JSON_UNLIKELY(get_token() != token_type::value_string)) 398 { 399 return sax->parse_error(m_lexer.get_position(), 400 m_lexer.get_token_string(), 401 parse_error::create(101, m_lexer.get_position(), 402 exception_message(token_type::value_string, "object key"))); 403 } 404 405 if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) 406 { 407 return false; 408 } 409 410 // parse separator (:) 411 if (JSON_UNLIKELY(get_token() != token_type::name_separator)) 412 { 413 return sax->parse_error(m_lexer.get_position(), 414 m_lexer.get_token_string(), 415 parse_error::create(101, m_lexer.get_position(), 416 exception_message(token_type::name_separator, "object separator"))); 417 } 418 419 // parse values 420 get_token(); 421 continue; 422 } 423 424 // closing } 425 if (JSON_LIKELY(last_token == token_type::end_object)) 426 { 427 if (JSON_UNLIKELY(not sax->end_object())) 428 { 429 return false; 430 } 431 432 // We are done with this object. Before we can parse a 433 // new value, we need to evaluate the new state first. 434 // By setting skip_to_state_evaluation to false, we 435 // are effectively jumping to the beginning of this if. 436 assert(not states.empty()); 437 states.pop_back(); 438 skip_to_state_evaluation = true; 439 continue; 440 } 441 442 return sax->parse_error(m_lexer.get_position(), 443 m_lexer.get_token_string(), 444 parse_error::create(101, m_lexer.get_position(), 445 exception_message(token_type::end_object, "object"))); 446 } 447 } 448 } 449 450 /// get next token from lexer get_token()451 token_type get_token() 452 { 453 return last_token = m_lexer.scan(); 454 } 455 exception_message(const token_type expected,const std::string & context)456 std::string exception_message(const token_type expected, const std::string& context) 457 { 458 std::string error_msg = "syntax error "; 459 460 if (not context.empty()) 461 { 462 error_msg += "while parsing " + context + " "; 463 } 464 465 error_msg += "- "; 466 467 if (last_token == token_type::parse_error) 468 { 469 error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" + 470 m_lexer.get_token_string() + "'"; 471 } 472 else 473 { 474 error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token)); 475 } 476 477 if (expected != token_type::uninitialized) 478 { 479 error_msg += "; expected " + std::string(lexer_t::token_type_name(expected)); 480 } 481 482 return error_msg; 483 } 484 485 private: 486 /// callback function 487 const parser_callback_t callback = nullptr; 488 /// the type of the last read token 489 token_type last_token = token_type::uninitialized; 490 /// the lexer 491 lexer_t m_lexer; 492 /// whether to throw exceptions in case of errors 493 const bool allow_exceptions = true; 494 }; 495 } // namespace detail 496 } // namespace nlohmann 497