1 #pragma once 2 3 #include <cmath> // isfinite 4 #include <cstdint> // uint8_t 5 #include <functional> // function 6 #include <string> // string 7 #include <utility> // move 8 #include <vector> // vector 9 10 #include <nlohmann/detail/exceptions.hpp> 11 #include <nlohmann/detail/input/input_adapters.hpp> 12 #include <nlohmann/detail/input/json_sax.hpp> 13 #include <nlohmann/detail/input/lexer.hpp> 14 #include <nlohmann/detail/macro_scope.hpp> 15 #include <nlohmann/detail/meta/is_sax.hpp> 16 #include <nlohmann/detail/value_t.hpp> 17 18 namespace nlohmann 19 { 20 namespace detail 21 { 22 //////////// 23 // parser // 24 //////////// 25 26 enum class parse_event_t : uint8_t 27 { 28 /// the parser read `{` and started to process a JSON object 29 object_start, 30 /// the parser read `}` and finished processing a JSON object 31 object_end, 32 /// the parser read `[` and started to process a JSON array 33 array_start, 34 /// the parser read `]` and finished processing a JSON array 35 array_end, 36 /// the parser read a key of a value in an object 37 key, 38 /// the parser finished reading a JSON value 39 value 40 }; 41 42 template<typename BasicJsonType> 43 using parser_callback_t = 44 std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>; 45 46 /*! 47 @brief syntax analysis 48 49 This class implements a recursive descent parser. 50 */ 51 template<typename BasicJsonType, typename InputAdapterType> 52 class parser 53 { 54 using number_integer_t = typename BasicJsonType::number_integer_t; 55 using number_unsigned_t = typename BasicJsonType::number_unsigned_t; 56 using number_float_t = typename BasicJsonType::number_float_t; 57 using string_t = typename BasicJsonType::string_t; 58 using lexer_t = lexer<BasicJsonType, InputAdapterType>; 59 using token_type = typename lexer_t::token_type; 60 61 public: 62 /// a parser reading from an input adapter parser(InputAdapterType && adapter,const parser_callback_t<BasicJsonType> cb=nullptr,const bool allow_exceptions_=true,const bool skip_comments=false)63 explicit parser(InputAdapterType&& adapter, 64 const parser_callback_t<BasicJsonType> cb = nullptr, 65 const bool allow_exceptions_ = true, 66 const bool skip_comments = false) 67 : callback(cb) 68 , m_lexer(std::move(adapter), skip_comments) 69 , allow_exceptions(allow_exceptions_) 70 { 71 // read first token 72 get_token(); 73 } 74 75 /*! 76 @brief public parser interface 77 78 @param[in] strict whether to expect the last token to be EOF 79 @param[in,out] result parsed JSON value 80 81 @throw parse_error.101 in case of an unexpected token 82 @throw parse_error.102 if to_unicode fails or surrogate error 83 @throw parse_error.103 if to_unicode fails 84 */ parse(const bool strict,BasicJsonType & result)85 void parse(const bool strict, BasicJsonType& result) 86 { 87 if (callback) 88 { 89 json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions); 90 sax_parse_internal(&sdp); 91 result.assert_invariant(); 92 93 // in strict mode, input must be completely read 94 if (strict && (get_token() != token_type::end_of_input)) 95 { 96 sdp.parse_error(m_lexer.get_position(), 97 m_lexer.get_token_string(), 98 parse_error::create(101, m_lexer.get_position(), 99 exception_message(token_type::end_of_input, "value"))); 100 } 101 102 // in case of an error, return discarded value 103 if (sdp.is_errored()) 104 { 105 result = value_t::discarded; 106 return; 107 } 108 109 // set top-level value to null if it was discarded by the callback 110 // function 111 if (result.is_discarded()) 112 { 113 result = nullptr; 114 } 115 } 116 else 117 { 118 json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions); 119 sax_parse_internal(&sdp); 120 result.assert_invariant(); 121 122 // in strict mode, input must be completely read 123 if (strict && (get_token() != token_type::end_of_input)) 124 { 125 sdp.parse_error(m_lexer.get_position(), 126 m_lexer.get_token_string(), 127 parse_error::create(101, m_lexer.get_position(), 128 exception_message(token_type::end_of_input, "value"))); 129 } 130 131 // in case of an error, return discarded value 132 if (sdp.is_errored()) 133 { 134 result = value_t::discarded; 135 return; 136 } 137 } 138 } 139 140 /*! 141 @brief public accept interface 142 143 @param[in] strict whether to expect the last token to be EOF 144 @return whether the input is a proper JSON text 145 */ accept(const bool strict=true)146 bool accept(const bool strict = true) 147 { 148 json_sax_acceptor<BasicJsonType> sax_acceptor; 149 return sax_parse(&sax_acceptor, strict); 150 } 151 152 template<typename SAX> 153 JSON_HEDLEY_NON_NULL(2) sax_parse(SAX * sax,const bool strict=true)154 bool sax_parse(SAX* sax, const bool strict = true) 155 { 156 (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; 157 const bool result = sax_parse_internal(sax); 158 159 // strict mode: next byte must be EOF 160 if (result && strict && (get_token() != token_type::end_of_input)) 161 { 162 return sax->parse_error(m_lexer.get_position(), 163 m_lexer.get_token_string(), 164 parse_error::create(101, m_lexer.get_position(), 165 exception_message(token_type::end_of_input, "value"))); 166 } 167 168 return result; 169 } 170 171 private: 172 template<typename SAX> 173 JSON_HEDLEY_NON_NULL(2) sax_parse_internal(SAX * sax)174 bool sax_parse_internal(SAX* sax) 175 { 176 // stack to remember the hierarchy of structured values we are parsing 177 // true = array; false = object 178 std::vector<bool> states; 179 // value to avoid a goto (see comment where set to true) 180 bool skip_to_state_evaluation = false; 181 182 while (true) 183 { 184 if (!skip_to_state_evaluation) 185 { 186 // invariant: get_token() was called before each iteration 187 switch (last_token) 188 { 189 case token_type::begin_object: 190 { 191 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1)))) 192 { 193 return false; 194 } 195 196 // closing } -> we are done 197 if (get_token() == token_type::end_object) 198 { 199 if (JSON_HEDLEY_UNLIKELY(!sax->end_object())) 200 { 201 return false; 202 } 203 break; 204 } 205 206 // parse key 207 if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string)) 208 { 209 return sax->parse_error(m_lexer.get_position(), 210 m_lexer.get_token_string(), 211 parse_error::create(101, m_lexer.get_position(), 212 exception_message(token_type::value_string, "object key"))); 213 } 214 if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string()))) 215 { 216 return false; 217 } 218 219 // parse separator (:) 220 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) 221 { 222 return sax->parse_error(m_lexer.get_position(), 223 m_lexer.get_token_string(), 224 parse_error::create(101, m_lexer.get_position(), 225 exception_message(token_type::name_separator, "object separator"))); 226 } 227 228 // remember we are now inside an object 229 states.push_back(false); 230 231 // parse values 232 get_token(); 233 continue; 234 } 235 236 case token_type::begin_array: 237 { 238 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1)))) 239 { 240 return false; 241 } 242 243 // closing ] -> we are done 244 if (get_token() == token_type::end_array) 245 { 246 if (JSON_HEDLEY_UNLIKELY(!sax->end_array())) 247 { 248 return false; 249 } 250 break; 251 } 252 253 // remember we are now inside an array 254 states.push_back(true); 255 256 // parse values (no need to call get_token) 257 continue; 258 } 259 260 case token_type::value_float: 261 { 262 const auto res = m_lexer.get_number_float(); 263 264 if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res))) 265 { 266 return sax->parse_error(m_lexer.get_position(), 267 m_lexer.get_token_string(), 268 out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); 269 } 270 271 if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string()))) 272 { 273 return false; 274 } 275 276 break; 277 } 278 279 case token_type::literal_false: 280 { 281 if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false))) 282 { 283 return false; 284 } 285 break; 286 } 287 288 case token_type::literal_null: 289 { 290 if (JSON_HEDLEY_UNLIKELY(!sax->null())) 291 { 292 return false; 293 } 294 break; 295 } 296 297 case token_type::literal_true: 298 { 299 if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true))) 300 { 301 return false; 302 } 303 break; 304 } 305 306 case token_type::value_integer: 307 { 308 if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer()))) 309 { 310 return false; 311 } 312 break; 313 } 314 315 case token_type::value_string: 316 { 317 if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string()))) 318 { 319 return false; 320 } 321 break; 322 } 323 324 case token_type::value_unsigned: 325 { 326 if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned()))) 327 { 328 return false; 329 } 330 break; 331 } 332 333 case token_type::parse_error: 334 { 335 // using "uninitialized" to avoid "expected" message 336 return sax->parse_error(m_lexer.get_position(), 337 m_lexer.get_token_string(), 338 parse_error::create(101, m_lexer.get_position(), 339 exception_message(token_type::uninitialized, "value"))); 340 } 341 342 default: // the last token was unexpected 343 { 344 return sax->parse_error(m_lexer.get_position(), 345 m_lexer.get_token_string(), 346 parse_error::create(101, m_lexer.get_position(), 347 exception_message(token_type::literal_or_value, "value"))); 348 } 349 } 350 } 351 else 352 { 353 skip_to_state_evaluation = false; 354 } 355 356 // we reached this line after we successfully parsed a value 357 if (states.empty()) 358 { 359 // empty stack: we reached the end of the hierarchy: done 360 return true; 361 } 362 363 if (states.back()) // array 364 { 365 // comma -> next value 366 if (get_token() == token_type::value_separator) 367 { 368 // parse a new value 369 get_token(); 370 continue; 371 } 372 373 // closing ] 374 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array)) 375 { 376 if (JSON_HEDLEY_UNLIKELY(!sax->end_array())) 377 { 378 return false; 379 } 380 381 // We are done with this array. Before we can parse a 382 // new value, we need to evaluate the new state first. 383 // By setting skip_to_state_evaluation to false, we 384 // are effectively jumping to the beginning of this if. 385 JSON_ASSERT(!states.empty()); 386 states.pop_back(); 387 skip_to_state_evaluation = true; 388 continue; 389 } 390 391 return sax->parse_error(m_lexer.get_position(), 392 m_lexer.get_token_string(), 393 parse_error::create(101, m_lexer.get_position(), 394 exception_message(token_type::end_array, "array"))); 395 } 396 else // object 397 { 398 // comma -> next value 399 if (get_token() == token_type::value_separator) 400 { 401 // parse key 402 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string)) 403 { 404 return sax->parse_error(m_lexer.get_position(), 405 m_lexer.get_token_string(), 406 parse_error::create(101, m_lexer.get_position(), 407 exception_message(token_type::value_string, "object key"))); 408 } 409 410 if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string()))) 411 { 412 return false; 413 } 414 415 // parse separator (:) 416 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) 417 { 418 return sax->parse_error(m_lexer.get_position(), 419 m_lexer.get_token_string(), 420 parse_error::create(101, m_lexer.get_position(), 421 exception_message(token_type::name_separator, "object separator"))); 422 } 423 424 // parse values 425 get_token(); 426 continue; 427 } 428 429 // closing } 430 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object)) 431 { 432 if (JSON_HEDLEY_UNLIKELY(!sax->end_object())) 433 { 434 return false; 435 } 436 437 // We are done with this object. Before we can parse a 438 // new value, we need to evaluate the new state first. 439 // By setting skip_to_state_evaluation to false, we 440 // are effectively jumping to the beginning of this if. 441 JSON_ASSERT(!states.empty()); 442 states.pop_back(); 443 skip_to_state_evaluation = true; 444 continue; 445 } 446 447 return sax->parse_error(m_lexer.get_position(), 448 m_lexer.get_token_string(), 449 parse_error::create(101, m_lexer.get_position(), 450 exception_message(token_type::end_object, "object"))); 451 } 452 } 453 } 454 455 /// get next token from lexer get_token()456 token_type get_token() 457 { 458 return last_token = m_lexer.scan(); 459 } 460 exception_message(const token_type expected,const std::string & context)461 std::string exception_message(const token_type expected, const std::string& context) 462 { 463 std::string error_msg = "syntax error "; 464 465 if (!context.empty()) 466 { 467 error_msg += "while parsing " + context + " "; 468 } 469 470 error_msg += "- "; 471 472 if (last_token == token_type::parse_error) 473 { 474 error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" + 475 m_lexer.get_token_string() + "'"; 476 } 477 else 478 { 479 error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token)); 480 } 481 482 if (expected != token_type::uninitialized) 483 { 484 error_msg += "; expected " + std::string(lexer_t::token_type_name(expected)); 485 } 486 487 return error_msg; 488 } 489 490 private: 491 /// callback function 492 const parser_callback_t<BasicJsonType> callback = nullptr; 493 /// the type of the last read token 494 token_type last_token = token_type::uninitialized; 495 /// the lexer 496 lexer_t m_lexer; 497 /// whether to throw exceptions in case of errors 498 const bool allow_exceptions = true; 499 }; 500 } // namespace detail 501 } // namespace nlohmann 502