1 #pragma once 2 3 #include <cassert> // assert 4 #include <cmath> // isfinite 5 #include <cstdint> // uint8_t 6 #include <functional> // function 7 #include <string> // string 8 #include <utility> // move 9 10 #include <nlohmann/detail/exceptions.hpp> 11 #include <nlohmann/detail/macro_scope.hpp> 12 #include <nlohmann/detail/meta/is_sax.hpp> 13 #include <nlohmann/detail/input/input_adapters.hpp> 14 #include <nlohmann/detail/input/json_sax.hpp> 15 #include <nlohmann/detail/input/lexer.hpp> 16 #include <nlohmann/detail/value_t.hpp> 17 18 namespace nlohmann 19 { 20 namespace detail 21 { 22 //////////// 23 // parser // 24 //////////// 25 26 /*! 27 @brief syntax analysis 28 29 This class implements a recursive decent parser. 30 */ 31 template<typename BasicJsonType> 32 class parser 33 { 34 using number_integer_t = typename BasicJsonType::number_integer_t; 35 using number_unsigned_t = typename BasicJsonType::number_unsigned_t; 36 using number_float_t = typename BasicJsonType::number_float_t; 37 using string_t = typename BasicJsonType::string_t; 38 using lexer_t = lexer<BasicJsonType>; 39 using token_type = typename lexer_t::token_type; 40 41 public: 42 enum class parse_event_t : uint8_t 43 { 44 /// the parser read `{` and started to process a JSON object 45 object_start, 46 /// the parser read `}` and finished processing a JSON object 47 object_end, 48 /// the parser read `[` and started to process a JSON array 49 array_start, 50 /// the parser read `]` and finished processing a JSON array 51 array_end, 52 /// the parser read a key of a value in an object 53 key, 54 /// the parser finished reading a JSON value 55 value 56 }; 57 58 using parser_callback_t = 59 std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>; 60 61 /// a parser reading from an input adapter parser(detail::input_adapter_t && adapter,const parser_callback_t cb=nullptr,const bool allow_exceptions_=true)62 explicit parser(detail::input_adapter_t&& adapter, 63 const parser_callback_t cb = nullptr, 64 const bool allow_exceptions_ = true) 65 : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_) 66 { 67 // read first token 68 get_token(); 69 } 70 71 /*! 72 @brief public parser interface 73 74 @param[in] strict whether to expect the last token to be EOF 75 @param[in,out] result parsed JSON value 76 77 @throw parse_error.101 in case of an unexpected token 78 @throw parse_error.102 if to_unicode fails or surrogate error 79 @throw parse_error.103 if to_unicode fails 80 */ parse(const bool strict,BasicJsonType & result)81 void parse(const bool strict, BasicJsonType& result) 82 { 83 if (callback) 84 { 85 json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions); 86 sax_parse_internal(&sdp); 87 result.assert_invariant(); 88 89 // in strict mode, input must be completely read 90 if (strict and (get_token() != token_type::end_of_input)) 91 { 92 sdp.parse_error(m_lexer.get_position(), 93 m_lexer.get_token_string(), 94 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input))); 95 } 96 97 // in case of an error, return discarded value 98 if (sdp.is_errored()) 99 { 100 result = value_t::discarded; 101 return; 102 } 103 104 // set top-level value to null if it was discarded by the callback 105 // function 106 if (result.is_discarded()) 107 { 108 result = nullptr; 109 } 110 } 111 else 112 { 113 json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions); 114 sax_parse_internal(&sdp); 115 result.assert_invariant(); 116 117 // in strict mode, input must be completely read 118 if (strict and (get_token() != token_type::end_of_input)) 119 { 120 sdp.parse_error(m_lexer.get_position(), 121 m_lexer.get_token_string(), 122 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input))); 123 } 124 125 // in case of an error, return discarded value 126 if (sdp.is_errored()) 127 { 128 result = value_t::discarded; 129 return; 130 } 131 } 132 } 133 134 /*! 135 @brief public accept interface 136 137 @param[in] strict whether to expect the last token to be EOF 138 @return whether the input is a proper JSON text 139 */ accept(const bool strict=true)140 bool accept(const bool strict = true) 141 { 142 json_sax_acceptor<BasicJsonType> sax_acceptor; 143 return sax_parse(&sax_acceptor, strict); 144 } 145 146 template <typename SAX> sax_parse(SAX * sax,const bool strict=true)147 bool sax_parse(SAX* sax, const bool strict = true) 148 { 149 (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; 150 const bool result = sax_parse_internal(sax); 151 152 // strict mode: next byte must be EOF 153 if (result and strict and (get_token() != token_type::end_of_input)) 154 { 155 return sax->parse_error(m_lexer.get_position(), 156 m_lexer.get_token_string(), 157 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input))); 158 } 159 160 return result; 161 } 162 163 private: 164 template <typename SAX> sax_parse_internal(SAX * sax)165 bool sax_parse_internal(SAX* sax) 166 { 167 // stack to remember the hieararchy of structured values we are parsing 168 // true = array; false = object 169 std::vector<bool> states; 170 // value to avoid a goto (see comment where set to true) 171 bool skip_to_state_evaluation = false; 172 173 while (true) 174 { 175 if (not skip_to_state_evaluation) 176 { 177 // invariant: get_token() was called before each iteration 178 switch (last_token) 179 { 180 case token_type::begin_object: 181 { 182 if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) 183 { 184 return false; 185 } 186 187 // closing } -> we are done 188 if (get_token() == token_type::end_object) 189 { 190 if (JSON_UNLIKELY(not sax->end_object())) 191 { 192 return false; 193 } 194 break; 195 } 196 197 // parse key 198 if (JSON_UNLIKELY(last_token != token_type::value_string)) 199 { 200 return sax->parse_error(m_lexer.get_position(), 201 m_lexer.get_token_string(), 202 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); 203 } 204 else 205 { 206 if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) 207 { 208 return false; 209 } 210 } 211 212 // parse separator (:) 213 if (JSON_UNLIKELY(get_token() != token_type::name_separator)) 214 { 215 return sax->parse_error(m_lexer.get_position(), 216 m_lexer.get_token_string(), 217 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); 218 } 219 220 // remember we are now inside an object 221 states.push_back(false); 222 223 // parse values 224 get_token(); 225 continue; 226 } 227 228 case token_type::begin_array: 229 { 230 if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) 231 { 232 return false; 233 } 234 235 // closing ] -> we are done 236 if (get_token() == token_type::end_array) 237 { 238 if (JSON_UNLIKELY(not sax->end_array())) 239 { 240 return false; 241 } 242 break; 243 } 244 245 // remember we are now inside an array 246 states.push_back(true); 247 248 // parse values (no need to call get_token) 249 continue; 250 } 251 252 case token_type::value_float: 253 { 254 const auto res = m_lexer.get_number_float(); 255 256 if (JSON_UNLIKELY(not std::isfinite(res))) 257 { 258 return sax->parse_error(m_lexer.get_position(), 259 m_lexer.get_token_string(), 260 out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); 261 } 262 else 263 { 264 if (JSON_UNLIKELY(not sax->number_float(res, m_lexer.get_string()))) 265 { 266 return false; 267 } 268 break; 269 } 270 } 271 272 case token_type::literal_false: 273 { 274 if (JSON_UNLIKELY(not sax->boolean(false))) 275 { 276 return false; 277 } 278 break; 279 } 280 281 case token_type::literal_null: 282 { 283 if (JSON_UNLIKELY(not sax->null())) 284 { 285 return false; 286 } 287 break; 288 } 289 290 case token_type::literal_true: 291 { 292 if (JSON_UNLIKELY(not sax->boolean(true))) 293 { 294 return false; 295 } 296 break; 297 } 298 299 case token_type::value_integer: 300 { 301 if (JSON_UNLIKELY(not sax->number_integer(m_lexer.get_number_integer()))) 302 { 303 return false; 304 } 305 break; 306 } 307 308 case token_type::value_string: 309 { 310 if (JSON_UNLIKELY(not sax->string(m_lexer.get_string()))) 311 { 312 return false; 313 } 314 break; 315 } 316 317 case token_type::value_unsigned: 318 { 319 if (JSON_UNLIKELY(not sax->number_unsigned(m_lexer.get_number_unsigned()))) 320 { 321 return false; 322 } 323 break; 324 } 325 326 case token_type::parse_error: 327 { 328 // using "uninitialized" to avoid "expected" message 329 return sax->parse_error(m_lexer.get_position(), 330 m_lexer.get_token_string(), 331 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized))); 332 } 333 334 default: // the last token was unexpected 335 { 336 return sax->parse_error(m_lexer.get_position(), 337 m_lexer.get_token_string(), 338 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value))); 339 } 340 } 341 } 342 else 343 { 344 skip_to_state_evaluation = false; 345 } 346 347 // we reached this line after we successfully parsed a value 348 if (states.empty()) 349 { 350 // empty stack: we reached the end of the hieararchy: done 351 return true; 352 } 353 else 354 { 355 if (states.back()) // array 356 { 357 // comma -> next value 358 if (get_token() == token_type::value_separator) 359 { 360 // parse a new value 361 get_token(); 362 continue; 363 } 364 365 // closing ] 366 if (JSON_LIKELY(last_token == token_type::end_array)) 367 { 368 if (JSON_UNLIKELY(not sax->end_array())) 369 { 370 return false; 371 } 372 373 // We are done with this array. Before we can parse a 374 // new value, we need to evaluate the new state first. 375 // By setting skip_to_state_evaluation to false, we 376 // are effectively jumping to the beginning of this if. 377 assert(not states.empty()); 378 states.pop_back(); 379 skip_to_state_evaluation = true; 380 continue; 381 } 382 else 383 { 384 return sax->parse_error(m_lexer.get_position(), 385 m_lexer.get_token_string(), 386 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array))); 387 } 388 } 389 else // object 390 { 391 // comma -> next value 392 if (get_token() == token_type::value_separator) 393 { 394 // parse key 395 if (JSON_UNLIKELY(get_token() != token_type::value_string)) 396 { 397 return sax->parse_error(m_lexer.get_position(), 398 m_lexer.get_token_string(), 399 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string))); 400 } 401 else 402 { 403 if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) 404 { 405 return false; 406 } 407 } 408 409 // parse separator (:) 410 if (JSON_UNLIKELY(get_token() != token_type::name_separator)) 411 { 412 return sax->parse_error(m_lexer.get_position(), 413 m_lexer.get_token_string(), 414 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator))); 415 } 416 417 // parse values 418 get_token(); 419 continue; 420 } 421 422 // closing } 423 if (JSON_LIKELY(last_token == token_type::end_object)) 424 { 425 if (JSON_UNLIKELY(not sax->end_object())) 426 { 427 return false; 428 } 429 430 // We are done with this object. Before we can parse a 431 // new value, we need to evaluate the new state first. 432 // By setting skip_to_state_evaluation to false, we 433 // are effectively jumping to the beginning of this if. 434 assert(not states.empty()); 435 states.pop_back(); 436 skip_to_state_evaluation = true; 437 continue; 438 } 439 else 440 { 441 return sax->parse_error(m_lexer.get_position(), 442 m_lexer.get_token_string(), 443 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object))); 444 } 445 } 446 } 447 } 448 } 449 450 /// get next token from lexer get_token()451 token_type get_token() 452 { 453 return (last_token = m_lexer.scan()); 454 } 455 exception_message(const token_type expected)456 std::string exception_message(const token_type expected) 457 { 458 std::string error_msg = "syntax error - "; 459 if (last_token == token_type::parse_error) 460 { 461 error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" + 462 m_lexer.get_token_string() + "'"; 463 } 464 else 465 { 466 error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token)); 467 } 468 469 if (expected != token_type::uninitialized) 470 { 471 error_msg += "; expected " + std::string(lexer_t::token_type_name(expected)); 472 } 473 474 return error_msg; 475 } 476 477 private: 478 /// callback function 479 const parser_callback_t callback = nullptr; 480 /// the type of the last read token 481 token_type last_token = token_type::uninitialized; 482 /// the lexer 483 lexer_t m_lexer; 484 /// whether to throw exceptions in case of errors 485 const bool allow_exceptions = true; 486 }; 487 } 488 } 489