1 #ifndef SQL_JSON_PATH_INCLUDED 2 #define SQL_JSON_PATH_INCLUDED 3 4 /* Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License, version 2.0, 8 as published by the Free Software Foundation. 9 10 This program is also distributed with certain software (including 11 but not limited to OpenSSL) that is licensed under separate terms, 12 as designated in a particular file or component or in included license 13 documentation. The authors of MySQL hereby grant you an additional 14 permission to link the program and your derivative works with the 15 separately licensed software that they have included with MySQL. 16 17 This program is distributed in the hope that it will be useful, 18 but WITHOUT ANY WARRANTY; without even the implied warranty of 19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 GNU General Public License, version 2.0, for more details. 21 22 You should have received a copy of the GNU General Public License 23 along with this program; if not, write to the Free Software 24 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ 25 26 /** 27 @file json_path.h 28 29 This file contains interface support for the JSON path abstraction. 30 The path abstraction is described by the functional spec 31 attached to WL#7909. 32 */ 33 34 #include <stddef.h> 35 #include <algorithm> 36 #include <new> 37 #include <string> 38 #include <utility> 39 40 #include "my_alloc.h" // MEM_ROOT 41 #include "my_dbug.h" // DBUG_ASSERT 42 #include "my_inttypes.h" 43 #include "my_sys.h" 44 #include "prealloced_array.h" // Prealloced_array 45 46 class String; 47 48 /** The type of a Json_path_leg. */ 49 enum enum_json_path_leg_type { 50 /** 51 A path leg that represents a JSON object member (such as `.name`). 52 This path leg matches a single member in a JSON object. 53 */ 54 jpl_member, 55 56 /** 57 A path leg that represents a JSON array cell (such as `[10]`). 58 This path leg matches a single element in a JSON object. 59 */ 60 jpl_array_cell, 61 62 /** 63 A path leg that represents a range in a JSON array 64 (such as `[2 to 7]`). 65 */ 66 jpl_array_range, 67 68 /** 69 A path leg that represents the member wildcard (`.*`), which 70 matches all the members of a JSON object. 71 */ 72 jpl_member_wildcard, 73 74 /** 75 A path leg that represents the array wildcard (`[*]`), which 76 matches all the elements of a JSON array. 77 */ 78 jpl_array_cell_wildcard, 79 80 /** 81 A path leg that represents the ellipsis (`**`), which matches any 82 JSON value and recursively all the JSON values nested within it if 83 it is an object or an array. 84 */ 85 jpl_ellipsis 86 }; 87 88 /** 89 A class that represents the index of an element in a JSON array. The 90 index is 0-based and relative to the beginning of the array. 91 */ 92 class Json_array_index final { 93 /** 94 The array index. It is 0 if the specified index was before the 95 first element of the array, or equal to the array length if the 96 specified index was after the last element of the array. 97 */ 98 size_t m_index; 99 100 /** True if the array index is within the bounds of the array. */ 101 bool m_within_bounds; 102 103 public: 104 /** 105 Construct a new Json_array_index object representing the specified 106 position in an array of the given length. 107 108 @param index the array index 109 @param from_end true if @a index is relative to the end of the array 110 @param array_length the length of the array 111 */ Json_array_index(size_t index,bool from_end,size_t array_length)112 Json_array_index(size_t index, bool from_end, size_t array_length) 113 : m_index(from_end ? (index < array_length ? array_length - index - 1 : 0) 114 : std::min(index, array_length)), 115 m_within_bounds(index < array_length) {} 116 117 /** 118 Is the array index within the bounds of the array? 119 120 @retval true if the array index is within bounds 121 @retval false otherwise 122 */ within_bounds()123 bool within_bounds() const { return m_within_bounds; } 124 125 /** 126 Get the position in the array pointed to by this array index. 127 128 If the index is out of bounds, 0 will be returned if the array 129 index is before the first element in the array, or a value equal 130 to the length of the array if the index is after the last element. 131 132 @return the position in the array (0-based index relative to the 133 start of the array) 134 */ position()135 size_t position() const { return m_index; } 136 }; 137 138 /** 139 One path leg in a JSON path expression. 140 141 A path leg describes either a key/value pair in an object 142 or a 0-based index into an array. 143 */ 144 class Json_path_leg final { 145 /// The type of this path leg. 146 enum_json_path_leg_type m_leg_type; 147 148 /// The index of an array cell, or the start of an array range. 149 size_t m_first_array_index = 0; 150 151 /// Is #m_first_array_index relative to the end of the array? 152 bool m_first_array_index_from_end = false; 153 154 /// The end (inclusive) of an array range. 155 size_t m_last_array_index = 0; 156 157 /// Is #m_last_array_index relative to the end of the array? 158 bool m_last_array_index_from_end = false; 159 160 /// The member name of a member path leg. 161 std::string m_member_name; 162 163 public: 164 /** 165 Construct a wildcard or ellipsis path leg. 166 167 @param leg_type the type of wildcard (#jpl_ellipsis, 168 #jpl_member_wildcard or #jpl_array_cell_wildcard) 169 */ Json_path_leg(enum_json_path_leg_type leg_type)170 explicit Json_path_leg(enum_json_path_leg_type leg_type) 171 : m_leg_type(leg_type) { 172 DBUG_ASSERT(leg_type == jpl_ellipsis || leg_type == jpl_member_wildcard || 173 leg_type == jpl_array_cell_wildcard); 174 } 175 176 /** 177 Construct an array cell path leg. 178 179 @param index the 0-based index in the array, 180 relative to the beginning of the array 181 */ Json_path_leg(size_t index)182 explicit Json_path_leg(size_t index) : Json_path_leg(index, false) {} 183 184 /** 185 Construct an array cell path leg. 186 187 @param index the 0-based index in the array 188 @param from_end true if @a index is relative to the end of the array 189 */ Json_path_leg(size_t index,bool from_end)190 Json_path_leg(size_t index, bool from_end) 191 : m_leg_type(jpl_array_cell), 192 m_first_array_index(index), 193 m_first_array_index_from_end(from_end) {} 194 195 /** 196 Construct an array range path leg. 197 198 @param idx1 the start index of the range, inclusive 199 @param idx1_from_end true if the start index is relative 200 to the end of the array 201 @param idx2 the last index of the range, inclusive 202 @param idx2_from_end true if the last index is relative 203 to the end of the array 204 */ Json_path_leg(size_t idx1,bool idx1_from_end,size_t idx2,bool idx2_from_end)205 Json_path_leg(size_t idx1, bool idx1_from_end, size_t idx2, 206 bool idx2_from_end) 207 : m_leg_type(jpl_array_range), 208 m_first_array_index(idx1), 209 m_first_array_index_from_end(idx1_from_end), 210 m_last_array_index(idx2), 211 m_last_array_index_from_end(idx2_from_end) {} 212 213 /** 214 Construct an object member path leg. 215 216 @param member_name the name of the object member 217 @param length the length of the member name 218 */ Json_path_leg(const char * member_name,size_t length)219 Json_path_leg(const char *member_name, size_t length) 220 : m_leg_type(jpl_member), m_member_name(member_name, length) {} 221 222 /** Construct an object member path leg. */ Json_path_leg(const std::string & member_name)223 Json_path_leg(const std::string &member_name) 224 : Json_path_leg(member_name.c_str(), member_name.length()) {} 225 226 /** Get the type of the path leg. */ get_type()227 enum_json_path_leg_type get_type() const { return m_leg_type; } 228 229 /** Get the member name of a ::jpl_member path leg. */ get_member_name()230 const std::string &get_member_name() const { return m_member_name; } 231 232 /** Turn into a human-readable string. */ 233 bool to_string(String *buf) const; 234 235 /** 236 Is this path leg an auto-wrapping array accessor? 237 238 An auto-wrapping array accessor is an array accessor that matches 239 non-arrays by auto-wrapping them in a single-element array before doing 240 the matching. 241 242 This function returns true for any ::jpl_array_cell or ::jpl_array_range 243 path leg that would match the element contained in a single-element 244 array, and which therefore would also match non-arrays that have been 245 auto-wrapped in single-element arrays. 246 */ 247 bool is_autowrap() const; 248 249 /** 250 Get the first array cell pointed to by an array range, or the 251 array cell pointed to by an array cell index. 252 253 @param array_length the length of the array 254 */ first_array_index(size_t array_length)255 Json_array_index first_array_index(size_t array_length) const { 256 DBUG_ASSERT(m_leg_type == jpl_array_cell || m_leg_type == jpl_array_range); 257 return Json_array_index(m_first_array_index, m_first_array_index_from_end, 258 array_length); 259 } 260 261 /** 262 Get the last array cell pointed to by an array range. The range 263 includes this cell. 264 265 @param array_length the length of the array 266 */ last_array_index(size_t array_length)267 Json_array_index last_array_index(size_t array_length) const { 268 DBUG_ASSERT(m_leg_type == jpl_array_range); 269 return Json_array_index(m_last_array_index, m_last_array_index_from_end, 270 array_length); 271 } 272 273 /** 274 A structure that represents an array range. 275 */ 276 struct Array_range { 277 size_t m_begin; ///< Beginning of the range, inclusive. 278 size_t m_end; ///< End of the range, exclusive. 279 }; 280 281 /** 282 Get the array range pointed to by a path leg of type 283 ::jpl_array_range or ::jpl_array_cell_wildcard. 284 @param array_length the length of the array 285 */ 286 Array_range get_array_range(size_t array_length) const; 287 }; 288 289 using Json_path_leg_pointers = Prealloced_array<const Json_path_leg *, 8>; 290 using Json_path_iterator = Json_path_leg_pointers::const_iterator; 291 292 /** 293 A path expression which can be used to seek to 294 a position inside a JSON value. 295 */ 296 class Json_seekable_path { 297 protected: 298 /** An array of pointers to the legs of the JSON path. */ 299 Json_path_leg_pointers m_path_legs; 300 301 Json_seekable_path(); 302 303 public: 304 /** Return the number of legs in this searchable path */ leg_count()305 size_t leg_count() const { return m_path_legs.size(); } 306 307 /** Get an iterator pointing to the first path leg. */ begin()308 Json_path_iterator begin() const { return m_path_legs.begin(); } 309 310 /** Get an iterator pointing just past the last path leg. */ end()311 Json_path_iterator end() const { return m_path_legs.end(); } 312 313 /** Get a pointer to the last path leg. The path must not be empty. */ last_leg()314 const Json_path_leg *last_leg() const { return m_path_legs.back(); } 315 }; 316 317 /** 318 A JSON path expression. 319 320 From the user's point of view, a path expression is a string literal 321 with the following structure. We parse this structure into a 322 Json_path object: 323 324 pathExpression ::= scope pathLeg (pathLeg)* 325 326 scope ::= dollarSign 327 328 pathLeg ::= member | arrayLocation | doubleAsterisk 329 330 member ::= period (keyName | asterisk) 331 332 arrayLocation ::= 333 leftBracket 334 (arrayIndex | arrayRange | asterisk) 335 rightBracket 336 337 arrayIndex ::= 338 non-negative-integer | 339 last [ minus non-negative-integer ] 340 341 arrayRange ::= arrayIndex to arrayIndex 342 343 keyName ::= ECMAScript-identifier | ECMAScript-string-literal 344 345 doubleAsterisk ::= ** 346 347 to ::= "to" 348 349 last ::= "last" 350 */ 351 class Json_path final : public Json_seekable_path { 352 private: 353 /** 354 A MEM_ROOT in which the Json_path_leg objects pointed to by 355 #Json_seekable_path::m_path_legs are allocated. 356 */ 357 MEM_ROOT m_mem_root; 358 359 public: 360 Json_path(); 361 ~Json_path()362 ~Json_path() { 363 for (const auto ptr : m_path_legs) ptr->~Json_path_leg(); 364 } 365 366 /** Move constructor. */ Json_path(Json_path && other)367 Json_path(Json_path &&other) : m_mem_root(std::move(other.m_mem_root)) { 368 // Move the contents of m_path_legs from other into this. 369 m_path_legs = std::move(other.m_path_legs); 370 371 /* 372 Must also make sure that other.m_path_legs is empty, so that we 373 don't end up destroying the same objects twice; once from this's 374 destructor and once from other's destructor. 375 376 Move-constructing a vector would usually leave "other" empty, 377 but it is not guaranteed. Furthermore, m_path_legs is a 378 Prealloced_array, not a std::vector, so often moving will mean 379 copying from one prealloced area to another instead of simply 380 swapping pointers to the backing array. (And at the time of 381 writing Prealloced_array doesn't even have a move-assignment 382 operator, so the above assignment will always copy and leave 383 "other" unchanged.) 384 */ 385 other.m_path_legs.clear(); 386 } 387 388 /** Move assignment. */ 389 Json_path &operator=(Json_path &&other) { 390 if (&other != this) { 391 this->~Json_path(); 392 new (this) Json_path(std::move(other)); 393 } 394 return *this; 395 } 396 397 /** 398 Add a path leg to the end of this path. 399 @param[in] leg the leg to add 400 @return false on success, true on error 401 */ append(const Json_path_leg & leg)402 bool append(const Json_path_leg &leg) { 403 auto ptr = new (&m_mem_root) Json_path_leg(leg); 404 return ptr == nullptr || m_path_legs.push_back(ptr); 405 } 406 407 /** 408 Resets this to an empty path with no legs. 409 */ clear()410 void clear() { 411 // Destruct all the Json_path_leg objects, and clear the pointers to them. 412 for (const auto ptr : m_path_legs) ptr->~Json_path_leg(); 413 m_path_legs.clear(); 414 // Mark the memory as ready for reuse. 415 free_root(&m_mem_root, MYF(MY_MARK_BLOCKS_FREE)); 416 } 417 418 /** 419 Return true if the path can match more than one value in a JSON document. 420 421 @retval true if the path contains a path leg which is a wildcard, 422 ellipsis or array range 423 @retval false otherwise 424 */ 425 bool can_match_many() const; 426 427 /** Turn into a human-readable string. */ 428 bool to_string(String *buf) const; 429 }; 430 431 /** 432 A lightweight path expression. This exists so that paths can be cloned 433 from the path legs of other paths without allocating heap memory 434 to copy those legs into. This class does not own the memory of the 435 Json_path_leg objects pointed to by #Json_seekable_path::m_path_legs, it 436 just points to Json_path_leg objects that belong to a Json_path instance. 437 */ 438 class Json_path_clone final : public Json_seekable_path { 439 public: 440 /** 441 Add a path leg to the end of this cloned path. 442 @param[in] leg the leg to add 443 @return false on success, true on error 444 */ append(const Json_path_leg * leg)445 bool append(const Json_path_leg *leg) { return m_path_legs.push_back(leg); } 446 447 /** 448 Resets this to an empty path with no legs. 449 */ clear()450 void clear() { m_path_legs.clear(); } 451 }; 452 453 /** 454 Initialize a Json_path from a path expression. 455 456 Stops parsing on the first error. It initializes the Json_path and 457 returns false if the path is parsed successfully. Otherwise, it 458 returns false. In that case, the output bad_index argument will 459 contain an index into the path expression. The parsing failed near 460 that index. 461 462 @param[in] path_length The length of the path expression. 463 @param[in] path_expression The string form of the path expression. 464 @param[out] path The Json_path object to be initialized. 465 @param[out] bad_index If null is returned, the parsing failed around here. 466 @return false on success, true on error 467 */ 468 bool parse_path(size_t path_length, const char *path_expression, 469 Json_path *path, size_t *bad_index); 470 471 /** 472 A helper function that uses the above one as workhorse. Entry point for 473 for JSON_TABLE (Table_function_json class) and Json_path_cache. Raises an 474 error if the path expression is syntactically incorrect. Raises an 475 error if the path expression contains wildcard tokens but is not 476 supposed to. Otherwise updates the supplied Json_path object with 477 the parsed path. 478 479 @param[in] path_value A String to be interpreted as a path. 480 @param[in] forbid_wildcards True if the path shouldn't contain * or ** 481 @param[out] json_path The object that will hold the parsed path 482 483 @returns false on success (valid path or NULL), true on error 484 */ 485 bool parse_path(const String &path_value, bool forbid_wildcards, 486 Json_path *json_path); 487 #endif /* SQL_JSON_PATH_INCLUDED */ 488