1 /*------------------------------------------------------------------------- 2 * 3 * jsonb_util.c 4 * converting between Jsonb and JsonbValues, and iterating. 5 * 6 * Copyright (c) 2014-2018, PostgreSQL Global Development Group 7 * 8 * 9 * IDENTIFICATION 10 * src/backend/utils/adt/jsonb_util.c 11 * 12 *------------------------------------------------------------------------- 13 */ 14 #include "postgres.h" 15 16 #include "access/hash.h" 17 #include "catalog/pg_collation.h" 18 #include "miscadmin.h" 19 #include "utils/builtins.h" 20 #include "utils/jsonb.h" 21 #include "utils/memutils.h" 22 #include "utils/varlena.h" 23 24 /* 25 * Maximum number of elements in an array (or key/value pairs in an object). 26 * This is limited by two things: the size of the JEntry array must fit 27 * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits 28 * reserved for that in the JsonbContainer.header field. 29 * 30 * (The total size of an array's or object's elements is also limited by 31 * JENTRY_OFFLENMASK, but we're not concerned about that here.) 32 */ 33 #define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK)) 34 #define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK)) 35 36 static void fillJsonbValue(JsonbContainer *container, int index, 37 char *base_addr, uint32 offset, 38 JsonbValue *result); 39 static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b); 40 static int compareJsonbScalarValue(JsonbValue *a, JsonbValue *b); 41 static Jsonb *convertToJsonb(JsonbValue *val); 42 static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level); 43 static void convertJsonbArray(StringInfo buffer, JEntry *header, JsonbValue *val, int level); 44 static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level); 45 static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal); 46 47 static int reserveFromBuffer(StringInfo buffer, int len); 48 static void appendToBuffer(StringInfo buffer, const char *data, int len); 49 static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len); 50 static short padBufferToInt(StringInfo buffer); 51 52 static JsonbIterator *iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent); 53 static JsonbIterator *freeAndGetParent(JsonbIterator *it); 54 static JsonbParseState *pushState(JsonbParseState **pstate); 55 static void appendKey(JsonbParseState *pstate, JsonbValue *scalarVal); 56 static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal); 57 static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal); 58 static int lengthCompareJsonbStringValue(const void *a, const void *b); 59 static int lengthCompareJsonbPair(const void *a, const void *b, void *arg); 60 static void uniqueifyJsonbObject(JsonbValue *object); 61 static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate, 62 JsonbIteratorToken seq, 63 JsonbValue *scalarVal); 64 65 /* 66 * Turn an in-memory JsonbValue into a Jsonb for on-disk storage. 67 * 68 * There isn't a JsonbToJsonbValue(), because generally we find it more 69 * convenient to directly iterate through the Jsonb representation and only 70 * really convert nested scalar values. JsonbIteratorNext() does this, so that 71 * clients of the iteration code don't have to directly deal with the binary 72 * representation (JsonbDeepContains() is a notable exception, although all 73 * exceptions are internal to this module). In general, functions that accept 74 * a JsonbValue argument are concerned with the manipulation of scalar values, 75 * or simple containers of scalar values, where it would be inconvenient to 76 * deal with a great amount of other state. 77 */ 78 Jsonb * 79 JsonbValueToJsonb(JsonbValue *val) 80 { 81 Jsonb *out; 82 83 if (IsAJsonbScalar(val)) 84 { 85 /* Scalar value */ 86 JsonbParseState *pstate = NULL; 87 JsonbValue *res; 88 JsonbValue scalarArray; 89 90 scalarArray.type = jbvArray; 91 scalarArray.val.array.rawScalar = true; 92 scalarArray.val.array.nElems = 1; 93 94 pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray); 95 pushJsonbValue(&pstate, WJB_ELEM, val); 96 res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL); 97 98 out = convertToJsonb(res); 99 } 100 else if (val->type == jbvObject || val->type == jbvArray) 101 { 102 out = convertToJsonb(val); 103 } 104 else 105 { 106 Assert(val->type == jbvBinary); 107 out = palloc(VARHDRSZ + val->val.binary.len); 108 SET_VARSIZE(out, VARHDRSZ + val->val.binary.len); 109 memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len); 110 } 111 112 return out; 113 } 114 115 /* 116 * Get the offset of the variable-length portion of a Jsonb node within 117 * the variable-length-data part of its container. The node is identified 118 * by index within the container's JEntry array. 119 */ 120 uint32 121 getJsonbOffset(const JsonbContainer *jc, int index) 122 { 123 uint32 offset = 0; 124 int i; 125 126 /* 127 * Start offset of this entry is equal to the end offset of the previous 128 * entry. Walk backwards to the most recent entry stored as an end 129 * offset, returning that offset plus any lengths in between. 130 */ 131 for (i = index - 1; i >= 0; i--) 132 { 133 offset += JBE_OFFLENFLD(jc->children[i]); 134 if (JBE_HAS_OFF(jc->children[i])) 135 break; 136 } 137 138 return offset; 139 } 140 141 /* 142 * Get the length of the variable-length portion of a Jsonb node. 143 * The node is identified by index within the container's JEntry array. 144 */ 145 uint32 146 getJsonbLength(const JsonbContainer *jc, int index) 147 { 148 uint32 off; 149 uint32 len; 150 151 /* 152 * If the length is stored directly in the JEntry, just return it. 153 * Otherwise, get the begin offset of the entry, and subtract that from 154 * the stored end+1 offset. 155 */ 156 if (JBE_HAS_OFF(jc->children[index])) 157 { 158 off = getJsonbOffset(jc, index); 159 len = JBE_OFFLENFLD(jc->children[index]) - off; 160 } 161 else 162 len = JBE_OFFLENFLD(jc->children[index]); 163 164 return len; 165 } 166 167 /* 168 * BT comparator worker function. Returns an integer less than, equal to, or 169 * greater than zero, indicating whether a is less than, equal to, or greater 170 * than b. Consistent with the requirements for a B-Tree operator class 171 * 172 * Strings are compared lexically, in contrast with other places where we use a 173 * much simpler comparator logic for searching through Strings. Since this is 174 * called from B-Tree support function 1, we're careful about not leaking 175 * memory here. 176 */ 177 int 178 compareJsonbContainers(JsonbContainer *a, JsonbContainer *b) 179 { 180 JsonbIterator *ita, 181 *itb; 182 int res = 0; 183 184 ita = JsonbIteratorInit(a); 185 itb = JsonbIteratorInit(b); 186 187 do 188 { 189 JsonbValue va, 190 vb; 191 JsonbIteratorToken ra, 192 rb; 193 194 ra = JsonbIteratorNext(&ita, &va, false); 195 rb = JsonbIteratorNext(&itb, &vb, false); 196 197 if (ra == rb) 198 { 199 if (ra == WJB_DONE) 200 { 201 /* Decisively equal */ 202 break; 203 } 204 205 if (ra == WJB_END_ARRAY || ra == WJB_END_OBJECT) 206 { 207 /* 208 * There is no array or object to compare at this stage of 209 * processing. jbvArray/jbvObject values are compared 210 * initially, at the WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT 211 * tokens. 212 */ 213 continue; 214 } 215 216 if (va.type == vb.type) 217 { 218 switch (va.type) 219 { 220 case jbvString: 221 case jbvNull: 222 case jbvNumeric: 223 case jbvBool: 224 res = compareJsonbScalarValue(&va, &vb); 225 break; 226 case jbvArray: 227 228 /* 229 * This could be a "raw scalar" pseudo array. That's 230 * a special case here though, since we still want the 231 * general type-based comparisons to apply, and as far 232 * as we're concerned a pseudo array is just a scalar. 233 */ 234 if (va.val.array.rawScalar != vb.val.array.rawScalar) 235 res = (va.val.array.rawScalar) ? -1 : 1; 236 if (va.val.array.nElems != vb.val.array.nElems) 237 res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1; 238 break; 239 case jbvObject: 240 if (va.val.object.nPairs != vb.val.object.nPairs) 241 res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1; 242 break; 243 case jbvBinary: 244 elog(ERROR, "unexpected jbvBinary value"); 245 } 246 } 247 else 248 { 249 /* Type-defined order */ 250 res = (va.type > vb.type) ? 1 : -1; 251 } 252 } 253 else 254 { 255 /* 256 * It's safe to assume that the types differed, and that the va 257 * and vb values passed were set. 258 * 259 * If the two values were of the same container type, then there'd 260 * have been a chance to observe the variation in the number of 261 * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're 262 * either two heterogeneously-typed containers, or a container and 263 * some scalar type. 264 * 265 * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT 266 * cases here, because we would have seen the corresponding 267 * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and 268 * concluded that they don't match. 269 */ 270 Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT); 271 Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT); 272 273 Assert(va.type != vb.type); 274 Assert(va.type != jbvBinary); 275 Assert(vb.type != jbvBinary); 276 /* Type-defined order */ 277 res = (va.type > vb.type) ? 1 : -1; 278 } 279 } 280 while (res == 0); 281 282 while (ita != NULL) 283 { 284 JsonbIterator *i = ita->parent; 285 286 pfree(ita); 287 ita = i; 288 } 289 while (itb != NULL) 290 { 291 JsonbIterator *i = itb->parent; 292 293 pfree(itb); 294 itb = i; 295 } 296 297 return res; 298 } 299 300 /* 301 * Find value in object (i.e. the "value" part of some key/value pair in an 302 * object), or find a matching element if we're looking through an array. Do 303 * so on the basis of equality of the object keys only, or alternatively 304 * element values only, with a caller-supplied value "key". The "flags" 305 * argument allows the caller to specify which container types are of interest. 306 * 307 * This exported utility function exists to facilitate various cases concerned 308 * with "containment". If asked to look through an object, the caller had 309 * better pass a Jsonb String, because their keys can only be strings. 310 * Otherwise, for an array, any type of JsonbValue will do. 311 * 312 * In order to proceed with the search, it is necessary for callers to have 313 * both specified an interest in exactly one particular container type with an 314 * appropriate flag, as well as having the pointed-to Jsonb container be of 315 * one of those same container types at the top level. (Actually, we just do 316 * whichever makes sense to save callers the trouble of figuring it out - at 317 * most one can make sense, because the container either points to an array 318 * (possibly a "raw scalar" pseudo array) or an object.) 319 * 320 * Note that we can return a jbvBinary JsonbValue if this is called on an 321 * object, but we never do so on an array. If the caller asks to look through 322 * a container type that is not of the type pointed to by the container, 323 * immediately fall through and return NULL. If we cannot find the value, 324 * return NULL. Otherwise, return palloc()'d copy of value. 325 */ 326 JsonbValue * 327 findJsonbValueFromContainer(JsonbContainer *container, uint32 flags, 328 JsonbValue *key) 329 { 330 JEntry *children = container->children; 331 int count = JsonContainerSize(container); 332 JsonbValue *result; 333 334 Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0); 335 336 /* Quick out without a palloc cycle if object/array is empty */ 337 if (count <= 0) 338 return NULL; 339 340 result = palloc(sizeof(JsonbValue)); 341 342 if ((flags & JB_FARRAY) && JsonContainerIsArray(container)) 343 { 344 char *base_addr = (char *) (children + count); 345 uint32 offset = 0; 346 int i; 347 348 for (i = 0; i < count; i++) 349 { 350 fillJsonbValue(container, i, base_addr, offset, result); 351 352 if (key->type == result->type) 353 { 354 if (equalsJsonbScalarValue(key, result)) 355 return result; 356 } 357 358 JBE_ADVANCE_OFFSET(offset, children[i]); 359 } 360 } 361 else if ((flags & JB_FOBJECT) && JsonContainerIsObject(container)) 362 { 363 /* Since this is an object, account for *Pairs* of Jentrys */ 364 char *base_addr = (char *) (children + count * 2); 365 uint32 stopLow = 0, 366 stopHigh = count; 367 368 /* Object key passed by caller must be a string */ 369 Assert(key->type == jbvString); 370 371 /* Binary search on object/pair keys *only* */ 372 while (stopLow < stopHigh) 373 { 374 uint32 stopMiddle; 375 int difference; 376 JsonbValue candidate; 377 378 stopMiddle = stopLow + (stopHigh - stopLow) / 2; 379 380 candidate.type = jbvString; 381 candidate.val.string.val = 382 base_addr + getJsonbOffset(container, stopMiddle); 383 candidate.val.string.len = getJsonbLength(container, stopMiddle); 384 385 difference = lengthCompareJsonbStringValue(&candidate, key); 386 387 if (difference == 0) 388 { 389 /* Found our key, return corresponding value */ 390 int index = stopMiddle + count; 391 392 fillJsonbValue(container, index, base_addr, 393 getJsonbOffset(container, index), 394 result); 395 396 return result; 397 } 398 else 399 { 400 if (difference < 0) 401 stopLow = stopMiddle + 1; 402 else 403 stopHigh = stopMiddle; 404 } 405 } 406 } 407 408 /* Not found */ 409 pfree(result); 410 return NULL; 411 } 412 413 /* 414 * Get i-th value of a Jsonb array. 415 * 416 * Returns palloc()'d copy of the value, or NULL if it does not exist. 417 */ 418 JsonbValue * 419 getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i) 420 { 421 JsonbValue *result; 422 char *base_addr; 423 uint32 nelements; 424 425 if (!JsonContainerIsArray(container)) 426 elog(ERROR, "not a jsonb array"); 427 428 nelements = JsonContainerSize(container); 429 base_addr = (char *) &container->children[nelements]; 430 431 if (i >= nelements) 432 return NULL; 433 434 result = palloc(sizeof(JsonbValue)); 435 436 fillJsonbValue(container, i, base_addr, 437 getJsonbOffset(container, i), 438 result); 439 440 return result; 441 } 442 443 /* 444 * A helper function to fill in a JsonbValue to represent an element of an 445 * array, or a key or value of an object. 446 * 447 * The node's JEntry is at container->children[index], and its variable-length 448 * data is at base_addr + offset. We make the caller determine the offset 449 * since in many cases the caller can amortize that work across multiple 450 * children. When it can't, it can just call getJsonbOffset(). 451 * 452 * A nested array or object will be returned as jbvBinary, ie. it won't be 453 * expanded. 454 */ 455 static void 456 fillJsonbValue(JsonbContainer *container, int index, 457 char *base_addr, uint32 offset, 458 JsonbValue *result) 459 { 460 JEntry entry = container->children[index]; 461 462 if (JBE_ISNULL(entry)) 463 { 464 result->type = jbvNull; 465 } 466 else if (JBE_ISSTRING(entry)) 467 { 468 result->type = jbvString; 469 result->val.string.val = base_addr + offset; 470 result->val.string.len = getJsonbLength(container, index); 471 Assert(result->val.string.len >= 0); 472 } 473 else if (JBE_ISNUMERIC(entry)) 474 { 475 result->type = jbvNumeric; 476 result->val.numeric = (Numeric) (base_addr + INTALIGN(offset)); 477 } 478 else if (JBE_ISBOOL_TRUE(entry)) 479 { 480 result->type = jbvBool; 481 result->val.boolean = true; 482 } 483 else if (JBE_ISBOOL_FALSE(entry)) 484 { 485 result->type = jbvBool; 486 result->val.boolean = false; 487 } 488 else 489 { 490 Assert(JBE_ISCONTAINER(entry)); 491 result->type = jbvBinary; 492 /* Remove alignment padding from data pointer and length */ 493 result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset)); 494 result->val.binary.len = getJsonbLength(container, index) - 495 (INTALIGN(offset) - offset); 496 } 497 } 498 499 /* 500 * Push JsonbValue into JsonbParseState. 501 * 502 * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory 503 * JsonbValue to a Jsonb. 504 * 505 * Initial state of *JsonbParseState is NULL, since it'll be allocated here 506 * originally (caller will get JsonbParseState back by reference). 507 * 508 * Only sequential tokens pertaining to non-container types should pass a 509 * JsonbValue. There is one exception -- WJB_BEGIN_ARRAY callers may pass a 510 * "raw scalar" pseudo array to append it - the actual scalar should be passed 511 * next and it will be added as the only member of the array. 512 * 513 * Values of type jvbBinary, which are rolled up arrays and objects, 514 * are unpacked before being added to the result. 515 */ 516 JsonbValue * 517 pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq, 518 JsonbValue *jbval) 519 { 520 JsonbIterator *it; 521 JsonbValue *res = NULL; 522 JsonbValue v; 523 JsonbIteratorToken tok; 524 525 if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) || 526 jbval->type != jbvBinary) 527 { 528 /* drop through */ 529 return pushJsonbValueScalar(pstate, seq, jbval); 530 } 531 532 /* unpack the binary and add each piece to the pstate */ 533 it = JsonbIteratorInit(jbval->val.binary.data); 534 while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) 535 res = pushJsonbValueScalar(pstate, tok, 536 tok < WJB_BEGIN_ARRAY ? &v : NULL); 537 538 return res; 539 } 540 541 /* 542 * Do the actual pushing, with only scalar or pseudo-scalar-array values 543 * accepted. 544 */ 545 static JsonbValue * 546 pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq, 547 JsonbValue *scalarVal) 548 { 549 JsonbValue *result = NULL; 550 551 switch (seq) 552 { 553 case WJB_BEGIN_ARRAY: 554 Assert(!scalarVal || scalarVal->val.array.rawScalar); 555 *pstate = pushState(pstate); 556 result = &(*pstate)->contVal; 557 (*pstate)->contVal.type = jbvArray; 558 (*pstate)->contVal.val.array.nElems = 0; 559 (*pstate)->contVal.val.array.rawScalar = (scalarVal && 560 scalarVal->val.array.rawScalar); 561 if (scalarVal && scalarVal->val.array.nElems > 0) 562 { 563 /* Assume that this array is still really a scalar */ 564 Assert(scalarVal->type == jbvArray); 565 (*pstate)->size = scalarVal->val.array.nElems; 566 } 567 else 568 { 569 (*pstate)->size = 4; 570 } 571 (*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) * 572 (*pstate)->size); 573 break; 574 case WJB_BEGIN_OBJECT: 575 Assert(!scalarVal); 576 *pstate = pushState(pstate); 577 result = &(*pstate)->contVal; 578 (*pstate)->contVal.type = jbvObject; 579 (*pstate)->contVal.val.object.nPairs = 0; 580 (*pstate)->size = 4; 581 (*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) * 582 (*pstate)->size); 583 break; 584 case WJB_KEY: 585 Assert(scalarVal->type == jbvString); 586 appendKey(*pstate, scalarVal); 587 break; 588 case WJB_VALUE: 589 Assert(IsAJsonbScalar(scalarVal)); 590 appendValue(*pstate, scalarVal); 591 break; 592 case WJB_ELEM: 593 Assert(IsAJsonbScalar(scalarVal)); 594 appendElement(*pstate, scalarVal); 595 break; 596 case WJB_END_OBJECT: 597 uniqueifyJsonbObject(&(*pstate)->contVal); 598 /* fall through! */ 599 case WJB_END_ARRAY: 600 /* Steps here common to WJB_END_OBJECT case */ 601 Assert(!scalarVal); 602 result = &(*pstate)->contVal; 603 604 /* 605 * Pop stack and push current array/object as value in parent 606 * array/object 607 */ 608 *pstate = (*pstate)->next; 609 if (*pstate) 610 { 611 switch ((*pstate)->contVal.type) 612 { 613 case jbvArray: 614 appendElement(*pstate, result); 615 break; 616 case jbvObject: 617 appendValue(*pstate, result); 618 break; 619 default: 620 elog(ERROR, "invalid jsonb container type"); 621 } 622 } 623 break; 624 default: 625 elog(ERROR, "unrecognized jsonb sequential processing token"); 626 } 627 628 return result; 629 } 630 631 /* 632 * pushJsonbValue() worker: Iteration-like forming of Jsonb 633 */ 634 static JsonbParseState * 635 pushState(JsonbParseState **pstate) 636 { 637 JsonbParseState *ns = palloc(sizeof(JsonbParseState)); 638 639 ns->next = *pstate; 640 return ns; 641 } 642 643 /* 644 * pushJsonbValue() worker: Append a pair key to state when generating a Jsonb 645 */ 646 static void 647 appendKey(JsonbParseState *pstate, JsonbValue *string) 648 { 649 JsonbValue *object = &pstate->contVal; 650 651 Assert(object->type == jbvObject); 652 Assert(string->type == jbvString); 653 654 if (object->val.object.nPairs >= JSONB_MAX_PAIRS) 655 ereport(ERROR, 656 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 657 errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)", 658 JSONB_MAX_PAIRS))); 659 660 if (object->val.object.nPairs >= pstate->size) 661 { 662 pstate->size *= 2; 663 object->val.object.pairs = repalloc(object->val.object.pairs, 664 sizeof(JsonbPair) * pstate->size); 665 } 666 667 object->val.object.pairs[object->val.object.nPairs].key = *string; 668 object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs; 669 } 670 671 /* 672 * pushJsonbValue() worker: Append a pair value to state when generating a 673 * Jsonb 674 */ 675 static void 676 appendValue(JsonbParseState *pstate, JsonbValue *scalarVal) 677 { 678 JsonbValue *object = &pstate->contVal; 679 680 Assert(object->type == jbvObject); 681 682 object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal; 683 } 684 685 /* 686 * pushJsonbValue() worker: Append an element to state when generating a Jsonb 687 */ 688 static void 689 appendElement(JsonbParseState *pstate, JsonbValue *scalarVal) 690 { 691 JsonbValue *array = &pstate->contVal; 692 693 Assert(array->type == jbvArray); 694 695 if (array->val.array.nElems >= JSONB_MAX_ELEMS) 696 ereport(ERROR, 697 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 698 errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)", 699 JSONB_MAX_ELEMS))); 700 701 if (array->val.array.nElems >= pstate->size) 702 { 703 pstate->size *= 2; 704 array->val.array.elems = repalloc(array->val.array.elems, 705 sizeof(JsonbValue) * pstate->size); 706 } 707 708 array->val.array.elems[array->val.array.nElems++] = *scalarVal; 709 } 710 711 /* 712 * Given a JsonbContainer, expand to JsonbIterator to iterate over items 713 * fully expanded to in-memory representation for manipulation. 714 * 715 * See JsonbIteratorNext() for notes on memory management. 716 */ 717 JsonbIterator * 718 JsonbIteratorInit(JsonbContainer *container) 719 { 720 return iteratorFromContainer(container, NULL); 721 } 722 723 /* 724 * Get next JsonbValue while iterating 725 * 726 * Caller should initially pass their own, original iterator. They may get 727 * back a child iterator palloc()'d here instead. The function can be relied 728 * on to free those child iterators, lest the memory allocated for highly 729 * nested objects become unreasonable, but only if callers don't end iteration 730 * early (by breaking upon having found something in a search, for example). 731 * 732 * Callers in such a scenario, that are particularly sensitive to leaking 733 * memory in a long-lived context may walk the ancestral tree from the final 734 * iterator we left them with to its oldest ancestor, pfree()ing as they go. 735 * They do not have to free any other memory previously allocated for iterators 736 * but not accessible as direct ancestors of the iterator they're last passed 737 * back. 738 * 739 * Returns "Jsonb sequential processing" token value. Iterator "state" 740 * reflects the current stage of the process in a less granular fashion, and is 741 * mostly used here to track things internally with respect to particular 742 * iterators. 743 * 744 * Clients of this function should not have to handle any jbvBinary values 745 * (since recursive calls will deal with this), provided skipNested is false. 746 * It is our job to expand the jbvBinary representation without bothering them 747 * with it. However, clients should not take it upon themselves to touch array 748 * or Object element/pair buffers, since their element/pair pointers are 749 * garbage. Also, *val will not be set when returning WJB_END_ARRAY or 750 * WJB_END_OBJECT, on the assumption that it's only useful to access values 751 * when recursing in. 752 */ 753 JsonbIteratorToken 754 JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested) 755 { 756 if (*it == NULL) 757 return WJB_DONE; 758 759 /* 760 * When stepping into a nested container, we jump back here to start 761 * processing the child. We will not recurse further in one call, because 762 * processing the child will always begin in JBI_ARRAY_START or 763 * JBI_OBJECT_START state. 764 */ 765 recurse: 766 switch ((*it)->state) 767 { 768 case JBI_ARRAY_START: 769 /* Set v to array on first array call */ 770 val->type = jbvArray; 771 val->val.array.nElems = (*it)->nElems; 772 773 /* 774 * v->val.array.elems is not actually set, because we aren't doing 775 * a full conversion 776 */ 777 val->val.array.rawScalar = (*it)->isScalar; 778 (*it)->curIndex = 0; 779 (*it)->curDataOffset = 0; 780 (*it)->curValueOffset = 0; /* not actually used */ 781 /* Set state for next call */ 782 (*it)->state = JBI_ARRAY_ELEM; 783 return WJB_BEGIN_ARRAY; 784 785 case JBI_ARRAY_ELEM: 786 if ((*it)->curIndex >= (*it)->nElems) 787 { 788 /* 789 * All elements within array already processed. Report this 790 * to caller, and give it back original parent iterator (which 791 * independently tracks iteration progress at its level of 792 * nesting). 793 */ 794 *it = freeAndGetParent(*it); 795 return WJB_END_ARRAY; 796 } 797 798 fillJsonbValue((*it)->container, (*it)->curIndex, 799 (*it)->dataProper, (*it)->curDataOffset, 800 val); 801 802 JBE_ADVANCE_OFFSET((*it)->curDataOffset, 803 (*it)->children[(*it)->curIndex]); 804 (*it)->curIndex++; 805 806 if (!IsAJsonbScalar(val) && !skipNested) 807 { 808 /* Recurse into container. */ 809 *it = iteratorFromContainer(val->val.binary.data, *it); 810 goto recurse; 811 } 812 else 813 { 814 /* 815 * Scalar item in array, or a container and caller didn't want 816 * us to recurse into it. 817 */ 818 return WJB_ELEM; 819 } 820 821 case JBI_OBJECT_START: 822 /* Set v to object on first object call */ 823 val->type = jbvObject; 824 val->val.object.nPairs = (*it)->nElems; 825 826 /* 827 * v->val.object.pairs is not actually set, because we aren't 828 * doing a full conversion 829 */ 830 (*it)->curIndex = 0; 831 (*it)->curDataOffset = 0; 832 (*it)->curValueOffset = getJsonbOffset((*it)->container, 833 (*it)->nElems); 834 /* Set state for next call */ 835 (*it)->state = JBI_OBJECT_KEY; 836 return WJB_BEGIN_OBJECT; 837 838 case JBI_OBJECT_KEY: 839 if ((*it)->curIndex >= (*it)->nElems) 840 { 841 /* 842 * All pairs within object already processed. Report this to 843 * caller, and give it back original containing iterator 844 * (which independently tracks iteration progress at its level 845 * of nesting). 846 */ 847 *it = freeAndGetParent(*it); 848 return WJB_END_OBJECT; 849 } 850 else 851 { 852 /* Return key of a key/value pair. */ 853 fillJsonbValue((*it)->container, (*it)->curIndex, 854 (*it)->dataProper, (*it)->curDataOffset, 855 val); 856 if (val->type != jbvString) 857 elog(ERROR, "unexpected jsonb type as object key"); 858 859 /* Set state for next call */ 860 (*it)->state = JBI_OBJECT_VALUE; 861 return WJB_KEY; 862 } 863 864 case JBI_OBJECT_VALUE: 865 /* Set state for next call */ 866 (*it)->state = JBI_OBJECT_KEY; 867 868 fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems, 869 (*it)->dataProper, (*it)->curValueOffset, 870 val); 871 872 JBE_ADVANCE_OFFSET((*it)->curDataOffset, 873 (*it)->children[(*it)->curIndex]); 874 JBE_ADVANCE_OFFSET((*it)->curValueOffset, 875 (*it)->children[(*it)->curIndex + (*it)->nElems]); 876 (*it)->curIndex++; 877 878 /* 879 * Value may be a container, in which case we recurse with new, 880 * child iterator (unless the caller asked not to, by passing 881 * skipNested). 882 */ 883 if (!IsAJsonbScalar(val) && !skipNested) 884 { 885 *it = iteratorFromContainer(val->val.binary.data, *it); 886 goto recurse; 887 } 888 else 889 return WJB_VALUE; 890 } 891 892 elog(ERROR, "invalid iterator state"); 893 return -1; 894 } 895 896 /* 897 * Initialize an iterator for iterating all elements in a container. 898 */ 899 static JsonbIterator * 900 iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent) 901 { 902 JsonbIterator *it; 903 904 it = palloc0(sizeof(JsonbIterator)); 905 it->container = container; 906 it->parent = parent; 907 it->nElems = JsonContainerSize(container); 908 909 /* Array starts just after header */ 910 it->children = container->children; 911 912 switch (container->header & (JB_FARRAY | JB_FOBJECT)) 913 { 914 case JB_FARRAY: 915 it->dataProper = 916 (char *) it->children + it->nElems * sizeof(JEntry); 917 it->isScalar = JsonContainerIsScalar(container); 918 /* This is either a "raw scalar", or an array */ 919 Assert(!it->isScalar || it->nElems == 1); 920 921 it->state = JBI_ARRAY_START; 922 break; 923 924 case JB_FOBJECT: 925 it->dataProper = 926 (char *) it->children + it->nElems * sizeof(JEntry) * 2; 927 it->state = JBI_OBJECT_START; 928 break; 929 930 default: 931 elog(ERROR, "unknown type of jsonb container"); 932 } 933 934 return it; 935 } 936 937 /* 938 * JsonbIteratorNext() worker: Return parent, while freeing memory for current 939 * iterator 940 */ 941 static JsonbIterator * 942 freeAndGetParent(JsonbIterator *it) 943 { 944 JsonbIterator *v = it->parent; 945 946 pfree(it); 947 return v; 948 } 949 950 /* 951 * Worker for "contains" operator's function 952 * 953 * Formally speaking, containment is top-down, unordered subtree isomorphism. 954 * 955 * Takes iterators that belong to some container type. These iterators 956 * "belong" to those values in the sense that they've just been initialized in 957 * respect of them by the caller (perhaps in a nested fashion). 958 * 959 * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level. 960 * We determine if mContained is contained within val. 961 */ 962 bool 963 JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained) 964 { 965 JsonbValue vval, 966 vcontained; 967 JsonbIteratorToken rval, 968 rcont; 969 970 /* 971 * Guard against stack overflow due to overly complex Jsonb. 972 * 973 * Functions called here independently take this precaution, but that 974 * might not be sufficient since this is also a recursive function. 975 */ 976 check_stack_depth(); 977 978 rval = JsonbIteratorNext(val, &vval, false); 979 rcont = JsonbIteratorNext(mContained, &vcontained, false); 980 981 if (rval != rcont) 982 { 983 /* 984 * The differing return values can immediately be taken as indicating 985 * two differing container types at this nesting level, which is 986 * sufficient reason to give up entirely (but it should be the case 987 * that they're both some container type). 988 */ 989 Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY); 990 Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY); 991 return false; 992 } 993 else if (rcont == WJB_BEGIN_OBJECT) 994 { 995 Assert(vval.type == jbvObject); 996 Assert(vcontained.type == jbvObject); 997 998 /* 999 * If the lhs has fewer pairs than the rhs, it can't possibly contain 1000 * the rhs. (This conclusion is safe only because we de-duplicate 1001 * keys in all Jsonb objects; thus there can be no corresponding 1002 * optimization in the array case.) The case probably won't arise 1003 * often, but since it's such a cheap check we may as well make it. 1004 */ 1005 if (vval.val.object.nPairs < vcontained.val.object.nPairs) 1006 return false; 1007 1008 /* Work through rhs "is it contained within?" object */ 1009 for (;;) 1010 { 1011 JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */ 1012 1013 rcont = JsonbIteratorNext(mContained, &vcontained, false); 1014 1015 /* 1016 * When we get through caller's rhs "is it contained within?" 1017 * object without failing to find one of its values, it's 1018 * contained. 1019 */ 1020 if (rcont == WJB_END_OBJECT) 1021 return true; 1022 1023 Assert(rcont == WJB_KEY); 1024 1025 /* First, find value by key... */ 1026 lhsVal = findJsonbValueFromContainer((*val)->container, 1027 JB_FOBJECT, 1028 &vcontained); 1029 1030 if (!lhsVal) 1031 return false; 1032 1033 /* 1034 * ...at this stage it is apparent that there is at least a key 1035 * match for this rhs pair. 1036 */ 1037 rcont = JsonbIteratorNext(mContained, &vcontained, true); 1038 1039 Assert(rcont == WJB_VALUE); 1040 1041 /* 1042 * Compare rhs pair's value with lhs pair's value just found using 1043 * key 1044 */ 1045 if (lhsVal->type != vcontained.type) 1046 { 1047 return false; 1048 } 1049 else if (IsAJsonbScalar(lhsVal)) 1050 { 1051 if (!equalsJsonbScalarValue(lhsVal, &vcontained)) 1052 return false; 1053 } 1054 else 1055 { 1056 /* Nested container value (object or array) */ 1057 JsonbIterator *nestval, 1058 *nestContained; 1059 1060 Assert(lhsVal->type == jbvBinary); 1061 Assert(vcontained.type == jbvBinary); 1062 1063 nestval = JsonbIteratorInit(lhsVal->val.binary.data); 1064 nestContained = JsonbIteratorInit(vcontained.val.binary.data); 1065 1066 /* 1067 * Match "value" side of rhs datum object's pair recursively. 1068 * It's a nested structure. 1069 * 1070 * Note that nesting still has to "match up" at the right 1071 * nesting sub-levels. However, there need only be zero or 1072 * more matching pairs (or elements) at each nesting level 1073 * (provided the *rhs* pairs/elements *all* match on each 1074 * level), which enables searching nested structures for a 1075 * single String or other primitive type sub-datum quite 1076 * effectively (provided the user constructed the rhs nested 1077 * structure such that we "know where to look"). 1078 * 1079 * In other words, the mapping of container nodes in the rhs 1080 * "vcontained" Jsonb to internal nodes on the lhs is 1081 * injective, and parent-child edges on the rhs must be mapped 1082 * to parent-child edges on the lhs to satisfy the condition 1083 * of containment (plus of course the mapped nodes must be 1084 * equal). 1085 */ 1086 if (!JsonbDeepContains(&nestval, &nestContained)) 1087 return false; 1088 } 1089 } 1090 } 1091 else if (rcont == WJB_BEGIN_ARRAY) 1092 { 1093 JsonbValue *lhsConts = NULL; 1094 uint32 nLhsElems = vval.val.array.nElems; 1095 1096 Assert(vval.type == jbvArray); 1097 Assert(vcontained.type == jbvArray); 1098 1099 /* 1100 * Handle distinction between "raw scalar" pseudo arrays, and real 1101 * arrays. 1102 * 1103 * A raw scalar may contain another raw scalar, and an array may 1104 * contain a raw scalar, but a raw scalar may not contain an array. We 1105 * don't do something like this for the object case, since objects can 1106 * only contain pairs, never raw scalars (a pair is represented by an 1107 * rhs object argument with a single contained pair). 1108 */ 1109 if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar) 1110 return false; 1111 1112 /* Work through rhs "is it contained within?" array */ 1113 for (;;) 1114 { 1115 rcont = JsonbIteratorNext(mContained, &vcontained, true); 1116 1117 /* 1118 * When we get through caller's rhs "is it contained within?" 1119 * array without failing to find one of its values, it's 1120 * contained. 1121 */ 1122 if (rcont == WJB_END_ARRAY) 1123 return true; 1124 1125 Assert(rcont == WJB_ELEM); 1126 1127 if (IsAJsonbScalar(&vcontained)) 1128 { 1129 if (!findJsonbValueFromContainer((*val)->container, 1130 JB_FARRAY, 1131 &vcontained)) 1132 return false; 1133 } 1134 else 1135 { 1136 uint32 i; 1137 1138 /* 1139 * If this is first container found in rhs array (at this 1140 * depth), initialize temp lhs array of containers 1141 */ 1142 if (lhsConts == NULL) 1143 { 1144 uint32 j = 0; 1145 1146 /* Make room for all possible values */ 1147 lhsConts = palloc(sizeof(JsonbValue) * nLhsElems); 1148 1149 for (i = 0; i < nLhsElems; i++) 1150 { 1151 /* Store all lhs elements in temp array */ 1152 rcont = JsonbIteratorNext(val, &vval, true); 1153 Assert(rcont == WJB_ELEM); 1154 1155 if (vval.type == jbvBinary) 1156 lhsConts[j++] = vval; 1157 } 1158 1159 /* No container elements in temp array, so give up now */ 1160 if (j == 0) 1161 return false; 1162 1163 /* We may have only partially filled array */ 1164 nLhsElems = j; 1165 } 1166 1167 /* XXX: Nested array containment is O(N^2) */ 1168 for (i = 0; i < nLhsElems; i++) 1169 { 1170 /* Nested container value (object or array) */ 1171 JsonbIterator *nestval, 1172 *nestContained; 1173 bool contains; 1174 1175 nestval = JsonbIteratorInit(lhsConts[i].val.binary.data); 1176 nestContained = JsonbIteratorInit(vcontained.val.binary.data); 1177 1178 contains = JsonbDeepContains(&nestval, &nestContained); 1179 1180 if (nestval) 1181 pfree(nestval); 1182 if (nestContained) 1183 pfree(nestContained); 1184 if (contains) 1185 break; 1186 } 1187 1188 /* 1189 * Report rhs container value is not contained if couldn't 1190 * match rhs container to *some* lhs cont 1191 */ 1192 if (i == nLhsElems) 1193 return false; 1194 } 1195 } 1196 } 1197 else 1198 { 1199 elog(ERROR, "invalid jsonb container type"); 1200 } 1201 1202 elog(ERROR, "unexpectedly fell off end of jsonb container"); 1203 return false; 1204 } 1205 1206 /* 1207 * Hash a JsonbValue scalar value, mixing the hash value into an existing 1208 * hash provided by the caller. 1209 * 1210 * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY 1211 * flags. 1212 */ 1213 void 1214 JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash) 1215 { 1216 uint32 tmp; 1217 1218 /* Compute hash value for scalarVal */ 1219 switch (scalarVal->type) 1220 { 1221 case jbvNull: 1222 tmp = 0x01; 1223 break; 1224 case jbvString: 1225 tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val, 1226 scalarVal->val.string.len)); 1227 break; 1228 case jbvNumeric: 1229 /* Must hash equal numerics to equal hash codes */ 1230 tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric, 1231 NumericGetDatum(scalarVal->val.numeric))); 1232 break; 1233 case jbvBool: 1234 tmp = scalarVal->val.boolean ? 0x02 : 0x04; 1235 1236 break; 1237 default: 1238 elog(ERROR, "invalid jsonb scalar type"); 1239 tmp = 0; /* keep compiler quiet */ 1240 break; 1241 } 1242 1243 /* 1244 * Combine hash values of successive keys, values and elements by rotating 1245 * the previous value left 1 bit, then XOR'ing in the new 1246 * key/value/element's hash value. 1247 */ 1248 *hash = (*hash << 1) | (*hash >> 31); 1249 *hash ^= tmp; 1250 } 1251 1252 /* 1253 * Hash a value to a 64-bit value, with a seed. Otherwise, similar to 1254 * JsonbHashScalarValue. 1255 */ 1256 void 1257 JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash, 1258 uint64 seed) 1259 { 1260 uint64 tmp; 1261 1262 switch (scalarVal->type) 1263 { 1264 case jbvNull: 1265 tmp = seed + 0x01; 1266 break; 1267 case jbvString: 1268 tmp = DatumGetUInt64(hash_any_extended((const unsigned char *) scalarVal->val.string.val, 1269 scalarVal->val.string.len, 1270 seed)); 1271 break; 1272 case jbvNumeric: 1273 tmp = DatumGetUInt64(DirectFunctionCall2(hash_numeric_extended, 1274 NumericGetDatum(scalarVal->val.numeric), 1275 UInt64GetDatum(seed))); 1276 break; 1277 case jbvBool: 1278 if (seed) 1279 tmp = DatumGetUInt64(DirectFunctionCall2(hashcharextended, 1280 BoolGetDatum(scalarVal->val.boolean), 1281 UInt64GetDatum(seed))); 1282 else 1283 tmp = scalarVal->val.boolean ? 0x02 : 0x04; 1284 1285 break; 1286 default: 1287 elog(ERROR, "invalid jsonb scalar type"); 1288 break; 1289 } 1290 1291 *hash = ROTATE_HIGH_AND_LOW_32BITS(*hash); 1292 *hash ^= tmp; 1293 } 1294 1295 /* 1296 * Are two scalar JsonbValues of the same type a and b equal? 1297 */ 1298 static bool 1299 equalsJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar) 1300 { 1301 if (aScalar->type == bScalar->type) 1302 { 1303 switch (aScalar->type) 1304 { 1305 case jbvNull: 1306 return true; 1307 case jbvString: 1308 return lengthCompareJsonbStringValue(aScalar, bScalar) == 0; 1309 case jbvNumeric: 1310 return DatumGetBool(DirectFunctionCall2(numeric_eq, 1311 PointerGetDatum(aScalar->val.numeric), 1312 PointerGetDatum(bScalar->val.numeric))); 1313 case jbvBool: 1314 return aScalar->val.boolean == bScalar->val.boolean; 1315 1316 default: 1317 elog(ERROR, "invalid jsonb scalar type"); 1318 } 1319 } 1320 elog(ERROR, "jsonb scalar type mismatch"); 1321 return -1; 1322 } 1323 1324 /* 1325 * Compare two scalar JsonbValues, returning -1, 0, or 1. 1326 * 1327 * Strings are compared using the default collation. Used by B-tree 1328 * operators, where a lexical sort order is generally expected. 1329 */ 1330 static int 1331 compareJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar) 1332 { 1333 if (aScalar->type == bScalar->type) 1334 { 1335 switch (aScalar->type) 1336 { 1337 case jbvNull: 1338 return 0; 1339 case jbvString: 1340 return varstr_cmp(aScalar->val.string.val, 1341 aScalar->val.string.len, 1342 bScalar->val.string.val, 1343 bScalar->val.string.len, 1344 DEFAULT_COLLATION_OID); 1345 case jbvNumeric: 1346 return DatumGetInt32(DirectFunctionCall2(numeric_cmp, 1347 PointerGetDatum(aScalar->val.numeric), 1348 PointerGetDatum(bScalar->val.numeric))); 1349 case jbvBool: 1350 if (aScalar->val.boolean == bScalar->val.boolean) 1351 return 0; 1352 else if (aScalar->val.boolean > bScalar->val.boolean) 1353 return 1; 1354 else 1355 return -1; 1356 default: 1357 elog(ERROR, "invalid jsonb scalar type"); 1358 } 1359 } 1360 elog(ERROR, "jsonb scalar type mismatch"); 1361 return -1; 1362 } 1363 1364 1365 /* 1366 * Functions for manipulating the resizeable buffer used by convertJsonb and 1367 * its subroutines. 1368 */ 1369 1370 /* 1371 * Reserve 'len' bytes, at the end of the buffer, enlarging it if necessary. 1372 * Returns the offset to the reserved area. The caller is expected to fill 1373 * the reserved area later with copyToBuffer(). 1374 */ 1375 static int 1376 reserveFromBuffer(StringInfo buffer, int len) 1377 { 1378 int offset; 1379 1380 /* Make more room if needed */ 1381 enlargeStringInfo(buffer, len); 1382 1383 /* remember current offset */ 1384 offset = buffer->len; 1385 1386 /* reserve the space */ 1387 buffer->len += len; 1388 1389 /* 1390 * Keep a trailing null in place, even though it's not useful for us; it 1391 * seems best to preserve the invariants of StringInfos. 1392 */ 1393 buffer->data[buffer->len] = '\0'; 1394 1395 return offset; 1396 } 1397 1398 /* 1399 * Copy 'len' bytes to a previously reserved area in buffer. 1400 */ 1401 static void 1402 copyToBuffer(StringInfo buffer, int offset, const char *data, int len) 1403 { 1404 memcpy(buffer->data + offset, data, len); 1405 } 1406 1407 /* 1408 * A shorthand for reserveFromBuffer + copyToBuffer. 1409 */ 1410 static void 1411 appendToBuffer(StringInfo buffer, const char *data, int len) 1412 { 1413 int offset; 1414 1415 offset = reserveFromBuffer(buffer, len); 1416 copyToBuffer(buffer, offset, data, len); 1417 } 1418 1419 1420 /* 1421 * Append padding, so that the length of the StringInfo is int-aligned. 1422 * Returns the number of padding bytes appended. 1423 */ 1424 static short 1425 padBufferToInt(StringInfo buffer) 1426 { 1427 int padlen, 1428 p, 1429 offset; 1430 1431 padlen = INTALIGN(buffer->len) - buffer->len; 1432 1433 offset = reserveFromBuffer(buffer, padlen); 1434 1435 /* padlen must be small, so this is probably faster than a memset */ 1436 for (p = 0; p < padlen; p++) 1437 buffer->data[offset + p] = '\0'; 1438 1439 return padlen; 1440 } 1441 1442 /* 1443 * Given a JsonbValue, convert to Jsonb. The result is palloc'd. 1444 */ 1445 static Jsonb * 1446 convertToJsonb(JsonbValue *val) 1447 { 1448 StringInfoData buffer; 1449 JEntry jentry; 1450 Jsonb *res; 1451 1452 /* Should not already have binary representation */ 1453 Assert(val->type != jbvBinary); 1454 1455 /* Allocate an output buffer. It will be enlarged as needed */ 1456 initStringInfo(&buffer); 1457 1458 /* Make room for the varlena header */ 1459 reserveFromBuffer(&buffer, VARHDRSZ); 1460 1461 convertJsonbValue(&buffer, &jentry, val, 0); 1462 1463 /* 1464 * Note: the JEntry of the root is discarded. Therefore the root 1465 * JsonbContainer struct must contain enough information to tell what kind 1466 * of value it is. 1467 */ 1468 1469 res = (Jsonb *) buffer.data; 1470 1471 SET_VARSIZE(res, buffer.len); 1472 1473 return res; 1474 } 1475 1476 /* 1477 * Subroutine of convertJsonb: serialize a single JsonbValue into buffer. 1478 * 1479 * The JEntry header for this node is returned in *header. It is filled in 1480 * with the length of this value and appropriate type bits. If we wish to 1481 * store an end offset rather than a length, it is the caller's responsibility 1482 * to adjust for that. 1483 * 1484 * If the value is an array or an object, this recurses. 'level' is only used 1485 * for debugging purposes. 1486 */ 1487 static void 1488 convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level) 1489 { 1490 check_stack_depth(); 1491 1492 if (!val) 1493 return; 1494 1495 /* 1496 * A JsonbValue passed as val should never have a type of jbvBinary, and 1497 * neither should any of its sub-components. Those values will be produced 1498 * by convertJsonbArray and convertJsonbObject, the results of which will 1499 * not be passed back to this function as an argument. 1500 */ 1501 1502 if (IsAJsonbScalar(val)) 1503 convertJsonbScalar(buffer, header, val); 1504 else if (val->type == jbvArray) 1505 convertJsonbArray(buffer, header, val, level); 1506 else if (val->type == jbvObject) 1507 convertJsonbObject(buffer, header, val, level); 1508 else 1509 elog(ERROR, "unknown type of jsonb container to convert"); 1510 } 1511 1512 static void 1513 convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level) 1514 { 1515 int base_offset; 1516 int jentry_offset; 1517 int i; 1518 int totallen; 1519 uint32 header; 1520 int nElems = val->val.array.nElems; 1521 1522 /* Remember where in the buffer this array starts. */ 1523 base_offset = buffer->len; 1524 1525 /* Align to 4-byte boundary (any padding counts as part of my data) */ 1526 padBufferToInt(buffer); 1527 1528 /* 1529 * Construct the header Jentry and store it in the beginning of the 1530 * variable-length payload. 1531 */ 1532 header = nElems | JB_FARRAY; 1533 if (val->val.array.rawScalar) 1534 { 1535 Assert(nElems == 1); 1536 Assert(level == 0); 1537 header |= JB_FSCALAR; 1538 } 1539 1540 appendToBuffer(buffer, (char *) &header, sizeof(uint32)); 1541 1542 /* Reserve space for the JEntries of the elements. */ 1543 jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems); 1544 1545 totallen = 0; 1546 for (i = 0; i < nElems; i++) 1547 { 1548 JsonbValue *elem = &val->val.array.elems[i]; 1549 int len; 1550 JEntry meta; 1551 1552 /* 1553 * Convert element, producing a JEntry and appending its 1554 * variable-length data to buffer 1555 */ 1556 convertJsonbValue(buffer, &meta, elem, level + 1); 1557 1558 len = JBE_OFFLENFLD(meta); 1559 totallen += len; 1560 1561 /* 1562 * Bail out if total variable-length data exceeds what will fit in a 1563 * JEntry length field. We check this in each iteration, not just 1564 * once at the end, to forestall possible integer overflow. 1565 */ 1566 if (totallen > JENTRY_OFFLENMASK) 1567 ereport(ERROR, 1568 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 1569 errmsg("total size of jsonb array elements exceeds the maximum of %u bytes", 1570 JENTRY_OFFLENMASK))); 1571 1572 /* 1573 * Convert each JB_OFFSET_STRIDE'th length to an offset. 1574 */ 1575 if ((i % JB_OFFSET_STRIDE) == 0) 1576 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; 1577 1578 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); 1579 jentry_offset += sizeof(JEntry); 1580 } 1581 1582 /* Total data size is everything we've appended to buffer */ 1583 totallen = buffer->len - base_offset; 1584 1585 /* Check length again, since we didn't include the metadata above */ 1586 if (totallen > JENTRY_OFFLENMASK) 1587 ereport(ERROR, 1588 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 1589 errmsg("total size of jsonb array elements exceeds the maximum of %u bytes", 1590 JENTRY_OFFLENMASK))); 1591 1592 /* Initialize the header of this node in the container's JEntry array */ 1593 *pheader = JENTRY_ISCONTAINER | totallen; 1594 } 1595 1596 static void 1597 convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level) 1598 { 1599 int base_offset; 1600 int jentry_offset; 1601 int i; 1602 int totallen; 1603 uint32 header; 1604 int nPairs = val->val.object.nPairs; 1605 1606 /* Remember where in the buffer this object starts. */ 1607 base_offset = buffer->len; 1608 1609 /* Align to 4-byte boundary (any padding counts as part of my data) */ 1610 padBufferToInt(buffer); 1611 1612 /* 1613 * Construct the header Jentry and store it in the beginning of the 1614 * variable-length payload. 1615 */ 1616 header = nPairs | JB_FOBJECT; 1617 appendToBuffer(buffer, (char *) &header, sizeof(uint32)); 1618 1619 /* Reserve space for the JEntries of the keys and values. */ 1620 jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2); 1621 1622 /* 1623 * Iterate over the keys, then over the values, since that is the ordering 1624 * we want in the on-disk representation. 1625 */ 1626 totallen = 0; 1627 for (i = 0; i < nPairs; i++) 1628 { 1629 JsonbPair *pair = &val->val.object.pairs[i]; 1630 int len; 1631 JEntry meta; 1632 1633 /* 1634 * Convert key, producing a JEntry and appending its variable-length 1635 * data to buffer 1636 */ 1637 convertJsonbScalar(buffer, &meta, &pair->key); 1638 1639 len = JBE_OFFLENFLD(meta); 1640 totallen += len; 1641 1642 /* 1643 * Bail out if total variable-length data exceeds what will fit in a 1644 * JEntry length field. We check this in each iteration, not just 1645 * once at the end, to forestall possible integer overflow. 1646 */ 1647 if (totallen > JENTRY_OFFLENMASK) 1648 ereport(ERROR, 1649 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 1650 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes", 1651 JENTRY_OFFLENMASK))); 1652 1653 /* 1654 * Convert each JB_OFFSET_STRIDE'th length to an offset. 1655 */ 1656 if ((i % JB_OFFSET_STRIDE) == 0) 1657 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; 1658 1659 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); 1660 jentry_offset += sizeof(JEntry); 1661 } 1662 for (i = 0; i < nPairs; i++) 1663 { 1664 JsonbPair *pair = &val->val.object.pairs[i]; 1665 int len; 1666 JEntry meta; 1667 1668 /* 1669 * Convert value, producing a JEntry and appending its variable-length 1670 * data to buffer 1671 */ 1672 convertJsonbValue(buffer, &meta, &pair->value, level + 1); 1673 1674 len = JBE_OFFLENFLD(meta); 1675 totallen += len; 1676 1677 /* 1678 * Bail out if total variable-length data exceeds what will fit in a 1679 * JEntry length field. We check this in each iteration, not just 1680 * once at the end, to forestall possible integer overflow. 1681 */ 1682 if (totallen > JENTRY_OFFLENMASK) 1683 ereport(ERROR, 1684 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 1685 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes", 1686 JENTRY_OFFLENMASK))); 1687 1688 /* 1689 * Convert each JB_OFFSET_STRIDE'th length to an offset. 1690 */ 1691 if (((i + nPairs) % JB_OFFSET_STRIDE) == 0) 1692 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; 1693 1694 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); 1695 jentry_offset += sizeof(JEntry); 1696 } 1697 1698 /* Total data size is everything we've appended to buffer */ 1699 totallen = buffer->len - base_offset; 1700 1701 /* Check length again, since we didn't include the metadata above */ 1702 if (totallen > JENTRY_OFFLENMASK) 1703 ereport(ERROR, 1704 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 1705 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes", 1706 JENTRY_OFFLENMASK))); 1707 1708 /* Initialize the header of this node in the container's JEntry array */ 1709 *pheader = JENTRY_ISCONTAINER | totallen; 1710 } 1711 1712 static void 1713 convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal) 1714 { 1715 int numlen; 1716 short padlen; 1717 1718 switch (scalarVal->type) 1719 { 1720 case jbvNull: 1721 *jentry = JENTRY_ISNULL; 1722 break; 1723 1724 case jbvString: 1725 appendToBuffer(buffer, scalarVal->val.string.val, scalarVal->val.string.len); 1726 1727 *jentry = scalarVal->val.string.len; 1728 break; 1729 1730 case jbvNumeric: 1731 numlen = VARSIZE_ANY(scalarVal->val.numeric); 1732 padlen = padBufferToInt(buffer); 1733 1734 appendToBuffer(buffer, (char *) scalarVal->val.numeric, numlen); 1735 1736 *jentry = JENTRY_ISNUMERIC | (padlen + numlen); 1737 break; 1738 1739 case jbvBool: 1740 *jentry = (scalarVal->val.boolean) ? 1741 JENTRY_ISBOOL_TRUE : JENTRY_ISBOOL_FALSE; 1742 break; 1743 1744 default: 1745 elog(ERROR, "invalid jsonb scalar type"); 1746 } 1747 } 1748 1749 /* 1750 * Compare two jbvString JsonbValue values, a and b. 1751 * 1752 * This is a special qsort() comparator used to sort strings in certain 1753 * internal contexts where it is sufficient to have a well-defined sort order. 1754 * In particular, object pair keys are sorted according to this criteria to 1755 * facilitate cheap binary searches where we don't care about lexical sort 1756 * order. 1757 * 1758 * a and b are first sorted based on their length. If a tie-breaker is 1759 * required, only then do we consider string binary equality. 1760 */ 1761 static int 1762 lengthCompareJsonbStringValue(const void *a, const void *b) 1763 { 1764 const JsonbValue *va = (const JsonbValue *) a; 1765 const JsonbValue *vb = (const JsonbValue *) b; 1766 int res; 1767 1768 Assert(va->type == jbvString); 1769 Assert(vb->type == jbvString); 1770 1771 if (va->val.string.len == vb->val.string.len) 1772 { 1773 res = memcmp(va->val.string.val, vb->val.string.val, va->val.string.len); 1774 } 1775 else 1776 { 1777 res = (va->val.string.len > vb->val.string.len) ? 1 : -1; 1778 } 1779 1780 return res; 1781 } 1782 1783 /* 1784 * qsort_arg() comparator to compare JsonbPair values. 1785 * 1786 * Third argument 'binequal' may point to a bool. If it's set, *binequal is set 1787 * to true iff a and b have full binary equality, since some callers have an 1788 * interest in whether the two values are equal or merely equivalent. 1789 * 1790 * N.B: String comparisons here are "length-wise" 1791 * 1792 * Pairs with equals keys are ordered such that the order field is respected. 1793 */ 1794 static int 1795 lengthCompareJsonbPair(const void *a, const void *b, void *binequal) 1796 { 1797 const JsonbPair *pa = (const JsonbPair *) a; 1798 const JsonbPair *pb = (const JsonbPair *) b; 1799 int res; 1800 1801 res = lengthCompareJsonbStringValue(&pa->key, &pb->key); 1802 if (res == 0 && binequal) 1803 *((bool *) binequal) = true; 1804 1805 /* 1806 * Guarantee keeping order of equal pair. Unique algorithm will prefer 1807 * first element as value. 1808 */ 1809 if (res == 0) 1810 res = (pa->order > pb->order) ? -1 : 1; 1811 1812 return res; 1813 } 1814 1815 /* 1816 * Sort and unique-ify pairs in JsonbValue object 1817 */ 1818 static void 1819 uniqueifyJsonbObject(JsonbValue *object) 1820 { 1821 bool hasNonUniq = false; 1822 1823 Assert(object->type == jbvObject); 1824 1825 if (object->val.object.nPairs > 1) 1826 qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair), 1827 lengthCompareJsonbPair, &hasNonUniq); 1828 1829 if (hasNonUniq) 1830 { 1831 JsonbPair *ptr = object->val.object.pairs + 1, 1832 *res = object->val.object.pairs; 1833 1834 while (ptr - object->val.object.pairs < object->val.object.nPairs) 1835 { 1836 /* Avoid copying over duplicate */ 1837 if (lengthCompareJsonbStringValue(ptr, res) != 0) 1838 { 1839 res++; 1840 if (ptr != res) 1841 memcpy(res, ptr, sizeof(JsonbPair)); 1842 } 1843 ptr++; 1844 } 1845 1846 object->val.object.nPairs = res + 1 - object->val.object.pairs; 1847 } 1848 } 1849