1 /*------------------------------------------------------------------------- 2 * 3 * jsonb.h 4 * Declarations for jsonb data type support. 5 * 6 * Copyright (c) 1996-2016, PostgreSQL Global Development Group 7 * 8 * src/include/utils/jsonb.h 9 * 10 *------------------------------------------------------------------------- 11 */ 12 #ifndef __JSONB_H__ 13 #define __JSONB_H__ 14 15 #include "lib/stringinfo.h" 16 #include "utils/array.h" 17 #include "utils/numeric.h" 18 19 /* Tokens used when sequentially processing a jsonb value */ 20 typedef enum 21 { 22 WJB_DONE, 23 WJB_KEY, 24 WJB_VALUE, 25 WJB_ELEM, 26 WJB_BEGIN_ARRAY, 27 WJB_END_ARRAY, 28 WJB_BEGIN_OBJECT, 29 WJB_END_OBJECT 30 } JsonbIteratorToken; 31 32 /* Strategy numbers for GIN index opclasses */ 33 #define JsonbContainsStrategyNumber 7 34 #define JsonbExistsStrategyNumber 9 35 #define JsonbExistsAnyStrategyNumber 10 36 #define JsonbExistsAllStrategyNumber 11 37 38 /* 39 * In the standard jsonb_ops GIN opclass for jsonb, we choose to index both 40 * keys and values. The storage format is text. The first byte of the text 41 * string distinguishes whether this is a key (always a string), null value, 42 * boolean value, numeric value, or string value. However, array elements 43 * that are strings are marked as though they were keys; this imprecision 44 * supports the definition of the "exists" operator, which treats array 45 * elements like keys. The remainder of the text string is empty for a null 46 * value, "t" or "f" for a boolean value, a normalized print representation of 47 * a numeric value, or the text of a string value. However, if the length of 48 * this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash 49 * the text representation and store an 8-hex-digit representation of the 50 * uint32 hash value, marking the prefix byte with an additional bit to 51 * distinguish that this has happened. Hashing long strings saves space and 52 * ensures that we won't overrun the maximum entry length for a GIN index. 53 * (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit. It's chosen 54 * to ensure that the on-disk text datum will have a short varlena header.) 55 * Note that when any hashed item appears in a query, we must recheck index 56 * matches against the heap tuple; currently, this costs nothing because we 57 * must always recheck for other reasons. 58 */ 59 #define JGINFLAG_KEY 0x01 /* key (or string array element) */ 60 #define JGINFLAG_NULL 0x02 /* null value */ 61 #define JGINFLAG_BOOL 0x03 /* boolean value */ 62 #define JGINFLAG_NUM 0x04 /* numeric value */ 63 #define JGINFLAG_STR 0x05 /* string value (if not an array element) */ 64 #define JGINFLAG_HASHED 0x10 /* OR'd into flag if value was hashed */ 65 #define JGIN_MAXLENGTH 125 /* max length of text part before hashing */ 66 67 /* Convenience macros */ 68 #define DatumGetJsonb(d) ((Jsonb *) PG_DETOAST_DATUM(d)) 69 #define JsonbGetDatum(p) PointerGetDatum(p) 70 #define PG_GETARG_JSONB(x) DatumGetJsonb(PG_GETARG_DATUM(x)) 71 #define PG_RETURN_JSONB(x) PG_RETURN_POINTER(x) 72 73 typedef struct JsonbPair JsonbPair; 74 typedef struct JsonbValue JsonbValue; 75 76 /* 77 * Jsonbs are varlena objects, so must meet the varlena convention that the 78 * first int32 of the object contains the total object size in bytes. Be sure 79 * to use VARSIZE() and SET_VARSIZE() to access it, though! 80 * 81 * Jsonb is the on-disk representation, in contrast to the in-memory JsonbValue 82 * representation. Often, JsonbValues are just shims through which a Jsonb 83 * buffer is accessed, but they can also be deep copied and passed around. 84 * 85 * Jsonb is a tree structure. Each node in the tree consists of a JEntry 86 * header and a variable-length content (possibly of zero size). The JEntry 87 * header indicates what kind of a node it is, e.g. a string or an array, 88 * and provides the length of its variable-length portion. 89 * 90 * The JEntry and the content of a node are not stored physically together. 91 * Instead, the container array or object has an array that holds the JEntrys 92 * of all the child nodes, followed by their variable-length portions. 93 * 94 * The root node is an exception; it has no parent array or object that could 95 * hold its JEntry. Hence, no JEntry header is stored for the root node. It 96 * is implicitly known that the root node must be an array or an object, 97 * so we can get away without the type indicator as long as we can distinguish 98 * the two. For that purpose, both an array and an object begin with a uint32 99 * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked 100 * scalar value needs to be stored as a Jsonb value, what we actually store is 101 * an array with one element, with the flags in the array's header field set 102 * to JB_FSCALAR | JB_FARRAY. 103 * 104 * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct, 105 * the variable-length portion of some node types is aligned to a 4-byte 106 * boundary, while others are not. When alignment is needed, the padding is 107 * in the beginning of the node that requires it. For example, if a numeric 108 * node is stored after a string node, so that the numeric node begins at 109 * offset 3, the variable-length portion of the numeric node will begin with 110 * one padding byte so that the actual numeric data is 4-byte aligned. 111 */ 112 113 /* 114 * JEntry format. 115 * 116 * The least significant 28 bits store either the data length of the entry, 117 * or its end+1 offset from the start of the variable-length portion of the 118 * containing object. The next three bits store the type of the entry, and 119 * the high-order bit tells whether the least significant bits store a length 120 * or an offset. 121 * 122 * The reason for the offset-or-length complication is to compromise between 123 * access speed and data compressibility. In the initial design each JEntry 124 * always stored an offset, but this resulted in JEntry arrays with horrible 125 * compressibility properties, so that TOAST compression of a JSONB did not 126 * work well. Storing only lengths would greatly improve compressibility, 127 * but it makes random access into large arrays expensive (O(N) not O(1)). 128 * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and 129 * a length in the rest. This results in reasonably compressible data (as 130 * long as the stride isn't too small). We may have to examine as many as 131 * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any 132 * given item, but that's still O(1) no matter how large the container is. 133 * 134 * We could avoid eating a flag bit for this purpose if we were to store 135 * the stride in the container header, or if we were willing to treat the 136 * stride as an unchangeable constant. Neither of those options is very 137 * attractive though. 138 */ 139 typedef uint32 JEntry; 140 141 #define JENTRY_OFFLENMASK 0x0FFFFFFF 142 #define JENTRY_TYPEMASK 0x70000000 143 #define JENTRY_HAS_OFF 0x80000000 144 145 /* values stored in the type bits */ 146 #define JENTRY_ISSTRING 0x00000000 147 #define JENTRY_ISNUMERIC 0x10000000 148 #define JENTRY_ISBOOL_FALSE 0x20000000 149 #define JENTRY_ISBOOL_TRUE 0x30000000 150 #define JENTRY_ISNULL 0x40000000 151 #define JENTRY_ISCONTAINER 0x50000000 /* array or object */ 152 153 /* Access macros. Note possible multiple evaluations */ 154 #define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK) 155 #define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0) 156 #define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING) 157 #define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC) 158 #define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER) 159 #define JBE_ISNULL(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL) 160 #define JBE_ISBOOL_TRUE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE) 161 #define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE) 162 #define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_)) 163 164 /* Macro for advancing an offset variable to the next JEntry */ 165 #define JBE_ADVANCE_OFFSET(offset, je) \ 166 do { \ 167 JEntry je_ = (je); \ 168 if (JBE_HAS_OFF(je_)) \ 169 (offset) = JBE_OFFLENFLD(je_); \ 170 else \ 171 (offset) += JBE_OFFLENFLD(je_); \ 172 } while(0) 173 174 /* 175 * We store an offset, not a length, every JB_OFFSET_STRIDE children. 176 * Caution: this macro should only be referenced when creating a JSONB 177 * value. When examining an existing value, pay attention to the HAS_OFF 178 * bits instead. This allows changes in the offset-placement heuristic 179 * without breaking on-disk compatibility. 180 */ 181 #define JB_OFFSET_STRIDE 32 182 183 /* 184 * A jsonb array or object node, within a Jsonb Datum. 185 * 186 * An array has one child for each element, stored in array order. 187 * 188 * An object has two children for each key/value pair. The keys all appear 189 * first, in key sort order; then the values appear, in an order matching the 190 * key order. This arrangement keeps the keys compact in memory, making a 191 * search for a particular key more cache-friendly. 192 */ 193 typedef struct JsonbContainer 194 { 195 uint32 header; /* number of elements or key/value pairs, and 196 * flags */ 197 JEntry children[FLEXIBLE_ARRAY_MEMBER]; 198 199 /* the data for each child node follows. */ 200 } JsonbContainer; 201 202 /* flags for the header-field in JsonbContainer */ 203 #define JB_CMASK 0x0FFFFFFF /* mask for count field */ 204 #define JB_FSCALAR 0x10000000 /* flag bits */ 205 #define JB_FOBJECT 0x20000000 206 #define JB_FARRAY 0x40000000 207 208 /* The top-level on-disk format for a jsonb datum. */ 209 typedef struct 210 { 211 int32 vl_len_; /* varlena header (do not touch directly!) */ 212 JsonbContainer root; 213 } Jsonb; 214 215 /* convenience macros for accessing the root container in a Jsonb datum */ 216 #define JB_ROOT_COUNT(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_CMASK) 217 #define JB_ROOT_IS_SCALAR(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FSCALAR) 218 #define JB_ROOT_IS_OBJECT(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FOBJECT) 219 #define JB_ROOT_IS_ARRAY(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FARRAY) 220 221 222 /* 223 * JsonbValue: In-memory representation of Jsonb. This is a convenient 224 * deserialized representation, that can easily support using the "val" 225 * union across underlying types during manipulation. The Jsonb on-disk 226 * representation has various alignment considerations. 227 */ 228 struct JsonbValue 229 { 230 enum 231 { 232 /* Scalar types */ 233 jbvNull = 0x0, 234 jbvString, 235 jbvNumeric, 236 jbvBool, 237 /* Composite types */ 238 jbvArray = 0x10, 239 jbvObject, 240 /* Binary (i.e. struct Jsonb) jbvArray/jbvObject */ 241 jbvBinary 242 } type; /* Influences sort order */ 243 244 union 245 { 246 Numeric numeric; 247 bool boolean; 248 struct 249 { 250 int len; 251 char *val; /* Not necessarily null-terminated */ 252 } string; /* String primitive type */ 253 254 struct 255 { 256 int nElems; 257 JsonbValue *elems; 258 bool rawScalar; /* Top-level "raw scalar" array? */ 259 } array; /* Array container type */ 260 261 struct 262 { 263 int nPairs; /* 1 pair, 2 elements */ 264 JsonbPair *pairs; 265 } object; /* Associative container type */ 266 267 struct 268 { 269 int len; 270 JsonbContainer *data; 271 } binary; /* Array or object, in on-disk format */ 272 } val; 273 }; 274 275 #define IsAJsonbScalar(jsonbval) ((jsonbval)->type >= jbvNull && \ 276 (jsonbval)->type <= jbvBool) 277 278 /* 279 * Key/value pair within an Object. 280 * 281 * This struct type is only used briefly while constructing a Jsonb; it is 282 * *not* the on-disk representation. 283 * 284 * Pairs with duplicate keys are de-duplicated. We store the originally 285 * observed pair ordering for the purpose of removing duplicates in a 286 * well-defined way (which is "last observed wins"). 287 */ 288 struct JsonbPair 289 { 290 JsonbValue key; /* Must be a jbvString */ 291 JsonbValue value; /* May be of any type */ 292 uint32 order; /* Pair's index in original sequence */ 293 }; 294 295 /* Conversion state used when parsing Jsonb from text, or for type coercion */ 296 typedef struct JsonbParseState 297 { 298 JsonbValue contVal; 299 Size size; 300 struct JsonbParseState *next; 301 } JsonbParseState; 302 303 /* 304 * JsonbIterator holds details of the type for each iteration. It also stores a 305 * Jsonb varlena buffer, which can be directly accessed in some contexts. 306 */ 307 typedef enum 308 { 309 JBI_ARRAY_START, 310 JBI_ARRAY_ELEM, 311 JBI_OBJECT_START, 312 JBI_OBJECT_KEY, 313 JBI_OBJECT_VALUE 314 } JsonbIterState; 315 316 typedef struct JsonbIterator 317 { 318 /* Container being iterated */ 319 JsonbContainer *container; 320 uint32 nElems; /* Number of elements in children array (will 321 * be nPairs for objects) */ 322 bool isScalar; /* Pseudo-array scalar value? */ 323 JEntry *children; /* JEntrys for child nodes */ 324 /* Data proper. This points to the beginning of the variable-length data */ 325 char *dataProper; 326 327 /* Current item in buffer (up to nElems) */ 328 int curIndex; 329 330 /* Data offset corresponding to current item */ 331 uint32 curDataOffset; 332 333 /* 334 * If the container is an object, we want to return keys and values 335 * alternately; so curDataOffset points to the current key, and 336 * curValueOffset points to the current value. 337 */ 338 uint32 curValueOffset; 339 340 /* Private state */ 341 JsonbIterState state; 342 343 struct JsonbIterator *parent; 344 } JsonbIterator; 345 346 /* I/O routines */ 347 extern Datum jsonb_in(PG_FUNCTION_ARGS); 348 extern Datum jsonb_out(PG_FUNCTION_ARGS); 349 extern Datum jsonb_recv(PG_FUNCTION_ARGS); 350 extern Datum jsonb_send(PG_FUNCTION_ARGS); 351 extern Datum jsonb_typeof(PG_FUNCTION_ARGS); 352 353 /* generator routines */ 354 extern Datum to_jsonb(PG_FUNCTION_ARGS); 355 356 extern Datum jsonb_build_object(PG_FUNCTION_ARGS); 357 extern Datum jsonb_build_object_noargs(PG_FUNCTION_ARGS); 358 extern Datum jsonb_build_array(PG_FUNCTION_ARGS); 359 extern Datum jsonb_build_array_noargs(PG_FUNCTION_ARGS); 360 extern Datum jsonb_object(PG_FUNCTION_ARGS); 361 extern Datum jsonb_object_two_arg(PG_FUNCTION_ARGS); 362 363 /* jsonb_agg, json_object_agg functions */ 364 extern Datum jsonb_agg_transfn(PG_FUNCTION_ARGS); 365 extern Datum jsonb_agg_finalfn(PG_FUNCTION_ARGS); 366 extern Datum jsonb_object_agg_transfn(PG_FUNCTION_ARGS); 367 extern Datum jsonb_object_agg_finalfn(PG_FUNCTION_ARGS); 368 369 /* Indexing-related ops */ 370 extern Datum jsonb_exists(PG_FUNCTION_ARGS); 371 extern Datum jsonb_exists_any(PG_FUNCTION_ARGS); 372 extern Datum jsonb_exists_all(PG_FUNCTION_ARGS); 373 extern Datum jsonb_contains(PG_FUNCTION_ARGS); 374 extern Datum jsonb_contained(PG_FUNCTION_ARGS); 375 extern Datum jsonb_ne(PG_FUNCTION_ARGS); 376 extern Datum jsonb_lt(PG_FUNCTION_ARGS); 377 extern Datum jsonb_gt(PG_FUNCTION_ARGS); 378 extern Datum jsonb_le(PG_FUNCTION_ARGS); 379 extern Datum jsonb_ge(PG_FUNCTION_ARGS); 380 extern Datum jsonb_eq(PG_FUNCTION_ARGS); 381 extern Datum jsonb_cmp(PG_FUNCTION_ARGS); 382 extern Datum jsonb_hash(PG_FUNCTION_ARGS); 383 384 /* GIN support functions for jsonb_ops */ 385 extern Datum gin_compare_jsonb(PG_FUNCTION_ARGS); 386 extern Datum gin_extract_jsonb(PG_FUNCTION_ARGS); 387 extern Datum gin_extract_jsonb_query(PG_FUNCTION_ARGS); 388 extern Datum gin_consistent_jsonb(PG_FUNCTION_ARGS); 389 extern Datum gin_triconsistent_jsonb(PG_FUNCTION_ARGS); 390 391 /* GIN support functions for jsonb_path_ops */ 392 extern Datum gin_extract_jsonb_path(PG_FUNCTION_ARGS); 393 extern Datum gin_extract_jsonb_query_path(PG_FUNCTION_ARGS); 394 extern Datum gin_consistent_jsonb_path(PG_FUNCTION_ARGS); 395 extern Datum gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS); 396 397 /* pretty printer, returns text */ 398 extern Datum jsonb_pretty(PG_FUNCTION_ARGS); 399 400 /* concatenation */ 401 extern Datum jsonb_concat(PG_FUNCTION_ARGS); 402 403 /* deletion */ 404 extern Datum jsonb_delete(PG_FUNCTION_ARGS); 405 extern Datum jsonb_delete_idx(PG_FUNCTION_ARGS); 406 extern Datum jsonb_delete_path(PG_FUNCTION_ARGS); 407 408 /* replacement */ 409 extern Datum jsonb_set(PG_FUNCTION_ARGS); 410 411 /* insert after or before (for arrays) */ 412 extern Datum jsonb_insert(PG_FUNCTION_ARGS); 413 414 /* Support functions */ 415 extern uint32 getJsonbOffset(const JsonbContainer *jc, int index); 416 extern uint32 getJsonbLength(const JsonbContainer *jc, int index); 417 extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b); 418 extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader, 419 uint32 flags, 420 JsonbValue *key); 421 extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *sheader, 422 uint32 i); 423 extern JsonbValue *pushJsonbValue(JsonbParseState **pstate, 424 JsonbIteratorToken seq, JsonbValue *jbVal); 425 extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container); 426 extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, 427 bool skipNested); 428 extern Jsonb *JsonbValueToJsonb(JsonbValue *val); 429 extern bool JsonbDeepContains(JsonbIterator **val, 430 JsonbIterator **mContained); 431 extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash); 432 433 /* jsonb.c support functions */ 434 extern char *JsonbToCString(StringInfo out, JsonbContainer *in, 435 int estimated_len); 436 extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in, 437 int estimated_len); 438 439 440 #endif /* __JSONB_H__ */ 441