1 /*------------------------------------------------------------------------- 2 * 3 * jsonb.h 4 * Declarations for jsonb data type support. 5 * 6 * Copyright (c) 1996-2020, PostgreSQL Global Development Group 7 * 8 * src/include/utils/jsonb.h 9 * 10 *------------------------------------------------------------------------- 11 */ 12 #ifndef __JSONB_H__ 13 #define __JSONB_H__ 14 15 #include "lib/stringinfo.h" 16 #include "utils/array.h" 17 #include "utils/numeric.h" 18 19 /* Tokens used when sequentially processing a jsonb value */ 20 typedef enum 21 { 22 WJB_DONE, 23 WJB_KEY, 24 WJB_VALUE, 25 WJB_ELEM, 26 WJB_BEGIN_ARRAY, 27 WJB_END_ARRAY, 28 WJB_BEGIN_OBJECT, 29 WJB_END_OBJECT 30 } JsonbIteratorToken; 31 32 /* Strategy numbers for GIN index opclasses */ 33 #define JsonbContainsStrategyNumber 7 34 #define JsonbExistsStrategyNumber 9 35 #define JsonbExistsAnyStrategyNumber 10 36 #define JsonbExistsAllStrategyNumber 11 37 #define JsonbJsonpathExistsStrategyNumber 15 38 #define JsonbJsonpathPredicateStrategyNumber 16 39 40 41 /* 42 * In the standard jsonb_ops GIN opclass for jsonb, we choose to index both 43 * keys and values. The storage format is text. The first byte of the text 44 * string distinguishes whether this is a key (always a string), null value, 45 * boolean value, numeric value, or string value. However, array elements 46 * that are strings are marked as though they were keys; this imprecision 47 * supports the definition of the "exists" operator, which treats array 48 * elements like keys. The remainder of the text string is empty for a null 49 * value, "t" or "f" for a boolean value, a normalized print representation of 50 * a numeric value, or the text of a string value. However, if the length of 51 * this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash 52 * the text representation and store an 8-hex-digit representation of the 53 * uint32 hash value, marking the prefix byte with an additional bit to 54 * distinguish that this has happened. Hashing long strings saves space and 55 * ensures that we won't overrun the maximum entry length for a GIN index. 56 * (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit. It's chosen 57 * to ensure that the on-disk text datum will have a short varlena header.) 58 * Note that when any hashed item appears in a query, we must recheck index 59 * matches against the heap tuple; currently, this costs nothing because we 60 * must always recheck for other reasons. 61 */ 62 #define JGINFLAG_KEY 0x01 /* key (or string array element) */ 63 #define JGINFLAG_NULL 0x02 /* null value */ 64 #define JGINFLAG_BOOL 0x03 /* boolean value */ 65 #define JGINFLAG_NUM 0x04 /* numeric value */ 66 #define JGINFLAG_STR 0x05 /* string value (if not an array element) */ 67 #define JGINFLAG_HASHED 0x10 /* OR'd into flag if value was hashed */ 68 #define JGIN_MAXLENGTH 125 /* max length of text part before hashing */ 69 70 /* Convenience macros */ 71 #define DatumGetJsonbP(d) ((Jsonb *) PG_DETOAST_DATUM(d)) 72 #define DatumGetJsonbPCopy(d) ((Jsonb *) PG_DETOAST_DATUM_COPY(d)) 73 #define JsonbPGetDatum(p) PointerGetDatum(p) 74 #define PG_GETARG_JSONB_P(x) DatumGetJsonbP(PG_GETARG_DATUM(x)) 75 #define PG_GETARG_JSONB_P_COPY(x) DatumGetJsonbPCopy(PG_GETARG_DATUM(x)) 76 #define PG_RETURN_JSONB_P(x) PG_RETURN_POINTER(x) 77 78 typedef struct JsonbPair JsonbPair; 79 typedef struct JsonbValue JsonbValue; 80 81 /* 82 * Jsonbs are varlena objects, so must meet the varlena convention that the 83 * first int32 of the object contains the total object size in bytes. Be sure 84 * to use VARSIZE() and SET_VARSIZE() to access it, though! 85 * 86 * Jsonb is the on-disk representation, in contrast to the in-memory JsonbValue 87 * representation. Often, JsonbValues are just shims through which a Jsonb 88 * buffer is accessed, but they can also be deep copied and passed around. 89 * 90 * Jsonb is a tree structure. Each node in the tree consists of a JEntry 91 * header and a variable-length content (possibly of zero size). The JEntry 92 * header indicates what kind of a node it is, e.g. a string or an array, 93 * and provides the length of its variable-length portion. 94 * 95 * The JEntry and the content of a node are not stored physically together. 96 * Instead, the container array or object has an array that holds the JEntrys 97 * of all the child nodes, followed by their variable-length portions. 98 * 99 * The root node is an exception; it has no parent array or object that could 100 * hold its JEntry. Hence, no JEntry header is stored for the root node. It 101 * is implicitly known that the root node must be an array or an object, 102 * so we can get away without the type indicator as long as we can distinguish 103 * the two. For that purpose, both an array and an object begin with a uint32 104 * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked 105 * scalar value needs to be stored as a Jsonb value, what we actually store is 106 * an array with one element, with the flags in the array's header field set 107 * to JB_FSCALAR | JB_FARRAY. 108 * 109 * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct, 110 * the variable-length portion of some node types is aligned to a 4-byte 111 * boundary, while others are not. When alignment is needed, the padding is 112 * in the beginning of the node that requires it. For example, if a numeric 113 * node is stored after a string node, so that the numeric node begins at 114 * offset 3, the variable-length portion of the numeric node will begin with 115 * one padding byte so that the actual numeric data is 4-byte aligned. 116 */ 117 118 /* 119 * JEntry format. 120 * 121 * The least significant 28 bits store either the data length of the entry, 122 * or its end+1 offset from the start of the variable-length portion of the 123 * containing object. The next three bits store the type of the entry, and 124 * the high-order bit tells whether the least significant bits store a length 125 * or an offset. 126 * 127 * The reason for the offset-or-length complication is to compromise between 128 * access speed and data compressibility. In the initial design each JEntry 129 * always stored an offset, but this resulted in JEntry arrays with horrible 130 * compressibility properties, so that TOAST compression of a JSONB did not 131 * work well. Storing only lengths would greatly improve compressibility, 132 * but it makes random access into large arrays expensive (O(N) not O(1)). 133 * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and 134 * a length in the rest. This results in reasonably compressible data (as 135 * long as the stride isn't too small). We may have to examine as many as 136 * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any 137 * given item, but that's still O(1) no matter how large the container is. 138 * 139 * We could avoid eating a flag bit for this purpose if we were to store 140 * the stride in the container header, or if we were willing to treat the 141 * stride as an unchangeable constant. Neither of those options is very 142 * attractive though. 143 */ 144 typedef uint32 JEntry; 145 146 #define JENTRY_OFFLENMASK 0x0FFFFFFF 147 #define JENTRY_TYPEMASK 0x70000000 148 #define JENTRY_HAS_OFF 0x80000000 149 150 /* values stored in the type bits */ 151 #define JENTRY_ISSTRING 0x00000000 152 #define JENTRY_ISNUMERIC 0x10000000 153 #define JENTRY_ISBOOL_FALSE 0x20000000 154 #define JENTRY_ISBOOL_TRUE 0x30000000 155 #define JENTRY_ISNULL 0x40000000 156 #define JENTRY_ISCONTAINER 0x50000000 /* array or object */ 157 158 /* Access macros. Note possible multiple evaluations */ 159 #define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK) 160 #define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0) 161 #define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING) 162 #define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC) 163 #define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER) 164 #define JBE_ISNULL(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL) 165 #define JBE_ISBOOL_TRUE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE) 166 #define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE) 167 #define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_)) 168 169 /* Macro for advancing an offset variable to the next JEntry */ 170 #define JBE_ADVANCE_OFFSET(offset, je) \ 171 do { \ 172 JEntry je_ = (je); \ 173 if (JBE_HAS_OFF(je_)) \ 174 (offset) = JBE_OFFLENFLD(je_); \ 175 else \ 176 (offset) += JBE_OFFLENFLD(je_); \ 177 } while(0) 178 179 /* 180 * We store an offset, not a length, every JB_OFFSET_STRIDE children. 181 * Caution: this macro should only be referenced when creating a JSONB 182 * value. When examining an existing value, pay attention to the HAS_OFF 183 * bits instead. This allows changes in the offset-placement heuristic 184 * without breaking on-disk compatibility. 185 */ 186 #define JB_OFFSET_STRIDE 32 187 188 /* 189 * A jsonb array or object node, within a Jsonb Datum. 190 * 191 * An array has one child for each element, stored in array order. 192 * 193 * An object has two children for each key/value pair. The keys all appear 194 * first, in key sort order; then the values appear, in an order matching the 195 * key order. This arrangement keeps the keys compact in memory, making a 196 * search for a particular key more cache-friendly. 197 */ 198 typedef struct JsonbContainer 199 { 200 uint32 header; /* number of elements or key/value pairs, and 201 * flags */ 202 JEntry children[FLEXIBLE_ARRAY_MEMBER]; 203 204 /* the data for each child node follows. */ 205 } JsonbContainer; 206 207 /* flags for the header-field in JsonbContainer */ 208 #define JB_CMASK 0x0FFFFFFF /* mask for count field */ 209 #define JB_FSCALAR 0x10000000 /* flag bits */ 210 #define JB_FOBJECT 0x20000000 211 #define JB_FARRAY 0x40000000 212 213 /* convenience macros for accessing a JsonbContainer struct */ 214 #define JsonContainerSize(jc) ((jc)->header & JB_CMASK) 215 #define JsonContainerIsScalar(jc) (((jc)->header & JB_FSCALAR) != 0) 216 #define JsonContainerIsObject(jc) (((jc)->header & JB_FOBJECT) != 0) 217 #define JsonContainerIsArray(jc) (((jc)->header & JB_FARRAY) != 0) 218 219 /* The top-level on-disk format for a jsonb datum. */ 220 typedef struct 221 { 222 int32 vl_len_; /* varlena header (do not touch directly!) */ 223 JsonbContainer root; 224 } Jsonb; 225 226 /* convenience macros for accessing the root container in a Jsonb datum */ 227 #define JB_ROOT_COUNT(jbp_) (*(uint32 *) VARDATA(jbp_) & JB_CMASK) 228 #define JB_ROOT_IS_SCALAR(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FSCALAR) != 0) 229 #define JB_ROOT_IS_OBJECT(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FOBJECT) != 0) 230 #define JB_ROOT_IS_ARRAY(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FARRAY) != 0) 231 232 233 enum jbvType 234 { 235 /* Scalar types */ 236 jbvNull = 0x0, 237 jbvString, 238 jbvNumeric, 239 jbvBool, 240 /* Composite types */ 241 jbvArray = 0x10, 242 jbvObject, 243 /* Binary (i.e. struct Jsonb) jbvArray/jbvObject */ 244 jbvBinary, 245 246 /* 247 * Virtual types. 248 * 249 * These types are used only for in-memory JSON processing and serialized 250 * into JSON strings when outputted to json/jsonb. 251 */ 252 jbvDatetime = 0x20, 253 }; 254 255 /* 256 * JsonbValue: In-memory representation of Jsonb. This is a convenient 257 * deserialized representation, that can easily support using the "val" 258 * union across underlying types during manipulation. The Jsonb on-disk 259 * representation has various alignment considerations. 260 */ 261 struct JsonbValue 262 { 263 enum jbvType type; /* Influences sort order */ 264 265 union 266 { 267 Numeric numeric; 268 bool boolean; 269 struct 270 { 271 int len; 272 char *val; /* Not necessarily null-terminated */ 273 } string; /* String primitive type */ 274 275 struct 276 { 277 int nElems; 278 JsonbValue *elems; 279 bool rawScalar; /* Top-level "raw scalar" array? */ 280 } array; /* Array container type */ 281 282 struct 283 { 284 int nPairs; /* 1 pair, 2 elements */ 285 JsonbPair *pairs; 286 } object; /* Associative container type */ 287 288 struct 289 { 290 int len; 291 JsonbContainer *data; 292 } binary; /* Array or object, in on-disk format */ 293 294 struct 295 { 296 Datum value; 297 Oid typid; 298 int32 typmod; 299 int tz; /* Numeric time zone, in seconds, for 300 * TimestampTz data type */ 301 } datetime; 302 } val; 303 }; 304 305 #define IsAJsonbScalar(jsonbval) (((jsonbval)->type >= jbvNull && \ 306 (jsonbval)->type <= jbvBool) || \ 307 (jsonbval)->type == jbvDatetime) 308 309 /* 310 * Key/value pair within an Object. 311 * 312 * This struct type is only used briefly while constructing a Jsonb; it is 313 * *not* the on-disk representation. 314 * 315 * Pairs with duplicate keys are de-duplicated. We store the originally 316 * observed pair ordering for the purpose of removing duplicates in a 317 * well-defined way (which is "last observed wins"). 318 */ 319 struct JsonbPair 320 { 321 JsonbValue key; /* Must be a jbvString */ 322 JsonbValue value; /* May be of any type */ 323 uint32 order; /* Pair's index in original sequence */ 324 }; 325 326 /* Conversion state used when parsing Jsonb from text, or for type coercion */ 327 typedef struct JsonbParseState 328 { 329 JsonbValue contVal; 330 Size size; 331 struct JsonbParseState *next; 332 } JsonbParseState; 333 334 /* 335 * JsonbIterator holds details of the type for each iteration. It also stores a 336 * Jsonb varlena buffer, which can be directly accessed in some contexts. 337 */ 338 typedef enum 339 { 340 JBI_ARRAY_START, 341 JBI_ARRAY_ELEM, 342 JBI_OBJECT_START, 343 JBI_OBJECT_KEY, 344 JBI_OBJECT_VALUE 345 } JsonbIterState; 346 347 typedef struct JsonbIterator 348 { 349 /* Container being iterated */ 350 JsonbContainer *container; 351 uint32 nElems; /* Number of elements in children array (will 352 * be nPairs for objects) */ 353 bool isScalar; /* Pseudo-array scalar value? */ 354 JEntry *children; /* JEntrys for child nodes */ 355 /* Data proper. This points to the beginning of the variable-length data */ 356 char *dataProper; 357 358 /* Current item in buffer (up to nElems) */ 359 int curIndex; 360 361 /* Data offset corresponding to current item */ 362 uint32 curDataOffset; 363 364 /* 365 * If the container is an object, we want to return keys and values 366 * alternately; so curDataOffset points to the current key, and 367 * curValueOffset points to the current value. 368 */ 369 uint32 curValueOffset; 370 371 /* Private state */ 372 JsonbIterState state; 373 374 struct JsonbIterator *parent; 375 } JsonbIterator; 376 377 378 /* Support functions */ 379 extern uint32 getJsonbOffset(const JsonbContainer *jc, int index); 380 extern uint32 getJsonbLength(const JsonbContainer *jc, int index); 381 extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b); 382 extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader, 383 uint32 flags, 384 JsonbValue *key); 385 extern JsonbValue *getKeyJsonValueFromContainer(JsonbContainer *container, 386 const char *keyVal, int keyLen, 387 JsonbValue *res); 388 extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *sheader, 389 uint32 i); 390 extern JsonbValue *pushJsonbValue(JsonbParseState **pstate, 391 JsonbIteratorToken seq, JsonbValue *jbval); 392 extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container); 393 extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, 394 bool skipNested); 395 extern Jsonb *JsonbValueToJsonb(JsonbValue *val); 396 extern bool JsonbDeepContains(JsonbIterator **val, 397 JsonbIterator **mContained); 398 extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash); 399 extern void JsonbHashScalarValueExtended(const JsonbValue *scalarVal, 400 uint64 *hash, uint64 seed); 401 402 /* jsonb.c support functions */ 403 extern char *JsonbToCString(StringInfo out, JsonbContainer *in, 404 int estimated_len); 405 extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in, 406 int estimated_len); 407 extern bool JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res); 408 extern const char *JsonbTypeName(JsonbValue *jb); 409 410 411 #endif /* __JSONB_H__ */ 412