1 /*-------------------------------------------------------------------------
2  *
3  * jsonb.h
4  *	  Declarations for jsonb data type support.
5  *
6  * Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  *
8  * src/include/utils/jsonb.h
9  *
10  *-------------------------------------------------------------------------
11  */
12 #ifndef __JSONB_H__
13 #define __JSONB_H__
14 
15 #include "lib/stringinfo.h"
16 #include "utils/array.h"
17 #include "utils/numeric.h"
18 
19 /* Tokens used when sequentially processing a jsonb value */
20 typedef enum
21 {
22 	WJB_DONE,
23 	WJB_KEY,
24 	WJB_VALUE,
25 	WJB_ELEM,
26 	WJB_BEGIN_ARRAY,
27 	WJB_END_ARRAY,
28 	WJB_BEGIN_OBJECT,
29 	WJB_END_OBJECT
30 } JsonbIteratorToken;
31 
32 /* Strategy numbers for GIN index opclasses */
33 #define JsonbContainsStrategyNumber		7
34 #define JsonbExistsStrategyNumber		9
35 #define JsonbExistsAnyStrategyNumber	10
36 #define JsonbExistsAllStrategyNumber	11
37 #define JsonbJsonpathExistsStrategyNumber		15
38 #define JsonbJsonpathPredicateStrategyNumber	16
39 
40 
41 /*
42  * In the standard jsonb_ops GIN opclass for jsonb, we choose to index both
43  * keys and values.  The storage format is text.  The first byte of the text
44  * string distinguishes whether this is a key (always a string), null value,
45  * boolean value, numeric value, or string value.  However, array elements
46  * that are strings are marked as though they were keys; this imprecision
47  * supports the definition of the "exists" operator, which treats array
48  * elements like keys.  The remainder of the text string is empty for a null
49  * value, "t" or "f" for a boolean value, a normalized print representation of
50  * a numeric value, or the text of a string value.  However, if the length of
51  * this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash
52  * the text representation and store an 8-hex-digit representation of the
53  * uint32 hash value, marking the prefix byte with an additional bit to
54  * distinguish that this has happened.  Hashing long strings saves space and
55  * ensures that we won't overrun the maximum entry length for a GIN index.
56  * (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit.  It's chosen
57  * to ensure that the on-disk text datum will have a short varlena header.)
58  * Note that when any hashed item appears in a query, we must recheck index
59  * matches against the heap tuple; currently, this costs nothing because we
60  * must always recheck for other reasons.
61  */
62 #define JGINFLAG_KEY	0x01	/* key (or string array element) */
63 #define JGINFLAG_NULL	0x02	/* null value */
64 #define JGINFLAG_BOOL	0x03	/* boolean value */
65 #define JGINFLAG_NUM	0x04	/* numeric value */
66 #define JGINFLAG_STR	0x05	/* string value (if not an array element) */
67 #define JGINFLAG_HASHED 0x10	/* OR'd into flag if value was hashed */
68 #define JGIN_MAXLENGTH	125		/* max length of text part before hashing */
69 
70 /* Convenience macros */
71 #define DatumGetJsonbP(d)	((Jsonb *) PG_DETOAST_DATUM(d))
72 #define DatumGetJsonbPCopy(d)	((Jsonb *) PG_DETOAST_DATUM_COPY(d))
73 #define JsonbPGetDatum(p)	PointerGetDatum(p)
74 #define PG_GETARG_JSONB_P(x)	DatumGetJsonbP(PG_GETARG_DATUM(x))
75 #define PG_GETARG_JSONB_P_COPY(x)	DatumGetJsonbPCopy(PG_GETARG_DATUM(x))
76 #define PG_RETURN_JSONB_P(x)	PG_RETURN_POINTER(x)
77 
78 typedef struct JsonbPair JsonbPair;
79 typedef struct JsonbValue JsonbValue;
80 
81 /*
82  * Jsonbs are varlena objects, so must meet the varlena convention that the
83  * first int32 of the object contains the total object size in bytes.  Be sure
84  * to use VARSIZE() and SET_VARSIZE() to access it, though!
85  *
86  * Jsonb is the on-disk representation, in contrast to the in-memory JsonbValue
87  * representation.  Often, JsonbValues are just shims through which a Jsonb
88  * buffer is accessed, but they can also be deep copied and passed around.
89  *
90  * Jsonb is a tree structure. Each node in the tree consists of a JEntry
91  * header and a variable-length content (possibly of zero size).  The JEntry
92  * header indicates what kind of a node it is, e.g. a string or an array,
93  * and provides the length of its variable-length portion.
94  *
95  * The JEntry and the content of a node are not stored physically together.
96  * Instead, the container array or object has an array that holds the JEntrys
97  * of all the child nodes, followed by their variable-length portions.
98  *
99  * The root node is an exception; it has no parent array or object that could
100  * hold its JEntry. Hence, no JEntry header is stored for the root node.  It
101  * is implicitly known that the root node must be an array or an object,
102  * so we can get away without the type indicator as long as we can distinguish
103  * the two.  For that purpose, both an array and an object begin with a uint32
104  * header field, which contains an JB_FOBJECT or JB_FARRAY flag.  When a naked
105  * scalar value needs to be stored as a Jsonb value, what we actually store is
106  * an array with one element, with the flags in the array's header field set
107  * to JB_FSCALAR | JB_FARRAY.
108  *
109  * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
110  * the variable-length portion of some node types is aligned to a 4-byte
111  * boundary, while others are not. When alignment is needed, the padding is
112  * in the beginning of the node that requires it. For example, if a numeric
113  * node is stored after a string node, so that the numeric node begins at
114  * offset 3, the variable-length portion of the numeric node will begin with
115  * one padding byte so that the actual numeric data is 4-byte aligned.
116  */
117 
118 /*
119  * JEntry format.
120  *
121  * The least significant 28 bits store either the data length of the entry,
122  * or its end+1 offset from the start of the variable-length portion of the
123  * containing object.  The next three bits store the type of the entry, and
124  * the high-order bit tells whether the least significant bits store a length
125  * or an offset.
126  *
127  * The reason for the offset-or-length complication is to compromise between
128  * access speed and data compressibility.  In the initial design each JEntry
129  * always stored an offset, but this resulted in JEntry arrays with horrible
130  * compressibility properties, so that TOAST compression of a JSONB did not
131  * work well.  Storing only lengths would greatly improve compressibility,
132  * but it makes random access into large arrays expensive (O(N) not O(1)).
133  * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and
134  * a length in the rest.  This results in reasonably compressible data (as
135  * long as the stride isn't too small).  We may have to examine as many as
136  * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any
137  * given item, but that's still O(1) no matter how large the container is.
138  *
139  * We could avoid eating a flag bit for this purpose if we were to store
140  * the stride in the container header, or if we were willing to treat the
141  * stride as an unchangeable constant.  Neither of those options is very
142  * attractive though.
143  */
144 typedef uint32 JEntry;
145 
146 #define JENTRY_OFFLENMASK		0x0FFFFFFF
147 #define JENTRY_TYPEMASK			0x70000000
148 #define JENTRY_HAS_OFF			0x80000000
149 
150 /* values stored in the type bits */
151 #define JENTRY_ISSTRING			0x00000000
152 #define JENTRY_ISNUMERIC		0x10000000
153 #define JENTRY_ISBOOL_FALSE		0x20000000
154 #define JENTRY_ISBOOL_TRUE		0x30000000
155 #define JENTRY_ISNULL			0x40000000
156 #define JENTRY_ISCONTAINER		0x50000000	/* array or object */
157 
158 /* Access macros.  Note possible multiple evaluations */
159 #define JBE_OFFLENFLD(je_)		((je_) & JENTRY_OFFLENMASK)
160 #define JBE_HAS_OFF(je_)		(((je_) & JENTRY_HAS_OFF) != 0)
161 #define JBE_ISSTRING(je_)		(((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
162 #define JBE_ISNUMERIC(je_)		(((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
163 #define JBE_ISCONTAINER(je_)	(((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
164 #define JBE_ISNULL(je_)			(((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL)
165 #define JBE_ISBOOL_TRUE(je_)	(((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE)
166 #define JBE_ISBOOL_FALSE(je_)	(((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
167 #define JBE_ISBOOL(je_)			(JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
168 
169 /* Macro for advancing an offset variable to the next JEntry */
170 #define JBE_ADVANCE_OFFSET(offset, je) \
171 	do { \
172 		JEntry	je_ = (je); \
173 		if (JBE_HAS_OFF(je_)) \
174 			(offset) = JBE_OFFLENFLD(je_); \
175 		else \
176 			(offset) += JBE_OFFLENFLD(je_); \
177 	} while(0)
178 
179 /*
180  * We store an offset, not a length, every JB_OFFSET_STRIDE children.
181  * Caution: this macro should only be referenced when creating a JSONB
182  * value.  When examining an existing value, pay attention to the HAS_OFF
183  * bits instead.  This allows changes in the offset-placement heuristic
184  * without breaking on-disk compatibility.
185  */
186 #define JB_OFFSET_STRIDE		32
187 
188 /*
189  * A jsonb array or object node, within a Jsonb Datum.
190  *
191  * An array has one child for each element, stored in array order.
192  *
193  * An object has two children for each key/value pair.  The keys all appear
194  * first, in key sort order; then the values appear, in an order matching the
195  * key order.  This arrangement keeps the keys compact in memory, making a
196  * search for a particular key more cache-friendly.
197  */
198 typedef struct JsonbContainer
199 {
200 	uint32		header;			/* number of elements or key/value pairs, and
201 								 * flags */
202 	JEntry		children[FLEXIBLE_ARRAY_MEMBER];
203 
204 	/* the data for each child node follows. */
205 } JsonbContainer;
206 
207 /* flags for the header-field in JsonbContainer */
208 #define JB_CMASK				0x0FFFFFFF	/* mask for count field */
209 #define JB_FSCALAR				0x10000000	/* flag bits */
210 #define JB_FOBJECT				0x20000000
211 #define JB_FARRAY				0x40000000
212 
213 /* convenience macros for accessing a JsonbContainer struct */
214 #define JsonContainerSize(jc)		((jc)->header & JB_CMASK)
215 #define JsonContainerIsScalar(jc)	(((jc)->header & JB_FSCALAR) != 0)
216 #define JsonContainerIsObject(jc)	(((jc)->header & JB_FOBJECT) != 0)
217 #define JsonContainerIsArray(jc)	(((jc)->header & JB_FARRAY) != 0)
218 
219 /* The top-level on-disk format for a jsonb datum. */
220 typedef struct
221 {
222 	int32		vl_len_;		/* varlena header (do not touch directly!) */
223 	JsonbContainer root;
224 } Jsonb;
225 
226 /* convenience macros for accessing the root container in a Jsonb datum */
227 #define JB_ROOT_COUNT(jbp_)		(*(uint32 *) VARDATA(jbp_) & JB_CMASK)
228 #define JB_ROOT_IS_SCALAR(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FSCALAR) != 0)
229 #define JB_ROOT_IS_OBJECT(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FOBJECT) != 0)
230 #define JB_ROOT_IS_ARRAY(jbp_)	((*(uint32 *) VARDATA(jbp_) & JB_FARRAY) != 0)
231 
232 
233 enum jbvType
234 {
235 	/* Scalar types */
236 	jbvNull = 0x0,
237 	jbvString,
238 	jbvNumeric,
239 	jbvBool,
240 	/* Composite types */
241 	jbvArray = 0x10,
242 	jbvObject,
243 	/* Binary (i.e. struct Jsonb) jbvArray/jbvObject */
244 	jbvBinary,
245 
246 	/*
247 	 * Virtual types.
248 	 *
249 	 * These types are used only for in-memory JSON processing and serialized
250 	 * into JSON strings when outputted to json/jsonb.
251 	 */
252 	jbvDatetime = 0x20,
253 };
254 
255 /*
256  * JsonbValue:	In-memory representation of Jsonb.  This is a convenient
257  * deserialized representation, that can easily support using the "val"
258  * union across underlying types during manipulation.  The Jsonb on-disk
259  * representation has various alignment considerations.
260  */
261 struct JsonbValue
262 {
263 	enum jbvType type;			/* Influences sort order */
264 
265 	union
266 	{
267 		Numeric numeric;
268 		bool		boolean;
269 		struct
270 		{
271 			int			len;
272 			char	   *val;	/* Not necessarily null-terminated */
273 		}			string;		/* String primitive type */
274 
275 		struct
276 		{
277 			int			nElems;
278 			JsonbValue *elems;
279 			bool		rawScalar;	/* Top-level "raw scalar" array? */
280 		}			array;		/* Array container type */
281 
282 		struct
283 		{
284 			int			nPairs; /* 1 pair, 2 elements */
285 			JsonbPair  *pairs;
286 		}			object;		/* Associative container type */
287 
288 		struct
289 		{
290 			int			len;
291 			JsonbContainer *data;
292 		}			binary;		/* Array or object, in on-disk format */
293 
294 		struct
295 		{
296 			Datum		value;
297 			Oid			typid;
298 			int32		typmod;
299 			int			tz;		/* Numeric time zone, in seconds, for
300 								 * TimestampTz data type */
301 		}			datetime;
302 	}			val;
303 };
304 
305 #define IsAJsonbScalar(jsonbval)	(((jsonbval)->type >= jbvNull && \
306 									  (jsonbval)->type <= jbvBool) || \
307 									  (jsonbval)->type == jbvDatetime)
308 
309 /*
310  * Key/value pair within an Object.
311  *
312  * This struct type is only used briefly while constructing a Jsonb; it is
313  * *not* the on-disk representation.
314  *
315  * Pairs with duplicate keys are de-duplicated.  We store the originally
316  * observed pair ordering for the purpose of removing duplicates in a
317  * well-defined way (which is "last observed wins").
318  */
319 struct JsonbPair
320 {
321 	JsonbValue	key;			/* Must be a jbvString */
322 	JsonbValue	value;			/* May be of any type */
323 	uint32		order;			/* Pair's index in original sequence */
324 };
325 
326 /* Conversion state used when parsing Jsonb from text, or for type coercion */
327 typedef struct JsonbParseState
328 {
329 	JsonbValue	contVal;
330 	Size		size;
331 	struct JsonbParseState *next;
332 } JsonbParseState;
333 
334 /*
335  * JsonbIterator holds details of the type for each iteration. It also stores a
336  * Jsonb varlena buffer, which can be directly accessed in some contexts.
337  */
338 typedef enum
339 {
340 	JBI_ARRAY_START,
341 	JBI_ARRAY_ELEM,
342 	JBI_OBJECT_START,
343 	JBI_OBJECT_KEY,
344 	JBI_OBJECT_VALUE
345 } JsonbIterState;
346 
347 typedef struct JsonbIterator
348 {
349 	/* Container being iterated */
350 	JsonbContainer *container;
351 	uint32		nElems;			/* Number of elements in children array (will
352 								 * be nPairs for objects) */
353 	bool		isScalar;		/* Pseudo-array scalar value? */
354 	JEntry	   *children;		/* JEntrys for child nodes */
355 	/* Data proper.  This points to the beginning of the variable-length data */
356 	char	   *dataProper;
357 
358 	/* Current item in buffer (up to nElems) */
359 	int			curIndex;
360 
361 	/* Data offset corresponding to current item */
362 	uint32		curDataOffset;
363 
364 	/*
365 	 * If the container is an object, we want to return keys and values
366 	 * alternately; so curDataOffset points to the current key, and
367 	 * curValueOffset points to the current value.
368 	 */
369 	uint32		curValueOffset;
370 
371 	/* Private state */
372 	JsonbIterState state;
373 
374 	struct JsonbIterator *parent;
375 } JsonbIterator;
376 
377 
378 /* Support functions */
379 extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
380 extern uint32 getJsonbLength(const JsonbContainer *jc, int index);
381 extern int	compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
382 extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader,
383 											   uint32 flags,
384 											   JsonbValue *key);
385 extern JsonbValue *getKeyJsonValueFromContainer(JsonbContainer *container,
386 												const char *keyVal, int keyLen,
387 												JsonbValue *res);
388 extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *sheader,
389 												 uint32 i);
390 extern JsonbValue *pushJsonbValue(JsonbParseState **pstate,
391 								  JsonbIteratorToken seq, JsonbValue *jbval);
392 extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container);
393 extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val,
394 											bool skipNested);
395 extern void JsonbToJsonbValue(Jsonb *jsonb, JsonbValue *val);
396 extern Jsonb *JsonbValueToJsonb(JsonbValue *val);
397 extern bool JsonbDeepContains(JsonbIterator **val,
398 							  JsonbIterator **mContained);
399 extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash);
400 extern void JsonbHashScalarValueExtended(const JsonbValue *scalarVal,
401 										 uint64 *hash, uint64 seed);
402 
403 /* jsonb.c support functions */
404 extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
405 							int estimated_len);
406 extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
407 								  int estimated_len);
408 extern bool JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res);
409 extern const char *JsonbTypeName(JsonbValue *jb);
410 
411 extern Datum jsonb_set_element(Jsonb *jb, Datum *path, int path_len,
412 							   JsonbValue *newval);
413 extern Datum jsonb_get_element(Jsonb *jb, Datum *path, int npath,
414 							   bool *isnull, bool as_text);
415 #endif							/* __JSONB_H__ */
416