1 /*-------------------------------------------------------------------------
2  *
3  * array.h
4  *	  Declarations for Postgres arrays.
5  *
6  * A standard varlena array has the following internal structure:
7  *	  <vl_len_>		- standard varlena header word
8  *	  <ndim>		- number of dimensions of the array
9  *	  <dataoffset>	- offset to stored data, or 0 if no nulls bitmap
10  *	  <elemtype>	- element type OID
11  *	  <dimensions>	- length of each array axis (C array of int)
12  *	  <lower bnds>	- lower boundary of each dimension (C array of int)
13  *	  <null bitmap> - bitmap showing locations of nulls (OPTIONAL)
14  *	  <actual data> - whatever is the stored data
15  *
16  * The <dimensions> and <lower bnds> arrays each have ndim elements.
17  *
18  * The <null bitmap> may be omitted if the array contains no NULL elements.
19  * If it is absent, the <dataoffset> field is zero and the offset to the
20  * stored data must be computed on-the-fly.  If the bitmap is present,
21  * <dataoffset> is nonzero and is equal to the offset from the array start
22  * to the first data element (including any alignment padding).  The bitmap
23  * follows the same conventions as tuple null bitmaps, ie, a 1 indicates
24  * a non-null entry and the LSB of each bitmap byte is used first.
25  *
26  * The actual data starts on a MAXALIGN boundary.  Individual items in the
27  * array are aligned as specified by the array element type.  They are
28  * stored in row-major order (last subscript varies most rapidly).
29  *
30  * NOTE: it is important that array elements of toastable datatypes NOT be
31  * toasted, since the tupletoaster won't know they are there.  (We could
32  * support compressed toasted items; only out-of-line items are dangerous.
33  * However, it seems preferable to store such items uncompressed and allow
34  * the toaster to compress the whole array as one input.)
35  *
36  *
37  * The OIDVECTOR and INT2VECTOR datatypes are storage-compatible with
38  * generic arrays, but they support only one-dimensional arrays with no
39  * nulls (and no null bitmap).  They don't support being toasted, either.
40  *
41  * There are also some "fixed-length array" datatypes, such as NAME and
42  * POINT.  These are simply a sequence of a fixed number of items each
43  * of a fixed-length datatype, with no overhead; the item size must be
44  * a multiple of its alignment requirement, because we do no padding.
45  * We support subscripting on these types, but array_in() and array_out()
46  * only work with varlena arrays.
47  *
48  * In addition, arrays are a major user of the "expanded object" TOAST
49  * infrastructure.  This allows a varlena array to be converted to a
50  * separate representation that may include "deconstructed" Datum/isnull
51  * arrays holding the elements.
52  *
53  *
54  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
55  * Portions Copyright (c) 1994, Regents of the University of California
56  *
57  * src/include/utils/array.h
58  *
59  *-------------------------------------------------------------------------
60  */
61 #ifndef ARRAY_H
62 #define ARRAY_H
63 
64 #include "fmgr.h"
65 #include "utils/expandeddatum.h"
66 
67 
68 /*
69  * Arrays are varlena objects, so must meet the varlena convention that
70  * the first int32 of the object contains the total object size in bytes.
71  * Be sure to use VARSIZE() and SET_VARSIZE() to access it, though!
72  *
73  * CAUTION: if you change the header for ordinary arrays you will also
74  * need to change the headers for oidvector and int2vector!
75  */
76 typedef struct
77 {
78 	int32		vl_len_;		/* varlena header (do not touch directly!) */
79 	int			ndim;			/* # of dimensions */
80 	int32		dataoffset;		/* offset to data, or 0 if no bitmap */
81 	Oid			elemtype;		/* element type OID */
82 } ArrayType;
83 
84 /*
85  * An expanded array is contained within a private memory context (as
86  * all expanded objects must be) and has a control structure as below.
87  *
88  * The expanded array might contain a regular "flat" array if that was the
89  * original input and we've not modified it significantly.  Otherwise, the
90  * contents are represented by Datum/isnull arrays plus dimensionality and
91  * type information.  We could also have both forms, if we've deconstructed
92  * the original array for access purposes but not yet changed it.  For pass-
93  * by-reference element types, the Datums would point into the flat array in
94  * this situation.  Once we start modifying array elements, new pass-by-ref
95  * elements are separately palloc'd within the memory context.
96  */
97 #define EA_MAGIC 689375833		/* ID for debugging crosschecks */
98 
99 typedef struct ExpandedArrayHeader
100 {
101 	/* Standard header for expanded objects */
102 	ExpandedObjectHeader hdr;
103 
104 	/* Magic value identifying an expanded array (for debugging only) */
105 	int			ea_magic;
106 
107 	/* Dimensionality info (always valid) */
108 	int			ndims;			/* # of dimensions */
109 	int		   *dims;			/* array dimensions */
110 	int		   *lbound;			/* index lower bounds for each dimension */
111 
112 	/* Element type info (always valid) */
113 	Oid			element_type;	/* element type OID */
114 	int16		typlen;			/* needed info about element datatype */
115 	bool		typbyval;
116 	char		typalign;
117 
118 	/*
119 	 * If we have a Datum-array representation of the array, it's kept here;
120 	 * else dvalues/dnulls are NULL.  The dvalues and dnulls arrays are always
121 	 * palloc'd within the object private context, but may change size from
122 	 * time to time.  For pass-by-ref element types, dvalues entries might
123 	 * point either into the fstartptr..fendptr area, or to separately
124 	 * palloc'd chunks.  Elements should always be fully detoasted, as they
125 	 * are in the standard flat representation.
126 	 *
127 	 * Even when dvalues is valid, dnulls can be NULL if there are no null
128 	 * elements.
129 	 */
130 	Datum	   *dvalues;		/* array of Datums */
131 	bool	   *dnulls;			/* array of is-null flags for Datums */
132 	int			dvalueslen;		/* allocated length of above arrays */
133 	int			nelems;			/* number of valid entries in above arrays */
134 
135 	/*
136 	 * flat_size is the current space requirement for the flat equivalent of
137 	 * the expanded array, if known; otherwise it's 0.  We store this to make
138 	 * consecutive calls of get_flat_size cheap.
139 	 */
140 	Size		flat_size;
141 
142 	/*
143 	 * fvalue points to the flat representation if it is valid, else it is
144 	 * NULL.  If we have or ever had a flat representation then
145 	 * fstartptr/fendptr point to the start and end+1 of its data area; this
146 	 * is so that we can tell which Datum pointers point into the flat
147 	 * representation rather than being pointers to separately palloc'd data.
148 	 */
149 	ArrayType  *fvalue;			/* must be a fully detoasted array */
150 	char	   *fstartptr;		/* start of its data area */
151 	char	   *fendptr;		/* end+1 of its data area */
152 } ExpandedArrayHeader;
153 
154 /*
155  * Functions that can handle either a "flat" varlena array or an expanded
156  * array use this union to work with their input.  Don't refer to "flt";
157  * instead, cast to ArrayType.  This struct nominally requires 8-byte
158  * alignment on 64-bit, but it's often used for an ArrayType having 4-byte
159  * alignment.  UBSan complains about referencing "flt" in such cases.
160  */
161 typedef union AnyArrayType
162 {
163 	ArrayType	flt;
164 	ExpandedArrayHeader xpn;
165 } AnyArrayType;
166 
167 /*
168  * working state for accumArrayResult() and friends
169  * note that the input must be scalars (legal array elements)
170  */
171 typedef struct ArrayBuildState
172 {
173 	MemoryContext mcontext;		/* where all the temp stuff is kept */
174 	Datum	   *dvalues;		/* array of accumulated Datums */
175 	bool	   *dnulls;			/* array of is-null flags for Datums */
176 	int			alen;			/* allocated length of above arrays */
177 	int			nelems;			/* number of valid entries in above arrays */
178 	Oid			element_type;	/* data type of the Datums */
179 	int16		typlen;			/* needed info about datatype */
180 	bool		typbyval;
181 	char		typalign;
182 	bool		private_cxt;	/* use private memory context */
183 } ArrayBuildState;
184 
185 /*
186  * working state for accumArrayResultArr() and friends
187  * note that the input must be arrays, and the same array type is returned
188  */
189 typedef struct ArrayBuildStateArr
190 {
191 	MemoryContext mcontext;		/* where all the temp stuff is kept */
192 	char	   *data;			/* accumulated data */
193 	bits8	   *nullbitmap;		/* bitmap of is-null flags, or NULL if none */
194 	int			abytes;			/* allocated length of "data" */
195 	int			nbytes;			/* number of bytes used so far */
196 	int			aitems;			/* allocated length of bitmap (in elements) */
197 	int			nitems;			/* total number of elements in result */
198 	int			ndims;			/* current dimensions of result */
199 	int			dims[MAXDIM];
200 	int			lbs[MAXDIM];
201 	Oid			array_type;		/* data type of the arrays */
202 	Oid			element_type;	/* data type of the array elements */
203 	bool		private_cxt;	/* use private memory context */
204 } ArrayBuildStateArr;
205 
206 /*
207  * working state for accumArrayResultAny() and friends
208  * these functions handle both cases
209  */
210 typedef struct ArrayBuildStateAny
211 {
212 	/* Exactly one of these is not NULL: */
213 	ArrayBuildState *scalarstate;
214 	ArrayBuildStateArr *arraystate;
215 } ArrayBuildStateAny;
216 
217 /*
218  * structure to cache type metadata needed for array manipulation
219  */
220 typedef struct ArrayMetaState
221 {
222 	Oid			element_type;
223 	int16		typlen;
224 	bool		typbyval;
225 	char		typalign;
226 	char		typdelim;
227 	Oid			typioparam;
228 	Oid			typiofunc;
229 	FmgrInfo	proc;
230 } ArrayMetaState;
231 
232 /*
233  * private state needed by array_map (here because caller must provide it)
234  */
235 typedef struct ArrayMapState
236 {
237 	ArrayMetaState inp_extra;
238 	ArrayMetaState ret_extra;
239 } ArrayMapState;
240 
241 /* ArrayIteratorData is private in arrayfuncs.c */
242 typedef struct ArrayIteratorData *ArrayIterator;
243 
244 /* fmgr macros for regular varlena array objects */
245 #define DatumGetArrayTypeP(X)		  ((ArrayType *) PG_DETOAST_DATUM(X))
246 #define DatumGetArrayTypePCopy(X)	  ((ArrayType *) PG_DETOAST_DATUM_COPY(X))
247 #define PG_GETARG_ARRAYTYPE_P(n)	  DatumGetArrayTypeP(PG_GETARG_DATUM(n))
248 #define PG_GETARG_ARRAYTYPE_P_COPY(n) DatumGetArrayTypePCopy(PG_GETARG_DATUM(n))
249 #define PG_RETURN_ARRAYTYPE_P(x)	  PG_RETURN_POINTER(x)
250 
251 /* fmgr macros for expanded array objects */
252 #define PG_GETARG_EXPANDED_ARRAY(n)  DatumGetExpandedArray(PG_GETARG_DATUM(n))
253 #define PG_GETARG_EXPANDED_ARRAYX(n, metacache) \
254 	DatumGetExpandedArrayX(PG_GETARG_DATUM(n), metacache)
255 #define PG_RETURN_EXPANDED_ARRAY(x)  PG_RETURN_DATUM(EOHPGetRWDatum(&(x)->hdr))
256 
257 /* fmgr macros for AnyArrayType (ie, get either varlena or expanded form) */
258 #define PG_GETARG_ANY_ARRAY(n)	DatumGetAnyArray(PG_GETARG_DATUM(n))
259 
260 /*
261  * Access macros for varlena array header fields.
262  *
263  * ARR_DIMS returns a pointer to an array of array dimensions (number of
264  * elements along the various array axes).
265  *
266  * ARR_LBOUND returns a pointer to an array of array lower bounds.
267  *
268  * That is: if the third axis of an array has elements 5 through 8, then
269  * ARR_DIMS(a)[2] == 4 and ARR_LBOUND(a)[2] == 5.
270  *
271  * Unlike C, the default lower bound is 1.
272  */
273 #define ARR_SIZE(a)				VARSIZE(a)
274 #define ARR_NDIM(a)				((a)->ndim)
275 #define ARR_HASNULL(a)			((a)->dataoffset != 0)
276 #define ARR_ELEMTYPE(a)			((a)->elemtype)
277 
278 #define ARR_DIMS(a) \
279 		((int *) (((char *) (a)) + sizeof(ArrayType)))
280 #define ARR_LBOUND(a) \
281 		((int *) (((char *) (a)) + sizeof(ArrayType) + \
282 				  sizeof(int) * ARR_NDIM(a)))
283 
284 #define ARR_NULLBITMAP(a) \
285 		(ARR_HASNULL(a) ? \
286 		 (bits8 *) (((char *) (a)) + sizeof(ArrayType) + \
287 					2 * sizeof(int) * ARR_NDIM(a)) \
288 		 : (bits8 *) NULL)
289 
290 /*
291  * The total array header size (in bytes) for an array with the specified
292  * number of dimensions and total number of items.
293  */
294 #define ARR_OVERHEAD_NONULLS(ndims) \
295 		MAXALIGN(sizeof(ArrayType) + 2 * sizeof(int) * (ndims))
296 #define ARR_OVERHEAD_WITHNULLS(ndims, nitems) \
297 		MAXALIGN(sizeof(ArrayType) + 2 * sizeof(int) * (ndims) + \
298 				 ((nitems) + 7) / 8)
299 
300 #define ARR_DATA_OFFSET(a) \
301 		(ARR_HASNULL(a) ? (a)->dataoffset : ARR_OVERHEAD_NONULLS(ARR_NDIM(a)))
302 
303 /*
304  * Returns a pointer to the actual array data.
305  */
306 #define ARR_DATA_PTR(a) \
307 		(((char *) (a)) + ARR_DATA_OFFSET(a))
308 
309 /*
310  * Macros for working with AnyArrayType inputs.  Beware multiple references!
311  */
312 #define AARR_NDIM(a) \
313 	(VARATT_IS_EXPANDED_HEADER(a) ? \
314 	 (a)->xpn.ndims : ARR_NDIM((ArrayType *) (a)))
315 #define AARR_HASNULL(a) \
316 	(VARATT_IS_EXPANDED_HEADER(a) ? \
317 	 ((a)->xpn.dvalues != NULL ? (a)->xpn.dnulls != NULL : ARR_HASNULL((a)->xpn.fvalue)) : \
318 	 ARR_HASNULL((ArrayType *) (a)))
319 #define AARR_ELEMTYPE(a) \
320 	(VARATT_IS_EXPANDED_HEADER(a) ? \
321 	 (a)->xpn.element_type : ARR_ELEMTYPE((ArrayType *) (a)))
322 #define AARR_DIMS(a) \
323 	(VARATT_IS_EXPANDED_HEADER(a) ? \
324 	 (a)->xpn.dims : ARR_DIMS((ArrayType *) (a)))
325 #define AARR_LBOUND(a) \
326 	(VARATT_IS_EXPANDED_HEADER(a) ? \
327 	 (a)->xpn.lbound : ARR_LBOUND((ArrayType *) (a)))
328 
329 
330 /*
331  * GUC parameter
332  */
333 extern bool Array_nulls;
334 
335 /*
336  * prototypes for functions defined in arrayfuncs.c
337  */
338 extern Datum array_in(PG_FUNCTION_ARGS);
339 extern Datum array_out(PG_FUNCTION_ARGS);
340 extern Datum array_recv(PG_FUNCTION_ARGS);
341 extern Datum array_send(PG_FUNCTION_ARGS);
342 extern Datum array_eq(PG_FUNCTION_ARGS);
343 extern Datum array_ne(PG_FUNCTION_ARGS);
344 extern Datum array_lt(PG_FUNCTION_ARGS);
345 extern Datum array_gt(PG_FUNCTION_ARGS);
346 extern Datum array_le(PG_FUNCTION_ARGS);
347 extern Datum array_ge(PG_FUNCTION_ARGS);
348 extern Datum btarraycmp(PG_FUNCTION_ARGS);
349 extern Datum hash_array(PG_FUNCTION_ARGS);
350 extern Datum arrayoverlap(PG_FUNCTION_ARGS);
351 extern Datum arraycontains(PG_FUNCTION_ARGS);
352 extern Datum arraycontained(PG_FUNCTION_ARGS);
353 extern Datum array_ndims(PG_FUNCTION_ARGS);
354 extern Datum array_dims(PG_FUNCTION_ARGS);
355 extern Datum array_lower(PG_FUNCTION_ARGS);
356 extern Datum array_upper(PG_FUNCTION_ARGS);
357 extern Datum array_length(PG_FUNCTION_ARGS);
358 extern Datum array_cardinality(PG_FUNCTION_ARGS);
359 extern Datum array_larger(PG_FUNCTION_ARGS);
360 extern Datum array_smaller(PG_FUNCTION_ARGS);
361 extern Datum generate_subscripts(PG_FUNCTION_ARGS);
362 extern Datum generate_subscripts_nodir(PG_FUNCTION_ARGS);
363 extern Datum array_fill(PG_FUNCTION_ARGS);
364 extern Datum array_fill_with_lower_bounds(PG_FUNCTION_ARGS);
365 extern Datum array_unnest(PG_FUNCTION_ARGS);
366 extern Datum array_remove(PG_FUNCTION_ARGS);
367 extern Datum array_replace(PG_FUNCTION_ARGS);
368 extern Datum width_bucket_array(PG_FUNCTION_ARGS);
369 
370 extern void CopyArrayEls(ArrayType *array,
371 			 Datum *values,
372 			 bool *nulls,
373 			 int nitems,
374 			 int typlen,
375 			 bool typbyval,
376 			 char typalign,
377 			 bool freedata);
378 
379 extern Datum array_get_element(Datum arraydatum, int nSubscripts, int *indx,
380 				  int arraytyplen, int elmlen, bool elmbyval, char elmalign,
381 				  bool *isNull);
382 extern Datum array_set_element(Datum arraydatum, int nSubscripts, int *indx,
383 				  Datum dataValue, bool isNull,
384 				  int arraytyplen, int elmlen, bool elmbyval, char elmalign);
385 extern Datum array_get_slice(Datum arraydatum, int nSubscripts,
386 				int *upperIndx, int *lowerIndx,
387 				bool *upperProvided, bool *lowerProvided,
388 				int arraytyplen, int elmlen, bool elmbyval, char elmalign);
389 extern Datum array_set_slice(Datum arraydatum, int nSubscripts,
390 				int *upperIndx, int *lowerIndx,
391 				bool *upperProvided, bool *lowerProvided,
392 				Datum srcArrayDatum, bool isNull,
393 				int arraytyplen, int elmlen, bool elmbyval, char elmalign);
394 
395 extern Datum array_ref(ArrayType *array, int nSubscripts, int *indx,
396 		  int arraytyplen, int elmlen, bool elmbyval, char elmalign,
397 		  bool *isNull);
398 extern ArrayType *array_set(ArrayType *array, int nSubscripts, int *indx,
399 		  Datum dataValue, bool isNull,
400 		  int arraytyplen, int elmlen, bool elmbyval, char elmalign);
401 
402 extern Datum array_map(FunctionCallInfo fcinfo, Oid retType,
403 		  ArrayMapState *amstate);
404 
405 extern void array_bitmap_copy(bits8 *destbitmap, int destoffset,
406 				  const bits8 *srcbitmap, int srcoffset,
407 				  int nitems);
408 
409 extern ArrayType *construct_array(Datum *elems, int nelems,
410 				Oid elmtype,
411 				int elmlen, bool elmbyval, char elmalign);
412 extern ArrayType *construct_md_array(Datum *elems,
413 				   bool *nulls,
414 				   int ndims,
415 				   int *dims,
416 				   int *lbs,
417 				   Oid elmtype, int elmlen, bool elmbyval, char elmalign);
418 extern ArrayType *construct_empty_array(Oid elmtype);
419 extern ExpandedArrayHeader *construct_empty_expanded_array(Oid element_type,
420 							   MemoryContext parentcontext,
421 							   ArrayMetaState *metacache);
422 extern void deconstruct_array(ArrayType *array,
423 				  Oid elmtype,
424 				  int elmlen, bool elmbyval, char elmalign,
425 				  Datum **elemsp, bool **nullsp, int *nelemsp);
426 extern bool array_contains_nulls(ArrayType *array);
427 
428 extern ArrayBuildState *initArrayResult(Oid element_type,
429 				MemoryContext rcontext, bool subcontext);
430 extern ArrayBuildState *accumArrayResult(ArrayBuildState *astate,
431 				 Datum dvalue, bool disnull,
432 				 Oid element_type,
433 				 MemoryContext rcontext);
434 extern Datum makeArrayResult(ArrayBuildState *astate,
435 				MemoryContext rcontext);
436 extern Datum makeMdArrayResult(ArrayBuildState *astate, int ndims,
437 				  int *dims, int *lbs, MemoryContext rcontext, bool release);
438 
439 extern ArrayBuildStateArr *initArrayResultArr(Oid array_type, Oid element_type,
440 				   MemoryContext rcontext, bool subcontext);
441 extern ArrayBuildStateArr *accumArrayResultArr(ArrayBuildStateArr *astate,
442 					Datum dvalue, bool disnull,
443 					Oid array_type,
444 					MemoryContext rcontext);
445 extern Datum makeArrayResultArr(ArrayBuildStateArr *astate,
446 				   MemoryContext rcontext, bool release);
447 
448 extern ArrayBuildStateAny *initArrayResultAny(Oid input_type,
449 				   MemoryContext rcontext, bool subcontext);
450 extern ArrayBuildStateAny *accumArrayResultAny(ArrayBuildStateAny *astate,
451 					Datum dvalue, bool disnull,
452 					Oid input_type,
453 					MemoryContext rcontext);
454 extern Datum makeArrayResultAny(ArrayBuildStateAny *astate,
455 				   MemoryContext rcontext, bool release);
456 
457 extern ArrayIterator array_create_iterator(ArrayType *arr, int slice_ndim, ArrayMetaState *mstate);
458 extern bool array_iterate(ArrayIterator iterator, Datum *value, bool *isnull);
459 extern void array_free_iterator(ArrayIterator iterator);
460 
461 /*
462  * prototypes for functions defined in arrayutils.c
463  */
464 
465 extern int	ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx);
466 extern int	ArrayGetOffset0(int n, const int *tup, const int *scale);
467 extern int	ArrayGetNItems(int ndim, const int *dims);
468 extern void ArrayCheckBounds(int ndim, const int *dims, const int *lb);
469 extern void mda_get_range(int n, int *span, const int *st, const int *endp);
470 extern void mda_get_prod(int n, const int *range, int *prod);
471 extern void mda_get_offset_values(int n, int *dist, const int *prod, const int *span);
472 extern int	mda_next_tuple(int n, int *curr, const int *span);
473 extern int32 *ArrayGetIntegerTypmods(ArrayType *arr, int *n);
474 
475 /*
476  * prototypes for functions defined in array_expanded.c
477  */
478 extern Datum expand_array(Datum arraydatum, MemoryContext parentcontext,
479 			 ArrayMetaState *metacache);
480 extern ExpandedArrayHeader *DatumGetExpandedArray(Datum d);
481 extern ExpandedArrayHeader *DatumGetExpandedArrayX(Datum d,
482 					   ArrayMetaState *metacache);
483 extern AnyArrayType *DatumGetAnyArray(Datum d);
484 extern void deconstruct_expanded_array(ExpandedArrayHeader *eah);
485 
486 /*
487  * prototypes for functions defined in array_userfuncs.c
488  */
489 extern Datum array_append(PG_FUNCTION_ARGS);
490 extern Datum array_prepend(PG_FUNCTION_ARGS);
491 extern Datum array_cat(PG_FUNCTION_ARGS);
492 
493 extern ArrayType *create_singleton_array(FunctionCallInfo fcinfo,
494 					   Oid element_type,
495 					   Datum element,
496 					   bool isNull,
497 					   int ndims);
498 
499 extern Datum array_agg_transfn(PG_FUNCTION_ARGS);
500 extern Datum array_agg_finalfn(PG_FUNCTION_ARGS);
501 extern Datum array_agg_array_transfn(PG_FUNCTION_ARGS);
502 extern Datum array_agg_array_finalfn(PG_FUNCTION_ARGS);
503 
504 extern Datum array_position(PG_FUNCTION_ARGS);
505 extern Datum array_position_start(PG_FUNCTION_ARGS);
506 extern Datum array_positions(PG_FUNCTION_ARGS);
507 
508 /*
509  * prototypes for functions defined in array_typanalyze.c
510  */
511 extern Datum array_typanalyze(PG_FUNCTION_ARGS);
512 
513 #endif   /* ARRAY_H */
514