1 /*-------------------------------------------------------------------------
2  *
3  * _int_selfuncs.c
4  *	  Functions for selectivity estimation of intarray operators
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  contrib/intarray/_int_selfuncs.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 #include "_int.h"
17 
18 #include "access/htup_details.h"
19 #include "catalog/pg_operator.h"
20 #include "catalog/pg_statistic.h"
21 #include "catalog/pg_type.h"
22 #include "utils/builtins.h"
23 #include "utils/selfuncs.h"
24 #include "utils/syscache.h"
25 #include "utils/lsyscache.h"
26 #include "miscadmin.h"
27 
28 PG_FUNCTION_INFO_V1(_int_overlap_sel);
29 PG_FUNCTION_INFO_V1(_int_contains_sel);
30 PG_FUNCTION_INFO_V1(_int_contained_sel);
31 PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
32 PG_FUNCTION_INFO_V1(_int_contains_joinsel);
33 PG_FUNCTION_INFO_V1(_int_contained_joinsel);
34 PG_FUNCTION_INFO_V1(_int_matchsel);
35 
36 
37 static Selectivity int_query_opr_selec(ITEM *item, Datum *values, float4 *freqs,
38 					int nmncelems, float4 minfreq);
39 static int	compare_val_int4(const void *a, const void *b);
40 
41 /*
42  * Wrappers around the default array selectivity estimation functions.
43  *
44  * The default array selectivity operators for the @>, && and @< operators
45  * work fine for integer arrays. However, if we tried to just use arraycontsel
46  * and arracontjoinsel directly as the cost estimator functions for our
47  * operators, they would not work as intended, because they look at the
48  * operator's OID. Our operators behave exactly like the built-in anyarray
49  * versions, but we must tell the cost estimator functions which built-in
50  * operators they correspond to. These wrappers just replace the operator
51  * OID with the corresponding built-in operator's OID, and call the built-in
52  * function.
53  */
54 
55 Datum
_int_overlap_sel(PG_FUNCTION_ARGS)56 _int_overlap_sel(PG_FUNCTION_ARGS)
57 {
58 	PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
59 										PG_GETARG_DATUM(0),
60 										ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
61 										PG_GETARG_DATUM(2),
62 										PG_GETARG_DATUM(3)));
63 }
64 
65 Datum
_int_contains_sel(PG_FUNCTION_ARGS)66 _int_contains_sel(PG_FUNCTION_ARGS)
67 {
68 	PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
69 										PG_GETARG_DATUM(0),
70 										ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
71 										PG_GETARG_DATUM(2),
72 										PG_GETARG_DATUM(3)));
73 }
74 
75 Datum
_int_contained_sel(PG_FUNCTION_ARGS)76 _int_contained_sel(PG_FUNCTION_ARGS)
77 {
78 	PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
79 										PG_GETARG_DATUM(0),
80 										ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
81 										PG_GETARG_DATUM(2),
82 										PG_GETARG_DATUM(3)));
83 }
84 
85 Datum
_int_overlap_joinsel(PG_FUNCTION_ARGS)86 _int_overlap_joinsel(PG_FUNCTION_ARGS)
87 {
88 	PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
89 										PG_GETARG_DATUM(0),
90 										ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
91 										PG_GETARG_DATUM(2),
92 										PG_GETARG_DATUM(3),
93 										PG_GETARG_DATUM(4)));
94 }
95 
96 Datum
_int_contains_joinsel(PG_FUNCTION_ARGS)97 _int_contains_joinsel(PG_FUNCTION_ARGS)
98 {
99 	PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
100 										PG_GETARG_DATUM(0),
101 										ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
102 										PG_GETARG_DATUM(2),
103 										PG_GETARG_DATUM(3),
104 										PG_GETARG_DATUM(4)));
105 }
106 
107 Datum
_int_contained_joinsel(PG_FUNCTION_ARGS)108 _int_contained_joinsel(PG_FUNCTION_ARGS)
109 {
110 	PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
111 										PG_GETARG_DATUM(0),
112 										ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
113 										PG_GETARG_DATUM(2),
114 										PG_GETARG_DATUM(3),
115 										PG_GETARG_DATUM(4)));
116 }
117 
118 
119 /*
120  * _int_matchsel -- restriction selectivity function for intarray @@ query_int
121  */
122 Datum
_int_matchsel(PG_FUNCTION_ARGS)123 _int_matchsel(PG_FUNCTION_ARGS)
124 {
125 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
126 
127 	List	   *args = (List *) PG_GETARG_POINTER(2);
128 	int			varRelid = PG_GETARG_INT32(3);
129 	VariableStatData vardata;
130 	Node	   *other;
131 	bool		varonleft;
132 	Selectivity selec;
133 	QUERYTYPE  *query;
134 	Datum	   *mcelems = NULL;
135 	float4	   *mcefreqs = NULL;
136 	int			nmcelems = 0;
137 	float4		minfreq = 0.0;
138 	float4		nullfrac = 0.0;
139 	AttStatsSlot sslot;
140 
141 	/*
142 	 * If expression is not "variable @@ something" or "something @@ variable"
143 	 * then punt and return a default estimate.
144 	 */
145 	if (!get_restriction_variable(root, args, varRelid,
146 								  &vardata, &other, &varonleft))
147 		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
148 
149 	/*
150 	 * Variable should be int[]. We don't support cases where variable is
151 	 * query_int.
152 	 */
153 	if (vardata.vartype != INT4ARRAYOID)
154 		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
155 
156 	/*
157 	 * Can't do anything useful if the something is not a constant, either.
158 	 */
159 	if (!IsA(other, Const))
160 	{
161 		ReleaseVariableStats(vardata);
162 		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
163 	}
164 
165 	/*
166 	 * The "@@" operator is strict, so we can cope with NULL right away.
167 	 */
168 	if (((Const *) other)->constisnull)
169 	{
170 		ReleaseVariableStats(vardata);
171 		PG_RETURN_FLOAT8(0.0);
172 	}
173 
174 	/* The caller made sure the const is a query, so get it now */
175 	query = DatumGetQueryTypeP(((Const *) other)->constvalue);
176 
177 	/* Empty query matches nothing */
178 	if (query->size == 0)
179 	{
180 		ReleaseVariableStats(vardata);
181 		return (Selectivity) 0.0;
182 	}
183 
184 	/*
185 	 * Get the statistics for the intarray column.
186 	 *
187 	 * We're interested in the Most-Common-Elements list, and the NULL
188 	 * fraction.
189 	 */
190 	if (HeapTupleIsValid(vardata.statsTuple))
191 	{
192 		Form_pg_statistic stats;
193 
194 		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
195 		nullfrac = stats->stanullfrac;
196 
197 		/*
198 		 * For an int4 array, the default array type analyze function will
199 		 * collect a Most Common Elements list, which is an array of int4s.
200 		 */
201 		if (get_attstatsslot(&sslot, vardata.statsTuple,
202 							 STATISTIC_KIND_MCELEM, InvalidOid,
203 							 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
204 		{
205 			Assert(sslot.valuetype == INT4OID);
206 
207 			/*
208 			 * There should be three more Numbers than Values, because the
209 			 * last three (for intarray) cells are taken for minimal, maximal
210 			 * and nulls frequency. Punt if not.
211 			 */
212 			if (sslot.nnumbers == sslot.nvalues + 3)
213 			{
214 				/* Grab the lowest frequency. */
215 				minfreq = sslot.numbers[sslot.nnumbers - (sslot.nnumbers - sslot.nvalues)];
216 
217 				mcelems = sslot.values;
218 				mcefreqs = sslot.numbers;
219 				nmcelems = sslot.nvalues;
220 			}
221 		}
222 	}
223 	else
224 		memset(&sslot, 0, sizeof(sslot));
225 
226 	/* Process the logical expression in the query, using the stats */
227 	selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
228 								mcelems, mcefreqs, nmcelems, minfreq);
229 
230 	/* MCE stats count only non-null rows, so adjust for null rows. */
231 	selec *= (1.0 - nullfrac);
232 
233 	free_attstatsslot(&sslot);
234 	ReleaseVariableStats(vardata);
235 
236 	CLAMP_PROBABILITY(selec);
237 
238 	PG_RETURN_FLOAT8((float8) selec);
239 }
240 
241 /*
242  * Estimate selectivity of single intquery operator
243  */
244 static Selectivity
int_query_opr_selec(ITEM * item,Datum * mcelems,float4 * mcefreqs,int nmcelems,float4 minfreq)245 int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
246 					int nmcelems, float4 minfreq)
247 {
248 	Selectivity selec;
249 
250 	/* since this function recurses, it could be driven to stack overflow */
251 	check_stack_depth();
252 
253 	if (item->type == VAL)
254 	{
255 		Datum	   *searchres;
256 
257 		if (mcelems == NULL)
258 			return (Selectivity) DEFAULT_EQ_SEL;
259 
260 		searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
261 									  sizeof(Datum), compare_val_int4);
262 		if (searchres)
263 		{
264 			/*
265 			 * The element is in MCELEM.  Return precise selectivity (or at
266 			 * least as precise as ANALYZE could find out).
267 			 */
268 			selec = mcefreqs[searchres - mcelems];
269 		}
270 		else
271 		{
272 			/*
273 			 * The element is not in MCELEM.  Punt, but assume that the
274 			 * selectivity cannot be more than minfreq / 2.
275 			 */
276 			selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
277 		}
278 	}
279 	else if (item->type == OPR)
280 	{
281 		/* Current query node is an operator */
282 		Selectivity s1,
283 					s2;
284 
285 		s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
286 								 minfreq);
287 		switch (item->val)
288 		{
289 			case (int32) '!':
290 				selec = 1.0 - s1;
291 				break;
292 
293 			case (int32) '&':
294 				s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
295 										 nmcelems, minfreq);
296 				selec = s1 * s2;
297 				break;
298 
299 			case (int32) '|':
300 				s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
301 										 nmcelems, minfreq);
302 				selec = s1 + s2 - s1 * s2;
303 				break;
304 
305 			default:
306 				elog(ERROR, "unrecognized operator: %d", item->val);
307 				selec = 0;		/* keep compiler quiet */
308 				break;
309 		}
310 	}
311 	else
312 	{
313 		elog(ERROR, "unrecognized int query item type: %u", item->type);
314 		selec = 0;				/* keep compiler quiet */
315 	}
316 
317 	/* Clamp intermediate results to stay sane despite roundoff error */
318 	CLAMP_PROBABILITY(selec);
319 
320 	return selec;
321 }
322 
323 /*
324  * Comparison function for binary search in mcelem array.
325  */
326 static int
compare_val_int4(const void * a,const void * b)327 compare_val_int4(const void *a, const void *b)
328 {
329 	int32		key = *(int32 *) a;
330 	const Datum *t = (const Datum *) b;
331 
332 	return key - DatumGetInt32(*t);
333 }
334