1 /*-------------------------------------------------------------------------
2  *
3  * tsquery_gist.c
4  *	  GiST index support for tsquery
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/utils/adt/tsquery_gist.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "access/gist.h"
18 #include "access/stratnum.h"
19 #include "tsearch/ts_utils.h"
20 #include "utils/builtins.h"
21 
22 #define GETENTRY(vec,pos) DatumGetTSQuerySign((vec)->vector[pos].key)
23 
24 
25 Datum
gtsquery_compress(PG_FUNCTION_ARGS)26 gtsquery_compress(PG_FUNCTION_ARGS)
27 {
28 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
29 	GISTENTRY  *retval = entry;
30 
31 	if (entry->leafkey)
32 	{
33 		TSQuerySign sign;
34 
35 		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
36 		sign = makeTSQuerySign(DatumGetTSQuery(entry->key));
37 
38 		gistentryinit(*retval, TSQuerySignGetDatum(sign),
39 					  entry->rel, entry->page,
40 					  entry->offset, false);
41 	}
42 
43 	PG_RETURN_POINTER(retval);
44 }
45 
46 /*
47  * We do not need a decompress function, because the other gtsquery
48  * support functions work with the compressed representation.
49  */
50 
51 Datum
gtsquery_consistent(PG_FUNCTION_ARGS)52 gtsquery_consistent(PG_FUNCTION_ARGS)
53 {
54 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
55 	TSQuery		query = PG_GETARG_TSQUERY(1);
56 	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
57 
58 	/* Oid		subtype = PG_GETARG_OID(3); */
59 	bool	   *recheck = (bool *) PG_GETARG_POINTER(4);
60 	TSQuerySign key = DatumGetTSQuerySign(entry->key);
61 	TSQuerySign sq = makeTSQuerySign(query);
62 	bool		retval;
63 
64 	/* All cases served by this function are inexact */
65 	*recheck = true;
66 
67 	switch (strategy)
68 	{
69 		case RTContainsStrategyNumber:
70 			if (GIST_LEAF(entry))
71 				retval = (key & sq) == sq;
72 			else
73 				retval = (key & sq) != 0;
74 			break;
75 		case RTContainedByStrategyNumber:
76 			if (GIST_LEAF(entry))
77 				retval = (key & sq) == key;
78 			else
79 				retval = (key & sq) != 0;
80 			break;
81 		default:
82 			retval = false;
83 	}
84 	PG_RETURN_BOOL(retval);
85 }
86 
87 Datum
gtsquery_union(PG_FUNCTION_ARGS)88 gtsquery_union(PG_FUNCTION_ARGS)
89 {
90 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
91 	int		   *size = (int *) PG_GETARG_POINTER(1);
92 	TSQuerySign sign;
93 	int			i;
94 
95 	sign = 0;
96 
97 	for (i = 0; i < entryvec->n; i++)
98 		sign |= GETENTRY(entryvec, i);
99 
100 	*size = sizeof(TSQuerySign);
101 
102 	PG_RETURN_TSQUERYSIGN(sign);
103 }
104 
105 Datum
gtsquery_same(PG_FUNCTION_ARGS)106 gtsquery_same(PG_FUNCTION_ARGS)
107 {
108 	TSQuerySign a = PG_GETARG_TSQUERYSIGN(0);
109 	TSQuerySign b = PG_GETARG_TSQUERYSIGN(1);
110 	bool	   *result = (bool *) PG_GETARG_POINTER(2);
111 
112 	*result = (a == b) ? true : false;
113 
114 	PG_RETURN_POINTER(result);
115 }
116 
117 static int
sizebitvec(TSQuerySign sign)118 sizebitvec(TSQuerySign sign)
119 {
120 	int			size = 0,
121 				i;
122 
123 	for (i = 0; i < TSQS_SIGLEN; i++)
124 		size += 0x01 & (sign >> i);
125 
126 	return size;
127 }
128 
129 static int
hemdist(TSQuerySign a,TSQuerySign b)130 hemdist(TSQuerySign a, TSQuerySign b)
131 {
132 	TSQuerySign res = a ^ b;
133 
134 	return sizebitvec(res);
135 }
136 
137 Datum
gtsquery_penalty(PG_FUNCTION_ARGS)138 gtsquery_penalty(PG_FUNCTION_ARGS)
139 {
140 	TSQuerySign origval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
141 	TSQuerySign newval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
142 	float	   *penalty = (float *) PG_GETARG_POINTER(2);
143 
144 	*penalty = hemdist(origval, newval);
145 
146 	PG_RETURN_POINTER(penalty);
147 }
148 
149 
150 typedef struct
151 {
152 	OffsetNumber pos;
153 	int32		cost;
154 } SPLITCOST;
155 
156 static int
comparecost(const void * a,const void * b)157 comparecost(const void *a, const void *b)
158 {
159 	if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
160 		return 0;
161 	else
162 		return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
163 }
164 
165 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
166 
167 Datum
gtsquery_picksplit(PG_FUNCTION_ARGS)168 gtsquery_picksplit(PG_FUNCTION_ARGS)
169 {
170 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
171 	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
172 	OffsetNumber maxoff = entryvec->n - 2;
173 	OffsetNumber k,
174 				j;
175 	TSQuerySign datum_l,
176 				datum_r;
177 	int32		size_alpha,
178 				size_beta;
179 	int32		size_waste,
180 				waste = -1;
181 	int32		nbytes;
182 	OffsetNumber seed_1 = 0,
183 				seed_2 = 0;
184 	OffsetNumber *left,
185 			   *right;
186 
187 	SPLITCOST  *costvector;
188 
189 	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
190 	left = v->spl_left = (OffsetNumber *) palloc(nbytes);
191 	right = v->spl_right = (OffsetNumber *) palloc(nbytes);
192 	v->spl_nleft = v->spl_nright = 0;
193 
194 	for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
195 		for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
196 		{
197 			size_waste = hemdist(GETENTRY(entryvec, j), GETENTRY(entryvec, k));
198 			if (size_waste > waste)
199 			{
200 				waste = size_waste;
201 				seed_1 = k;
202 				seed_2 = j;
203 			}
204 		}
205 
206 
207 	if (seed_1 == 0 || seed_2 == 0)
208 	{
209 		seed_1 = 1;
210 		seed_2 = 2;
211 	}
212 
213 	datum_l = GETENTRY(entryvec, seed_1);
214 	datum_r = GETENTRY(entryvec, seed_2);
215 
216 	maxoff = OffsetNumberNext(maxoff);
217 	costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
218 	for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
219 	{
220 		costvector[j - 1].pos = j;
221 		size_alpha = hemdist(GETENTRY(entryvec, seed_1), GETENTRY(entryvec, j));
222 		size_beta = hemdist(GETENTRY(entryvec, seed_2), GETENTRY(entryvec, j));
223 		costvector[j - 1].cost = abs(size_alpha - size_beta);
224 	}
225 	qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
226 
227 	for (k = 0; k < maxoff; k++)
228 	{
229 		j = costvector[k].pos;
230 		if (j == seed_1)
231 		{
232 			*left++ = j;
233 			v->spl_nleft++;
234 			continue;
235 		}
236 		else if (j == seed_2)
237 		{
238 			*right++ = j;
239 			v->spl_nright++;
240 			continue;
241 		}
242 		size_alpha = hemdist(datum_l, GETENTRY(entryvec, j));
243 		size_beta = hemdist(datum_r, GETENTRY(entryvec, j));
244 
245 		if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05))
246 		{
247 			datum_l |= GETENTRY(entryvec, j);
248 			*left++ = j;
249 			v->spl_nleft++;
250 		}
251 		else
252 		{
253 			datum_r |= GETENTRY(entryvec, j);
254 			*right++ = j;
255 			v->spl_nright++;
256 		}
257 	}
258 
259 	*right = *left = FirstOffsetNumber;
260 	v->spl_ldatum = TSQuerySignGetDatum(datum_l);
261 	v->spl_rdatum = TSQuerySignGetDatum(datum_r);
262 
263 	PG_RETURN_POINTER(v);
264 }
265 
266 /*
267  * Formerly, gtsquery_consistent was declared in pg_proc.h with arguments
268  * that did not match the documented conventions for GiST support functions.
269  * We fixed that, but we still need a pg_proc entry with the old signature
270  * to support reloading pre-9.6 contrib/tsearch2 opclass declarations.
271  * This compatibility function should go away eventually.
272  */
273 Datum
gtsquery_consistent_oldsig(PG_FUNCTION_ARGS)274 gtsquery_consistent_oldsig(PG_FUNCTION_ARGS)
275 {
276 	return gtsquery_consistent(fcinfo);
277 }
278