1 /*
2  * contrib/intarray/_intbig_gist.c
3  */
4 #include "postgres.h"
5 
6 #include "_int.h"
7 #include "access/gist.h"
8 #include "access/reloptions.h"
9 #include "access/stratnum.h"
10 #include "port/pg_bitutils.h"
11 
12 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
13 /*
14 ** _intbig methods
15 */
16 PG_FUNCTION_INFO_V1(g_intbig_consistent);
17 PG_FUNCTION_INFO_V1(g_intbig_compress);
18 PG_FUNCTION_INFO_V1(g_intbig_decompress);
19 PG_FUNCTION_INFO_V1(g_intbig_penalty);
20 PG_FUNCTION_INFO_V1(g_intbig_picksplit);
21 PG_FUNCTION_INFO_V1(g_intbig_union);
22 PG_FUNCTION_INFO_V1(g_intbig_same);
23 PG_FUNCTION_INFO_V1(g_intbig_options);
24 
25 PG_FUNCTION_INFO_V1(_intbig_in);
26 PG_FUNCTION_INFO_V1(_intbig_out);
27 
28 Datum
_intbig_in(PG_FUNCTION_ARGS)29 _intbig_in(PG_FUNCTION_ARGS)
30 {
31 	ereport(ERROR,
32 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
33 			 errmsg("_intbig_in() not implemented")));
34 	PG_RETURN_DATUM(0);
35 }
36 
37 Datum
_intbig_out(PG_FUNCTION_ARGS)38 _intbig_out(PG_FUNCTION_ARGS)
39 {
40 	ereport(ERROR,
41 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
42 			 errmsg("_intbig_out() not implemented")));
43 	PG_RETURN_DATUM(0);
44 }
45 
46 static GISTTYPE *
_intbig_alloc(bool allistrue,int siglen,BITVECP sign)47 _intbig_alloc(bool allistrue, int siglen, BITVECP sign)
48 {
49 	int			flag = allistrue ? ALLISTRUE : 0;
50 	int			size = CALCGTSIZE(flag, siglen);
51 	GISTTYPE   *res = (GISTTYPE *) palloc(size);
52 
53 	SET_VARSIZE(res, size);
54 	res->flag = flag;
55 
56 	if (!allistrue)
57 	{
58 		if (sign)
59 			memcpy(GETSIGN(res), sign, siglen);
60 		else
61 			memset(GETSIGN(res), 0, siglen);
62 	}
63 
64 	return res;
65 }
66 
67 
68 /*********************************************************************
69 ** intbig functions
70 *********************************************************************/
71 static bool
_intbig_overlap(GISTTYPE * a,ArrayType * b,int siglen)72 _intbig_overlap(GISTTYPE *a, ArrayType *b, int siglen)
73 {
74 	int			num = ARRNELEMS(b);
75 	int32	   *ptr = ARRPTR(b);
76 
77 	CHECKARRVALID(b);
78 
79 	while (num--)
80 	{
81 		if (GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
82 			return true;
83 		ptr++;
84 	}
85 
86 	return false;
87 }
88 
89 static bool
_intbig_contains(GISTTYPE * a,ArrayType * b,int siglen)90 _intbig_contains(GISTTYPE *a, ArrayType *b, int siglen)
91 {
92 	int			num = ARRNELEMS(b);
93 	int32	   *ptr = ARRPTR(b);
94 
95 	CHECKARRVALID(b);
96 
97 	while (num--)
98 	{
99 		if (!GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
100 			return false;
101 		ptr++;
102 	}
103 
104 	return true;
105 }
106 
107 Datum
g_intbig_same(PG_FUNCTION_ARGS)108 g_intbig_same(PG_FUNCTION_ARGS)
109 {
110 	GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
111 	GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
112 	bool	   *result = (bool *) PG_GETARG_POINTER(2);
113 	int			siglen = GET_SIGLEN();
114 
115 	if (ISALLTRUE(a) && ISALLTRUE(b))
116 		*result = true;
117 	else if (ISALLTRUE(a))
118 		*result = false;
119 	else if (ISALLTRUE(b))
120 		*result = false;
121 	else
122 	{
123 		int32		i;
124 		BITVECP		sa = GETSIGN(a),
125 					sb = GETSIGN(b);
126 
127 		*result = true;
128 		LOOPBYTE(siglen)
129 		{
130 			if (sa[i] != sb[i])
131 			{
132 				*result = false;
133 				break;
134 			}
135 		}
136 	}
137 	PG_RETURN_POINTER(result);
138 }
139 
140 Datum
g_intbig_compress(PG_FUNCTION_ARGS)141 g_intbig_compress(PG_FUNCTION_ARGS)
142 {
143 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
144 	int			siglen = GET_SIGLEN();
145 
146 	if (entry->leafkey)
147 	{
148 		GISTENTRY  *retval;
149 		ArrayType  *in = DatumGetArrayTypeP(entry->key);
150 		int32	   *ptr;
151 		int			num;
152 		GISTTYPE   *res = _intbig_alloc(false, siglen, NULL);
153 
154 		CHECKARRVALID(in);
155 		if (ARRISEMPTY(in))
156 		{
157 			ptr = NULL;
158 			num = 0;
159 		}
160 		else
161 		{
162 			ptr = ARRPTR(in);
163 			num = ARRNELEMS(in);
164 		}
165 
166 		while (num--)
167 		{
168 			HASH(GETSIGN(res), *ptr, siglen);
169 			ptr++;
170 		}
171 
172 		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
173 		gistentryinit(*retval, PointerGetDatum(res),
174 					  entry->rel, entry->page,
175 					  entry->offset, false);
176 
177 		if (in != DatumGetArrayTypeP(entry->key))
178 			pfree(in);
179 
180 		PG_RETURN_POINTER(retval);
181 	}
182 	else if (!ISALLTRUE(DatumGetPointer(entry->key)))
183 	{
184 		GISTENTRY  *retval;
185 		int			i;
186 		BITVECP		sign = GETSIGN(DatumGetPointer(entry->key));
187 		GISTTYPE   *res;
188 
189 		LOOPBYTE(siglen)
190 		{
191 			if ((sign[i] & 0xff) != 0xff)
192 				PG_RETURN_POINTER(entry);
193 		}
194 
195 		res = _intbig_alloc(true, siglen, sign);
196 		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
197 		gistentryinit(*retval, PointerGetDatum(res),
198 					  entry->rel, entry->page,
199 					  entry->offset, false);
200 
201 		PG_RETURN_POINTER(retval);
202 	}
203 
204 	PG_RETURN_POINTER(entry);
205 }
206 
207 
208 static int32
sizebitvec(BITVECP sign,int siglen)209 sizebitvec(BITVECP sign, int siglen)
210 {
211 	return pg_popcount(sign, siglen);
212 }
213 
214 static int
hemdistsign(BITVECP a,BITVECP b,int siglen)215 hemdistsign(BITVECP a, BITVECP b, int siglen)
216 {
217 	int			i,
218 				diff,
219 				dist = 0;
220 
221 	LOOPBYTE(siglen)
222 	{
223 		diff = (unsigned char) (a[i] ^ b[i]);
224 		/* Using the popcount functions here isn't likely to win */
225 		dist += pg_number_of_ones[diff];
226 	}
227 	return dist;
228 }
229 
230 static int
hemdist(GISTTYPE * a,GISTTYPE * b,int siglen)231 hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
232 {
233 	if (ISALLTRUE(a))
234 	{
235 		if (ISALLTRUE(b))
236 			return 0;
237 		else
238 			return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
239 	}
240 	else if (ISALLTRUE(b))
241 		return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
242 
243 	return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
244 }
245 
246 Datum
g_intbig_decompress(PG_FUNCTION_ARGS)247 g_intbig_decompress(PG_FUNCTION_ARGS)
248 {
249 	PG_RETURN_DATUM(PG_GETARG_DATUM(0));
250 }
251 
252 static int32
unionkey(BITVECP sbase,GISTTYPE * add,int siglen)253 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
254 {
255 	int32		i;
256 	BITVECP		sadd = GETSIGN(add);
257 
258 	if (ISALLTRUE(add))
259 		return 1;
260 	LOOPBYTE(siglen)
261 		sbase[i] |= sadd[i];
262 	return 0;
263 }
264 
265 Datum
g_intbig_union(PG_FUNCTION_ARGS)266 g_intbig_union(PG_FUNCTION_ARGS)
267 {
268 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
269 	int		   *size = (int *) PG_GETARG_POINTER(1);
270 	int			siglen = GET_SIGLEN();
271 	int32		i;
272 	GISTTYPE   *result = _intbig_alloc(false, siglen, NULL);
273 	BITVECP		base = GETSIGN(result);
274 
275 	for (i = 0; i < entryvec->n; i++)
276 	{
277 		if (unionkey(base, GETENTRY(entryvec, i), siglen))
278 		{
279 			result->flag |= ALLISTRUE;
280 			SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
281 			break;
282 		}
283 	}
284 
285 	*size = VARSIZE(result);
286 
287 	PG_RETURN_POINTER(result);
288 }
289 
290 Datum
g_intbig_penalty(PG_FUNCTION_ARGS)291 g_intbig_penalty(PG_FUNCTION_ARGS)
292 {
293 	GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
294 	GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
295 	float	   *penalty = (float *) PG_GETARG_POINTER(2);
296 	GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
297 	GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
298 	int			siglen = GET_SIGLEN();
299 
300 	*penalty = hemdist(origval, newval, siglen);
301 	PG_RETURN_POINTER(penalty);
302 }
303 
304 
305 typedef struct
306 {
307 	OffsetNumber pos;
308 	int32		cost;
309 } SPLITCOST;
310 
311 static int
comparecost(const void * a,const void * b)312 comparecost(const void *a, const void *b)
313 {
314 	return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
315 }
316 
317 
318 Datum
g_intbig_picksplit(PG_FUNCTION_ARGS)319 g_intbig_picksplit(PG_FUNCTION_ARGS)
320 {
321 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
322 	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
323 	int			siglen = GET_SIGLEN();
324 	OffsetNumber k,
325 				j;
326 	GISTTYPE   *datum_l,
327 			   *datum_r;
328 	BITVECP		union_l,
329 				union_r;
330 	int32		size_alpha,
331 				size_beta;
332 	int32		size_waste,
333 				waste = -1;
334 	int32		nbytes;
335 	OffsetNumber seed_1 = 0,
336 				seed_2 = 0;
337 	OffsetNumber *left,
338 			   *right;
339 	OffsetNumber maxoff;
340 	BITVECP		ptr;
341 	int			i;
342 	SPLITCOST  *costvector;
343 	GISTTYPE   *_k,
344 			   *_j;
345 
346 	maxoff = entryvec->n - 2;
347 	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
348 	v->spl_left = (OffsetNumber *) palloc(nbytes);
349 	v->spl_right = (OffsetNumber *) palloc(nbytes);
350 
351 	for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
352 	{
353 		_k = GETENTRY(entryvec, k);
354 		for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
355 		{
356 			size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
357 			if (size_waste > waste)
358 			{
359 				waste = size_waste;
360 				seed_1 = k;
361 				seed_2 = j;
362 			}
363 		}
364 	}
365 
366 	left = v->spl_left;
367 	v->spl_nleft = 0;
368 	right = v->spl_right;
369 	v->spl_nright = 0;
370 
371 	if (seed_1 == 0 || seed_2 == 0)
372 	{
373 		seed_1 = 1;
374 		seed_2 = 2;
375 	}
376 
377 	/* form initial .. */
378 	datum_l = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_1)), siglen,
379 							GETSIGN(GETENTRY(entryvec, seed_1)));
380 	datum_r = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_2)), siglen,
381 							GETSIGN(GETENTRY(entryvec, seed_2)));
382 
383 	maxoff = OffsetNumberNext(maxoff);
384 	/* sort before ... */
385 	costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
386 	for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
387 	{
388 		costvector[j - 1].pos = j;
389 		_j = GETENTRY(entryvec, j);
390 		size_alpha = hemdist(datum_l, _j, siglen);
391 		size_beta = hemdist(datum_r, _j, siglen);
392 		costvector[j - 1].cost = Abs(size_alpha - size_beta);
393 	}
394 	qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
395 
396 	union_l = GETSIGN(datum_l);
397 	union_r = GETSIGN(datum_r);
398 
399 	for (k = 0; k < maxoff; k++)
400 	{
401 		j = costvector[k].pos;
402 		if (j == seed_1)
403 		{
404 			*left++ = j;
405 			v->spl_nleft++;
406 			continue;
407 		}
408 		else if (j == seed_2)
409 		{
410 			*right++ = j;
411 			v->spl_nright++;
412 			continue;
413 		}
414 		_j = GETENTRY(entryvec, j);
415 		size_alpha = hemdist(datum_l, _j, siglen);
416 		size_beta = hemdist(datum_r, _j, siglen);
417 
418 		if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
419 		{
420 			if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
421 			{
422 				if (!ISALLTRUE(datum_l))
423 					MemSet((void *) union_l, 0xff, siglen);
424 			}
425 			else
426 			{
427 				ptr = GETSIGN(_j);
428 				LOOPBYTE(siglen)
429 					union_l[i] |= ptr[i];
430 			}
431 			*left++ = j;
432 			v->spl_nleft++;
433 		}
434 		else
435 		{
436 			if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
437 			{
438 				if (!ISALLTRUE(datum_r))
439 					MemSet((void *) union_r, 0xff, siglen);
440 			}
441 			else
442 			{
443 				ptr = GETSIGN(_j);
444 				LOOPBYTE(siglen)
445 					union_r[i] |= ptr[i];
446 			}
447 			*right++ = j;
448 			v->spl_nright++;
449 		}
450 	}
451 
452 	*right = *left = FirstOffsetNumber;
453 	pfree(costvector);
454 
455 	v->spl_ldatum = PointerGetDatum(datum_l);
456 	v->spl_rdatum = PointerGetDatum(datum_r);
457 
458 	PG_RETURN_POINTER(v);
459 }
460 
461 Datum
g_intbig_consistent(PG_FUNCTION_ARGS)462 g_intbig_consistent(PG_FUNCTION_ARGS)
463 {
464 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
465 	ArrayType  *query = PG_GETARG_ARRAYTYPE_P(1);
466 	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
467 
468 	/* Oid		subtype = PG_GETARG_OID(3); */
469 	bool	   *recheck = (bool *) PG_GETARG_POINTER(4);
470 	int			siglen = GET_SIGLEN();
471 	bool		retval;
472 
473 	/* All cases served by this function are inexact */
474 	*recheck = true;
475 
476 	if (ISALLTRUE(DatumGetPointer(entry->key)))
477 		PG_RETURN_BOOL(true);
478 
479 	if (strategy == BooleanSearchStrategy)
480 	{
481 		retval = signconsistent((QUERYTYPE *) query,
482 								GETSIGN(DatumGetPointer(entry->key)),
483 								siglen,
484 								false);
485 		PG_FREE_IF_COPY(query, 1);
486 		PG_RETURN_BOOL(retval);
487 	}
488 
489 	CHECKARRVALID(query);
490 
491 	switch (strategy)
492 	{
493 		case RTOverlapStrategyNumber:
494 			retval = _intbig_overlap((GISTTYPE *) DatumGetPointer(entry->key),
495 									 query, siglen);
496 			break;
497 		case RTSameStrategyNumber:
498 			if (GIST_LEAF(entry))
499 			{
500 				int			i,
501 							num = ARRNELEMS(query);
502 				int32	   *ptr = ARRPTR(query);
503 				BITVECP		dq = palloc0(siglen),
504 							de;
505 
506 				while (num--)
507 				{
508 					HASH(dq, *ptr, siglen);
509 					ptr++;
510 				}
511 
512 				de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
513 				retval = true;
514 				LOOPBYTE(siglen)
515 				{
516 					if (de[i] != dq[i])
517 					{
518 						retval = false;
519 						break;
520 					}
521 				}
522 
523 				pfree(dq);
524 			}
525 			else
526 				retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
527 										  query, siglen);
528 			break;
529 		case RTContainsStrategyNumber:
530 		case RTOldContainsStrategyNumber:
531 			retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
532 									  query, siglen);
533 			break;
534 		case RTContainedByStrategyNumber:
535 		case RTOldContainedByStrategyNumber:
536 			if (GIST_LEAF(entry))
537 			{
538 				int			i,
539 							num = ARRNELEMS(query);
540 				int32	   *ptr = ARRPTR(query);
541 				BITVECP		dq = palloc0(siglen),
542 							de;
543 
544 				while (num--)
545 				{
546 					HASH(dq, *ptr, siglen);
547 					ptr++;
548 				}
549 
550 				de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
551 				retval = true;
552 				LOOPBYTE(siglen)
553 				{
554 					if (de[i] & ~dq[i])
555 					{
556 						retval = false;
557 						break;
558 					}
559 				}
560 			}
561 			else
562 			{
563 				/*
564 				 * Unfortunately, because empty arrays could be anywhere in
565 				 * the index, we must search the whole tree.
566 				 */
567 				retval = true;
568 			}
569 			break;
570 		default:
571 			retval = false;
572 	}
573 	PG_FREE_IF_COPY(query, 1);
574 	PG_RETURN_BOOL(retval);
575 }
576 
577 Datum
g_intbig_options(PG_FUNCTION_ARGS)578 g_intbig_options(PG_FUNCTION_ARGS)
579 {
580 	local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
581 
582 	init_local_reloptions(relopts, sizeof(GISTIntArrayBigOptions));
583 	add_local_int_reloption(relopts, "siglen",
584 							"signature length in bytes",
585 							SIGLEN_DEFAULT, 1, SIGLEN_MAX,
586 							offsetof(GISTIntArrayBigOptions, siglen));
587 
588 	PG_RETURN_VOID();
589 }
590