1 /*
2  * contrib/intarray/_intbig_gist.c
3  */
4 #include "postgres.h"
5 
6 #include "access/gist.h"
7 #include "access/stratnum.h"
8 
9 #include "_int.h"
10 
11 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
12 /*
13 ** _intbig methods
14 */
15 PG_FUNCTION_INFO_V1(g_intbig_consistent);
16 PG_FUNCTION_INFO_V1(g_intbig_compress);
17 PG_FUNCTION_INFO_V1(g_intbig_decompress);
18 PG_FUNCTION_INFO_V1(g_intbig_penalty);
19 PG_FUNCTION_INFO_V1(g_intbig_picksplit);
20 PG_FUNCTION_INFO_V1(g_intbig_union);
21 PG_FUNCTION_INFO_V1(g_intbig_same);
22 
23 /* Number of one-bits in an unsigned byte */
24 static const uint8 number_of_ones[256] = {
25 	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
26 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
27 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
28 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
29 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
30 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
31 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
32 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
33 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
34 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
35 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
36 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
37 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
38 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
39 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
40 	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
41 };
42 
43 PG_FUNCTION_INFO_V1(_intbig_in);
44 PG_FUNCTION_INFO_V1(_intbig_out);
45 
46 Datum
_intbig_in(PG_FUNCTION_ARGS)47 _intbig_in(PG_FUNCTION_ARGS)
48 {
49 	ereport(ERROR,
50 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
51 			 errmsg("_intbig_in() not implemented")));
52 	PG_RETURN_DATUM(0);
53 }
54 
55 Datum
_intbig_out(PG_FUNCTION_ARGS)56 _intbig_out(PG_FUNCTION_ARGS)
57 {
58 	ereport(ERROR,
59 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
60 			 errmsg("_intbig_out() not implemented")));
61 	PG_RETURN_DATUM(0);
62 }
63 
64 
65 /*********************************************************************
66 ** intbig functions
67 *********************************************************************/
68 static bool
_intbig_overlap(GISTTYPE * a,ArrayType * b)69 _intbig_overlap(GISTTYPE *a, ArrayType *b)
70 {
71 	int			num = ARRNELEMS(b);
72 	int32	   *ptr = ARRPTR(b);
73 
74 	CHECKARRVALID(b);
75 
76 	while (num--)
77 	{
78 		if (GETBIT(GETSIGN(a), HASHVAL(*ptr)))
79 			return true;
80 		ptr++;
81 	}
82 
83 	return false;
84 }
85 
86 static bool
_intbig_contains(GISTTYPE * a,ArrayType * b)87 _intbig_contains(GISTTYPE *a, ArrayType *b)
88 {
89 	int			num = ARRNELEMS(b);
90 	int32	   *ptr = ARRPTR(b);
91 
92 	CHECKARRVALID(b);
93 
94 	while (num--)
95 	{
96 		if (!GETBIT(GETSIGN(a), HASHVAL(*ptr)))
97 			return false;
98 		ptr++;
99 	}
100 
101 	return true;
102 }
103 
104 Datum
g_intbig_same(PG_FUNCTION_ARGS)105 g_intbig_same(PG_FUNCTION_ARGS)
106 {
107 	GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
108 	GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
109 	bool	   *result = (bool *) PG_GETARG_POINTER(2);
110 
111 	if (ISALLTRUE(a) && ISALLTRUE(b))
112 		*result = true;
113 	else if (ISALLTRUE(a))
114 		*result = false;
115 	else if (ISALLTRUE(b))
116 		*result = false;
117 	else
118 	{
119 		int32		i;
120 		BITVECP		sa = GETSIGN(a),
121 					sb = GETSIGN(b);
122 
123 		*result = true;
124 		LOOPBYTE
125 		{
126 			if (sa[i] != sb[i])
127 			{
128 				*result = false;
129 				break;
130 			}
131 		}
132 	}
133 	PG_RETURN_POINTER(result);
134 }
135 
136 Datum
g_intbig_compress(PG_FUNCTION_ARGS)137 g_intbig_compress(PG_FUNCTION_ARGS)
138 {
139 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
140 
141 	if (entry->leafkey)
142 	{
143 		GISTENTRY  *retval;
144 		ArrayType  *in = DatumGetArrayTypeP(entry->key);
145 		int32	   *ptr;
146 		int			num;
147 		GISTTYPE   *res = (GISTTYPE *) palloc0(CALCGTSIZE(0));
148 
149 		CHECKARRVALID(in);
150 		if (ARRISEMPTY(in))
151 		{
152 			ptr = NULL;
153 			num = 0;
154 		}
155 		else
156 		{
157 			ptr = ARRPTR(in);
158 			num = ARRNELEMS(in);
159 		}
160 		SET_VARSIZE(res, CALCGTSIZE(0));
161 
162 		while (num--)
163 		{
164 			HASH(GETSIGN(res), *ptr);
165 			ptr++;
166 		}
167 
168 		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
169 		gistentryinit(*retval, PointerGetDatum(res),
170 					  entry->rel, entry->page,
171 					  entry->offset, FALSE);
172 
173 		if (in != DatumGetArrayTypeP(entry->key))
174 			pfree(in);
175 
176 		PG_RETURN_POINTER(retval);
177 	}
178 	else if (!ISALLTRUE(DatumGetPointer(entry->key)))
179 	{
180 		GISTENTRY  *retval;
181 		int			i;
182 		BITVECP		sign = GETSIGN(DatumGetPointer(entry->key));
183 		GISTTYPE   *res;
184 
185 		LOOPBYTE
186 		{
187 			if ((sign[i] & 0xff) != 0xff)
188 				PG_RETURN_POINTER(entry);
189 		}
190 
191 		res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE));
192 		SET_VARSIZE(res, CALCGTSIZE(ALLISTRUE));
193 		res->flag = ALLISTRUE;
194 
195 		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
196 		gistentryinit(*retval, PointerGetDatum(res),
197 					  entry->rel, entry->page,
198 					  entry->offset, FALSE);
199 
200 		PG_RETURN_POINTER(retval);
201 	}
202 
203 	PG_RETURN_POINTER(entry);
204 }
205 
206 
207 static int32
sizebitvec(BITVECP sign)208 sizebitvec(BITVECP sign)
209 {
210 	int32		size = 0,
211 				i;
212 
213 	LOOPBYTE
214 		size += number_of_ones[(unsigned char) sign[i]];
215 	return size;
216 }
217 
218 static int
hemdistsign(BITVECP a,BITVECP b)219 hemdistsign(BITVECP a, BITVECP b)
220 {
221 	int			i,
222 				diff,
223 				dist = 0;
224 
225 	LOOPBYTE
226 	{
227 		diff = (unsigned char) (a[i] ^ b[i]);
228 		dist += number_of_ones[diff];
229 	}
230 	return dist;
231 }
232 
233 static int
hemdist(GISTTYPE * a,GISTTYPE * b)234 hemdist(GISTTYPE *a, GISTTYPE *b)
235 {
236 	if (ISALLTRUE(a))
237 	{
238 		if (ISALLTRUE(b))
239 			return 0;
240 		else
241 			return SIGLENBIT - sizebitvec(GETSIGN(b));
242 	}
243 	else if (ISALLTRUE(b))
244 		return SIGLENBIT - sizebitvec(GETSIGN(a));
245 
246 	return hemdistsign(GETSIGN(a), GETSIGN(b));
247 }
248 
249 Datum
g_intbig_decompress(PG_FUNCTION_ARGS)250 g_intbig_decompress(PG_FUNCTION_ARGS)
251 {
252 	PG_RETURN_DATUM(PG_GETARG_DATUM(0));
253 }
254 
255 static int32
unionkey(BITVECP sbase,GISTTYPE * add)256 unionkey(BITVECP sbase, GISTTYPE *add)
257 {
258 	int32		i;
259 	BITVECP		sadd = GETSIGN(add);
260 
261 	if (ISALLTRUE(add))
262 		return 1;
263 	LOOPBYTE
264 		sbase[i] |= sadd[i];
265 	return 0;
266 }
267 
268 Datum
g_intbig_union(PG_FUNCTION_ARGS)269 g_intbig_union(PG_FUNCTION_ARGS)
270 {
271 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
272 	int		   *size = (int *) PG_GETARG_POINTER(1);
273 	BITVEC		base;
274 	int32		i,
275 				len;
276 	int32		flag = 0;
277 	GISTTYPE   *result;
278 
279 	MemSet((void *) base, 0, sizeof(BITVEC));
280 	for (i = 0; i < entryvec->n; i++)
281 	{
282 		if (unionkey(base, GETENTRY(entryvec, i)))
283 		{
284 			flag = ALLISTRUE;
285 			break;
286 		}
287 	}
288 
289 	len = CALCGTSIZE(flag);
290 	result = (GISTTYPE *) palloc(len);
291 	SET_VARSIZE(result, len);
292 	result->flag = flag;
293 	if (!ISALLTRUE(result))
294 		memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
295 	*size = len;
296 
297 	PG_RETURN_POINTER(result);
298 }
299 
300 Datum
g_intbig_penalty(PG_FUNCTION_ARGS)301 g_intbig_penalty(PG_FUNCTION_ARGS)
302 {
303 	GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
304 	GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
305 	float	   *penalty = (float *) PG_GETARG_POINTER(2);
306 	GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
307 	GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
308 
309 	*penalty = hemdist(origval, newval);
310 	PG_RETURN_POINTER(penalty);
311 }
312 
313 
314 typedef struct
315 {
316 	OffsetNumber pos;
317 	int32		cost;
318 } SPLITCOST;
319 
320 static int
comparecost(const void * a,const void * b)321 comparecost(const void *a, const void *b)
322 {
323 	return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
324 }
325 
326 
327 Datum
g_intbig_picksplit(PG_FUNCTION_ARGS)328 g_intbig_picksplit(PG_FUNCTION_ARGS)
329 {
330 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
331 	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
332 	OffsetNumber k,
333 				j;
334 	GISTTYPE   *datum_l,
335 			   *datum_r;
336 	BITVECP		union_l,
337 				union_r;
338 	int32		size_alpha,
339 				size_beta;
340 	int32		size_waste,
341 				waste = -1;
342 	int32		nbytes;
343 	OffsetNumber seed_1 = 0,
344 				seed_2 = 0;
345 	OffsetNumber *left,
346 			   *right;
347 	OffsetNumber maxoff;
348 	BITVECP		ptr;
349 	int			i;
350 	SPLITCOST  *costvector;
351 	GISTTYPE   *_k,
352 			   *_j;
353 
354 	maxoff = entryvec->n - 2;
355 	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
356 	v->spl_left = (OffsetNumber *) palloc(nbytes);
357 	v->spl_right = (OffsetNumber *) palloc(nbytes);
358 
359 	for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
360 	{
361 		_k = GETENTRY(entryvec, k);
362 		for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
363 		{
364 			size_waste = hemdist(_k, GETENTRY(entryvec, j));
365 			if (size_waste > waste)
366 			{
367 				waste = size_waste;
368 				seed_1 = k;
369 				seed_2 = j;
370 			}
371 		}
372 	}
373 
374 	left = v->spl_left;
375 	v->spl_nleft = 0;
376 	right = v->spl_right;
377 	v->spl_nright = 0;
378 
379 	if (seed_1 == 0 || seed_2 == 0)
380 	{
381 		seed_1 = 1;
382 		seed_2 = 2;
383 	}
384 
385 	/* form initial .. */
386 	if (ISALLTRUE(GETENTRY(entryvec, seed_1)))
387 	{
388 		datum_l = (GISTTYPE *) palloc(GTHDRSIZE);
389 		SET_VARSIZE(datum_l, GTHDRSIZE);
390 		datum_l->flag = ALLISTRUE;
391 	}
392 	else
393 	{
394 		datum_l = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
395 		SET_VARSIZE(datum_l, GTHDRSIZE + SIGLEN);
396 		datum_l->flag = 0;
397 		memcpy((void *) GETSIGN(datum_l), (void *) GETSIGN(GETENTRY(entryvec, seed_1)), sizeof(BITVEC));
398 	}
399 	if (ISALLTRUE(GETENTRY(entryvec, seed_2)))
400 	{
401 		datum_r = (GISTTYPE *) palloc(GTHDRSIZE);
402 		SET_VARSIZE(datum_r, GTHDRSIZE);
403 		datum_r->flag = ALLISTRUE;
404 	}
405 	else
406 	{
407 		datum_r = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
408 		SET_VARSIZE(datum_r, GTHDRSIZE + SIGLEN);
409 		datum_r->flag = 0;
410 		memcpy((void *) GETSIGN(datum_r), (void *) GETSIGN(GETENTRY(entryvec, seed_2)), sizeof(BITVEC));
411 	}
412 
413 	maxoff = OffsetNumberNext(maxoff);
414 	/* sort before ... */
415 	costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
416 	for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
417 	{
418 		costvector[j - 1].pos = j;
419 		_j = GETENTRY(entryvec, j);
420 		size_alpha = hemdist(datum_l, _j);
421 		size_beta = hemdist(datum_r, _j);
422 		costvector[j - 1].cost = Abs(size_alpha - size_beta);
423 	}
424 	qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
425 
426 	union_l = GETSIGN(datum_l);
427 	union_r = GETSIGN(datum_r);
428 
429 	for (k = 0; k < maxoff; k++)
430 	{
431 		j = costvector[k].pos;
432 		if (j == seed_1)
433 		{
434 			*left++ = j;
435 			v->spl_nleft++;
436 			continue;
437 		}
438 		else if (j == seed_2)
439 		{
440 			*right++ = j;
441 			v->spl_nright++;
442 			continue;
443 		}
444 		_j = GETENTRY(entryvec, j);
445 		size_alpha = hemdist(datum_l, _j);
446 		size_beta = hemdist(datum_r, _j);
447 
448 		if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
449 		{
450 			if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
451 			{
452 				if (!ISALLTRUE(datum_l))
453 					MemSet((void *) union_l, 0xff, sizeof(BITVEC));
454 			}
455 			else
456 			{
457 				ptr = GETSIGN(_j);
458 				LOOPBYTE
459 					union_l[i] |= ptr[i];
460 			}
461 			*left++ = j;
462 			v->spl_nleft++;
463 		}
464 		else
465 		{
466 			if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
467 			{
468 				if (!ISALLTRUE(datum_r))
469 					MemSet((void *) union_r, 0xff, sizeof(BITVEC));
470 			}
471 			else
472 			{
473 				ptr = GETSIGN(_j);
474 				LOOPBYTE
475 					union_r[i] |= ptr[i];
476 			}
477 			*right++ = j;
478 			v->spl_nright++;
479 		}
480 	}
481 
482 	*right = *left = FirstOffsetNumber;
483 	pfree(costvector);
484 
485 	v->spl_ldatum = PointerGetDatum(datum_l);
486 	v->spl_rdatum = PointerGetDatum(datum_r);
487 
488 	PG_RETURN_POINTER(v);
489 }
490 
491 Datum
g_intbig_consistent(PG_FUNCTION_ARGS)492 g_intbig_consistent(PG_FUNCTION_ARGS)
493 {
494 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
495 	ArrayType  *query = PG_GETARG_ARRAYTYPE_P(1);
496 	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
497 
498 	/* Oid		subtype = PG_GETARG_OID(3); */
499 	bool	   *recheck = (bool *) PG_GETARG_POINTER(4);
500 	bool		retval;
501 
502 	/* All cases served by this function are inexact */
503 	*recheck = true;
504 
505 	if (ISALLTRUE(DatumGetPointer(entry->key)))
506 		PG_RETURN_BOOL(true);
507 
508 	if (strategy == BooleanSearchStrategy)
509 	{
510 		retval = signconsistent((QUERYTYPE *) query,
511 								GETSIGN(DatumGetPointer(entry->key)),
512 								false);
513 		PG_FREE_IF_COPY(query, 1);
514 		PG_RETURN_BOOL(retval);
515 	}
516 
517 	CHECKARRVALID(query);
518 
519 	switch (strategy)
520 	{
521 		case RTOverlapStrategyNumber:
522 			retval = _intbig_overlap((GISTTYPE *) DatumGetPointer(entry->key), query);
523 			break;
524 		case RTSameStrategyNumber:
525 			if (GIST_LEAF(entry))
526 			{
527 				int			i,
528 							num = ARRNELEMS(query);
529 				int32	   *ptr = ARRPTR(query);
530 				BITVEC		qp;
531 				BITVECP		dq,
532 							de;
533 
534 				memset(qp, 0, sizeof(BITVEC));
535 
536 				while (num--)
537 				{
538 					HASH(qp, *ptr);
539 					ptr++;
540 				}
541 
542 				de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
543 				dq = qp;
544 				retval = true;
545 				LOOPBYTE
546 				{
547 					if (de[i] != dq[i])
548 					{
549 						retval = false;
550 						break;
551 					}
552 				}
553 
554 			}
555 			else
556 				retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key), query);
557 			break;
558 		case RTContainsStrategyNumber:
559 		case RTOldContainsStrategyNumber:
560 			retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key), query);
561 			break;
562 		case RTContainedByStrategyNumber:
563 		case RTOldContainedByStrategyNumber:
564 			if (GIST_LEAF(entry))
565 			{
566 				int			i,
567 							num = ARRNELEMS(query);
568 				int32	   *ptr = ARRPTR(query);
569 				BITVEC		qp;
570 				BITVECP		dq,
571 							de;
572 
573 				memset(qp, 0, sizeof(BITVEC));
574 
575 				while (num--)
576 				{
577 					HASH(qp, *ptr);
578 					ptr++;
579 				}
580 
581 				de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
582 				dq = qp;
583 				retval = true;
584 				LOOPBYTE
585 				{
586 					if (de[i] & ~dq[i])
587 					{
588 						retval = false;
589 						break;
590 					}
591 				}
592 			}
593 			else
594 			{
595 				/*
596 				 * Unfortunately, because empty arrays could be anywhere in
597 				 * the index, we must search the whole tree.
598 				 */
599 				retval = true;
600 			}
601 			break;
602 		default:
603 			retval = FALSE;
604 	}
605 	PG_FREE_IF_COPY(query, 1);
606 	PG_RETURN_BOOL(retval);
607 }
608