1 /*
2 * contrib/intarray/_intbig_gist.c
3 */
4 #include "postgres.h"
5
6 #include "_int.h"
7 #include "access/gist.h"
8 #include "access/reloptions.h"
9 #include "access/stratnum.h"
10 #include "port/pg_bitutils.h"
11
12 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
13 /*
14 ** _intbig methods
15 */
16 PG_FUNCTION_INFO_V1(g_intbig_consistent);
17 PG_FUNCTION_INFO_V1(g_intbig_compress);
18 PG_FUNCTION_INFO_V1(g_intbig_decompress);
19 PG_FUNCTION_INFO_V1(g_intbig_penalty);
20 PG_FUNCTION_INFO_V1(g_intbig_picksplit);
21 PG_FUNCTION_INFO_V1(g_intbig_union);
22 PG_FUNCTION_INFO_V1(g_intbig_same);
23 PG_FUNCTION_INFO_V1(g_intbig_options);
24
25 PG_FUNCTION_INFO_V1(_intbig_in);
26 PG_FUNCTION_INFO_V1(_intbig_out);
27
28 Datum
_intbig_in(PG_FUNCTION_ARGS)29 _intbig_in(PG_FUNCTION_ARGS)
30 {
31 ereport(ERROR,
32 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
33 errmsg("_intbig_in() not implemented")));
34 PG_RETURN_DATUM(0);
35 }
36
37 Datum
_intbig_out(PG_FUNCTION_ARGS)38 _intbig_out(PG_FUNCTION_ARGS)
39 {
40 ereport(ERROR,
41 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
42 errmsg("_intbig_out() not implemented")));
43 PG_RETURN_DATUM(0);
44 }
45
46 static GISTTYPE *
_intbig_alloc(bool allistrue,int siglen,BITVECP sign)47 _intbig_alloc(bool allistrue, int siglen, BITVECP sign)
48 {
49 int flag = allistrue ? ALLISTRUE : 0;
50 int size = CALCGTSIZE(flag, siglen);
51 GISTTYPE *res = (GISTTYPE *) palloc(size);
52
53 SET_VARSIZE(res, size);
54 res->flag = flag;
55
56 if (!allistrue)
57 {
58 if (sign)
59 memcpy(GETSIGN(res), sign, siglen);
60 else
61 memset(GETSIGN(res), 0, siglen);
62 }
63
64 return res;
65 }
66
67
68 /*********************************************************************
69 ** intbig functions
70 *********************************************************************/
71 static bool
_intbig_overlap(GISTTYPE * a,ArrayType * b,int siglen)72 _intbig_overlap(GISTTYPE *a, ArrayType *b, int siglen)
73 {
74 int num = ARRNELEMS(b);
75 int32 *ptr = ARRPTR(b);
76
77 CHECKARRVALID(b);
78
79 while (num--)
80 {
81 if (GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
82 return true;
83 ptr++;
84 }
85
86 return false;
87 }
88
89 static bool
_intbig_contains(GISTTYPE * a,ArrayType * b,int siglen)90 _intbig_contains(GISTTYPE *a, ArrayType *b, int siglen)
91 {
92 int num = ARRNELEMS(b);
93 int32 *ptr = ARRPTR(b);
94
95 CHECKARRVALID(b);
96
97 while (num--)
98 {
99 if (!GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen)))
100 return false;
101 ptr++;
102 }
103
104 return true;
105 }
106
107 Datum
g_intbig_same(PG_FUNCTION_ARGS)108 g_intbig_same(PG_FUNCTION_ARGS)
109 {
110 GISTTYPE *a = (GISTTYPE *) PG_GETARG_POINTER(0);
111 GISTTYPE *b = (GISTTYPE *) PG_GETARG_POINTER(1);
112 bool *result = (bool *) PG_GETARG_POINTER(2);
113 int siglen = GET_SIGLEN();
114
115 if (ISALLTRUE(a) && ISALLTRUE(b))
116 *result = true;
117 else if (ISALLTRUE(a))
118 *result = false;
119 else if (ISALLTRUE(b))
120 *result = false;
121 else
122 {
123 int32 i;
124 BITVECP sa = GETSIGN(a),
125 sb = GETSIGN(b);
126
127 *result = true;
128 LOOPBYTE(siglen)
129 {
130 if (sa[i] != sb[i])
131 {
132 *result = false;
133 break;
134 }
135 }
136 }
137 PG_RETURN_POINTER(result);
138 }
139
140 Datum
g_intbig_compress(PG_FUNCTION_ARGS)141 g_intbig_compress(PG_FUNCTION_ARGS)
142 {
143 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
144 int siglen = GET_SIGLEN();
145
146 if (entry->leafkey)
147 {
148 GISTENTRY *retval;
149 ArrayType *in = DatumGetArrayTypeP(entry->key);
150 int32 *ptr;
151 int num;
152 GISTTYPE *res = _intbig_alloc(false, siglen, NULL);
153
154 CHECKARRVALID(in);
155 if (ARRISEMPTY(in))
156 {
157 ptr = NULL;
158 num = 0;
159 }
160 else
161 {
162 ptr = ARRPTR(in);
163 num = ARRNELEMS(in);
164 }
165
166 while (num--)
167 {
168 HASH(GETSIGN(res), *ptr, siglen);
169 ptr++;
170 }
171
172 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
173 gistentryinit(*retval, PointerGetDatum(res),
174 entry->rel, entry->page,
175 entry->offset, false);
176
177 if (in != DatumGetArrayTypeP(entry->key))
178 pfree(in);
179
180 PG_RETURN_POINTER(retval);
181 }
182 else if (!ISALLTRUE(DatumGetPointer(entry->key)))
183 {
184 GISTENTRY *retval;
185 int i;
186 BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
187 GISTTYPE *res;
188
189 LOOPBYTE(siglen)
190 {
191 if ((sign[i] & 0xff) != 0xff)
192 PG_RETURN_POINTER(entry);
193 }
194
195 res = _intbig_alloc(true, siglen, sign);
196 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
197 gistentryinit(*retval, PointerGetDatum(res),
198 entry->rel, entry->page,
199 entry->offset, false);
200
201 PG_RETURN_POINTER(retval);
202 }
203
204 PG_RETURN_POINTER(entry);
205 }
206
207
208 static int32
sizebitvec(BITVECP sign,int siglen)209 sizebitvec(BITVECP sign, int siglen)
210 {
211 return pg_popcount(sign, siglen);
212 }
213
214 static int
hemdistsign(BITVECP a,BITVECP b,int siglen)215 hemdistsign(BITVECP a, BITVECP b, int siglen)
216 {
217 int i,
218 diff,
219 dist = 0;
220
221 LOOPBYTE(siglen)
222 {
223 diff = (unsigned char) (a[i] ^ b[i]);
224 /* Using the popcount functions here isn't likely to win */
225 dist += pg_number_of_ones[diff];
226 }
227 return dist;
228 }
229
230 static int
hemdist(GISTTYPE * a,GISTTYPE * b,int siglen)231 hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
232 {
233 if (ISALLTRUE(a))
234 {
235 if (ISALLTRUE(b))
236 return 0;
237 else
238 return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
239 }
240 else if (ISALLTRUE(b))
241 return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
242
243 return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
244 }
245
246 Datum
g_intbig_decompress(PG_FUNCTION_ARGS)247 g_intbig_decompress(PG_FUNCTION_ARGS)
248 {
249 PG_RETURN_DATUM(PG_GETARG_DATUM(0));
250 }
251
252 static int32
unionkey(BITVECP sbase,GISTTYPE * add,int siglen)253 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
254 {
255 int32 i;
256 BITVECP sadd = GETSIGN(add);
257
258 if (ISALLTRUE(add))
259 return 1;
260 LOOPBYTE(siglen)
261 sbase[i] |= sadd[i];
262 return 0;
263 }
264
265 Datum
g_intbig_union(PG_FUNCTION_ARGS)266 g_intbig_union(PG_FUNCTION_ARGS)
267 {
268 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
269 int *size = (int *) PG_GETARG_POINTER(1);
270 int siglen = GET_SIGLEN();
271 int32 i;
272 GISTTYPE *result = _intbig_alloc(false, siglen, NULL);
273 BITVECP base = GETSIGN(result);
274
275 for (i = 0; i < entryvec->n; i++)
276 {
277 if (unionkey(base, GETENTRY(entryvec, i), siglen))
278 {
279 result->flag |= ALLISTRUE;
280 SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
281 break;
282 }
283 }
284
285 *size = VARSIZE(result);
286
287 PG_RETURN_POINTER(result);
288 }
289
290 Datum
g_intbig_penalty(PG_FUNCTION_ARGS)291 g_intbig_penalty(PG_FUNCTION_ARGS)
292 {
293 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
294 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
295 float *penalty = (float *) PG_GETARG_POINTER(2);
296 GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
297 GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
298 int siglen = GET_SIGLEN();
299
300 *penalty = hemdist(origval, newval, siglen);
301 PG_RETURN_POINTER(penalty);
302 }
303
304
305 typedef struct
306 {
307 OffsetNumber pos;
308 int32 cost;
309 } SPLITCOST;
310
311 static int
comparecost(const void * a,const void * b)312 comparecost(const void *a, const void *b)
313 {
314 return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
315 }
316
317
318 Datum
g_intbig_picksplit(PG_FUNCTION_ARGS)319 g_intbig_picksplit(PG_FUNCTION_ARGS)
320 {
321 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
322 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
323 int siglen = GET_SIGLEN();
324 OffsetNumber k,
325 j;
326 GISTTYPE *datum_l,
327 *datum_r;
328 BITVECP union_l,
329 union_r;
330 int32 size_alpha,
331 size_beta;
332 int32 size_waste,
333 waste = -1;
334 int32 nbytes;
335 OffsetNumber seed_1 = 0,
336 seed_2 = 0;
337 OffsetNumber *left,
338 *right;
339 OffsetNumber maxoff;
340 BITVECP ptr;
341 int i;
342 SPLITCOST *costvector;
343 GISTTYPE *_k,
344 *_j;
345
346 maxoff = entryvec->n - 2;
347 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
348 v->spl_left = (OffsetNumber *) palloc(nbytes);
349 v->spl_right = (OffsetNumber *) palloc(nbytes);
350
351 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
352 {
353 _k = GETENTRY(entryvec, k);
354 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
355 {
356 size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
357 if (size_waste > waste)
358 {
359 waste = size_waste;
360 seed_1 = k;
361 seed_2 = j;
362 }
363 }
364 }
365
366 left = v->spl_left;
367 v->spl_nleft = 0;
368 right = v->spl_right;
369 v->spl_nright = 0;
370
371 if (seed_1 == 0 || seed_2 == 0)
372 {
373 seed_1 = 1;
374 seed_2 = 2;
375 }
376
377 /* form initial .. */
378 datum_l = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_1)), siglen,
379 GETSIGN(GETENTRY(entryvec, seed_1)));
380 datum_r = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_2)), siglen,
381 GETSIGN(GETENTRY(entryvec, seed_2)));
382
383 maxoff = OffsetNumberNext(maxoff);
384 /* sort before ... */
385 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
386 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
387 {
388 costvector[j - 1].pos = j;
389 _j = GETENTRY(entryvec, j);
390 size_alpha = hemdist(datum_l, _j, siglen);
391 size_beta = hemdist(datum_r, _j, siglen);
392 costvector[j - 1].cost = Abs(size_alpha - size_beta);
393 }
394 qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
395
396 union_l = GETSIGN(datum_l);
397 union_r = GETSIGN(datum_r);
398
399 for (k = 0; k < maxoff; k++)
400 {
401 j = costvector[k].pos;
402 if (j == seed_1)
403 {
404 *left++ = j;
405 v->spl_nleft++;
406 continue;
407 }
408 else if (j == seed_2)
409 {
410 *right++ = j;
411 v->spl_nright++;
412 continue;
413 }
414 _j = GETENTRY(entryvec, j);
415 size_alpha = hemdist(datum_l, _j, siglen);
416 size_beta = hemdist(datum_r, _j, siglen);
417
418 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
419 {
420 if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
421 {
422 if (!ISALLTRUE(datum_l))
423 MemSet((void *) union_l, 0xff, siglen);
424 }
425 else
426 {
427 ptr = GETSIGN(_j);
428 LOOPBYTE(siglen)
429 union_l[i] |= ptr[i];
430 }
431 *left++ = j;
432 v->spl_nleft++;
433 }
434 else
435 {
436 if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
437 {
438 if (!ISALLTRUE(datum_r))
439 MemSet((void *) union_r, 0xff, siglen);
440 }
441 else
442 {
443 ptr = GETSIGN(_j);
444 LOOPBYTE(siglen)
445 union_r[i] |= ptr[i];
446 }
447 *right++ = j;
448 v->spl_nright++;
449 }
450 }
451
452 *right = *left = FirstOffsetNumber;
453 pfree(costvector);
454
455 v->spl_ldatum = PointerGetDatum(datum_l);
456 v->spl_rdatum = PointerGetDatum(datum_r);
457
458 PG_RETURN_POINTER(v);
459 }
460
461 Datum
g_intbig_consistent(PG_FUNCTION_ARGS)462 g_intbig_consistent(PG_FUNCTION_ARGS)
463 {
464 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
465 ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
466 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
467
468 /* Oid subtype = PG_GETARG_OID(3); */
469 bool *recheck = (bool *) PG_GETARG_POINTER(4);
470 int siglen = GET_SIGLEN();
471 bool retval;
472
473 /* All cases served by this function are inexact */
474 *recheck = true;
475
476 if (ISALLTRUE(DatumGetPointer(entry->key)))
477 PG_RETURN_BOOL(true);
478
479 if (strategy == BooleanSearchStrategy)
480 {
481 retval = signconsistent((QUERYTYPE *) query,
482 GETSIGN(DatumGetPointer(entry->key)),
483 siglen,
484 false);
485 PG_FREE_IF_COPY(query, 1);
486 PG_RETURN_BOOL(retval);
487 }
488
489 CHECKARRVALID(query);
490
491 switch (strategy)
492 {
493 case RTOverlapStrategyNumber:
494 retval = _intbig_overlap((GISTTYPE *) DatumGetPointer(entry->key),
495 query, siglen);
496 break;
497 case RTSameStrategyNumber:
498 if (GIST_LEAF(entry))
499 {
500 int i,
501 num = ARRNELEMS(query);
502 int32 *ptr = ARRPTR(query);
503 BITVECP dq = palloc0(siglen),
504 de;
505
506 while (num--)
507 {
508 HASH(dq, *ptr, siglen);
509 ptr++;
510 }
511
512 de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
513 retval = true;
514 LOOPBYTE(siglen)
515 {
516 if (de[i] != dq[i])
517 {
518 retval = false;
519 break;
520 }
521 }
522
523 pfree(dq);
524 }
525 else
526 retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
527 query, siglen);
528 break;
529 case RTContainsStrategyNumber:
530 case RTOldContainsStrategyNumber:
531 retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key),
532 query, siglen);
533 break;
534 case RTContainedByStrategyNumber:
535 case RTOldContainedByStrategyNumber:
536
537 /*
538 * This code is unreachable as of intarray 1.4, because the <@
539 * operator has been removed from the opclass. We keep it for now
540 * to support older versions of the SQL definitions.
541 */
542 if (GIST_LEAF(entry))
543 {
544 int i,
545 num = ARRNELEMS(query);
546 int32 *ptr = ARRPTR(query);
547 BITVECP dq = palloc0(siglen),
548 de;
549
550 while (num--)
551 {
552 HASH(dq, *ptr, siglen);
553 ptr++;
554 }
555
556 de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
557 retval = true;
558 LOOPBYTE(siglen)
559 {
560 if (de[i] & ~dq[i])
561 {
562 retval = false;
563 break;
564 }
565 }
566 }
567 else
568 {
569 /*
570 * Unfortunately, because empty arrays could be anywhere in
571 * the index, we must search the whole tree.
572 */
573 retval = true;
574 }
575 break;
576 default:
577 retval = false;
578 }
579 PG_FREE_IF_COPY(query, 1);
580 PG_RETURN_BOOL(retval);
581 }
582
583 Datum
g_intbig_options(PG_FUNCTION_ARGS)584 g_intbig_options(PG_FUNCTION_ARGS)
585 {
586 local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
587
588 init_local_reloptions(relopts, sizeof(GISTIntArrayBigOptions));
589 add_local_int_reloption(relopts, "siglen",
590 "signature length in bytes",
591 SIGLEN_DEFAULT, 1, SIGLEN_MAX,
592 offsetof(GISTIntArrayBigOptions, siglen));
593
594 PG_RETURN_VOID();
595 }
596