1 /* 2 * contrib/intarray/_intbig_gist.c 3 */ 4 #include "postgres.h" 5 6 #include "_int.h" 7 #include "access/gist.h" 8 #include "access/reloptions.h" 9 #include "access/stratnum.h" 10 #include "port/pg_bitutils.h" 11 12 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key)) 13 /* 14 ** _intbig methods 15 */ 16 PG_FUNCTION_INFO_V1(g_intbig_consistent); 17 PG_FUNCTION_INFO_V1(g_intbig_compress); 18 PG_FUNCTION_INFO_V1(g_intbig_decompress); 19 PG_FUNCTION_INFO_V1(g_intbig_penalty); 20 PG_FUNCTION_INFO_V1(g_intbig_picksplit); 21 PG_FUNCTION_INFO_V1(g_intbig_union); 22 PG_FUNCTION_INFO_V1(g_intbig_same); 23 PG_FUNCTION_INFO_V1(g_intbig_options); 24 25 PG_FUNCTION_INFO_V1(_intbig_in); 26 PG_FUNCTION_INFO_V1(_intbig_out); 27 28 Datum 29 _intbig_in(PG_FUNCTION_ARGS) 30 { 31 ereport(ERROR, 32 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 33 errmsg("_intbig_in() not implemented"))); 34 PG_RETURN_DATUM(0); 35 } 36 37 Datum 38 _intbig_out(PG_FUNCTION_ARGS) 39 { 40 ereport(ERROR, 41 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 42 errmsg("_intbig_out() not implemented"))); 43 PG_RETURN_DATUM(0); 44 } 45 46 static GISTTYPE * 47 _intbig_alloc(bool allistrue, int siglen, BITVECP sign) 48 { 49 int flag = allistrue ? ALLISTRUE : 0; 50 int size = CALCGTSIZE(flag, siglen); 51 GISTTYPE *res = (GISTTYPE *) palloc(size); 52 53 SET_VARSIZE(res, size); 54 res->flag = flag; 55 56 if (!allistrue) 57 { 58 if (sign) 59 memcpy(GETSIGN(res), sign, siglen); 60 else 61 memset(GETSIGN(res), 0, siglen); 62 } 63 64 return res; 65 } 66 67 68 /********************************************************************* 69 ** intbig functions 70 *********************************************************************/ 71 static bool 72 _intbig_overlap(GISTTYPE *a, ArrayType *b, int siglen) 73 { 74 int num = ARRNELEMS(b); 75 int32 *ptr = ARRPTR(b); 76 77 CHECKARRVALID(b); 78 79 while (num--) 80 { 81 if (GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen))) 82 return true; 83 ptr++; 84 } 85 86 return false; 87 } 88 89 static bool 90 _intbig_contains(GISTTYPE *a, ArrayType *b, int siglen) 91 { 92 int num = ARRNELEMS(b); 93 int32 *ptr = ARRPTR(b); 94 95 CHECKARRVALID(b); 96 97 while (num--) 98 { 99 if (!GETBIT(GETSIGN(a), HASHVAL(*ptr, siglen))) 100 return false; 101 ptr++; 102 } 103 104 return true; 105 } 106 107 Datum 108 g_intbig_same(PG_FUNCTION_ARGS) 109 { 110 GISTTYPE *a = (GISTTYPE *) PG_GETARG_POINTER(0); 111 GISTTYPE *b = (GISTTYPE *) PG_GETARG_POINTER(1); 112 bool *result = (bool *) PG_GETARG_POINTER(2); 113 int siglen = GET_SIGLEN(); 114 115 if (ISALLTRUE(a) && ISALLTRUE(b)) 116 *result = true; 117 else if (ISALLTRUE(a)) 118 *result = false; 119 else if (ISALLTRUE(b)) 120 *result = false; 121 else 122 { 123 int32 i; 124 BITVECP sa = GETSIGN(a), 125 sb = GETSIGN(b); 126 127 *result = true; 128 LOOPBYTE(siglen) 129 { 130 if (sa[i] != sb[i]) 131 { 132 *result = false; 133 break; 134 } 135 } 136 } 137 PG_RETURN_POINTER(result); 138 } 139 140 Datum 141 g_intbig_compress(PG_FUNCTION_ARGS) 142 { 143 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); 144 int siglen = GET_SIGLEN(); 145 146 if (entry->leafkey) 147 { 148 GISTENTRY *retval; 149 ArrayType *in = DatumGetArrayTypeP(entry->key); 150 int32 *ptr; 151 int num; 152 GISTTYPE *res = _intbig_alloc(false, siglen, NULL); 153 154 CHECKARRVALID(in); 155 if (ARRISEMPTY(in)) 156 { 157 ptr = NULL; 158 num = 0; 159 } 160 else 161 { 162 ptr = ARRPTR(in); 163 num = ARRNELEMS(in); 164 } 165 166 while (num--) 167 { 168 HASH(GETSIGN(res), *ptr, siglen); 169 ptr++; 170 } 171 172 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); 173 gistentryinit(*retval, PointerGetDatum(res), 174 entry->rel, entry->page, 175 entry->offset, false); 176 177 if (in != DatumGetArrayTypeP(entry->key)) 178 pfree(in); 179 180 PG_RETURN_POINTER(retval); 181 } 182 else if (!ISALLTRUE(DatumGetPointer(entry->key))) 183 { 184 GISTENTRY *retval; 185 int i; 186 BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); 187 GISTTYPE *res; 188 189 LOOPBYTE(siglen) 190 { 191 if ((sign[i] & 0xff) != 0xff) 192 PG_RETURN_POINTER(entry); 193 } 194 195 res = _intbig_alloc(true, siglen, sign); 196 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); 197 gistentryinit(*retval, PointerGetDatum(res), 198 entry->rel, entry->page, 199 entry->offset, false); 200 201 PG_RETURN_POINTER(retval); 202 } 203 204 PG_RETURN_POINTER(entry); 205 } 206 207 208 static int32 209 sizebitvec(BITVECP sign, int siglen) 210 { 211 return pg_popcount(sign, siglen); 212 } 213 214 static int 215 hemdistsign(BITVECP a, BITVECP b, int siglen) 216 { 217 int i, 218 diff, 219 dist = 0; 220 221 LOOPBYTE(siglen) 222 { 223 diff = (unsigned char) (a[i] ^ b[i]); 224 /* Using the popcount functions here isn't likely to win */ 225 dist += pg_number_of_ones[diff]; 226 } 227 return dist; 228 } 229 230 static int 231 hemdist(GISTTYPE *a, GISTTYPE *b, int siglen) 232 { 233 if (ISALLTRUE(a)) 234 { 235 if (ISALLTRUE(b)) 236 return 0; 237 else 238 return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen); 239 } 240 else if (ISALLTRUE(b)) 241 return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen); 242 243 return hemdistsign(GETSIGN(a), GETSIGN(b), siglen); 244 } 245 246 Datum 247 g_intbig_decompress(PG_FUNCTION_ARGS) 248 { 249 PG_RETURN_DATUM(PG_GETARG_DATUM(0)); 250 } 251 252 static int32 253 unionkey(BITVECP sbase, GISTTYPE *add, int siglen) 254 { 255 int32 i; 256 BITVECP sadd = GETSIGN(add); 257 258 if (ISALLTRUE(add)) 259 return 1; 260 LOOPBYTE(siglen) 261 sbase[i] |= sadd[i]; 262 return 0; 263 } 264 265 Datum 266 g_intbig_union(PG_FUNCTION_ARGS) 267 { 268 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); 269 int *size = (int *) PG_GETARG_POINTER(1); 270 int siglen = GET_SIGLEN(); 271 int32 i; 272 GISTTYPE *result = _intbig_alloc(false, siglen, NULL); 273 BITVECP base = GETSIGN(result); 274 275 for (i = 0; i < entryvec->n; i++) 276 { 277 if (unionkey(base, GETENTRY(entryvec, i), siglen)) 278 { 279 result->flag |= ALLISTRUE; 280 SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen)); 281 break; 282 } 283 } 284 285 *size = VARSIZE(result); 286 287 PG_RETURN_POINTER(result); 288 } 289 290 Datum 291 g_intbig_penalty(PG_FUNCTION_ARGS) 292 { 293 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */ 294 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1); 295 float *penalty = (float *) PG_GETARG_POINTER(2); 296 GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key); 297 GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key); 298 int siglen = GET_SIGLEN(); 299 300 *penalty = hemdist(origval, newval, siglen); 301 PG_RETURN_POINTER(penalty); 302 } 303 304 305 typedef struct 306 { 307 OffsetNumber pos; 308 int32 cost; 309 } SPLITCOST; 310 311 static int 312 comparecost(const void *a, const void *b) 313 { 314 return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost; 315 } 316 317 318 Datum 319 g_intbig_picksplit(PG_FUNCTION_ARGS) 320 { 321 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); 322 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); 323 int siglen = GET_SIGLEN(); 324 OffsetNumber k, 325 j; 326 GISTTYPE *datum_l, 327 *datum_r; 328 BITVECP union_l, 329 union_r; 330 int32 size_alpha, 331 size_beta; 332 int32 size_waste, 333 waste = -1; 334 int32 nbytes; 335 OffsetNumber seed_1 = 0, 336 seed_2 = 0; 337 OffsetNumber *left, 338 *right; 339 OffsetNumber maxoff; 340 BITVECP ptr; 341 int i; 342 SPLITCOST *costvector; 343 GISTTYPE *_k, 344 *_j; 345 346 maxoff = entryvec->n - 2; 347 nbytes = (maxoff + 2) * sizeof(OffsetNumber); 348 v->spl_left = (OffsetNumber *) palloc(nbytes); 349 v->spl_right = (OffsetNumber *) palloc(nbytes); 350 351 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) 352 { 353 _k = GETENTRY(entryvec, k); 354 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) 355 { 356 size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen); 357 if (size_waste > waste) 358 { 359 waste = size_waste; 360 seed_1 = k; 361 seed_2 = j; 362 } 363 } 364 } 365 366 left = v->spl_left; 367 v->spl_nleft = 0; 368 right = v->spl_right; 369 v->spl_nright = 0; 370 371 if (seed_1 == 0 || seed_2 == 0) 372 { 373 seed_1 = 1; 374 seed_2 = 2; 375 } 376 377 /* form initial .. */ 378 datum_l = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_1)), siglen, 379 GETSIGN(GETENTRY(entryvec, seed_1))); 380 datum_r = _intbig_alloc(ISALLTRUE(GETENTRY(entryvec, seed_2)), siglen, 381 GETSIGN(GETENTRY(entryvec, seed_2))); 382 383 maxoff = OffsetNumberNext(maxoff); 384 /* sort before ... */ 385 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); 386 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) 387 { 388 costvector[j - 1].pos = j; 389 _j = GETENTRY(entryvec, j); 390 size_alpha = hemdist(datum_l, _j, siglen); 391 size_beta = hemdist(datum_r, _j, siglen); 392 costvector[j - 1].cost = Abs(size_alpha - size_beta); 393 } 394 qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); 395 396 union_l = GETSIGN(datum_l); 397 union_r = GETSIGN(datum_r); 398 399 for (k = 0; k < maxoff; k++) 400 { 401 j = costvector[k].pos; 402 if (j == seed_1) 403 { 404 *left++ = j; 405 v->spl_nleft++; 406 continue; 407 } 408 else if (j == seed_2) 409 { 410 *right++ = j; 411 v->spl_nright++; 412 continue; 413 } 414 _j = GETENTRY(entryvec, j); 415 size_alpha = hemdist(datum_l, _j, siglen); 416 size_beta = hemdist(datum_r, _j, siglen); 417 418 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001)) 419 { 420 if (ISALLTRUE(datum_l) || ISALLTRUE(_j)) 421 { 422 if (!ISALLTRUE(datum_l)) 423 MemSet((void *) union_l, 0xff, siglen); 424 } 425 else 426 { 427 ptr = GETSIGN(_j); 428 LOOPBYTE(siglen) 429 union_l[i] |= ptr[i]; 430 } 431 *left++ = j; 432 v->spl_nleft++; 433 } 434 else 435 { 436 if (ISALLTRUE(datum_r) || ISALLTRUE(_j)) 437 { 438 if (!ISALLTRUE(datum_r)) 439 MemSet((void *) union_r, 0xff, siglen); 440 } 441 else 442 { 443 ptr = GETSIGN(_j); 444 LOOPBYTE(siglen) 445 union_r[i] |= ptr[i]; 446 } 447 *right++ = j; 448 v->spl_nright++; 449 } 450 } 451 452 *right = *left = FirstOffsetNumber; 453 pfree(costvector); 454 455 v->spl_ldatum = PointerGetDatum(datum_l); 456 v->spl_rdatum = PointerGetDatum(datum_r); 457 458 PG_RETURN_POINTER(v); 459 } 460 461 Datum 462 g_intbig_consistent(PG_FUNCTION_ARGS) 463 { 464 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); 465 ArrayType *query = PG_GETARG_ARRAYTYPE_P(1); 466 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); 467 468 /* Oid subtype = PG_GETARG_OID(3); */ 469 bool *recheck = (bool *) PG_GETARG_POINTER(4); 470 int siglen = GET_SIGLEN(); 471 bool retval; 472 473 /* All cases served by this function are inexact */ 474 *recheck = true; 475 476 if (ISALLTRUE(DatumGetPointer(entry->key))) 477 PG_RETURN_BOOL(true); 478 479 if (strategy == BooleanSearchStrategy) 480 { 481 retval = signconsistent((QUERYTYPE *) query, 482 GETSIGN(DatumGetPointer(entry->key)), 483 siglen, 484 false); 485 PG_FREE_IF_COPY(query, 1); 486 PG_RETURN_BOOL(retval); 487 } 488 489 CHECKARRVALID(query); 490 491 switch (strategy) 492 { 493 case RTOverlapStrategyNumber: 494 retval = _intbig_overlap((GISTTYPE *) DatumGetPointer(entry->key), 495 query, siglen); 496 break; 497 case RTSameStrategyNumber: 498 if (GIST_LEAF(entry)) 499 { 500 int i, 501 num = ARRNELEMS(query); 502 int32 *ptr = ARRPTR(query); 503 BITVECP dq = palloc0(siglen), 504 de; 505 506 while (num--) 507 { 508 HASH(dq, *ptr, siglen); 509 ptr++; 510 } 511 512 de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key)); 513 retval = true; 514 LOOPBYTE(siglen) 515 { 516 if (de[i] != dq[i]) 517 { 518 retval = false; 519 break; 520 } 521 } 522 523 pfree(dq); 524 } 525 else 526 retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key), 527 query, siglen); 528 break; 529 case RTContainsStrategyNumber: 530 case RTOldContainsStrategyNumber: 531 retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key), 532 query, siglen); 533 break; 534 case RTContainedByStrategyNumber: 535 case RTOldContainedByStrategyNumber: 536 if (GIST_LEAF(entry)) 537 { 538 int i, 539 num = ARRNELEMS(query); 540 int32 *ptr = ARRPTR(query); 541 BITVECP dq = palloc0(siglen), 542 de; 543 544 while (num--) 545 { 546 HASH(dq, *ptr, siglen); 547 ptr++; 548 } 549 550 de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key)); 551 retval = true; 552 LOOPBYTE(siglen) 553 { 554 if (de[i] & ~dq[i]) 555 { 556 retval = false; 557 break; 558 } 559 } 560 } 561 else 562 { 563 /* 564 * Unfortunately, because empty arrays could be anywhere in 565 * the index, we must search the whole tree. 566 */ 567 retval = true; 568 } 569 break; 570 default: 571 retval = false; 572 } 573 PG_FREE_IF_COPY(query, 1); 574 PG_RETURN_BOOL(retval); 575 } 576 577 Datum 578 g_intbig_options(PG_FUNCTION_ARGS) 579 { 580 local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0); 581 582 init_local_reloptions(relopts, sizeof(GISTIntArrayBigOptions)); 583 add_local_int_reloption(relopts, "siglen", 584 "signature length in bytes", 585 SIGLEN_DEFAULT, 1, SIGLEN_MAX, 586 offsetof(GISTIntArrayBigOptions, siglen)); 587 588 PG_RETURN_VOID(); 589 } 590