1 /*
2 * contrib/hstore/hstore_gist.c
3 */
4 #include "postgres.h"
5
6 #include "access/gist.h"
7 #include "access/reloptions.h"
8 #include "access/stratnum.h"
9 #include "catalog/pg_type.h"
10 #include "hstore.h"
11 #include "utils/pg_crc.h"
12
13 /* gist_hstore_ops opclass options */
14 typedef struct
15 {
16 int32 vl_len_; /* varlena header (do not touch directly!) */
17 int siglen; /* signature length in bytes */
18 } GistHstoreOptions;
19
20 /* bigint defines */
21 #define BITBYTE 8
22 #define SIGLEN_DEFAULT (sizeof(int32) * 4)
23 #define SIGLEN_MAX GISTMaxIndexKeySize
24 #define SIGLENBIT(siglen) ((siglen) * BITBYTE)
25 #define GET_SIGLEN() (PG_HAS_OPCLASS_OPTIONS() ? \
26 ((GistHstoreOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \
27 SIGLEN_DEFAULT)
28
29
30 typedef char *BITVECP;
31
32 #define LOOPBYTE(siglen) \
33 for (i = 0; i < (siglen); i++)
34
35 #define LOOPBIT(siglen) \
36 for (i = 0; i < SIGLENBIT(siglen); i++)
37
38 /* beware of multiple evaluation of arguments to these macros! */
39 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
40 #define GETBITBYTE(x,i) ( (*((char*)(x)) >> (i)) & 0x01 )
41 #define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
42 #define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
43 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
44 #define HASHVAL(val, siglen) (((unsigned int)(val)) % SIGLENBIT(siglen))
45 #define HASH(sign, val, siglen) SETBIT((sign), HASHVAL(val, siglen))
46
47 typedef struct
48 {
49 int32 vl_len_; /* varlena header (do not touch directly!) */
50 int32 flag;
51 char data[FLEXIBLE_ARRAY_MEMBER];
52 } GISTTYPE;
53
54 #define ALLISTRUE 0x04
55
56 #define ISALLTRUE(x) ( ((GISTTYPE*)x)->flag & ALLISTRUE )
57
58 #define GTHDRSIZE (VARHDRSZ + sizeof(int32))
59 #define CALCGTSIZE(flag, siglen) ( GTHDRSIZE+(((flag) & ALLISTRUE) ? 0 : (siglen)) )
60
61 #define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
62
63 #define SUMBIT(val) ( \
64 GETBITBYTE((val),0) + \
65 GETBITBYTE((val),1) + \
66 GETBITBYTE((val),2) + \
67 GETBITBYTE((val),3) + \
68 GETBITBYTE((val),4) + \
69 GETBITBYTE((val),5) + \
70 GETBITBYTE((val),6) + \
71 GETBITBYTE((val),7) \
72 )
73
74 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
75
76 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
77
78 /* shorthand for calculating CRC-32 of a single chunk of data. */
79 static pg_crc32
crc32_sz(char * buf,int size)80 crc32_sz(char *buf, int size)
81 {
82 pg_crc32 crc;
83
84 INIT_TRADITIONAL_CRC32(crc);
85 COMP_TRADITIONAL_CRC32(crc, buf, size);
86 FIN_TRADITIONAL_CRC32(crc);
87
88 return crc;
89 }
90
91
92 PG_FUNCTION_INFO_V1(ghstore_in);
93 PG_FUNCTION_INFO_V1(ghstore_out);
94
95
96 Datum
ghstore_in(PG_FUNCTION_ARGS)97 ghstore_in(PG_FUNCTION_ARGS)
98 {
99 elog(ERROR, "Not implemented");
100 PG_RETURN_DATUM(0);
101 }
102
103 Datum
ghstore_out(PG_FUNCTION_ARGS)104 ghstore_out(PG_FUNCTION_ARGS)
105 {
106 elog(ERROR, "Not implemented");
107 PG_RETURN_DATUM(0);
108 }
109
110 static GISTTYPE *
ghstore_alloc(bool allistrue,int siglen,BITVECP sign)111 ghstore_alloc(bool allistrue, int siglen, BITVECP sign)
112 {
113 int flag = allistrue ? ALLISTRUE : 0;
114 int size = CALCGTSIZE(flag, siglen);
115 GISTTYPE *res = palloc(size);
116
117 SET_VARSIZE(res, size);
118 res->flag = flag;
119
120 if (!allistrue)
121 {
122 if (sign)
123 memcpy(GETSIGN(res), sign, siglen);
124 else
125 memset(GETSIGN(res), 0, siglen);
126 }
127
128 return res;
129 }
130
131 PG_FUNCTION_INFO_V1(ghstore_consistent);
132 PG_FUNCTION_INFO_V1(ghstore_compress);
133 PG_FUNCTION_INFO_V1(ghstore_decompress);
134 PG_FUNCTION_INFO_V1(ghstore_penalty);
135 PG_FUNCTION_INFO_V1(ghstore_picksplit);
136 PG_FUNCTION_INFO_V1(ghstore_union);
137 PG_FUNCTION_INFO_V1(ghstore_same);
138 PG_FUNCTION_INFO_V1(ghstore_options);
139
140 Datum
ghstore_compress(PG_FUNCTION_ARGS)141 ghstore_compress(PG_FUNCTION_ARGS)
142 {
143 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
144 int siglen = GET_SIGLEN();
145 GISTENTRY *retval = entry;
146
147 if (entry->leafkey)
148 {
149 GISTTYPE *res = ghstore_alloc(false, siglen, NULL);
150 HStore *val = DatumGetHStoreP(entry->key);
151 HEntry *hsent = ARRPTR(val);
152 char *ptr = STRPTR(val);
153 int count = HS_COUNT(val);
154 int i;
155
156 for (i = 0; i < count; ++i)
157 {
158 int h;
159
160 h = crc32_sz((char *) HSTORE_KEY(hsent, ptr, i),
161 HSTORE_KEYLEN(hsent, i));
162 HASH(GETSIGN(res), h, siglen);
163 if (!HSTORE_VALISNULL(hsent, i))
164 {
165 h = crc32_sz((char *) HSTORE_VAL(hsent, ptr, i),
166 HSTORE_VALLEN(hsent, i));
167 HASH(GETSIGN(res), h, siglen);
168 }
169 }
170
171 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
172 gistentryinit(*retval, PointerGetDatum(res),
173 entry->rel, entry->page,
174 entry->offset,
175 false);
176 }
177 else if (!ISALLTRUE(DatumGetPointer(entry->key)))
178 {
179 int32 i;
180 GISTTYPE *res;
181 BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
182
183 LOOPBYTE(siglen)
184 {
185 if ((sign[i] & 0xff) != 0xff)
186 PG_RETURN_POINTER(retval);
187 }
188
189 res = ghstore_alloc(true, siglen, NULL);
190
191 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
192 gistentryinit(*retval, PointerGetDatum(res),
193 entry->rel, entry->page,
194 entry->offset,
195 false);
196 }
197
198 PG_RETURN_POINTER(retval);
199 }
200
201 /*
202 * Since type ghstore isn't toastable (and doesn't need to be),
203 * this function can be a no-op.
204 */
205 Datum
ghstore_decompress(PG_FUNCTION_ARGS)206 ghstore_decompress(PG_FUNCTION_ARGS)
207 {
208 PG_RETURN_POINTER(PG_GETARG_POINTER(0));
209 }
210
211 Datum
ghstore_same(PG_FUNCTION_ARGS)212 ghstore_same(PG_FUNCTION_ARGS)
213 {
214 GISTTYPE *a = (GISTTYPE *) PG_GETARG_POINTER(0);
215 GISTTYPE *b = (GISTTYPE *) PG_GETARG_POINTER(1);
216 bool *result = (bool *) PG_GETARG_POINTER(2);
217 int siglen = GET_SIGLEN();
218
219
220 if (ISALLTRUE(a) && ISALLTRUE(b))
221 *result = true;
222 else if (ISALLTRUE(a))
223 *result = false;
224 else if (ISALLTRUE(b))
225 *result = false;
226 else
227 {
228 int32 i;
229 BITVECP sa = GETSIGN(a),
230 sb = GETSIGN(b);
231
232 *result = true;
233 LOOPBYTE(siglen)
234 {
235 if (sa[i] != sb[i])
236 {
237 *result = false;
238 break;
239 }
240 }
241 }
242 PG_RETURN_POINTER(result);
243 }
244
245 static int32
sizebitvec(BITVECP sign,int siglen)246 sizebitvec(BITVECP sign, int siglen)
247 {
248 int32 size = 0,
249 i;
250
251 LOOPBYTE(siglen)
252 {
253 size += SUMBIT(sign);
254 sign = (BITVECP) (((char *) sign) + 1);
255 }
256 return size;
257 }
258
259 static int
hemdistsign(BITVECP a,BITVECP b,int siglen)260 hemdistsign(BITVECP a, BITVECP b, int siglen)
261 {
262 int i,
263 dist = 0;
264
265 LOOPBIT(siglen)
266 {
267 if (GETBIT(a, i) != GETBIT(b, i))
268 dist++;
269 }
270 return dist;
271 }
272
273 static int
hemdist(GISTTYPE * a,GISTTYPE * b,int siglen)274 hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
275 {
276 if (ISALLTRUE(a))
277 {
278 if (ISALLTRUE(b))
279 return 0;
280 else
281 return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
282 }
283 else if (ISALLTRUE(b))
284 return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
285
286 return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
287 }
288
289 static int32
unionkey(BITVECP sbase,GISTTYPE * add,int siglen)290 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
291 {
292 int32 i;
293 BITVECP sadd = GETSIGN(add);
294
295 if (ISALLTRUE(add))
296 return 1;
297 LOOPBYTE(siglen)
298 sbase[i] |= sadd[i];
299 return 0;
300 }
301
302 Datum
ghstore_union(PG_FUNCTION_ARGS)303 ghstore_union(PG_FUNCTION_ARGS)
304 {
305 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
306 int32 len = entryvec->n;
307
308 int *size = (int *) PG_GETARG_POINTER(1);
309 int siglen = GET_SIGLEN();
310 int32 i;
311 GISTTYPE *result = ghstore_alloc(false, siglen, NULL);
312 BITVECP base = GETSIGN(result);
313
314 for (i = 0; i < len; i++)
315 {
316 if (unionkey(base, GETENTRY(entryvec, i), siglen))
317 {
318 result->flag |= ALLISTRUE;
319 SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
320 break;
321 }
322 }
323
324 *size = VARSIZE(result);
325
326 PG_RETURN_POINTER(result);
327 }
328
329 Datum
ghstore_penalty(PG_FUNCTION_ARGS)330 ghstore_penalty(PG_FUNCTION_ARGS)
331 {
332 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
333 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
334 float *penalty = (float *) PG_GETARG_POINTER(2);
335 int siglen = GET_SIGLEN();
336 GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
337 GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
338
339 *penalty = hemdist(origval, newval, siglen);
340 PG_RETURN_POINTER(penalty);
341 }
342
343
344 typedef struct
345 {
346 OffsetNumber pos;
347 int32 cost;
348 } SPLITCOST;
349
350 static int
comparecost(const void * a,const void * b)351 comparecost(const void *a, const void *b)
352 {
353 return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
354 }
355
356
357 Datum
ghstore_picksplit(PG_FUNCTION_ARGS)358 ghstore_picksplit(PG_FUNCTION_ARGS)
359 {
360 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
361 OffsetNumber maxoff = entryvec->n - 2;
362
363 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
364 int siglen = GET_SIGLEN();
365 OffsetNumber k,
366 j;
367 GISTTYPE *datum_l,
368 *datum_r;
369 BITVECP union_l,
370 union_r;
371 int32 size_alpha,
372 size_beta;
373 int32 size_waste,
374 waste = -1;
375 int32 nbytes;
376 OffsetNumber seed_1 = 0,
377 seed_2 = 0;
378 OffsetNumber *left,
379 *right;
380 BITVECP ptr;
381 int i;
382 SPLITCOST *costvector;
383 GISTTYPE *_k,
384 *_j;
385
386 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
387 v->spl_left = (OffsetNumber *) palloc(nbytes);
388 v->spl_right = (OffsetNumber *) palloc(nbytes);
389
390 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
391 {
392 _k = GETENTRY(entryvec, k);
393 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
394 {
395 size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
396 if (size_waste > waste)
397 {
398 waste = size_waste;
399 seed_1 = k;
400 seed_2 = j;
401 }
402 }
403 }
404
405 left = v->spl_left;
406 v->spl_nleft = 0;
407 right = v->spl_right;
408 v->spl_nright = 0;
409
410 if (seed_1 == 0 || seed_2 == 0)
411 {
412 seed_1 = 1;
413 seed_2 = 2;
414 }
415
416 /* form initial .. */
417 datum_l = ghstore_alloc(ISALLTRUE(GETENTRY(entryvec, seed_1)), siglen,
418 GETSIGN(GETENTRY(entryvec, seed_1)));
419 datum_r = ghstore_alloc(ISALLTRUE(GETENTRY(entryvec, seed_2)), siglen,
420 GETSIGN(GETENTRY(entryvec, seed_2)));
421
422 maxoff = OffsetNumberNext(maxoff);
423 /* sort before ... */
424 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
425 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
426 {
427 costvector[j - 1].pos = j;
428 _j = GETENTRY(entryvec, j);
429 size_alpha = hemdist(datum_l, _j, siglen);
430 size_beta = hemdist(datum_r, _j, siglen);
431 costvector[j - 1].cost = abs(size_alpha - size_beta);
432 }
433 qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
434
435 union_l = GETSIGN(datum_l);
436 union_r = GETSIGN(datum_r);
437
438 for (k = 0; k < maxoff; k++)
439 {
440 j = costvector[k].pos;
441 if (j == seed_1)
442 {
443 *left++ = j;
444 v->spl_nleft++;
445 continue;
446 }
447 else if (j == seed_2)
448 {
449 *right++ = j;
450 v->spl_nright++;
451 continue;
452 }
453 _j = GETENTRY(entryvec, j);
454 size_alpha = hemdist(datum_l, _j, siglen);
455 size_beta = hemdist(datum_r, _j, siglen);
456
457 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.0001))
458 {
459 if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
460 {
461 if (!ISALLTRUE(datum_l))
462 MemSet((void *) union_l, 0xff, siglen);
463 }
464 else
465 {
466 ptr = GETSIGN(_j);
467 LOOPBYTE(siglen)
468 union_l[i] |= ptr[i];
469 }
470 *left++ = j;
471 v->spl_nleft++;
472 }
473 else
474 {
475 if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
476 {
477 if (!ISALLTRUE(datum_r))
478 MemSet((void *) union_r, 0xff, siglen);
479 }
480 else
481 {
482 ptr = GETSIGN(_j);
483 LOOPBYTE(siglen)
484 union_r[i] |= ptr[i];
485 }
486 *right++ = j;
487 v->spl_nright++;
488 }
489 }
490
491 *right = *left = FirstOffsetNumber;
492
493 v->spl_ldatum = PointerGetDatum(datum_l);
494 v->spl_rdatum = PointerGetDatum(datum_r);
495
496 PG_RETURN_POINTER(v);
497 }
498
499
500 Datum
ghstore_consistent(PG_FUNCTION_ARGS)501 ghstore_consistent(PG_FUNCTION_ARGS)
502 {
503 GISTTYPE *entry = (GISTTYPE *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
504 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
505
506 /* Oid subtype = PG_GETARG_OID(3); */
507 bool *recheck = (bool *) PG_GETARG_POINTER(4);
508 int siglen = GET_SIGLEN();
509 bool res = true;
510 BITVECP sign;
511
512 /* All cases served by this function are inexact */
513 *recheck = true;
514
515 if (ISALLTRUE(entry))
516 PG_RETURN_BOOL(true);
517
518 sign = GETSIGN(entry);
519
520 if (strategy == HStoreContainsStrategyNumber ||
521 strategy == HStoreOldContainsStrategyNumber)
522 {
523 HStore *query = PG_GETARG_HSTORE_P(1);
524 HEntry *qe = ARRPTR(query);
525 char *qv = STRPTR(query);
526 int count = HS_COUNT(query);
527 int i;
528
529 for (i = 0; res && i < count; ++i)
530 {
531 int crc = crc32_sz((char *) HSTORE_KEY(qe, qv, i),
532 HSTORE_KEYLEN(qe, i));
533
534 if (GETBIT(sign, HASHVAL(crc, siglen)))
535 {
536 if (!HSTORE_VALISNULL(qe, i))
537 {
538 crc = crc32_sz((char *) HSTORE_VAL(qe, qv, i),
539 HSTORE_VALLEN(qe, i));
540 if (!GETBIT(sign, HASHVAL(crc, siglen)))
541 res = false;
542 }
543 }
544 else
545 res = false;
546 }
547 }
548 else if (strategy == HStoreExistsStrategyNumber)
549 {
550 text *query = PG_GETARG_TEXT_PP(1);
551 int crc = crc32_sz(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query));
552
553 res = (GETBIT(sign, HASHVAL(crc, siglen))) ? true : false;
554 }
555 else if (strategy == HStoreExistsAllStrategyNumber)
556 {
557 ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
558 Datum *key_datums;
559 bool *key_nulls;
560 int key_count;
561 int i;
562
563 deconstruct_array(query,
564 TEXTOID, -1, false, TYPALIGN_INT,
565 &key_datums, &key_nulls, &key_count);
566
567 for (i = 0; res && i < key_count; ++i)
568 {
569 int crc;
570
571 if (key_nulls[i])
572 continue;
573 crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
574 if (!(GETBIT(sign, HASHVAL(crc, siglen))))
575 res = false;
576 }
577 }
578 else if (strategy == HStoreExistsAnyStrategyNumber)
579 {
580 ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
581 Datum *key_datums;
582 bool *key_nulls;
583 int key_count;
584 int i;
585
586 deconstruct_array(query,
587 TEXTOID, -1, false, TYPALIGN_INT,
588 &key_datums, &key_nulls, &key_count);
589
590 res = false;
591
592 for (i = 0; !res && i < key_count; ++i)
593 {
594 int crc;
595
596 if (key_nulls[i])
597 continue;
598 crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
599 if (GETBIT(sign, HASHVAL(crc, siglen)))
600 res = true;
601 }
602 }
603 else
604 elog(ERROR, "Unsupported strategy number: %d", strategy);
605
606 PG_RETURN_BOOL(res);
607 }
608
609 Datum
ghstore_options(PG_FUNCTION_ARGS)610 ghstore_options(PG_FUNCTION_ARGS)
611 {
612 local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
613
614 init_local_reloptions(relopts, sizeof(GistHstoreOptions));
615 add_local_int_reloption(relopts, "siglen",
616 "signature length in bytes",
617 SIGLEN_DEFAULT, 1, SIGLEN_MAX,
618 offsetof(GistHstoreOptions, siglen));
619
620 PG_RETURN_VOID();
621 }
622