1 /*
2 * contrib/hstore/hstore_gist.c
3 */
4 #include "postgres.h"
5
6 #include "access/gist.h"
7 #include "access/stratnum.h"
8 #include "catalog/pg_type.h"
9 #include "utils/pg_crc.h"
10
11 #include "hstore.h"
12
13 /* bigint defines */
14 #define BITBYTE 8
15 #define SIGLENINT 4 /* >122 => key will toast, so very slow!!! */
16 #define SIGLEN ( sizeof(int)*SIGLENINT )
17 #define SIGLENBIT (SIGLEN*BITBYTE)
18
19 typedef char BITVEC[SIGLEN];
20 typedef char *BITVECP;
21
22 #define SIGPTR(x) ( (BITVECP) ARR_DATA_PTR(x) )
23
24
25 #define LOOPBYTE \
26 for(i=0;i<SIGLEN;i++)
27
28 #define LOOPBIT \
29 for(i=0;i<SIGLENBIT;i++)
30
31 /* beware of multiple evaluation of arguments to these macros! */
32 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
33 #define GETBITBYTE(x,i) ( (*((char*)(x)) >> (i)) & 0x01 )
34 #define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
35 #define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
36 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
37 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
38 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
39
40 typedef struct
41 {
42 int32 vl_len_; /* varlena header (do not touch directly!) */
43 int32 flag;
44 char data[FLEXIBLE_ARRAY_MEMBER];
45 } GISTTYPE;
46
47 #define ALLISTRUE 0x04
48
49 #define ISALLTRUE(x) ( ((GISTTYPE*)x)->flag & ALLISTRUE )
50
51 #define GTHDRSIZE (VARHDRSZ + sizeof(int32))
52 #define CALCGTSIZE(flag) ( GTHDRSIZE+(((flag) & ALLISTRUE) ? 0 : SIGLEN) )
53
54 #define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
55
56 #define SUMBIT(val) ( \
57 GETBITBYTE((val),0) + \
58 GETBITBYTE((val),1) + \
59 GETBITBYTE((val),2) + \
60 GETBITBYTE((val),3) + \
61 GETBITBYTE((val),4) + \
62 GETBITBYTE((val),5) + \
63 GETBITBYTE((val),6) + \
64 GETBITBYTE((val),7) \
65 )
66
67 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
68
69 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
70
71 /* shorthand for calculating CRC-32 of a single chunk of data. */
72 static pg_crc32
crc32_sz(char * buf,int size)73 crc32_sz(char *buf, int size)
74 {
75 pg_crc32 crc;
76
77 INIT_TRADITIONAL_CRC32(crc);
78 COMP_TRADITIONAL_CRC32(crc, buf, size);
79 FIN_TRADITIONAL_CRC32(crc);
80
81 return crc;
82 }
83
84
85 PG_FUNCTION_INFO_V1(ghstore_in);
86 PG_FUNCTION_INFO_V1(ghstore_out);
87
88
89 Datum
ghstore_in(PG_FUNCTION_ARGS)90 ghstore_in(PG_FUNCTION_ARGS)
91 {
92 elog(ERROR, "Not implemented");
93 PG_RETURN_DATUM(0);
94 }
95
96 Datum
ghstore_out(PG_FUNCTION_ARGS)97 ghstore_out(PG_FUNCTION_ARGS)
98 {
99 elog(ERROR, "Not implemented");
100 PG_RETURN_DATUM(0);
101 }
102
103 PG_FUNCTION_INFO_V1(ghstore_consistent);
104 PG_FUNCTION_INFO_V1(ghstore_compress);
105 PG_FUNCTION_INFO_V1(ghstore_decompress);
106 PG_FUNCTION_INFO_V1(ghstore_penalty);
107 PG_FUNCTION_INFO_V1(ghstore_picksplit);
108 PG_FUNCTION_INFO_V1(ghstore_union);
109 PG_FUNCTION_INFO_V1(ghstore_same);
110
111 Datum
ghstore_compress(PG_FUNCTION_ARGS)112 ghstore_compress(PG_FUNCTION_ARGS)
113 {
114 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
115 GISTENTRY *retval = entry;
116
117 if (entry->leafkey)
118 {
119 GISTTYPE *res = (GISTTYPE *) palloc0(CALCGTSIZE(0));
120 HStore *val = DatumGetHStoreP(entry->key);
121 HEntry *hsent = ARRPTR(val);
122 char *ptr = STRPTR(val);
123 int count = HS_COUNT(val);
124 int i;
125
126 SET_VARSIZE(res, CALCGTSIZE(0));
127
128 for (i = 0; i < count; ++i)
129 {
130 int h;
131
132 h = crc32_sz((char *) HSTORE_KEY(hsent, ptr, i),
133 HSTORE_KEYLEN(hsent, i));
134 HASH(GETSIGN(res), h);
135 if (!HSTORE_VALISNULL(hsent, i))
136 {
137 h = crc32_sz((char *) HSTORE_VAL(hsent, ptr, i),
138 HSTORE_VALLEN(hsent, i));
139 HASH(GETSIGN(res), h);
140 }
141 }
142
143 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
144 gistentryinit(*retval, PointerGetDatum(res),
145 entry->rel, entry->page,
146 entry->offset,
147 false);
148 }
149 else if (!ISALLTRUE(DatumGetPointer(entry->key)))
150 {
151 int32 i;
152 GISTTYPE *res;
153 BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
154
155 LOOPBYTE
156 {
157 if ((sign[i] & 0xff) != 0xff)
158 PG_RETURN_POINTER(retval);
159 }
160
161 res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE));
162 SET_VARSIZE(res, CALCGTSIZE(ALLISTRUE));
163 res->flag = ALLISTRUE;
164
165 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
166 gistentryinit(*retval, PointerGetDatum(res),
167 entry->rel, entry->page,
168 entry->offset,
169 false);
170 }
171
172 PG_RETURN_POINTER(retval);
173 }
174
175 /*
176 * Since type ghstore isn't toastable (and doesn't need to be),
177 * this function can be a no-op.
178 */
179 Datum
ghstore_decompress(PG_FUNCTION_ARGS)180 ghstore_decompress(PG_FUNCTION_ARGS)
181 {
182 PG_RETURN_POINTER(PG_GETARG_POINTER(0));
183 }
184
185 Datum
ghstore_same(PG_FUNCTION_ARGS)186 ghstore_same(PG_FUNCTION_ARGS)
187 {
188 GISTTYPE *a = (GISTTYPE *) PG_GETARG_POINTER(0);
189 GISTTYPE *b = (GISTTYPE *) PG_GETARG_POINTER(1);
190 bool *result = (bool *) PG_GETARG_POINTER(2);
191
192 if (ISALLTRUE(a) && ISALLTRUE(b))
193 *result = true;
194 else if (ISALLTRUE(a))
195 *result = false;
196 else if (ISALLTRUE(b))
197 *result = false;
198 else
199 {
200 int32 i;
201 BITVECP sa = GETSIGN(a),
202 sb = GETSIGN(b);
203
204 *result = true;
205 LOOPBYTE
206 {
207 if (sa[i] != sb[i])
208 {
209 *result = false;
210 break;
211 }
212 }
213 }
214 PG_RETURN_POINTER(result);
215 }
216
217 static int32
sizebitvec(BITVECP sign)218 sizebitvec(BITVECP sign)
219 {
220 int32 size = 0,
221 i;
222
223 LOOPBYTE
224 {
225 size += SUMBIT(sign);
226 sign = (BITVECP) (((char *) sign) + 1);
227 }
228 return size;
229 }
230
231 static int
hemdistsign(BITVECP a,BITVECP b)232 hemdistsign(BITVECP a, BITVECP b)
233 {
234 int i,
235 dist = 0;
236
237 LOOPBIT
238 {
239 if (GETBIT(a, i) != GETBIT(b, i))
240 dist++;
241 }
242 return dist;
243 }
244
245 static int
hemdist(GISTTYPE * a,GISTTYPE * b)246 hemdist(GISTTYPE *a, GISTTYPE *b)
247 {
248 if (ISALLTRUE(a))
249 {
250 if (ISALLTRUE(b))
251 return 0;
252 else
253 return SIGLENBIT - sizebitvec(GETSIGN(b));
254 }
255 else if (ISALLTRUE(b))
256 return SIGLENBIT - sizebitvec(GETSIGN(a));
257
258 return hemdistsign(GETSIGN(a), GETSIGN(b));
259 }
260
261 static int32
unionkey(BITVECP sbase,GISTTYPE * add)262 unionkey(BITVECP sbase, GISTTYPE *add)
263 {
264 int32 i;
265 BITVECP sadd = GETSIGN(add);
266
267 if (ISALLTRUE(add))
268 return 1;
269 LOOPBYTE
270 sbase[i] |= sadd[i];
271 return 0;
272 }
273
274 Datum
ghstore_union(PG_FUNCTION_ARGS)275 ghstore_union(PG_FUNCTION_ARGS)
276 {
277 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
278 int32 len = entryvec->n;
279
280 int *size = (int *) PG_GETARG_POINTER(1);
281 BITVEC base;
282 int32 i;
283 int32 flag = 0;
284 GISTTYPE *result;
285
286 MemSet((void *) base, 0, sizeof(BITVEC));
287 for (i = 0; i < len; i++)
288 {
289 if (unionkey(base, GETENTRY(entryvec, i)))
290 {
291 flag = ALLISTRUE;
292 break;
293 }
294 }
295
296 len = CALCGTSIZE(flag);
297 result = (GISTTYPE *) palloc(len);
298 SET_VARSIZE(result, len);
299 result->flag = flag;
300 if (!ISALLTRUE(result))
301 memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
302 *size = len;
303
304 PG_RETURN_POINTER(result);
305 }
306
307 Datum
ghstore_penalty(PG_FUNCTION_ARGS)308 ghstore_penalty(PG_FUNCTION_ARGS)
309 {
310 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
311 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
312 float *penalty = (float *) PG_GETARG_POINTER(2);
313 GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
314 GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
315
316 *penalty = hemdist(origval, newval);
317 PG_RETURN_POINTER(penalty);
318 }
319
320
321 typedef struct
322 {
323 OffsetNumber pos;
324 int32 cost;
325 } SPLITCOST;
326
327 static int
comparecost(const void * a,const void * b)328 comparecost(const void *a, const void *b)
329 {
330 return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
331 }
332
333
334 Datum
ghstore_picksplit(PG_FUNCTION_ARGS)335 ghstore_picksplit(PG_FUNCTION_ARGS)
336 {
337 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
338 OffsetNumber maxoff = entryvec->n - 2;
339
340 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
341 OffsetNumber k,
342 j;
343 GISTTYPE *datum_l,
344 *datum_r;
345 BITVECP union_l,
346 union_r;
347 int32 size_alpha,
348 size_beta;
349 int32 size_waste,
350 waste = -1;
351 int32 nbytes;
352 OffsetNumber seed_1 = 0,
353 seed_2 = 0;
354 OffsetNumber *left,
355 *right;
356 BITVECP ptr;
357 int i;
358 SPLITCOST *costvector;
359 GISTTYPE *_k,
360 *_j;
361
362 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
363 v->spl_left = (OffsetNumber *) palloc(nbytes);
364 v->spl_right = (OffsetNumber *) palloc(nbytes);
365
366 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
367 {
368 _k = GETENTRY(entryvec, k);
369 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
370 {
371 size_waste = hemdist(_k, GETENTRY(entryvec, j));
372 if (size_waste > waste)
373 {
374 waste = size_waste;
375 seed_1 = k;
376 seed_2 = j;
377 }
378 }
379 }
380
381 left = v->spl_left;
382 v->spl_nleft = 0;
383 right = v->spl_right;
384 v->spl_nright = 0;
385
386 if (seed_1 == 0 || seed_2 == 0)
387 {
388 seed_1 = 1;
389 seed_2 = 2;
390 }
391
392 /* form initial .. */
393 if (ISALLTRUE(GETENTRY(entryvec, seed_1)))
394 {
395 datum_l = (GISTTYPE *) palloc(GTHDRSIZE);
396 SET_VARSIZE(datum_l, GTHDRSIZE);
397 datum_l->flag = ALLISTRUE;
398 }
399 else
400 {
401 datum_l = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
402 SET_VARSIZE(datum_l, GTHDRSIZE + SIGLEN);
403 datum_l->flag = 0;
404 memcpy((void *) GETSIGN(datum_l), (void *) GETSIGN(GETENTRY(entryvec, seed_1)), sizeof(BITVEC))
405 ;
406 }
407 if (ISALLTRUE(GETENTRY(entryvec, seed_2)))
408 {
409 datum_r = (GISTTYPE *) palloc(GTHDRSIZE);
410 SET_VARSIZE(datum_r, GTHDRSIZE);
411 datum_r->flag = ALLISTRUE;
412 }
413 else
414 {
415 datum_r = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
416 SET_VARSIZE(datum_r, GTHDRSIZE + SIGLEN);
417 datum_r->flag = 0;
418 memcpy((void *) GETSIGN(datum_r), (void *) GETSIGN(GETENTRY(entryvec, seed_2)), sizeof(BITVEC));
419 }
420
421 maxoff = OffsetNumberNext(maxoff);
422 /* sort before ... */
423 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
424 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
425 {
426 costvector[j - 1].pos = j;
427 _j = GETENTRY(entryvec, j);
428 size_alpha = hemdist(datum_l, _j);
429 size_beta = hemdist(datum_r, _j);
430 costvector[j - 1].cost = abs(size_alpha - size_beta);
431 }
432 qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
433
434 union_l = GETSIGN(datum_l);
435 union_r = GETSIGN(datum_r);
436
437 for (k = 0; k < maxoff; k++)
438 {
439 j = costvector[k].pos;
440 if (j == seed_1)
441 {
442 *left++ = j;
443 v->spl_nleft++;
444 continue;
445 }
446 else if (j == seed_2)
447 {
448 *right++ = j;
449 v->spl_nright++;
450 continue;
451 }
452 _j = GETENTRY(entryvec, j);
453 size_alpha = hemdist(datum_l, _j);
454 size_beta = hemdist(datum_r, _j);
455
456 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.0001))
457 {
458 if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
459 {
460 if (!ISALLTRUE(datum_l))
461 MemSet((void *) union_l, 0xff, sizeof(BITVEC));
462 }
463 else
464 {
465 ptr = GETSIGN(_j);
466 LOOPBYTE
467 union_l[i] |= ptr[i];
468 }
469 *left++ = j;
470 v->spl_nleft++;
471 }
472 else
473 {
474 if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
475 {
476 if (!ISALLTRUE(datum_r))
477 MemSet((void *) union_r, 0xff, sizeof(BITVEC));
478 }
479 else
480 {
481 ptr = GETSIGN(_j);
482 LOOPBYTE
483 union_r[i] |= ptr[i];
484 }
485 *right++ = j;
486 v->spl_nright++;
487 }
488 }
489
490 *right = *left = FirstOffsetNumber;
491
492 v->spl_ldatum = PointerGetDatum(datum_l);
493 v->spl_rdatum = PointerGetDatum(datum_r);
494
495 PG_RETURN_POINTER(v);
496 }
497
498
499 Datum
ghstore_consistent(PG_FUNCTION_ARGS)500 ghstore_consistent(PG_FUNCTION_ARGS)
501 {
502 GISTTYPE *entry = (GISTTYPE *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
503 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
504
505 /* Oid subtype = PG_GETARG_OID(3); */
506 bool *recheck = (bool *) PG_GETARG_POINTER(4);
507 bool res = true;
508 BITVECP sign;
509
510 /* All cases served by this function are inexact */
511 *recheck = true;
512
513 if (ISALLTRUE(entry))
514 PG_RETURN_BOOL(true);
515
516 sign = GETSIGN(entry);
517
518 if (strategy == HStoreContainsStrategyNumber ||
519 strategy == HStoreOldContainsStrategyNumber)
520 {
521 HStore *query = PG_GETARG_HSTORE_P(1);
522 HEntry *qe = ARRPTR(query);
523 char *qv = STRPTR(query);
524 int count = HS_COUNT(query);
525 int i;
526
527 for (i = 0; res && i < count; ++i)
528 {
529 int crc = crc32_sz((char *) HSTORE_KEY(qe, qv, i),
530 HSTORE_KEYLEN(qe, i));
531
532 if (GETBIT(sign, HASHVAL(crc)))
533 {
534 if (!HSTORE_VALISNULL(qe, i))
535 {
536 crc = crc32_sz((char *) HSTORE_VAL(qe, qv, i),
537 HSTORE_VALLEN(qe, i));
538 if (!GETBIT(sign, HASHVAL(crc)))
539 res = false;
540 }
541 }
542 else
543 res = false;
544 }
545 }
546 else if (strategy == HStoreExistsStrategyNumber)
547 {
548 text *query = PG_GETARG_TEXT_PP(1);
549 int crc = crc32_sz(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query));
550
551 res = (GETBIT(sign, HASHVAL(crc))) ? true : false;
552 }
553 else if (strategy == HStoreExistsAllStrategyNumber)
554 {
555 ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
556 Datum *key_datums;
557 bool *key_nulls;
558 int key_count;
559 int i;
560
561 deconstruct_array(query,
562 TEXTOID, -1, false, 'i',
563 &key_datums, &key_nulls, &key_count);
564
565 for (i = 0; res && i < key_count; ++i)
566 {
567 int crc;
568
569 if (key_nulls[i])
570 continue;
571 crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
572 if (!(GETBIT(sign, HASHVAL(crc))))
573 res = false;
574 }
575 }
576 else if (strategy == HStoreExistsAnyStrategyNumber)
577 {
578 ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
579 Datum *key_datums;
580 bool *key_nulls;
581 int key_count;
582 int i;
583
584 deconstruct_array(query,
585 TEXTOID, -1, false, 'i',
586 &key_datums, &key_nulls, &key_count);
587
588 res = false;
589
590 for (i = 0; !res && i < key_count; ++i)
591 {
592 int crc;
593
594 if (key_nulls[i])
595 continue;
596 crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
597 if (GETBIT(sign, HASHVAL(crc)))
598 res = true;
599 }
600 }
601 else
602 elog(ERROR, "Unsupported strategy number: %d", strategy);
603
604 PG_RETURN_BOOL(res);
605 }
606