1 /*
2 * contrib/ltree/_ltree_gist.c
3 *
4 *
5 * GiST support for ltree[]
6 * Teodor Sigaev <teodor@stack.net>
7 */
8 #include "postgres.h"
9
10 #include "access/gist.h"
11 #include "access/reloptions.h"
12 #include "access/stratnum.h"
13 #include "crc32.h"
14 #include "ltree.h"
15 #include "port/pg_bitutils.h"
16
17 PG_FUNCTION_INFO_V1(_ltree_compress);
18 PG_FUNCTION_INFO_V1(_ltree_same);
19 PG_FUNCTION_INFO_V1(_ltree_union);
20 PG_FUNCTION_INFO_V1(_ltree_penalty);
21 PG_FUNCTION_INFO_V1(_ltree_picksplit);
22 PG_FUNCTION_INFO_V1(_ltree_consistent);
23 PG_FUNCTION_INFO_V1(_ltree_gist_options);
24
25 #define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
26 #define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
27
28 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
29
30
31 static void
hashing(BITVECP sign,ltree * t,int siglen)32 hashing(BITVECP sign, ltree *t, int siglen)
33 {
34 int tlen = t->numlevel;
35 ltree_level *cur = LTREE_FIRST(t);
36 int hash;
37
38 while (tlen > 0)
39 {
40 hash = ltree_crc32_sz(cur->name, cur->len);
41 AHASH(sign, hash, siglen);
42 cur = LEVEL_NEXT(cur);
43 tlen--;
44 }
45 }
46
47 Datum
_ltree_compress(PG_FUNCTION_ARGS)48 _ltree_compress(PG_FUNCTION_ARGS)
49 {
50 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
51 GISTENTRY *retval = entry;
52 int siglen = LTREE_GET_ASIGLEN();
53
54 if (entry->leafkey)
55 { /* ltree */
56 ltree_gist *key;
57 ArrayType *val = DatumGetArrayTypeP(entry->key);
58 int num = ArrayGetNItems(ARR_NDIM(val), ARR_DIMS(val));
59 ltree *item = (ltree *) ARR_DATA_PTR(val);
60
61 if (ARR_NDIM(val) > 1)
62 ereport(ERROR,
63 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
64 errmsg("array must be one-dimensional")));
65 if (array_contains_nulls(val))
66 ereport(ERROR,
67 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
68 errmsg("array must not contain nulls")));
69
70 key = ltree_gist_alloc(false, NULL, siglen, NULL, NULL);
71
72 while (num > 0)
73 {
74 hashing(LTG_SIGN(key), item, siglen);
75 num--;
76 item = NEXTVAL(item);
77 }
78
79 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
80 gistentryinit(*retval, PointerGetDatum(key),
81 entry->rel, entry->page,
82 entry->offset, false);
83 }
84 else if (!LTG_ISALLTRUE(entry->key))
85 {
86 int32 i;
87 ltree_gist *key;
88 BITVECP sign = LTG_SIGN(DatumGetPointer(entry->key));
89
90 ALOOPBYTE(siglen)
91 {
92 if ((sign[i] & 0xff) != 0xff)
93 PG_RETURN_POINTER(retval);
94 }
95
96 key = ltree_gist_alloc(true, sign, siglen, NULL, NULL);
97 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
98 gistentryinit(*retval, PointerGetDatum(key),
99 entry->rel, entry->page,
100 entry->offset, false);
101 }
102 PG_RETURN_POINTER(retval);
103 }
104
105 Datum
_ltree_same(PG_FUNCTION_ARGS)106 _ltree_same(PG_FUNCTION_ARGS)
107 {
108 ltree_gist *a = (ltree_gist *) PG_GETARG_POINTER(0);
109 ltree_gist *b = (ltree_gist *) PG_GETARG_POINTER(1);
110 bool *result = (bool *) PG_GETARG_POINTER(2);
111 int siglen = LTREE_GET_ASIGLEN();
112
113 if (LTG_ISALLTRUE(a) && LTG_ISALLTRUE(b))
114 *result = true;
115 else if (LTG_ISALLTRUE(a))
116 *result = false;
117 else if (LTG_ISALLTRUE(b))
118 *result = false;
119 else
120 {
121 int32 i;
122 BITVECP sa = LTG_SIGN(a),
123 sb = LTG_SIGN(b);
124
125 *result = true;
126 ALOOPBYTE(siglen)
127 {
128 if (sa[i] != sb[i])
129 {
130 *result = false;
131 break;
132 }
133 }
134 }
135 PG_RETURN_POINTER(result);
136 }
137
138 static int32
unionkey(BITVECP sbase,ltree_gist * add,int siglen)139 unionkey(BITVECP sbase, ltree_gist *add, int siglen)
140 {
141 int32 i;
142 BITVECP sadd = LTG_SIGN(add);
143
144 if (LTG_ISALLTRUE(add))
145 return 1;
146
147 ALOOPBYTE(siglen)
148 sbase[i] |= sadd[i];
149 return 0;
150 }
151
152 Datum
_ltree_union(PG_FUNCTION_ARGS)153 _ltree_union(PG_FUNCTION_ARGS)
154 {
155 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
156 int *size = (int *) PG_GETARG_POINTER(1);
157 int siglen = LTREE_GET_ASIGLEN();
158 int32 i;
159 ltree_gist *result = ltree_gist_alloc(false, NULL, siglen, NULL, NULL);
160 BITVECP base = LTG_SIGN(result);
161
162 for (i = 0; i < entryvec->n; i++)
163 {
164 if (unionkey(base, GETENTRY(entryvec, i), siglen))
165 {
166 result->flag |= LTG_ALLTRUE;
167 SET_VARSIZE(result, LTG_HDRSIZE);
168 break;
169 }
170 }
171
172 *size = VARSIZE(result);
173
174 PG_RETURN_POINTER(result);
175 }
176
177 static int32
sizebitvec(BITVECP sign,int siglen)178 sizebitvec(BITVECP sign, int siglen)
179 {
180 return pg_popcount((const char *) sign, siglen);
181 }
182
183 static int
hemdistsign(BITVECP a,BITVECP b,int siglen)184 hemdistsign(BITVECP a, BITVECP b, int siglen)
185 {
186 int i,
187 diff,
188 dist = 0;
189
190 ALOOPBYTE(siglen)
191 {
192 diff = (unsigned char) (a[i] ^ b[i]);
193 /* Using the popcount functions here isn't likely to win */
194 dist += pg_number_of_ones[diff];
195 }
196 return dist;
197 }
198
199 static int
hemdist(ltree_gist * a,ltree_gist * b,int siglen)200 hemdist(ltree_gist *a, ltree_gist *b, int siglen)
201 {
202 if (LTG_ISALLTRUE(a))
203 {
204 if (LTG_ISALLTRUE(b))
205 return 0;
206 else
207 return ASIGLENBIT(siglen) - sizebitvec(LTG_SIGN(b), siglen);
208 }
209 else if (LTG_ISALLTRUE(b))
210 return ASIGLENBIT(siglen) - sizebitvec(LTG_SIGN(a), siglen);
211
212 return hemdistsign(LTG_SIGN(a), LTG_SIGN(b), siglen);
213 }
214
215
216 Datum
_ltree_penalty(PG_FUNCTION_ARGS)217 _ltree_penalty(PG_FUNCTION_ARGS)
218 {
219 ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
220 ltree_gist *newval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
221 float *penalty = (float *) PG_GETARG_POINTER(2);
222 int siglen = LTREE_GET_ASIGLEN();
223
224 *penalty = hemdist(origval, newval, siglen);
225 PG_RETURN_POINTER(penalty);
226 }
227
228 typedef struct
229 {
230 OffsetNumber pos;
231 int32 cost;
232 } SPLITCOST;
233
234 static int
comparecost(const void * a,const void * b)235 comparecost(const void *a, const void *b)
236 {
237 return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
238 }
239
240 Datum
_ltree_picksplit(PG_FUNCTION_ARGS)241 _ltree_picksplit(PG_FUNCTION_ARGS)
242 {
243 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
244 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
245 int siglen = LTREE_GET_ASIGLEN();
246 OffsetNumber k,
247 j;
248 ltree_gist *datum_l,
249 *datum_r;
250 BITVECP union_l,
251 union_r;
252 int32 size_alpha,
253 size_beta;
254 int32 size_waste,
255 waste = -1;
256 int32 nbytes;
257 OffsetNumber seed_1 = 0,
258 seed_2 = 0;
259 OffsetNumber *left,
260 *right;
261 OffsetNumber maxoff;
262 BITVECP ptr;
263 int i;
264 SPLITCOST *costvector;
265 ltree_gist *_k,
266 *_j;
267
268 maxoff = entryvec->n - 2;
269 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
270 v->spl_left = (OffsetNumber *) palloc(nbytes);
271 v->spl_right = (OffsetNumber *) palloc(nbytes);
272
273 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
274 {
275 _k = GETENTRY(entryvec, k);
276 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
277 {
278 size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
279 if (size_waste > waste)
280 {
281 waste = size_waste;
282 seed_1 = k;
283 seed_2 = j;
284 }
285 }
286 }
287
288 left = v->spl_left;
289 v->spl_nleft = 0;
290 right = v->spl_right;
291 v->spl_nright = 0;
292
293 if (seed_1 == 0 || seed_2 == 0)
294 {
295 seed_1 = 1;
296 seed_2 = 2;
297 }
298
299 /* form initial .. */
300 datum_l = ltree_gist_alloc(LTG_ISALLTRUE(GETENTRY(entryvec, seed_1)),
301 LTG_SIGN(GETENTRY(entryvec, seed_1)),
302 siglen, NULL, NULL);
303
304 datum_r = ltree_gist_alloc(LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)),
305 LTG_SIGN(GETENTRY(entryvec, seed_2)),
306 siglen, NULL, NULL);
307
308 maxoff = OffsetNumberNext(maxoff);
309 /* sort before ... */
310 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
311 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
312 {
313 costvector[j - 1].pos = j;
314 _j = GETENTRY(entryvec, j);
315 size_alpha = hemdist(datum_l, _j, siglen);
316 size_beta = hemdist(datum_r, _j, siglen);
317 costvector[j - 1].cost = Abs(size_alpha - size_beta);
318 }
319 qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
320
321 union_l = LTG_SIGN(datum_l);
322 union_r = LTG_SIGN(datum_r);
323
324 for (k = 0; k < maxoff; k++)
325 {
326 j = costvector[k].pos;
327 if (j == seed_1)
328 {
329 *left++ = j;
330 v->spl_nleft++;
331 continue;
332 }
333 else if (j == seed_2)
334 {
335 *right++ = j;
336 v->spl_nright++;
337 continue;
338 }
339 _j = GETENTRY(entryvec, j);
340 size_alpha = hemdist(datum_l, _j, siglen);
341 size_beta = hemdist(datum_r, _j, siglen);
342
343 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
344 {
345 if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j))
346 {
347 if (!LTG_ISALLTRUE(datum_l))
348 MemSet((void *) union_l, 0xff, siglen);
349 }
350 else
351 {
352 ptr = LTG_SIGN(_j);
353 ALOOPBYTE(siglen)
354 union_l[i] |= ptr[i];
355 }
356 *left++ = j;
357 v->spl_nleft++;
358 }
359 else
360 {
361 if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
362 {
363 if (!LTG_ISALLTRUE(datum_r))
364 MemSet((void *) union_r, 0xff, siglen);
365 }
366 else
367 {
368 ptr = LTG_SIGN(_j);
369 ALOOPBYTE(siglen)
370 union_r[i] |= ptr[i];
371 }
372 *right++ = j;
373 v->spl_nright++;
374 }
375 }
376
377 *right = *left = FirstOffsetNumber;
378
379 v->spl_ldatum = PointerGetDatum(datum_l);
380 v->spl_rdatum = PointerGetDatum(datum_r);
381
382 PG_RETURN_POINTER(v);
383 }
384
385 static bool
gist_te(ltree_gist * key,ltree * query,int siglen)386 gist_te(ltree_gist *key, ltree *query, int siglen)
387 {
388 ltree_level *curq = LTREE_FIRST(query);
389 BITVECP sign = LTG_SIGN(key);
390 int qlen = query->numlevel;
391 unsigned int hv;
392
393 if (LTG_ISALLTRUE(key))
394 return true;
395
396 while (qlen > 0)
397 {
398 hv = ltree_crc32_sz(curq->name, curq->len);
399 if (!GETBIT(sign, AHASHVAL(hv, siglen)))
400 return false;
401 curq = LEVEL_NEXT(curq);
402 qlen--;
403 }
404
405 return true;
406 }
407
408 typedef struct LtreeSignature
409 {
410 BITVECP sign;
411 int siglen;
412 } LtreeSignature;
413
414 static bool
checkcondition_bit(void * cxt,ITEM * val)415 checkcondition_bit(void *cxt, ITEM *val)
416 {
417 LtreeSignature *sig = cxt;
418
419 return (FLG_CANLOOKSIGN(val->flag)) ? GETBIT(sig->sign, AHASHVAL(val->val, sig->siglen)) : true;
420 }
421
422 static bool
gist_qtxt(ltree_gist * key,ltxtquery * query,int siglen)423 gist_qtxt(ltree_gist *key, ltxtquery *query, int siglen)
424 {
425 LtreeSignature sig;
426
427 if (LTG_ISALLTRUE(key))
428 return true;
429
430 sig.sign = LTG_SIGN(key);
431 sig.siglen = siglen;
432
433 return ltree_execute(GETQUERY(query),
434 &sig, false,
435 checkcondition_bit);
436 }
437
438 static bool
gist_qe(ltree_gist * key,lquery * query,int siglen)439 gist_qe(ltree_gist *key, lquery *query, int siglen)
440 {
441 lquery_level *curq = LQUERY_FIRST(query);
442 BITVECP sign = LTG_SIGN(key);
443 int qlen = query->numlevel;
444
445 if (LTG_ISALLTRUE(key))
446 return true;
447
448 while (qlen > 0)
449 {
450 if (curq->numvar && LQL_CANLOOKSIGN(curq))
451 {
452 bool isexist = false;
453 int vlen = curq->numvar;
454 lquery_variant *curv = LQL_FIRST(curq);
455
456 while (vlen > 0)
457 {
458 if (GETBIT(sign, AHASHVAL(curv->val, siglen)))
459 {
460 isexist = true;
461 break;
462 }
463 curv = LVAR_NEXT(curv);
464 vlen--;
465 }
466 if (!isexist)
467 return false;
468 }
469
470 curq = LQL_NEXT(curq);
471 qlen--;
472 }
473
474 return true;
475 }
476
477 static bool
_arrq_cons(ltree_gist * key,ArrayType * _query,int siglen)478 _arrq_cons(ltree_gist *key, ArrayType *_query, int siglen)
479 {
480 lquery *query = (lquery *) ARR_DATA_PTR(_query);
481 int num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
482
483 if (ARR_NDIM(_query) > 1)
484 ereport(ERROR,
485 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
486 errmsg("array must be one-dimensional")));
487 if (array_contains_nulls(_query))
488 ereport(ERROR,
489 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
490 errmsg("array must not contain nulls")));
491
492 while (num > 0)
493 {
494 if (gist_qe(key, query, siglen))
495 return true;
496 num--;
497 query = (lquery *) NEXTVAL(query);
498 }
499 return false;
500 }
501
502 Datum
_ltree_consistent(PG_FUNCTION_ARGS)503 _ltree_consistent(PG_FUNCTION_ARGS)
504 {
505 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
506 void *query = (void *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
507 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
508
509 /* Oid subtype = PG_GETARG_OID(3); */
510 bool *recheck = (bool *) PG_GETARG_POINTER(4);
511 int siglen = LTREE_GET_ASIGLEN();
512 ltree_gist *key = (ltree_gist *) DatumGetPointer(entry->key);
513 bool res = false;
514
515 /* All cases served by this function are inexact */
516 *recheck = true;
517
518 switch (strategy)
519 {
520 case 10:
521 case 11:
522 res = gist_te(key, (ltree *) query, siglen);
523 break;
524 case 12:
525 case 13:
526 res = gist_qe(key, (lquery *) query, siglen);
527 break;
528 case 14:
529 case 15:
530 res = gist_qtxt(key, (ltxtquery *) query, siglen);
531 break;
532 case 16:
533 case 17:
534 res = _arrq_cons(key, (ArrayType *) query, siglen);
535 break;
536 default:
537 /* internal error */
538 elog(ERROR, "unrecognized StrategyNumber: %d", strategy);
539 }
540 PG_FREE_IF_COPY(query, 1);
541 PG_RETURN_BOOL(res);
542 }
543
544 Datum
_ltree_gist_options(PG_FUNCTION_ARGS)545 _ltree_gist_options(PG_FUNCTION_ARGS)
546 {
547 local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
548
549 init_local_reloptions(relopts, sizeof(LtreeGistOptions));
550 add_local_int_reloption(relopts, "siglen", "signature length",
551 LTREE_ASIGLEN_DEFAULT, 1, LTREE_ASIGLEN_MAX,
552 offsetof(LtreeGistOptions, siglen));
553
554 PG_RETURN_VOID();
555 }
556