1 /*
2 * contrib/ltree/_ltree_gist.c
3 *
4 *
5 * GiST support for ltree[]
6 * Teodor Sigaev <teodor@stack.net>
7 */
8 #include "postgres.h"
9
10 #include "access/gist.h"
11 #include "access/stratnum.h"
12 #include "crc32.h"
13 #include "ltree.h"
14
15
16 PG_FUNCTION_INFO_V1(_ltree_compress);
17 PG_FUNCTION_INFO_V1(_ltree_same);
18 PG_FUNCTION_INFO_V1(_ltree_union);
19 PG_FUNCTION_INFO_V1(_ltree_penalty);
20 PG_FUNCTION_INFO_V1(_ltree_picksplit);
21 PG_FUNCTION_INFO_V1(_ltree_consistent);
22
23 #define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
24 #define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
25
26 /* Number of one-bits in an unsigned byte */
27 static const uint8 number_of_ones[256] = {
28 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
29 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
30 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
31 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
32 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
33 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
34 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
35 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
36 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
37 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
38 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
39 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
40 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
41 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
42 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
43 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
44 };
45
46 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
47
48
49 static void
hashing(BITVECP sign,ltree * t)50 hashing(BITVECP sign, ltree *t)
51 {
52 int tlen = t->numlevel;
53 ltree_level *cur = LTREE_FIRST(t);
54 int hash;
55
56 while (tlen > 0)
57 {
58 hash = ltree_crc32_sz(cur->name, cur->len);
59 AHASH(sign, hash);
60 cur = LEVEL_NEXT(cur);
61 tlen--;
62 }
63 }
64
65 Datum
_ltree_compress(PG_FUNCTION_ARGS)66 _ltree_compress(PG_FUNCTION_ARGS)
67 {
68 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
69 GISTENTRY *retval = entry;
70
71 if (entry->leafkey)
72 { /* ltree */
73 ltree_gist *key;
74 ArrayType *val = DatumGetArrayTypeP(entry->key);
75 int32 len = LTG_HDRSIZE + ASIGLEN;
76 int num = ArrayGetNItems(ARR_NDIM(val), ARR_DIMS(val));
77 ltree *item = (ltree *) ARR_DATA_PTR(val);
78
79 if (ARR_NDIM(val) > 1)
80 ereport(ERROR,
81 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
82 errmsg("array must be one-dimensional")));
83 if (array_contains_nulls(val))
84 ereport(ERROR,
85 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
86 errmsg("array must not contain nulls")));
87
88 key = (ltree_gist *) palloc0(len);
89 SET_VARSIZE(key, len);
90 key->flag = 0;
91
92 MemSet(LTG_SIGN(key), 0, ASIGLEN);
93 while (num > 0)
94 {
95 hashing(LTG_SIGN(key), item);
96 num--;
97 item = NEXTVAL(item);
98 }
99
100 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
101 gistentryinit(*retval, PointerGetDatum(key),
102 entry->rel, entry->page,
103 entry->offset, FALSE);
104 }
105 else if (!LTG_ISALLTRUE(entry->key))
106 {
107 int32 i,
108 len;
109 ltree_gist *key;
110
111 BITVECP sign = LTG_SIGN(DatumGetPointer(entry->key));
112
113 ALOOPBYTE
114 {
115 if ((sign[i] & 0xff) != 0xff)
116 PG_RETURN_POINTER(retval);
117 }
118 len = LTG_HDRSIZE;
119 key = (ltree_gist *) palloc0(len);
120 SET_VARSIZE(key, len);
121 key->flag = LTG_ALLTRUE;
122
123 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
124 gistentryinit(*retval, PointerGetDatum(key),
125 entry->rel, entry->page,
126 entry->offset, FALSE);
127 }
128 PG_RETURN_POINTER(retval);
129 }
130
131 Datum
_ltree_same(PG_FUNCTION_ARGS)132 _ltree_same(PG_FUNCTION_ARGS)
133 {
134 ltree_gist *a = (ltree_gist *) PG_GETARG_POINTER(0);
135 ltree_gist *b = (ltree_gist *) PG_GETARG_POINTER(1);
136 bool *result = (bool *) PG_GETARG_POINTER(2);
137
138 if (LTG_ISALLTRUE(a) && LTG_ISALLTRUE(b))
139 *result = true;
140 else if (LTG_ISALLTRUE(a))
141 *result = false;
142 else if (LTG_ISALLTRUE(b))
143 *result = false;
144 else
145 {
146 int32 i;
147 BITVECP sa = LTG_SIGN(a),
148 sb = LTG_SIGN(b);
149
150 *result = true;
151 ALOOPBYTE
152 {
153 if (sa[i] != sb[i])
154 {
155 *result = false;
156 break;
157 }
158 }
159 }
160 PG_RETURN_POINTER(result);
161 }
162
163 static int32
unionkey(BITVECP sbase,ltree_gist * add)164 unionkey(BITVECP sbase, ltree_gist *add)
165 {
166 int32 i;
167 BITVECP sadd = LTG_SIGN(add);
168
169 if (LTG_ISALLTRUE(add))
170 return 1;
171
172 ALOOPBYTE
173 sbase[i] |= sadd[i];
174 return 0;
175 }
176
177 Datum
_ltree_union(PG_FUNCTION_ARGS)178 _ltree_union(PG_FUNCTION_ARGS)
179 {
180 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
181 int *size = (int *) PG_GETARG_POINTER(1);
182 ABITVEC base;
183 int32 i,
184 len;
185 int32 flag = 0;
186 ltree_gist *result;
187
188 MemSet((void *) base, 0, sizeof(ABITVEC));
189 for (i = 0; i < entryvec->n; i++)
190 {
191 if (unionkey(base, GETENTRY(entryvec, i)))
192 {
193 flag = LTG_ALLTRUE;
194 break;
195 }
196 }
197
198 len = LTG_HDRSIZE + ((flag & LTG_ALLTRUE) ? 0 : ASIGLEN);
199 result = (ltree_gist *) palloc0(len);
200 SET_VARSIZE(result, len);
201 result->flag = flag;
202 if (!LTG_ISALLTRUE(result))
203 memcpy((void *) LTG_SIGN(result), (void *) base, sizeof(ABITVEC));
204 *size = len;
205
206 PG_RETURN_POINTER(result);
207 }
208
209 static int32
sizebitvec(BITVECP sign)210 sizebitvec(BITVECP sign)
211 {
212 int32 size = 0,
213 i;
214
215 ALOOPBYTE
216 size += number_of_ones[(unsigned char) sign[i]];
217 return size;
218 }
219
220 static int
hemdistsign(BITVECP a,BITVECP b)221 hemdistsign(BITVECP a, BITVECP b)
222 {
223 int i,
224 diff,
225 dist = 0;
226
227 ALOOPBYTE
228 {
229 diff = (unsigned char) (a[i] ^ b[i]);
230 dist += number_of_ones[diff];
231 }
232 return dist;
233 }
234
235 static int
hemdist(ltree_gist * a,ltree_gist * b)236 hemdist(ltree_gist *a, ltree_gist *b)
237 {
238 if (LTG_ISALLTRUE(a))
239 {
240 if (LTG_ISALLTRUE(b))
241 return 0;
242 else
243 return ASIGLENBIT - sizebitvec(LTG_SIGN(b));
244 }
245 else if (LTG_ISALLTRUE(b))
246 return ASIGLENBIT - sizebitvec(LTG_SIGN(a));
247
248 return hemdistsign(LTG_SIGN(a), LTG_SIGN(b));
249 }
250
251
252 Datum
_ltree_penalty(PG_FUNCTION_ARGS)253 _ltree_penalty(PG_FUNCTION_ARGS)
254 {
255 ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
256 ltree_gist *newval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
257 float *penalty = (float *) PG_GETARG_POINTER(2);
258
259 *penalty = hemdist(origval, newval);
260 PG_RETURN_POINTER(penalty);
261 }
262
263 typedef struct
264 {
265 OffsetNumber pos;
266 int32 cost;
267 } SPLITCOST;
268
269 static int
comparecost(const void * a,const void * b)270 comparecost(const void *a, const void *b)
271 {
272 return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
273 }
274
275 Datum
_ltree_picksplit(PG_FUNCTION_ARGS)276 _ltree_picksplit(PG_FUNCTION_ARGS)
277 {
278 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
279 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
280 OffsetNumber k,
281 j;
282 ltree_gist *datum_l,
283 *datum_r;
284 BITVECP union_l,
285 union_r;
286 int32 size_alpha,
287 size_beta;
288 int32 size_waste,
289 waste = -1;
290 int32 nbytes;
291 OffsetNumber seed_1 = 0,
292 seed_2 = 0;
293 OffsetNumber *left,
294 *right;
295 OffsetNumber maxoff;
296 BITVECP ptr;
297 int i;
298 SPLITCOST *costvector;
299 ltree_gist *_k,
300 *_j;
301
302 maxoff = entryvec->n - 2;
303 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
304 v->spl_left = (OffsetNumber *) palloc(nbytes);
305 v->spl_right = (OffsetNumber *) palloc(nbytes);
306
307 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
308 {
309 _k = GETENTRY(entryvec, k);
310 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
311 {
312 size_waste = hemdist(_k, GETENTRY(entryvec, j));
313 if (size_waste > waste)
314 {
315 waste = size_waste;
316 seed_1 = k;
317 seed_2 = j;
318 }
319 }
320 }
321
322 left = v->spl_left;
323 v->spl_nleft = 0;
324 right = v->spl_right;
325 v->spl_nright = 0;
326
327 if (seed_1 == 0 || seed_2 == 0)
328 {
329 seed_1 = 1;
330 seed_2 = 2;
331 }
332
333 /* form initial .. */
334 if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_1)))
335 {
336 datum_l = (ltree_gist *) palloc0(LTG_HDRSIZE);
337 SET_VARSIZE(datum_l, LTG_HDRSIZE);
338 datum_l->flag = LTG_ALLTRUE;
339 }
340 else
341 {
342 datum_l = (ltree_gist *) palloc0(LTG_HDRSIZE + ASIGLEN);
343 SET_VARSIZE(datum_l, LTG_HDRSIZE + ASIGLEN);
344 datum_l->flag = 0;
345 memcpy((void *) LTG_SIGN(datum_l), (void *) LTG_SIGN(GETENTRY(entryvec, seed_1)), sizeof(ABITVEC));
346 }
347 if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)))
348 {
349 datum_r = (ltree_gist *) palloc0(LTG_HDRSIZE);
350 SET_VARSIZE(datum_r, LTG_HDRSIZE);
351 datum_r->flag = LTG_ALLTRUE;
352 }
353 else
354 {
355 datum_r = (ltree_gist *) palloc0(LTG_HDRSIZE + ASIGLEN);
356 SET_VARSIZE(datum_r, LTG_HDRSIZE + ASIGLEN);
357 datum_r->flag = 0;
358 memcpy((void *) LTG_SIGN(datum_r), (void *) LTG_SIGN(GETENTRY(entryvec, seed_2)), sizeof(ABITVEC));
359 }
360
361 maxoff = OffsetNumberNext(maxoff);
362 /* sort before ... */
363 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
364 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
365 {
366 costvector[j - 1].pos = j;
367 _j = GETENTRY(entryvec, j);
368 size_alpha = hemdist(datum_l, _j);
369 size_beta = hemdist(datum_r, _j);
370 costvector[j - 1].cost = Abs(size_alpha - size_beta);
371 }
372 qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
373
374 union_l = LTG_SIGN(datum_l);
375 union_r = LTG_SIGN(datum_r);
376
377 for (k = 0; k < maxoff; k++)
378 {
379 j = costvector[k].pos;
380 if (j == seed_1)
381 {
382 *left++ = j;
383 v->spl_nleft++;
384 continue;
385 }
386 else if (j == seed_2)
387 {
388 *right++ = j;
389 v->spl_nright++;
390 continue;
391 }
392 _j = GETENTRY(entryvec, j);
393 size_alpha = hemdist(datum_l, _j);
394 size_beta = hemdist(datum_r, _j);
395
396 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
397 {
398 if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j))
399 {
400 if (!LTG_ISALLTRUE(datum_l))
401 MemSet((void *) union_l, 0xff, sizeof(ABITVEC));
402 }
403 else
404 {
405 ptr = LTG_SIGN(_j);
406 ALOOPBYTE
407 union_l[i] |= ptr[i];
408 }
409 *left++ = j;
410 v->spl_nleft++;
411 }
412 else
413 {
414 if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
415 {
416 if (!LTG_ISALLTRUE(datum_r))
417 MemSet((void *) union_r, 0xff, sizeof(ABITVEC));
418 }
419 else
420 {
421 ptr = LTG_SIGN(_j);
422 ALOOPBYTE
423 union_r[i] |= ptr[i];
424 }
425 *right++ = j;
426 v->spl_nright++;
427 }
428 }
429
430 *right = *left = FirstOffsetNumber;
431
432 v->spl_ldatum = PointerGetDatum(datum_l);
433 v->spl_rdatum = PointerGetDatum(datum_r);
434
435 PG_RETURN_POINTER(v);
436 }
437
438 static bool
gist_te(ltree_gist * key,ltree * query)439 gist_te(ltree_gist *key, ltree *query)
440 {
441 ltree_level *curq = LTREE_FIRST(query);
442 BITVECP sign = LTG_SIGN(key);
443 int qlen = query->numlevel;
444 unsigned int hv;
445
446 if (LTG_ISALLTRUE(key))
447 return true;
448
449 while (qlen > 0)
450 {
451 hv = ltree_crc32_sz(curq->name, curq->len);
452 if (!GETBIT(sign, AHASHVAL(hv)))
453 return false;
454 curq = LEVEL_NEXT(curq);
455 qlen--;
456 }
457
458 return true;
459 }
460
461 static bool
checkcondition_bit(void * checkval,ITEM * val)462 checkcondition_bit(void *checkval, ITEM *val)
463 {
464 return (FLG_CANLOOKSIGN(val->flag)) ? GETBIT(checkval, AHASHVAL(val->val)) : true;
465 }
466
467 static bool
gist_qtxt(ltree_gist * key,ltxtquery * query)468 gist_qtxt(ltree_gist *key, ltxtquery *query)
469 {
470 if (LTG_ISALLTRUE(key))
471 return true;
472
473 return ltree_execute(
474 GETQUERY(query),
475 (void *) LTG_SIGN(key), false,
476 checkcondition_bit
477 );
478 }
479
480 static bool
gist_qe(ltree_gist * key,lquery * query)481 gist_qe(ltree_gist *key, lquery *query)
482 {
483 lquery_level *curq = LQUERY_FIRST(query);
484 BITVECP sign = LTG_SIGN(key);
485 int qlen = query->numlevel;
486
487 if (LTG_ISALLTRUE(key))
488 return true;
489
490 while (qlen > 0)
491 {
492 if (curq->numvar && LQL_CANLOOKSIGN(curq))
493 {
494 bool isexist = false;
495 int vlen = curq->numvar;
496 lquery_variant *curv = LQL_FIRST(curq);
497
498 while (vlen > 0)
499 {
500 if (GETBIT(sign, AHASHVAL(curv->val)))
501 {
502 isexist = true;
503 break;
504 }
505 curv = LVAR_NEXT(curv);
506 vlen--;
507 }
508 if (!isexist)
509 return false;
510 }
511
512 curq = LQL_NEXT(curq);
513 qlen--;
514 }
515
516 return true;
517 }
518
519 static bool
_arrq_cons(ltree_gist * key,ArrayType * _query)520 _arrq_cons(ltree_gist *key, ArrayType *_query)
521 {
522 lquery *query = (lquery *) ARR_DATA_PTR(_query);
523 int num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
524
525 if (ARR_NDIM(_query) > 1)
526 ereport(ERROR,
527 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
528 errmsg("array must be one-dimensional")));
529 if (array_contains_nulls(_query))
530 ereport(ERROR,
531 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
532 errmsg("array must not contain nulls")));
533
534 while (num > 0)
535 {
536 if (gist_qe(key, query))
537 return true;
538 num--;
539 query = (lquery *) NEXTVAL(query);
540 }
541 return false;
542 }
543
544 Datum
_ltree_consistent(PG_FUNCTION_ARGS)545 _ltree_consistent(PG_FUNCTION_ARGS)
546 {
547 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
548 char *query = (char *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
549 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
550
551 /* Oid subtype = PG_GETARG_OID(3); */
552 bool *recheck = (bool *) PG_GETARG_POINTER(4);
553 ltree_gist *key = (ltree_gist *) DatumGetPointer(entry->key);
554 bool res = false;
555
556 /* All cases served by this function are inexact */
557 *recheck = true;
558
559 switch (strategy)
560 {
561 case 10:
562 case 11:
563 res = gist_te(key, (ltree *) query);
564 break;
565 case 12:
566 case 13:
567 res = gist_qe(key, (lquery *) query);
568 break;
569 case 14:
570 case 15:
571 res = gist_qtxt(key, (ltxtquery *) query);
572 break;
573 case 16:
574 case 17:
575 res = _arrq_cons(key, (ArrayType *) query);
576 break;
577 default:
578 /* internal error */
579 elog(ERROR, "unrecognized StrategyNumber: %d", strategy);
580 }
581 PG_FREE_IF_COPY(query, 1);
582 PG_RETURN_BOOL(res);
583 }
584