1 /*
2 * contrib/btree_gist/btree_utils_var.c
3 */
4 #include "postgres.h"
5
6 #include "btree_gist.h"
7
8 #include <math.h>
9 #include <limits.h>
10 #include <float.h>
11
12 #include "btree_utils_var.h"
13 #include "utils/pg_locale.h"
14 #include "utils/builtins.h"
15 #include "utils/rel.h"
16
17 /* used for key sorting */
18 typedef struct
19 {
20 int i;
21 GBT_VARKEY *t;
22 } Vsrt;
23
24 typedef struct
25 {
26 const gbtree_vinfo *tinfo;
27 Oid collation;
28 } gbt_vsrt_arg;
29
30
31 PG_FUNCTION_INFO_V1(gbt_var_decompress);
32 PG_FUNCTION_INFO_V1(gbt_var_fetch);
33
34
35 Datum
gbt_var_decompress(PG_FUNCTION_ARGS)36 gbt_var_decompress(PG_FUNCTION_ARGS)
37 {
38 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
39 GBT_VARKEY *key = (GBT_VARKEY *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
40
41 if (key != (GBT_VARKEY *) DatumGetPointer(entry->key))
42 {
43 GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
44
45 gistentryinit(*retval, PointerGetDatum(key),
46 entry->rel, entry->page,
47 entry->offset, FALSE);
48
49 PG_RETURN_POINTER(retval);
50 }
51
52 PG_RETURN_POINTER(entry);
53 }
54
55 /* Returns a better readable representation of variable key ( sets pointer ) */
56 GBT_VARKEY_R
gbt_var_key_readable(const GBT_VARKEY * k)57 gbt_var_key_readable(const GBT_VARKEY *k)
58 {
59 GBT_VARKEY_R r;
60
61 r.lower = (bytea *) &(((char *) k)[VARHDRSZ]);
62 if (VARSIZE(k) > (VARHDRSZ + (VARSIZE(r.lower))))
63 r.upper = (bytea *) &(((char *) k)[VARHDRSZ + INTALIGN(VARSIZE(r.lower))]);
64 else
65 r.upper = r.lower;
66 return r;
67 }
68
69
70 /*
71 * Create a leaf-entry to store in the index, from a single Datum.
72 */
73 static GBT_VARKEY *
gbt_var_key_from_datum(const struct varlena * u)74 gbt_var_key_from_datum(const struct varlena * u)
75 {
76 int32 lowersize = VARSIZE(u);
77 GBT_VARKEY *r;
78
79 r = (GBT_VARKEY *) palloc(lowersize + VARHDRSZ);
80 memcpy(VARDATA(r), u, lowersize);
81 SET_VARSIZE(r, lowersize + VARHDRSZ);
82
83 return r;
84 }
85
86 /*
87 * Create an entry to store in the index, from lower and upper bound.
88 */
89 GBT_VARKEY *
gbt_var_key_copy(const GBT_VARKEY_R * u)90 gbt_var_key_copy(const GBT_VARKEY_R *u)
91 {
92 int32 lowersize = VARSIZE(u->lower);
93 int32 uppersize = VARSIZE(u->upper);
94 GBT_VARKEY *r;
95
96 r = (GBT_VARKEY *) palloc0(INTALIGN(lowersize) + uppersize + VARHDRSZ);
97 memcpy(VARDATA(r), u->lower, lowersize);
98 memcpy(VARDATA(r) + INTALIGN(lowersize), u->upper, uppersize);
99 SET_VARSIZE(r, INTALIGN(lowersize) + uppersize + VARHDRSZ);
100
101 return r;
102 }
103
104
105 static GBT_VARKEY *
gbt_var_leaf2node(GBT_VARKEY * leaf,const gbtree_vinfo * tinfo)106 gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo)
107 {
108 GBT_VARKEY *out = leaf;
109
110 if (tinfo->f_l2n)
111 out = (*tinfo->f_l2n) (leaf);
112
113 return out;
114 }
115
116
117 /*
118 * returns the common prefix length of a node key
119 */
120 static int32
gbt_var_node_cp_len(const GBT_VARKEY * node,const gbtree_vinfo * tinfo)121 gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
122 {
123 GBT_VARKEY_R r = gbt_var_key_readable(node);
124 int32 i = 0;
125 int32 l = 0;
126 int32 t1len = VARSIZE(r.lower) - VARHDRSZ;
127 int32 t2len = VARSIZE(r.upper) - VARHDRSZ;
128 int32 ml = Min(t1len, t2len);
129 char *p1 = VARDATA(r.lower);
130 char *p2 = VARDATA(r.upper);
131
132 if (ml == 0)
133 return 0;
134
135 while (i < ml)
136 {
137 if (tinfo->eml > 1 && l == 0)
138 {
139 if ((l = pg_mblen(p1)) != pg_mblen(p2))
140 {
141 return i;
142 }
143 }
144 if (*p1 != *p2)
145 {
146 if (tinfo->eml > 1)
147 {
148 return (i - l + 1);
149 }
150 else
151 {
152 return i;
153 }
154 }
155
156 p1++;
157 p2++;
158 l--;
159 i++;
160 }
161 return (ml); /* lower == upper */
162 }
163
164
165 /*
166 * returns true, if query matches prefix ( common prefix )
167 */
168 static bool
gbt_bytea_pf_match(const bytea * pf,const bytea * query,const gbtree_vinfo * tinfo)169 gbt_bytea_pf_match(const bytea *pf, const bytea *query, const gbtree_vinfo *tinfo)
170 {
171 bool out = FALSE;
172 int32 qlen = VARSIZE(query) - VARHDRSZ;
173 int32 nlen = VARSIZE(pf) - VARHDRSZ;
174
175 if (nlen <= qlen)
176 {
177 char *q = VARDATA(query);
178 char *n = VARDATA(pf);
179
180 out = (memcmp(q, n, nlen) == 0);
181 }
182
183 return out;
184 }
185
186
187 /*
188 * returns true, if query matches node using common prefix
189 */
190 static bool
gbt_var_node_pf_match(const GBT_VARKEY_R * node,const bytea * query,const gbtree_vinfo * tinfo)191 gbt_var_node_pf_match(const GBT_VARKEY_R *node, const bytea *query, const gbtree_vinfo *tinfo)
192 {
193 return (tinfo->trnc && (
194 gbt_bytea_pf_match(node->lower, query, tinfo) ||
195 gbt_bytea_pf_match(node->upper, query, tinfo)
196 ));
197 }
198
199
200 /*
201 * truncates / compresses the node key
202 * cpf_length .. common prefix length
203 */
204 static GBT_VARKEY *
gbt_var_node_truncate(const GBT_VARKEY * node,int32 cpf_length,const gbtree_vinfo * tinfo)205 gbt_var_node_truncate(const GBT_VARKEY *node, int32 cpf_length, const gbtree_vinfo *tinfo)
206 {
207 GBT_VARKEY *out = NULL;
208 GBT_VARKEY_R r = gbt_var_key_readable(node);
209 int32 len1 = VARSIZE(r.lower) - VARHDRSZ;
210 int32 len2 = VARSIZE(r.upper) - VARHDRSZ;
211 int32 si;
212 char *out2;
213
214 len1 = Min(len1, (cpf_length + 1));
215 len2 = Min(len2, (cpf_length + 1));
216
217 si = 2 * VARHDRSZ + INTALIGN(len1 + VARHDRSZ) + len2;
218 out = (GBT_VARKEY *) palloc0(si);
219 SET_VARSIZE(out, si);
220
221 memcpy(VARDATA(out), r.lower, len1 + VARHDRSZ);
222 SET_VARSIZE(VARDATA(out), len1 + VARHDRSZ);
223
224 out2 = VARDATA(out) + INTALIGN(len1 + VARHDRSZ);
225 memcpy(out2, r.upper, len2 + VARHDRSZ);
226 SET_VARSIZE(out2, len2 + VARHDRSZ);
227
228 return out;
229 }
230
231
232
233 void
gbt_var_bin_union(Datum * u,GBT_VARKEY * e,Oid collation,const gbtree_vinfo * tinfo)234 gbt_var_bin_union(Datum *u, GBT_VARKEY *e, Oid collation,
235 const gbtree_vinfo *tinfo)
236 {
237 GBT_VARKEY_R eo = gbt_var_key_readable(e);
238 GBT_VARKEY_R nr;
239
240 if (eo.lower == eo.upper) /* leaf */
241 {
242 GBT_VARKEY *tmp;
243
244 tmp = gbt_var_leaf2node(e, tinfo);
245 if (tmp != e)
246 eo = gbt_var_key_readable(tmp);
247 }
248
249 if (DatumGetPointer(*u))
250 {
251 GBT_VARKEY_R ro = gbt_var_key_readable((GBT_VARKEY *) DatumGetPointer(*u));
252 bool update = false;
253
254 nr.lower = ro.lower;
255 nr.upper = ro.upper;
256
257 if ((*tinfo->f_cmp) (ro.lower, eo.lower, collation) > 0)
258 {
259 nr.lower = eo.lower;
260 update = true;
261 }
262
263 if ((*tinfo->f_cmp) (ro.upper, eo.upper, collation) < 0)
264 {
265 nr.upper = eo.upper;
266 update = true;
267 }
268
269 if (update)
270 *u = PointerGetDatum(gbt_var_key_copy(&nr));
271 }
272 else
273 {
274 nr.lower = eo.lower;
275 nr.upper = eo.upper;
276 *u = PointerGetDatum(gbt_var_key_copy(&nr));
277 }
278 }
279
280
281 GISTENTRY *
gbt_var_compress(GISTENTRY * entry,const gbtree_vinfo * tinfo)282 gbt_var_compress(GISTENTRY *entry, const gbtree_vinfo *tinfo)
283 {
284 GISTENTRY *retval;
285
286 if (entry->leafkey)
287 {
288 struct varlena *leaf = PG_DETOAST_DATUM(entry->key);
289 GBT_VARKEY *r;
290
291 r = gbt_var_key_from_datum(leaf);
292
293 retval = palloc(sizeof(GISTENTRY));
294 gistentryinit(*retval, PointerGetDatum(r),
295 entry->rel, entry->page,
296 entry->offset, TRUE);
297 }
298 else
299 retval = entry;
300
301 return (retval);
302 }
303
304
305 Datum
gbt_var_fetch(PG_FUNCTION_ARGS)306 gbt_var_fetch(PG_FUNCTION_ARGS)
307 {
308 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
309 GBT_VARKEY *key = (GBT_VARKEY *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
310 GBT_VARKEY_R r = gbt_var_key_readable(key);
311 GISTENTRY *retval;
312
313 retval = palloc(sizeof(GISTENTRY));
314 gistentryinit(*retval, PointerGetDatum(r.lower),
315 entry->rel, entry->page,
316 entry->offset, TRUE);
317
318 PG_RETURN_POINTER(retval);
319 }
320
321
322 GBT_VARKEY *
gbt_var_union(const GistEntryVector * entryvec,int32 * size,Oid collation,const gbtree_vinfo * tinfo)323 gbt_var_union(const GistEntryVector *entryvec, int32 *size, Oid collation,
324 const gbtree_vinfo *tinfo)
325 {
326 int i = 0,
327 numranges = entryvec->n;
328 GBT_VARKEY *cur;
329 Datum out;
330 GBT_VARKEY_R rk;
331
332 *size = sizeof(GBT_VARKEY);
333
334 cur = (GBT_VARKEY *) DatumGetPointer(entryvec->vector[0].key);
335 rk = gbt_var_key_readable(cur);
336 out = PointerGetDatum(gbt_var_key_copy(&rk));
337
338 for (i = 1; i < numranges; i++)
339 {
340 cur = (GBT_VARKEY *) DatumGetPointer(entryvec->vector[i].key);
341 gbt_var_bin_union(&out, cur, collation, tinfo);
342 }
343
344
345 /* Truncate (=compress) key */
346 if (tinfo->trnc)
347 {
348 int32 plen;
349 GBT_VARKEY *trc = NULL;
350
351 plen = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(out), tinfo);
352 trc = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(out), plen + 1, tinfo);
353
354 out = PointerGetDatum(trc);
355 }
356
357 return ((GBT_VARKEY *) DatumGetPointer(out));
358 }
359
360
361 bool
gbt_var_same(Datum d1,Datum d2,Oid collation,const gbtree_vinfo * tinfo)362 gbt_var_same(Datum d1, Datum d2, Oid collation,
363 const gbtree_vinfo *tinfo)
364 {
365 GBT_VARKEY *t1 = (GBT_VARKEY *) DatumGetPointer(d1);
366 GBT_VARKEY *t2 = (GBT_VARKEY *) DatumGetPointer(d2);
367 GBT_VARKEY_R r1,
368 r2;
369
370 r1 = gbt_var_key_readable(t1);
371 r2 = gbt_var_key_readable(t2);
372
373 return ((*tinfo->f_cmp) (r1.lower, r2.lower, collation) == 0 &&
374 (*tinfo->f_cmp) (r1.upper, r2.upper, collation) == 0);
375 }
376
377
378 float *
gbt_var_penalty(float * res,const GISTENTRY * o,const GISTENTRY * n,Oid collation,const gbtree_vinfo * tinfo)379 gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n,
380 Oid collation, const gbtree_vinfo *tinfo)
381 {
382 GBT_VARKEY *orge = (GBT_VARKEY *) DatumGetPointer(o->key);
383 GBT_VARKEY *newe = (GBT_VARKEY *) DatumGetPointer(n->key);
384 GBT_VARKEY_R ok,
385 nk;
386
387 *res = 0.0;
388
389 nk = gbt_var_key_readable(newe);
390 if (nk.lower == nk.upper) /* leaf */
391 {
392 GBT_VARKEY *tmp;
393
394 tmp = gbt_var_leaf2node(newe, tinfo);
395 if (tmp != newe)
396 nk = gbt_var_key_readable(tmp);
397 }
398 ok = gbt_var_key_readable(orge);
399
400 if ((VARSIZE(ok.lower) - VARHDRSZ) == 0 && (VARSIZE(ok.upper) - VARHDRSZ) == 0)
401 *res = 0.0;
402 else if (!(((*tinfo->f_cmp) (nk.lower, ok.lower, collation) >= 0 ||
403 gbt_bytea_pf_match(ok.lower, nk.lower, tinfo)) &&
404 ((*tinfo->f_cmp) (nk.upper, ok.upper, collation) <= 0 ||
405 gbt_bytea_pf_match(ok.upper, nk.upper, tinfo))))
406 {
407 Datum d = PointerGetDatum(0);
408 double dres;
409 int32 ol,
410 ul;
411
412 gbt_var_bin_union(&d, orge, collation, tinfo);
413 ol = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(d), tinfo);
414 gbt_var_bin_union(&d, newe, collation, tinfo);
415 ul = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(d), tinfo);
416
417 if (ul < ol)
418 {
419 dres = (ol - ul); /* reduction of common prefix len */
420 }
421 else
422 {
423 GBT_VARKEY_R uk = gbt_var_key_readable((GBT_VARKEY *) DatumGetPointer(d));
424 unsigned char tmp[4];
425
426 tmp[0] = (unsigned char) (((VARSIZE(ok.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.lower)[ul]));
427 tmp[1] = (unsigned char) (((VARSIZE(uk.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.lower)[ul]));
428 tmp[2] = (unsigned char) (((VARSIZE(ok.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.upper)[ul]));
429 tmp[3] = (unsigned char) (((VARSIZE(uk.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.upper)[ul]));
430 dres = Abs(tmp[0] - tmp[1]) + Abs(tmp[3] - tmp[2]);
431 dres /= 256.0;
432 }
433
434 *res += FLT_MIN;
435 *res += (float) (dres / ((double) (ol + 1)));
436 *res *= (FLT_MAX / (o->rel->rd_att->natts + 1));
437 }
438
439 return res;
440 }
441
442
443 static int
gbt_vsrt_cmp(const void * a,const void * b,void * arg)444 gbt_vsrt_cmp(const void *a, const void *b, void *arg)
445 {
446 GBT_VARKEY_R ar = gbt_var_key_readable(((const Vsrt *) a)->t);
447 GBT_VARKEY_R br = gbt_var_key_readable(((const Vsrt *) b)->t);
448 const gbt_vsrt_arg *varg = (const gbt_vsrt_arg *) arg;
449 int res;
450
451 res = (*varg->tinfo->f_cmp) (ar.lower, br.lower, varg->collation);
452 if (res == 0)
453 return (*varg->tinfo->f_cmp) (ar.upper, br.upper, varg->collation);
454
455 return res;
456 }
457
458 GIST_SPLITVEC *
gbt_var_picksplit(const GistEntryVector * entryvec,GIST_SPLITVEC * v,Oid collation,const gbtree_vinfo * tinfo)459 gbt_var_picksplit(const GistEntryVector *entryvec, GIST_SPLITVEC *v,
460 Oid collation, const gbtree_vinfo *tinfo)
461 {
462 OffsetNumber i,
463 maxoff = entryvec->n - 1;
464 Vsrt *arr;
465 int svcntr = 0,
466 nbytes;
467 char *cur;
468 GBT_VARKEY **sv = NULL;
469 gbt_vsrt_arg varg;
470
471 arr = (Vsrt *) palloc((maxoff + 1) * sizeof(Vsrt));
472 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
473 v->spl_left = (OffsetNumber *) palloc(nbytes);
474 v->spl_right = (OffsetNumber *) palloc(nbytes);
475 v->spl_ldatum = PointerGetDatum(0);
476 v->spl_rdatum = PointerGetDatum(0);
477 v->spl_nleft = 0;
478 v->spl_nright = 0;
479
480 sv = palloc(sizeof(bytea *) * (maxoff + 1));
481
482 /* Sort entries */
483
484 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
485 {
486 GBT_VARKEY_R ro;
487
488 cur = (char *) DatumGetPointer(entryvec->vector[i].key);
489 ro = gbt_var_key_readable((GBT_VARKEY *) cur);
490 if (ro.lower == ro.upper) /* leaf */
491 {
492 sv[svcntr] = gbt_var_leaf2node((GBT_VARKEY *) cur, tinfo);
493 arr[i].t = sv[svcntr];
494 if (sv[svcntr] != (GBT_VARKEY *) cur)
495 svcntr++;
496 }
497 else
498 arr[i].t = (GBT_VARKEY *) cur;
499 arr[i].i = i;
500 }
501
502 /* sort */
503 varg.tinfo = tinfo;
504 varg.collation = collation;
505 qsort_arg((void *) &arr[FirstOffsetNumber],
506 maxoff - FirstOffsetNumber + 1,
507 sizeof(Vsrt),
508 gbt_vsrt_cmp,
509 (void *) &varg);
510
511 /* We do simply create two parts */
512
513 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
514 {
515 if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
516 {
517 gbt_var_bin_union(&v->spl_ldatum, arr[i].t, collation, tinfo);
518 v->spl_left[v->spl_nleft] = arr[i].i;
519 v->spl_nleft++;
520 }
521 else
522 {
523 gbt_var_bin_union(&v->spl_rdatum, arr[i].t, collation, tinfo);
524 v->spl_right[v->spl_nright] = arr[i].i;
525 v->spl_nright++;
526 }
527 }
528
529 /* Truncate (=compress) key */
530 if (tinfo->trnc)
531 {
532 int32 ll = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(v->spl_ldatum), tinfo);
533 int32 lr = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(v->spl_rdatum), tinfo);
534 GBT_VARKEY *dl;
535 GBT_VARKEY *dr;
536
537 ll = Max(ll, lr);
538 ll++;
539
540 dl = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(v->spl_ldatum), ll, tinfo);
541 dr = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(v->spl_rdatum), ll, tinfo);
542 v->spl_ldatum = PointerGetDatum(dl);
543 v->spl_rdatum = PointerGetDatum(dr);
544 }
545
546 return v;
547 }
548
549
550 /*
551 * The GiST consistent method
552 */
553 bool
gbt_var_consistent(GBT_VARKEY_R * key,const void * query,StrategyNumber strategy,Oid collation,bool is_leaf,const gbtree_vinfo * tinfo)554 gbt_var_consistent(GBT_VARKEY_R *key,
555 const void *query,
556 StrategyNumber strategy,
557 Oid collation,
558 bool is_leaf,
559 const gbtree_vinfo *tinfo)
560 {
561 bool retval = FALSE;
562
563 switch (strategy)
564 {
565 case BTLessEqualStrategyNumber:
566 if (is_leaf)
567 retval = (*tinfo->f_ge) (query, key->lower, collation);
568 else
569 retval = (*tinfo->f_cmp) (query, key->lower, collation) >= 0
570 || gbt_var_node_pf_match(key, query, tinfo);
571 break;
572 case BTLessStrategyNumber:
573 if (is_leaf)
574 retval = (*tinfo->f_gt) (query, key->lower, collation);
575 else
576 retval = (*tinfo->f_cmp) (query, key->lower, collation) >= 0
577 || gbt_var_node_pf_match(key, query, tinfo);
578 break;
579 case BTEqualStrategyNumber:
580 if (is_leaf)
581 retval = (*tinfo->f_eq) (query, key->lower, collation);
582 else
583 retval =
584 ((*tinfo->f_cmp) (key->lower, query, collation) <= 0 &&
585 (*tinfo->f_cmp) (query, key->upper, collation) <= 0) ||
586 gbt_var_node_pf_match(key, query, tinfo);
587 break;
588 case BTGreaterStrategyNumber:
589 if (is_leaf)
590 retval = (*tinfo->f_lt) (query, key->upper, collation);
591 else
592 retval = (*tinfo->f_cmp) (query, key->upper, collation) <= 0
593 || gbt_var_node_pf_match(key, query, tinfo);
594 break;
595 case BTGreaterEqualStrategyNumber:
596 if (is_leaf)
597 retval = (*tinfo->f_le) (query, key->upper, collation);
598 else
599 retval = (*tinfo->f_cmp) (query, key->upper, collation) <= 0
600 || gbt_var_node_pf_match(key, query, tinfo);
601 break;
602 case BtreeGistNotEqualStrategyNumber:
603 retval = !((*tinfo->f_eq) (query, key->lower, collation) &&
604 (*tinfo->f_eq) (query, key->upper, collation));
605 break;
606 default:
607 retval = FALSE;
608 }
609
610 return retval;
611 }
612