1 /*
2 * contrib/btree_gist/btree_utils_var.c
3 */
4 #include "postgres.h"
5
6 #include "btree_gist.h"
7
8 #include <math.h>
9 #include <limits.h>
10 #include <float.h>
11
12 #include "btree_utils_var.h"
13 #include "utils/pg_locale.h"
14 #include "utils/builtins.h"
15 #include "utils/rel.h"
16
17 /* used for key sorting */
18 typedef struct
19 {
20 int i;
21 GBT_VARKEY *t;
22 } Vsrt;
23
24 typedef struct
25 {
26 const gbtree_vinfo *tinfo;
27 Oid collation;
28 FmgrInfo *flinfo;
29 } gbt_vsrt_arg;
30
31
32 PG_FUNCTION_INFO_V1(gbt_var_decompress);
33 PG_FUNCTION_INFO_V1(gbt_var_fetch);
34
35
36 Datum
gbt_var_decompress(PG_FUNCTION_ARGS)37 gbt_var_decompress(PG_FUNCTION_ARGS)
38 {
39 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
40 GBT_VARKEY *key = (GBT_VARKEY *) PG_DETOAST_DATUM(entry->key);
41
42 if (key != (GBT_VARKEY *) DatumGetPointer(entry->key))
43 {
44 GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
45
46 gistentryinit(*retval, PointerGetDatum(key),
47 entry->rel, entry->page,
48 entry->offset, false);
49
50 PG_RETURN_POINTER(retval);
51 }
52
53 PG_RETURN_POINTER(entry);
54 }
55
56 /* Returns a better readable representation of variable key ( sets pointer ) */
57 GBT_VARKEY_R
gbt_var_key_readable(const GBT_VARKEY * k)58 gbt_var_key_readable(const GBT_VARKEY *k)
59 {
60 GBT_VARKEY_R r;
61
62 r.lower = (bytea *) &(((char *) k)[VARHDRSZ]);
63 if (VARSIZE(k) > (VARHDRSZ + (VARSIZE(r.lower))))
64 r.upper = (bytea *) &(((char *) k)[VARHDRSZ + INTALIGN(VARSIZE(r.lower))]);
65 else
66 r.upper = r.lower;
67 return r;
68 }
69
70
71 /*
72 * Create a leaf-entry to store in the index, from a single Datum.
73 */
74 static GBT_VARKEY *
gbt_var_key_from_datum(const struct varlena * u)75 gbt_var_key_from_datum(const struct varlena *u)
76 {
77 int32 lowersize = VARSIZE(u);
78 GBT_VARKEY *r;
79
80 r = (GBT_VARKEY *) palloc(lowersize + VARHDRSZ);
81 memcpy(VARDATA(r), u, lowersize);
82 SET_VARSIZE(r, lowersize + VARHDRSZ);
83
84 return r;
85 }
86
87 /*
88 * Create an entry to store in the index, from lower and upper bound.
89 */
90 GBT_VARKEY *
gbt_var_key_copy(const GBT_VARKEY_R * u)91 gbt_var_key_copy(const GBT_VARKEY_R *u)
92 {
93 int32 lowersize = VARSIZE(u->lower);
94 int32 uppersize = VARSIZE(u->upper);
95 GBT_VARKEY *r;
96
97 r = (GBT_VARKEY *) palloc0(INTALIGN(lowersize) + uppersize + VARHDRSZ);
98 memcpy(VARDATA(r), u->lower, lowersize);
99 memcpy(VARDATA(r) + INTALIGN(lowersize), u->upper, uppersize);
100 SET_VARSIZE(r, INTALIGN(lowersize) + uppersize + VARHDRSZ);
101
102 return r;
103 }
104
105
106 static GBT_VARKEY *
gbt_var_leaf2node(GBT_VARKEY * leaf,const gbtree_vinfo * tinfo,FmgrInfo * flinfo)107 gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo, FmgrInfo *flinfo)
108 {
109 GBT_VARKEY *out = leaf;
110
111 if (tinfo->f_l2n)
112 out = tinfo->f_l2n(leaf, flinfo);
113
114 return out;
115 }
116
117
118 /*
119 * returns the common prefix length of a node key
120 */
121 static int32
gbt_var_node_cp_len(const GBT_VARKEY * node,const gbtree_vinfo * tinfo)122 gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
123 {
124 GBT_VARKEY_R r = gbt_var_key_readable(node);
125 int32 i = 0;
126 int32 l = 0;
127 int32 t1len = VARSIZE(r.lower) - VARHDRSZ;
128 int32 t2len = VARSIZE(r.upper) - VARHDRSZ;
129 int32 ml = Min(t1len, t2len);
130 char *p1 = VARDATA(r.lower);
131 char *p2 = VARDATA(r.upper);
132
133 if (ml == 0)
134 return 0;
135
136 while (i < ml)
137 {
138 if (tinfo->eml > 1 && l == 0)
139 {
140 if ((l = pg_mblen(p1)) != pg_mblen(p2))
141 {
142 return i;
143 }
144 }
145 if (*p1 != *p2)
146 {
147 if (tinfo->eml > 1)
148 {
149 return (i - l + 1);
150 }
151 else
152 {
153 return i;
154 }
155 }
156
157 p1++;
158 p2++;
159 l--;
160 i++;
161 }
162 return ml; /* lower == upper */
163 }
164
165
166 /*
167 * returns true, if query matches prefix ( common prefix )
168 */
169 static bool
gbt_bytea_pf_match(const bytea * pf,const bytea * query,const gbtree_vinfo * tinfo)170 gbt_bytea_pf_match(const bytea *pf, const bytea *query, const gbtree_vinfo *tinfo)
171 {
172 bool out = false;
173 int32 qlen = VARSIZE(query) - VARHDRSZ;
174 int32 nlen = VARSIZE(pf) - VARHDRSZ;
175
176 if (nlen <= qlen)
177 {
178 char *q = VARDATA(query);
179 char *n = VARDATA(pf);
180
181 out = (memcmp(q, n, nlen) == 0);
182 }
183
184 return out;
185 }
186
187
188 /*
189 * returns true, if query matches node using common prefix
190 */
191 static bool
gbt_var_node_pf_match(const GBT_VARKEY_R * node,const bytea * query,const gbtree_vinfo * tinfo)192 gbt_var_node_pf_match(const GBT_VARKEY_R *node, const bytea *query, const gbtree_vinfo *tinfo)
193 {
194 return (tinfo->trnc && (
195 gbt_bytea_pf_match(node->lower, query, tinfo) ||
196 gbt_bytea_pf_match(node->upper, query, tinfo)
197 ));
198 }
199
200
201 /*
202 * truncates / compresses the node key
203 * cpf_length .. common prefix length
204 */
205 static GBT_VARKEY *
gbt_var_node_truncate(const GBT_VARKEY * node,int32 cpf_length,const gbtree_vinfo * tinfo)206 gbt_var_node_truncate(const GBT_VARKEY *node, int32 cpf_length, const gbtree_vinfo *tinfo)
207 {
208 GBT_VARKEY *out = NULL;
209 GBT_VARKEY_R r = gbt_var_key_readable(node);
210 int32 len1 = VARSIZE(r.lower) - VARHDRSZ;
211 int32 len2 = VARSIZE(r.upper) - VARHDRSZ;
212 int32 si;
213 char *out2;
214
215 len1 = Min(len1, (cpf_length + 1));
216 len2 = Min(len2, (cpf_length + 1));
217
218 si = 2 * VARHDRSZ + INTALIGN(len1 + VARHDRSZ) + len2;
219 out = (GBT_VARKEY *) palloc0(si);
220 SET_VARSIZE(out, si);
221
222 memcpy(VARDATA(out), r.lower, len1 + VARHDRSZ);
223 SET_VARSIZE(VARDATA(out), len1 + VARHDRSZ);
224
225 out2 = VARDATA(out) + INTALIGN(len1 + VARHDRSZ);
226 memcpy(out2, r.upper, len2 + VARHDRSZ);
227 SET_VARSIZE(out2, len2 + VARHDRSZ);
228
229 return out;
230 }
231
232
233
234 void
gbt_var_bin_union(Datum * u,GBT_VARKEY * e,Oid collation,const gbtree_vinfo * tinfo,FmgrInfo * flinfo)235 gbt_var_bin_union(Datum *u, GBT_VARKEY *e, Oid collation,
236 const gbtree_vinfo *tinfo, FmgrInfo *flinfo)
237 {
238 GBT_VARKEY_R eo = gbt_var_key_readable(e);
239 GBT_VARKEY_R nr;
240
241 if (eo.lower == eo.upper) /* leaf */
242 {
243 GBT_VARKEY *tmp;
244
245 tmp = gbt_var_leaf2node(e, tinfo, flinfo);
246 if (tmp != e)
247 eo = gbt_var_key_readable(tmp);
248 }
249
250 if (DatumGetPointer(*u))
251 {
252 GBT_VARKEY_R ro = gbt_var_key_readable((GBT_VARKEY *) DatumGetPointer(*u));
253 bool update = false;
254
255 nr.lower = ro.lower;
256 nr.upper = ro.upper;
257
258 if (tinfo->f_cmp(ro.lower, eo.lower, collation, flinfo) > 0)
259 {
260 nr.lower = eo.lower;
261 update = true;
262 }
263
264 if (tinfo->f_cmp(ro.upper, eo.upper, collation, flinfo) < 0)
265 {
266 nr.upper = eo.upper;
267 update = true;
268 }
269
270 if (update)
271 *u = PointerGetDatum(gbt_var_key_copy(&nr));
272 }
273 else
274 {
275 nr.lower = eo.lower;
276 nr.upper = eo.upper;
277 *u = PointerGetDatum(gbt_var_key_copy(&nr));
278 }
279 }
280
281
282 GISTENTRY *
gbt_var_compress(GISTENTRY * entry,const gbtree_vinfo * tinfo)283 gbt_var_compress(GISTENTRY *entry, const gbtree_vinfo *tinfo)
284 {
285 GISTENTRY *retval;
286
287 if (entry->leafkey)
288 {
289 struct varlena *leaf = PG_DETOAST_DATUM(entry->key);
290 GBT_VARKEY *r;
291
292 r = gbt_var_key_from_datum(leaf);
293
294 retval = palloc(sizeof(GISTENTRY));
295 gistentryinit(*retval, PointerGetDatum(r),
296 entry->rel, entry->page,
297 entry->offset, true);
298 }
299 else
300 retval = entry;
301
302 return retval;
303 }
304
305
306 Datum
gbt_var_fetch(PG_FUNCTION_ARGS)307 gbt_var_fetch(PG_FUNCTION_ARGS)
308 {
309 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
310 GBT_VARKEY *key = (GBT_VARKEY *) PG_DETOAST_DATUM(entry->key);
311 GBT_VARKEY_R r = gbt_var_key_readable(key);
312 GISTENTRY *retval;
313
314 retval = palloc(sizeof(GISTENTRY));
315 gistentryinit(*retval, PointerGetDatum(r.lower),
316 entry->rel, entry->page,
317 entry->offset, true);
318
319 PG_RETURN_POINTER(retval);
320 }
321
322
323 GBT_VARKEY *
gbt_var_union(const GistEntryVector * entryvec,int32 * size,Oid collation,const gbtree_vinfo * tinfo,FmgrInfo * flinfo)324 gbt_var_union(const GistEntryVector *entryvec, int32 *size, Oid collation,
325 const gbtree_vinfo *tinfo, FmgrInfo *flinfo)
326 {
327 int i = 0,
328 numranges = entryvec->n;
329 GBT_VARKEY *cur;
330 Datum out;
331 GBT_VARKEY_R rk;
332
333 *size = sizeof(GBT_VARKEY);
334
335 cur = (GBT_VARKEY *) DatumGetPointer(entryvec->vector[0].key);
336 rk = gbt_var_key_readable(cur);
337 out = PointerGetDatum(gbt_var_key_copy(&rk));
338
339 for (i = 1; i < numranges; i++)
340 {
341 cur = (GBT_VARKEY *) DatumGetPointer(entryvec->vector[i].key);
342 gbt_var_bin_union(&out, cur, collation, tinfo, flinfo);
343 }
344
345
346 /* Truncate (=compress) key */
347 if (tinfo->trnc)
348 {
349 int32 plen;
350 GBT_VARKEY *trc = NULL;
351
352 plen = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(out), tinfo);
353 trc = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(out), plen + 1, tinfo);
354
355 out = PointerGetDatum(trc);
356 }
357
358 return ((GBT_VARKEY *) DatumGetPointer(out));
359 }
360
361
362 bool
gbt_var_same(Datum d1,Datum d2,Oid collation,const gbtree_vinfo * tinfo,FmgrInfo * flinfo)363 gbt_var_same(Datum d1, Datum d2, Oid collation,
364 const gbtree_vinfo *tinfo, FmgrInfo *flinfo)
365 {
366 GBT_VARKEY *t1 = (GBT_VARKEY *) DatumGetPointer(d1);
367 GBT_VARKEY *t2 = (GBT_VARKEY *) DatumGetPointer(d2);
368 GBT_VARKEY_R r1,
369 r2;
370
371 r1 = gbt_var_key_readable(t1);
372 r2 = gbt_var_key_readable(t2);
373
374 return (tinfo->f_cmp(r1.lower, r2.lower, collation, flinfo) == 0 &&
375 tinfo->f_cmp(r1.upper, r2.upper, collation, flinfo) == 0);
376 }
377
378
379 float *
gbt_var_penalty(float * res,const GISTENTRY * o,const GISTENTRY * n,Oid collation,const gbtree_vinfo * tinfo,FmgrInfo * flinfo)380 gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n,
381 Oid collation, const gbtree_vinfo *tinfo, FmgrInfo *flinfo)
382 {
383 GBT_VARKEY *orge = (GBT_VARKEY *) DatumGetPointer(o->key);
384 GBT_VARKEY *newe = (GBT_VARKEY *) DatumGetPointer(n->key);
385 GBT_VARKEY_R ok,
386 nk;
387
388 *res = 0.0;
389
390 nk = gbt_var_key_readable(newe);
391 if (nk.lower == nk.upper) /* leaf */
392 {
393 GBT_VARKEY *tmp;
394
395 tmp = gbt_var_leaf2node(newe, tinfo, flinfo);
396 if (tmp != newe)
397 nk = gbt_var_key_readable(tmp);
398 }
399 ok = gbt_var_key_readable(orge);
400
401 if ((VARSIZE(ok.lower) - VARHDRSZ) == 0 && (VARSIZE(ok.upper) - VARHDRSZ) == 0)
402 *res = 0.0;
403 else if (!((tinfo->f_cmp(nk.lower, ok.lower, collation, flinfo) >= 0 ||
404 gbt_bytea_pf_match(ok.lower, nk.lower, tinfo)) &&
405 (tinfo->f_cmp(nk.upper, ok.upper, collation, flinfo) <= 0 ||
406 gbt_bytea_pf_match(ok.upper, nk.upper, tinfo))))
407 {
408 Datum d = PointerGetDatum(0);
409 double dres;
410 int32 ol,
411 ul;
412
413 gbt_var_bin_union(&d, orge, collation, tinfo, flinfo);
414 ol = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(d), tinfo);
415 gbt_var_bin_union(&d, newe, collation, tinfo, flinfo);
416 ul = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(d), tinfo);
417
418 if (ul < ol)
419 {
420 dres = (ol - ul); /* reduction of common prefix len */
421 }
422 else
423 {
424 GBT_VARKEY_R uk = gbt_var_key_readable((GBT_VARKEY *) DatumGetPointer(d));
425 unsigned char tmp[4];
426
427 tmp[0] = (unsigned char) (((VARSIZE(ok.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.lower)[ul]));
428 tmp[1] = (unsigned char) (((VARSIZE(uk.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.lower)[ul]));
429 tmp[2] = (unsigned char) (((VARSIZE(ok.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.upper)[ul]));
430 tmp[3] = (unsigned char) (((VARSIZE(uk.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.upper)[ul]));
431 dres = Abs(tmp[0] - tmp[1]) + Abs(tmp[3] - tmp[2]);
432 dres /= 256.0;
433 }
434
435 *res += FLT_MIN;
436 *res += (float) (dres / ((double) (ol + 1)));
437 *res *= (FLT_MAX / (o->rel->rd_att->natts + 1));
438 }
439
440 return res;
441 }
442
443
444 static int
gbt_vsrt_cmp(const void * a,const void * b,void * arg)445 gbt_vsrt_cmp(const void *a, const void *b, void *arg)
446 {
447 GBT_VARKEY_R ar = gbt_var_key_readable(((const Vsrt *) a)->t);
448 GBT_VARKEY_R br = gbt_var_key_readable(((const Vsrt *) b)->t);
449 const gbt_vsrt_arg *varg = (const gbt_vsrt_arg *) arg;
450 int res;
451
452 res = varg->tinfo->f_cmp(ar.lower, br.lower, varg->collation, varg->flinfo);
453 if (res == 0)
454 return varg->tinfo->f_cmp(ar.upper, br.upper, varg->collation, varg->flinfo);
455
456 return res;
457 }
458
459 GIST_SPLITVEC *
gbt_var_picksplit(const GistEntryVector * entryvec,GIST_SPLITVEC * v,Oid collation,const gbtree_vinfo * tinfo,FmgrInfo * flinfo)460 gbt_var_picksplit(const GistEntryVector *entryvec, GIST_SPLITVEC *v,
461 Oid collation, const gbtree_vinfo *tinfo, FmgrInfo *flinfo)
462 {
463 OffsetNumber i,
464 maxoff = entryvec->n - 1;
465 Vsrt *arr;
466 int svcntr = 0,
467 nbytes;
468 char *cur;
469 GBT_VARKEY **sv = NULL;
470 gbt_vsrt_arg varg;
471
472 arr = (Vsrt *) palloc((maxoff + 1) * sizeof(Vsrt));
473 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
474 v->spl_left = (OffsetNumber *) palloc(nbytes);
475 v->spl_right = (OffsetNumber *) palloc(nbytes);
476 v->spl_ldatum = PointerGetDatum(0);
477 v->spl_rdatum = PointerGetDatum(0);
478 v->spl_nleft = 0;
479 v->spl_nright = 0;
480
481 sv = palloc(sizeof(bytea *) * (maxoff + 1));
482
483 /* Sort entries */
484
485 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
486 {
487 GBT_VARKEY_R ro;
488
489 cur = (char *) DatumGetPointer(entryvec->vector[i].key);
490 ro = gbt_var_key_readable((GBT_VARKEY *) cur);
491 if (ro.lower == ro.upper) /* leaf */
492 {
493 sv[svcntr] = gbt_var_leaf2node((GBT_VARKEY *) cur, tinfo, flinfo);
494 arr[i].t = sv[svcntr];
495 if (sv[svcntr] != (GBT_VARKEY *) cur)
496 svcntr++;
497 }
498 else
499 arr[i].t = (GBT_VARKEY *) cur;
500 arr[i].i = i;
501 }
502
503 /* sort */
504 varg.tinfo = tinfo;
505 varg.collation = collation;
506 varg.flinfo = flinfo;
507 qsort_arg((void *) &arr[FirstOffsetNumber],
508 maxoff - FirstOffsetNumber + 1,
509 sizeof(Vsrt),
510 gbt_vsrt_cmp,
511 (void *) &varg);
512
513 /* We do simply create two parts */
514
515 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
516 {
517 if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
518 {
519 gbt_var_bin_union(&v->spl_ldatum, arr[i].t, collation, tinfo, flinfo);
520 v->spl_left[v->spl_nleft] = arr[i].i;
521 v->spl_nleft++;
522 }
523 else
524 {
525 gbt_var_bin_union(&v->spl_rdatum, arr[i].t, collation, tinfo, flinfo);
526 v->spl_right[v->spl_nright] = arr[i].i;
527 v->spl_nright++;
528 }
529 }
530
531 /* Truncate (=compress) key */
532 if (tinfo->trnc)
533 {
534 int32 ll = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(v->spl_ldatum), tinfo);
535 int32 lr = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(v->spl_rdatum), tinfo);
536 GBT_VARKEY *dl;
537 GBT_VARKEY *dr;
538
539 ll = Max(ll, lr);
540 ll++;
541
542 dl = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(v->spl_ldatum), ll, tinfo);
543 dr = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(v->spl_rdatum), ll, tinfo);
544 v->spl_ldatum = PointerGetDatum(dl);
545 v->spl_rdatum = PointerGetDatum(dr);
546 }
547
548 return v;
549 }
550
551
552 /*
553 * The GiST consistent method
554 */
555 bool
gbt_var_consistent(GBT_VARKEY_R * key,const void * query,StrategyNumber strategy,Oid collation,bool is_leaf,const gbtree_vinfo * tinfo,FmgrInfo * flinfo)556 gbt_var_consistent(GBT_VARKEY_R *key,
557 const void *query,
558 StrategyNumber strategy,
559 Oid collation,
560 bool is_leaf,
561 const gbtree_vinfo *tinfo,
562 FmgrInfo *flinfo)
563 {
564 bool retval = false;
565
566 switch (strategy)
567 {
568 case BTLessEqualStrategyNumber:
569 if (is_leaf)
570 retval = tinfo->f_ge(query, key->lower, collation, flinfo);
571 else
572 retval = tinfo->f_cmp(query, key->lower, collation, flinfo) >= 0
573 || gbt_var_node_pf_match(key, query, tinfo);
574 break;
575 case BTLessStrategyNumber:
576 if (is_leaf)
577 retval = tinfo->f_gt(query, key->lower, collation, flinfo);
578 else
579 retval = tinfo->f_cmp(query, key->lower, collation, flinfo) >= 0
580 || gbt_var_node_pf_match(key, query, tinfo);
581 break;
582 case BTEqualStrategyNumber:
583 if (is_leaf)
584 retval = tinfo->f_eq(query, key->lower, collation, flinfo);
585 else
586 retval =
587 (tinfo->f_cmp(key->lower, query, collation, flinfo) <= 0 &&
588 tinfo->f_cmp(query, key->upper, collation, flinfo) <= 0) ||
589 gbt_var_node_pf_match(key, query, tinfo);
590 break;
591 case BTGreaterStrategyNumber:
592 if (is_leaf)
593 retval = tinfo->f_lt(query, key->upper, collation, flinfo);
594 else
595 retval = tinfo->f_cmp(query, key->upper, collation, flinfo) <= 0
596 || gbt_var_node_pf_match(key, query, tinfo);
597 break;
598 case BTGreaterEqualStrategyNumber:
599 if (is_leaf)
600 retval = tinfo->f_le(query, key->upper, collation, flinfo);
601 else
602 retval = tinfo->f_cmp(query, key->upper, collation, flinfo) <= 0
603 || gbt_var_node_pf_match(key, query, tinfo);
604 break;
605 case BtreeGistNotEqualStrategyNumber:
606 retval = !(tinfo->f_eq(query, key->lower, collation, flinfo) &&
607 tinfo->f_eq(query, key->upper, collation, flinfo));
608 break;
609 default:
610 retval = false;
611 }
612
613 return retval;
614 }
615