1 /*
2  * contrib/ltree/_ltree_gist.c
3  *
4  *
5  * GiST support for ltree[]
6  * Teodor Sigaev <teodor@stack.net>
7  */
8 #include "postgres.h"
9 
10 #include "access/gist.h"
11 #include "access/reloptions.h"
12 #include "access/stratnum.h"
13 #include "crc32.h"
14 #include "ltree.h"
15 #include "port/pg_bitutils.h"
16 
17 PG_FUNCTION_INFO_V1(_ltree_compress);
18 PG_FUNCTION_INFO_V1(_ltree_same);
19 PG_FUNCTION_INFO_V1(_ltree_union);
20 PG_FUNCTION_INFO_V1(_ltree_penalty);
21 PG_FUNCTION_INFO_V1(_ltree_picksplit);
22 PG_FUNCTION_INFO_V1(_ltree_consistent);
23 PG_FUNCTION_INFO_V1(_ltree_gist_options);
24 
25 #define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
26 #define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
27 
28 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
29 
30 
31 static void
hashing(BITVECP sign,ltree * t,int siglen)32 hashing(BITVECP sign, ltree *t, int siglen)
33 {
34 	int			tlen = t->numlevel;
35 	ltree_level *cur = LTREE_FIRST(t);
36 	int			hash;
37 
38 	while (tlen > 0)
39 	{
40 		hash = ltree_crc32_sz(cur->name, cur->len);
41 		AHASH(sign, hash, siglen);
42 		cur = LEVEL_NEXT(cur);
43 		tlen--;
44 	}
45 }
46 
47 Datum
_ltree_compress(PG_FUNCTION_ARGS)48 _ltree_compress(PG_FUNCTION_ARGS)
49 {
50 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
51 	GISTENTRY  *retval = entry;
52 	int			siglen = LTREE_GET_ASIGLEN();
53 
54 	if (entry->leafkey)
55 	{							/* ltree */
56 		ltree_gist *key;
57 		ArrayType  *val = DatumGetArrayTypeP(entry->key);
58 		int			num = ArrayGetNItems(ARR_NDIM(val), ARR_DIMS(val));
59 		ltree	   *item = (ltree *) ARR_DATA_PTR(val);
60 
61 		if (ARR_NDIM(val) > 1)
62 			ereport(ERROR,
63 					(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
64 					 errmsg("array must be one-dimensional")));
65 		if (array_contains_nulls(val))
66 			ereport(ERROR,
67 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
68 					 errmsg("array must not contain nulls")));
69 
70 		key = ltree_gist_alloc(false, NULL, siglen, NULL, NULL);
71 
72 		while (num > 0)
73 		{
74 			hashing(LTG_SIGN(key), item, siglen);
75 			num--;
76 			item = NEXTVAL(item);
77 		}
78 
79 		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
80 		gistentryinit(*retval, PointerGetDatum(key),
81 					  entry->rel, entry->page,
82 					  entry->offset, false);
83 	}
84 	else if (!LTG_ISALLTRUE(entry->key))
85 	{
86 		int32		i;
87 		ltree_gist *key;
88 		BITVECP		sign = LTG_SIGN(DatumGetPointer(entry->key));
89 
90 		ALOOPBYTE(siglen)
91 		{
92 			if ((sign[i] & 0xff) != 0xff)
93 				PG_RETURN_POINTER(retval);
94 		}
95 
96 		key = ltree_gist_alloc(true, sign, siglen, NULL, NULL);
97 		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
98 		gistentryinit(*retval, PointerGetDatum(key),
99 					  entry->rel, entry->page,
100 					  entry->offset, false);
101 	}
102 	PG_RETURN_POINTER(retval);
103 }
104 
105 Datum
_ltree_same(PG_FUNCTION_ARGS)106 _ltree_same(PG_FUNCTION_ARGS)
107 {
108 	ltree_gist *a = (ltree_gist *) PG_GETARG_POINTER(0);
109 	ltree_gist *b = (ltree_gist *) PG_GETARG_POINTER(1);
110 	bool	   *result = (bool *) PG_GETARG_POINTER(2);
111 	int			siglen = LTREE_GET_ASIGLEN();
112 
113 	if (LTG_ISALLTRUE(a) && LTG_ISALLTRUE(b))
114 		*result = true;
115 	else if (LTG_ISALLTRUE(a))
116 		*result = false;
117 	else if (LTG_ISALLTRUE(b))
118 		*result = false;
119 	else
120 	{
121 		int32		i;
122 		BITVECP		sa = LTG_SIGN(a),
123 					sb = LTG_SIGN(b);
124 
125 		*result = true;
126 		ALOOPBYTE(siglen)
127 		{
128 			if (sa[i] != sb[i])
129 			{
130 				*result = false;
131 				break;
132 			}
133 		}
134 	}
135 	PG_RETURN_POINTER(result);
136 }
137 
138 static int32
unionkey(BITVECP sbase,ltree_gist * add,int siglen)139 unionkey(BITVECP sbase, ltree_gist *add, int siglen)
140 {
141 	int32		i;
142 	BITVECP		sadd = LTG_SIGN(add);
143 
144 	if (LTG_ISALLTRUE(add))
145 		return 1;
146 
147 	ALOOPBYTE(siglen)
148 		sbase[i] |= sadd[i];
149 	return 0;
150 }
151 
152 Datum
_ltree_union(PG_FUNCTION_ARGS)153 _ltree_union(PG_FUNCTION_ARGS)
154 {
155 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
156 	int		   *size = (int *) PG_GETARG_POINTER(1);
157 	int			siglen = LTREE_GET_ASIGLEN();
158 	int32		i;
159 	ltree_gist *result = ltree_gist_alloc(false, NULL, siglen, NULL, NULL);
160 	BITVECP		base = LTG_SIGN(result);
161 
162 	for (i = 0; i < entryvec->n; i++)
163 	{
164 		if (unionkey(base, GETENTRY(entryvec, i), siglen))
165 		{
166 			result->flag |= LTG_ALLTRUE;
167 			SET_VARSIZE(result, LTG_HDRSIZE);
168 			break;
169 		}
170 	}
171 
172 	*size = VARSIZE(result);
173 
174 	PG_RETURN_POINTER(result);
175 }
176 
177 static int32
sizebitvec(BITVECP sign,int siglen)178 sizebitvec(BITVECP sign, int siglen)
179 {
180 	return pg_popcount((const char *) sign, siglen);
181 }
182 
183 static int
hemdistsign(BITVECP a,BITVECP b,int siglen)184 hemdistsign(BITVECP a, BITVECP b, int siglen)
185 {
186 	int			i,
187 				diff,
188 				dist = 0;
189 
190 	ALOOPBYTE(siglen)
191 	{
192 		diff = (unsigned char) (a[i] ^ b[i]);
193 		/* Using the popcount functions here isn't likely to win */
194 		dist += pg_number_of_ones[diff];
195 	}
196 	return dist;
197 }
198 
199 static int
hemdist(ltree_gist * a,ltree_gist * b,int siglen)200 hemdist(ltree_gist *a, ltree_gist *b, int siglen)
201 {
202 	if (LTG_ISALLTRUE(a))
203 	{
204 		if (LTG_ISALLTRUE(b))
205 			return 0;
206 		else
207 			return ASIGLENBIT(siglen) - sizebitvec(LTG_SIGN(b), siglen);
208 	}
209 	else if (LTG_ISALLTRUE(b))
210 		return ASIGLENBIT(siglen) - sizebitvec(LTG_SIGN(a), siglen);
211 
212 	return hemdistsign(LTG_SIGN(a), LTG_SIGN(b), siglen);
213 }
214 
215 
216 Datum
_ltree_penalty(PG_FUNCTION_ARGS)217 _ltree_penalty(PG_FUNCTION_ARGS)
218 {
219 	ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
220 	ltree_gist *newval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
221 	float	   *penalty = (float *) PG_GETARG_POINTER(2);
222 	int			siglen = LTREE_GET_ASIGLEN();
223 
224 	*penalty = hemdist(origval, newval, siglen);
225 	PG_RETURN_POINTER(penalty);
226 }
227 
228 typedef struct
229 {
230 	OffsetNumber pos;
231 	int32		cost;
232 } SPLITCOST;
233 
234 static int
comparecost(const void * a,const void * b)235 comparecost(const void *a, const void *b)
236 {
237 	return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
238 }
239 
240 Datum
_ltree_picksplit(PG_FUNCTION_ARGS)241 _ltree_picksplit(PG_FUNCTION_ARGS)
242 {
243 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
244 	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
245 	int			siglen = LTREE_GET_ASIGLEN();
246 	OffsetNumber k,
247 				j;
248 	ltree_gist *datum_l,
249 			   *datum_r;
250 	BITVECP		union_l,
251 				union_r;
252 	int32		size_alpha,
253 				size_beta;
254 	int32		size_waste,
255 				waste = -1;
256 	int32		nbytes;
257 	OffsetNumber seed_1 = 0,
258 				seed_2 = 0;
259 	OffsetNumber *left,
260 			   *right;
261 	OffsetNumber maxoff;
262 	BITVECP		ptr;
263 	int			i;
264 	SPLITCOST  *costvector;
265 	ltree_gist *_k,
266 			   *_j;
267 
268 	maxoff = entryvec->n - 2;
269 	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
270 	v->spl_left = (OffsetNumber *) palloc(nbytes);
271 	v->spl_right = (OffsetNumber *) palloc(nbytes);
272 
273 	for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
274 	{
275 		_k = GETENTRY(entryvec, k);
276 		for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
277 		{
278 			size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
279 			if (size_waste > waste)
280 			{
281 				waste = size_waste;
282 				seed_1 = k;
283 				seed_2 = j;
284 			}
285 		}
286 	}
287 
288 	left = v->spl_left;
289 	v->spl_nleft = 0;
290 	right = v->spl_right;
291 	v->spl_nright = 0;
292 
293 	if (seed_1 == 0 || seed_2 == 0)
294 	{
295 		seed_1 = 1;
296 		seed_2 = 2;
297 	}
298 
299 	/* form initial .. */
300 	datum_l = ltree_gist_alloc(LTG_ISALLTRUE(GETENTRY(entryvec, seed_1)),
301 							   LTG_SIGN(GETENTRY(entryvec, seed_1)),
302 							   siglen, NULL, NULL);
303 
304 	datum_r = ltree_gist_alloc(LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)),
305 							   LTG_SIGN(GETENTRY(entryvec, seed_2)),
306 							   siglen, NULL, NULL);
307 
308 	maxoff = OffsetNumberNext(maxoff);
309 	/* sort before ... */
310 	costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
311 	for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
312 	{
313 		costvector[j - 1].pos = j;
314 		_j = GETENTRY(entryvec, j);
315 		size_alpha = hemdist(datum_l, _j, siglen);
316 		size_beta = hemdist(datum_r, _j, siglen);
317 		costvector[j - 1].cost = Abs(size_alpha - size_beta);
318 	}
319 	qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
320 
321 	union_l = LTG_SIGN(datum_l);
322 	union_r = LTG_SIGN(datum_r);
323 
324 	for (k = 0; k < maxoff; k++)
325 	{
326 		j = costvector[k].pos;
327 		if (j == seed_1)
328 		{
329 			*left++ = j;
330 			v->spl_nleft++;
331 			continue;
332 		}
333 		else if (j == seed_2)
334 		{
335 			*right++ = j;
336 			v->spl_nright++;
337 			continue;
338 		}
339 		_j = GETENTRY(entryvec, j);
340 		size_alpha = hemdist(datum_l, _j, siglen);
341 		size_beta = hemdist(datum_r, _j, siglen);
342 
343 		if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
344 		{
345 			if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j))
346 			{
347 				if (!LTG_ISALLTRUE(datum_l))
348 					MemSet((void *) union_l, 0xff, siglen);
349 			}
350 			else
351 			{
352 				ptr = LTG_SIGN(_j);
353 				ALOOPBYTE(siglen)
354 					union_l[i] |= ptr[i];
355 			}
356 			*left++ = j;
357 			v->spl_nleft++;
358 		}
359 		else
360 		{
361 			if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
362 			{
363 				if (!LTG_ISALLTRUE(datum_r))
364 					MemSet((void *) union_r, 0xff, siglen);
365 			}
366 			else
367 			{
368 				ptr = LTG_SIGN(_j);
369 				ALOOPBYTE(siglen)
370 					union_r[i] |= ptr[i];
371 			}
372 			*right++ = j;
373 			v->spl_nright++;
374 		}
375 	}
376 
377 	*right = *left = FirstOffsetNumber;
378 
379 	v->spl_ldatum = PointerGetDatum(datum_l);
380 	v->spl_rdatum = PointerGetDatum(datum_r);
381 
382 	PG_RETURN_POINTER(v);
383 }
384 
385 static bool
gist_te(ltree_gist * key,ltree * query,int siglen)386 gist_te(ltree_gist *key, ltree *query, int siglen)
387 {
388 	ltree_level *curq = LTREE_FIRST(query);
389 	BITVECP		sign = LTG_SIGN(key);
390 	int			qlen = query->numlevel;
391 	unsigned int hv;
392 
393 	if (LTG_ISALLTRUE(key))
394 		return true;
395 
396 	while (qlen > 0)
397 	{
398 		hv = ltree_crc32_sz(curq->name, curq->len);
399 		if (!GETBIT(sign, AHASHVAL(hv, siglen)))
400 			return false;
401 		curq = LEVEL_NEXT(curq);
402 		qlen--;
403 	}
404 
405 	return true;
406 }
407 
408 typedef struct LtreeSignature
409 {
410 	BITVECP		sign;
411 	int			siglen;
412 } LtreeSignature;
413 
414 static bool
checkcondition_bit(void * cxt,ITEM * val)415 checkcondition_bit(void *cxt, ITEM *val)
416 {
417 	LtreeSignature *sig = cxt;
418 
419 	return (FLG_CANLOOKSIGN(val->flag)) ? GETBIT(sig->sign, AHASHVAL(val->val, sig->siglen)) : true;
420 }
421 
422 static bool
gist_qtxt(ltree_gist * key,ltxtquery * query,int siglen)423 gist_qtxt(ltree_gist *key, ltxtquery *query, int siglen)
424 {
425 	LtreeSignature sig;
426 
427 	if (LTG_ISALLTRUE(key))
428 		return true;
429 
430 	sig.sign = LTG_SIGN(key);
431 	sig.siglen = siglen;
432 
433 	return ltree_execute(GETQUERY(query),
434 						 &sig, false,
435 						 checkcondition_bit);
436 }
437 
438 static bool
gist_qe(ltree_gist * key,lquery * query,int siglen)439 gist_qe(ltree_gist *key, lquery *query, int siglen)
440 {
441 	lquery_level *curq = LQUERY_FIRST(query);
442 	BITVECP		sign = LTG_SIGN(key);
443 	int			qlen = query->numlevel;
444 
445 	if (LTG_ISALLTRUE(key))
446 		return true;
447 
448 	while (qlen > 0)
449 	{
450 		if (curq->numvar && LQL_CANLOOKSIGN(curq))
451 		{
452 			bool		isexist = false;
453 			int			vlen = curq->numvar;
454 			lquery_variant *curv = LQL_FIRST(curq);
455 
456 			while (vlen > 0)
457 			{
458 				if (GETBIT(sign, AHASHVAL(curv->val, siglen)))
459 				{
460 					isexist = true;
461 					break;
462 				}
463 				curv = LVAR_NEXT(curv);
464 				vlen--;
465 			}
466 			if (!isexist)
467 				return false;
468 		}
469 
470 		curq = LQL_NEXT(curq);
471 		qlen--;
472 	}
473 
474 	return true;
475 }
476 
477 static bool
_arrq_cons(ltree_gist * key,ArrayType * _query,int siglen)478 _arrq_cons(ltree_gist *key, ArrayType *_query, int siglen)
479 {
480 	lquery	   *query = (lquery *) ARR_DATA_PTR(_query);
481 	int			num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
482 
483 	if (ARR_NDIM(_query) > 1)
484 		ereport(ERROR,
485 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
486 				 errmsg("array must be one-dimensional")));
487 	if (array_contains_nulls(_query))
488 		ereport(ERROR,
489 				(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
490 				 errmsg("array must not contain nulls")));
491 
492 	while (num > 0)
493 	{
494 		if (gist_qe(key, query, siglen))
495 			return true;
496 		num--;
497 		query = (lquery *) NEXTVAL(query);
498 	}
499 	return false;
500 }
501 
502 Datum
_ltree_consistent(PG_FUNCTION_ARGS)503 _ltree_consistent(PG_FUNCTION_ARGS)
504 {
505 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
506 	void	   *query = (void *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
507 	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
508 
509 	/* Oid		subtype = PG_GETARG_OID(3); */
510 	bool	   *recheck = (bool *) PG_GETARG_POINTER(4);
511 	int			siglen = LTREE_GET_ASIGLEN();
512 	ltree_gist *key = (ltree_gist *) DatumGetPointer(entry->key);
513 	bool		res = false;
514 
515 	/* All cases served by this function are inexact */
516 	*recheck = true;
517 
518 	switch (strategy)
519 	{
520 		case 10:
521 		case 11:
522 			res = gist_te(key, (ltree *) query, siglen);
523 			break;
524 		case 12:
525 		case 13:
526 			res = gist_qe(key, (lquery *) query, siglen);
527 			break;
528 		case 14:
529 		case 15:
530 			res = gist_qtxt(key, (ltxtquery *) query, siglen);
531 			break;
532 		case 16:
533 		case 17:
534 			res = _arrq_cons(key, (ArrayType *) query, siglen);
535 			break;
536 		default:
537 			/* internal error */
538 			elog(ERROR, "unrecognized StrategyNumber: %d", strategy);
539 	}
540 	PG_FREE_IF_COPY(query, 1);
541 	PG_RETURN_BOOL(res);
542 }
543 
544 Datum
_ltree_gist_options(PG_FUNCTION_ARGS)545 _ltree_gist_options(PG_FUNCTION_ARGS)
546 {
547 	local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
548 
549 	init_local_reloptions(relopts, sizeof(LtreeGistOptions));
550 	add_local_int_reloption(relopts, "siglen", "signature length",
551 							LTREE_ASIGLEN_DEFAULT, 1, LTREE_ASIGLEN_MAX,
552 							offsetof(LtreeGistOptions, siglen));
553 
554 	PG_RETURN_VOID();
555 }
556