1 /*
2  * contrib/ltree/_ltree_gist.c
3  *
4  *
5  * GiST support for ltree[]
6  * Teodor Sigaev <teodor@stack.net>
7  */
8 #include "postgres.h"
9 
10 #include "access/gist.h"
11 #include "access/stratnum.h"
12 #include "crc32.h"
13 #include "ltree.h"
14 
15 
16 PG_FUNCTION_INFO_V1(_ltree_compress);
17 PG_FUNCTION_INFO_V1(_ltree_same);
18 PG_FUNCTION_INFO_V1(_ltree_union);
19 PG_FUNCTION_INFO_V1(_ltree_penalty);
20 PG_FUNCTION_INFO_V1(_ltree_picksplit);
21 PG_FUNCTION_INFO_V1(_ltree_consistent);
22 
23 #define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
24 #define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
25 
26 /* Number of one-bits in an unsigned byte */
27 static const uint8 number_of_ones[256] = {
28 	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
29 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
30 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
31 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
32 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
33 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
34 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
35 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
36 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
37 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
38 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
39 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
40 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
41 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
42 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
43 	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
44 };
45 
46 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
47 
48 
49 static void
hashing(BITVECP sign,ltree * t)50 hashing(BITVECP sign, ltree *t)
51 {
52 	int			tlen = t->numlevel;
53 	ltree_level *cur = LTREE_FIRST(t);
54 	int			hash;
55 
56 	while (tlen > 0)
57 	{
58 		hash = ltree_crc32_sz(cur->name, cur->len);
59 		AHASH(sign, hash);
60 		cur = LEVEL_NEXT(cur);
61 		tlen--;
62 	}
63 }
64 
65 Datum
_ltree_compress(PG_FUNCTION_ARGS)66 _ltree_compress(PG_FUNCTION_ARGS)
67 {
68 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
69 	GISTENTRY  *retval = entry;
70 
71 	if (entry->leafkey)
72 	{							/* ltree */
73 		ltree_gist *key;
74 		ArrayType  *val = DatumGetArrayTypeP(entry->key);
75 		int32		len = LTG_HDRSIZE + ASIGLEN;
76 		int			num = ArrayGetNItems(ARR_NDIM(val), ARR_DIMS(val));
77 		ltree	   *item = (ltree *) ARR_DATA_PTR(val);
78 
79 		if (ARR_NDIM(val) > 1)
80 			ereport(ERROR,
81 					(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
82 					 errmsg("array must be one-dimensional")));
83 		if (array_contains_nulls(val))
84 			ereport(ERROR,
85 					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
86 					 errmsg("array must not contain nulls")));
87 
88 		key = (ltree_gist *) palloc0(len);
89 		SET_VARSIZE(key, len);
90 		key->flag = 0;
91 
92 		MemSet(LTG_SIGN(key), 0, ASIGLEN);
93 		while (num > 0)
94 		{
95 			hashing(LTG_SIGN(key), item);
96 			num--;
97 			item = NEXTVAL(item);
98 		}
99 
100 		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
101 		gistentryinit(*retval, PointerGetDatum(key),
102 					  entry->rel, entry->page,
103 					  entry->offset, FALSE);
104 	}
105 	else if (!LTG_ISALLTRUE(entry->key))
106 	{
107 		int32		i,
108 					len;
109 		ltree_gist *key;
110 
111 		BITVECP		sign = LTG_SIGN(DatumGetPointer(entry->key));
112 
113 		ALOOPBYTE
114 		{
115 			if ((sign[i] & 0xff) != 0xff)
116 				PG_RETURN_POINTER(retval);
117 		}
118 		len = LTG_HDRSIZE;
119 		key = (ltree_gist *) palloc0(len);
120 		SET_VARSIZE(key, len);
121 		key->flag = LTG_ALLTRUE;
122 
123 		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
124 		gistentryinit(*retval, PointerGetDatum(key),
125 					  entry->rel, entry->page,
126 					  entry->offset, FALSE);
127 	}
128 	PG_RETURN_POINTER(retval);
129 }
130 
131 Datum
_ltree_same(PG_FUNCTION_ARGS)132 _ltree_same(PG_FUNCTION_ARGS)
133 {
134 	ltree_gist *a = (ltree_gist *) PG_GETARG_POINTER(0);
135 	ltree_gist *b = (ltree_gist *) PG_GETARG_POINTER(1);
136 	bool	   *result = (bool *) PG_GETARG_POINTER(2);
137 
138 	if (LTG_ISALLTRUE(a) && LTG_ISALLTRUE(b))
139 		*result = true;
140 	else if (LTG_ISALLTRUE(a))
141 		*result = false;
142 	else if (LTG_ISALLTRUE(b))
143 		*result = false;
144 	else
145 	{
146 		int32		i;
147 		BITVECP		sa = LTG_SIGN(a),
148 					sb = LTG_SIGN(b);
149 
150 		*result = true;
151 		ALOOPBYTE
152 		{
153 			if (sa[i] != sb[i])
154 			{
155 				*result = false;
156 				break;
157 			}
158 		}
159 	}
160 	PG_RETURN_POINTER(result);
161 }
162 
163 static int32
unionkey(BITVECP sbase,ltree_gist * add)164 unionkey(BITVECP sbase, ltree_gist *add)
165 {
166 	int32		i;
167 	BITVECP		sadd = LTG_SIGN(add);
168 
169 	if (LTG_ISALLTRUE(add))
170 		return 1;
171 
172 	ALOOPBYTE
173 		sbase[i] |= sadd[i];
174 	return 0;
175 }
176 
177 Datum
_ltree_union(PG_FUNCTION_ARGS)178 _ltree_union(PG_FUNCTION_ARGS)
179 {
180 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
181 	int		   *size = (int *) PG_GETARG_POINTER(1);
182 	ABITVEC		base;
183 	int32		i,
184 				len;
185 	int32		flag = 0;
186 	ltree_gist *result;
187 
188 	MemSet((void *) base, 0, sizeof(ABITVEC));
189 	for (i = 0; i < entryvec->n; i++)
190 	{
191 		if (unionkey(base, GETENTRY(entryvec, i)))
192 		{
193 			flag = LTG_ALLTRUE;
194 			break;
195 		}
196 	}
197 
198 	len = LTG_HDRSIZE + ((flag & LTG_ALLTRUE) ? 0 : ASIGLEN);
199 	result = (ltree_gist *) palloc0(len);
200 	SET_VARSIZE(result, len);
201 	result->flag = flag;
202 	if (!LTG_ISALLTRUE(result))
203 		memcpy((void *) LTG_SIGN(result), (void *) base, sizeof(ABITVEC));
204 	*size = len;
205 
206 	PG_RETURN_POINTER(result);
207 }
208 
209 static int32
sizebitvec(BITVECP sign)210 sizebitvec(BITVECP sign)
211 {
212 	int32		size = 0,
213 				i;
214 
215 	ALOOPBYTE
216 		size += number_of_ones[(unsigned char) sign[i]];
217 	return size;
218 }
219 
220 static int
hemdistsign(BITVECP a,BITVECP b)221 hemdistsign(BITVECP a, BITVECP b)
222 {
223 	int			i,
224 				diff,
225 				dist = 0;
226 
227 	ALOOPBYTE
228 	{
229 		diff = (unsigned char) (a[i] ^ b[i]);
230 		dist += number_of_ones[diff];
231 	}
232 	return dist;
233 }
234 
235 static int
hemdist(ltree_gist * a,ltree_gist * b)236 hemdist(ltree_gist *a, ltree_gist *b)
237 {
238 	if (LTG_ISALLTRUE(a))
239 	{
240 		if (LTG_ISALLTRUE(b))
241 			return 0;
242 		else
243 			return ASIGLENBIT - sizebitvec(LTG_SIGN(b));
244 	}
245 	else if (LTG_ISALLTRUE(b))
246 		return ASIGLENBIT - sizebitvec(LTG_SIGN(a));
247 
248 	return hemdistsign(LTG_SIGN(a), LTG_SIGN(b));
249 }
250 
251 
252 Datum
_ltree_penalty(PG_FUNCTION_ARGS)253 _ltree_penalty(PG_FUNCTION_ARGS)
254 {
255 	ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
256 	ltree_gist *newval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
257 	float	   *penalty = (float *) PG_GETARG_POINTER(2);
258 
259 	*penalty = hemdist(origval, newval);
260 	PG_RETURN_POINTER(penalty);
261 }
262 
263 typedef struct
264 {
265 	OffsetNumber pos;
266 	int32		cost;
267 } SPLITCOST;
268 
269 static int
comparecost(const void * a,const void * b)270 comparecost(const void *a, const void *b)
271 {
272 	return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
273 }
274 
275 Datum
_ltree_picksplit(PG_FUNCTION_ARGS)276 _ltree_picksplit(PG_FUNCTION_ARGS)
277 {
278 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
279 	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
280 	OffsetNumber k,
281 				j;
282 	ltree_gist *datum_l,
283 			   *datum_r;
284 	BITVECP		union_l,
285 				union_r;
286 	int32		size_alpha,
287 				size_beta;
288 	int32		size_waste,
289 				waste = -1;
290 	int32		nbytes;
291 	OffsetNumber seed_1 = 0,
292 				seed_2 = 0;
293 	OffsetNumber *left,
294 			   *right;
295 	OffsetNumber maxoff;
296 	BITVECP		ptr;
297 	int			i;
298 	SPLITCOST  *costvector;
299 	ltree_gist *_k,
300 			   *_j;
301 
302 	maxoff = entryvec->n - 2;
303 	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
304 	v->spl_left = (OffsetNumber *) palloc(nbytes);
305 	v->spl_right = (OffsetNumber *) palloc(nbytes);
306 
307 	for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
308 	{
309 		_k = GETENTRY(entryvec, k);
310 		for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
311 		{
312 			size_waste = hemdist(_k, GETENTRY(entryvec, j));
313 			if (size_waste > waste)
314 			{
315 				waste = size_waste;
316 				seed_1 = k;
317 				seed_2 = j;
318 			}
319 		}
320 	}
321 
322 	left = v->spl_left;
323 	v->spl_nleft = 0;
324 	right = v->spl_right;
325 	v->spl_nright = 0;
326 
327 	if (seed_1 == 0 || seed_2 == 0)
328 	{
329 		seed_1 = 1;
330 		seed_2 = 2;
331 	}
332 
333 	/* form initial .. */
334 	if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_1)))
335 	{
336 		datum_l = (ltree_gist *) palloc0(LTG_HDRSIZE);
337 		SET_VARSIZE(datum_l, LTG_HDRSIZE);
338 		datum_l->flag = LTG_ALLTRUE;
339 	}
340 	else
341 	{
342 		datum_l = (ltree_gist *) palloc0(LTG_HDRSIZE + ASIGLEN);
343 		SET_VARSIZE(datum_l, LTG_HDRSIZE + ASIGLEN);
344 		datum_l->flag = 0;
345 		memcpy((void *) LTG_SIGN(datum_l), (void *) LTG_SIGN(GETENTRY(entryvec, seed_1)), sizeof(ABITVEC));
346 	}
347 	if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)))
348 	{
349 		datum_r = (ltree_gist *) palloc0(LTG_HDRSIZE);
350 		SET_VARSIZE(datum_r, LTG_HDRSIZE);
351 		datum_r->flag = LTG_ALLTRUE;
352 	}
353 	else
354 	{
355 		datum_r = (ltree_gist *) palloc0(LTG_HDRSIZE + ASIGLEN);
356 		SET_VARSIZE(datum_r, LTG_HDRSIZE + ASIGLEN);
357 		datum_r->flag = 0;
358 		memcpy((void *) LTG_SIGN(datum_r), (void *) LTG_SIGN(GETENTRY(entryvec, seed_2)), sizeof(ABITVEC));
359 	}
360 
361 	maxoff = OffsetNumberNext(maxoff);
362 	/* sort before ... */
363 	costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
364 	for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
365 	{
366 		costvector[j - 1].pos = j;
367 		_j = GETENTRY(entryvec, j);
368 		size_alpha = hemdist(datum_l, _j);
369 		size_beta = hemdist(datum_r, _j);
370 		costvector[j - 1].cost = Abs(size_alpha - size_beta);
371 	}
372 	qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
373 
374 	union_l = LTG_SIGN(datum_l);
375 	union_r = LTG_SIGN(datum_r);
376 
377 	for (k = 0; k < maxoff; k++)
378 	{
379 		j = costvector[k].pos;
380 		if (j == seed_1)
381 		{
382 			*left++ = j;
383 			v->spl_nleft++;
384 			continue;
385 		}
386 		else if (j == seed_2)
387 		{
388 			*right++ = j;
389 			v->spl_nright++;
390 			continue;
391 		}
392 		_j = GETENTRY(entryvec, j);
393 		size_alpha = hemdist(datum_l, _j);
394 		size_beta = hemdist(datum_r, _j);
395 
396 		if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
397 		{
398 			if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j))
399 			{
400 				if (!LTG_ISALLTRUE(datum_l))
401 					MemSet((void *) union_l, 0xff, sizeof(ABITVEC));
402 			}
403 			else
404 			{
405 				ptr = LTG_SIGN(_j);
406 				ALOOPBYTE
407 					union_l[i] |= ptr[i];
408 			}
409 			*left++ = j;
410 			v->spl_nleft++;
411 		}
412 		else
413 		{
414 			if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
415 			{
416 				if (!LTG_ISALLTRUE(datum_r))
417 					MemSet((void *) union_r, 0xff, sizeof(ABITVEC));
418 			}
419 			else
420 			{
421 				ptr = LTG_SIGN(_j);
422 				ALOOPBYTE
423 					union_r[i] |= ptr[i];
424 			}
425 			*right++ = j;
426 			v->spl_nright++;
427 		}
428 	}
429 
430 	*right = *left = FirstOffsetNumber;
431 
432 	v->spl_ldatum = PointerGetDatum(datum_l);
433 	v->spl_rdatum = PointerGetDatum(datum_r);
434 
435 	PG_RETURN_POINTER(v);
436 }
437 
438 static bool
gist_te(ltree_gist * key,ltree * query)439 gist_te(ltree_gist *key, ltree *query)
440 {
441 	ltree_level *curq = LTREE_FIRST(query);
442 	BITVECP		sign = LTG_SIGN(key);
443 	int			qlen = query->numlevel;
444 	unsigned int hv;
445 
446 	if (LTG_ISALLTRUE(key))
447 		return true;
448 
449 	while (qlen > 0)
450 	{
451 		hv = ltree_crc32_sz(curq->name, curq->len);
452 		if (!GETBIT(sign, AHASHVAL(hv)))
453 			return false;
454 		curq = LEVEL_NEXT(curq);
455 		qlen--;
456 	}
457 
458 	return true;
459 }
460 
461 static bool
checkcondition_bit(void * checkval,ITEM * val)462 checkcondition_bit(void *checkval, ITEM *val)
463 {
464 	return (FLG_CANLOOKSIGN(val->flag)) ? GETBIT(checkval, AHASHVAL(val->val)) : true;
465 }
466 
467 static bool
gist_qtxt(ltree_gist * key,ltxtquery * query)468 gist_qtxt(ltree_gist *key, ltxtquery *query)
469 {
470 	if (LTG_ISALLTRUE(key))
471 		return true;
472 
473 	return ltree_execute(
474 						 GETQUERY(query),
475 						 (void *) LTG_SIGN(key), false,
476 						 checkcondition_bit
477 		);
478 }
479 
480 static bool
gist_qe(ltree_gist * key,lquery * query)481 gist_qe(ltree_gist *key, lquery *query)
482 {
483 	lquery_level *curq = LQUERY_FIRST(query);
484 	BITVECP		sign = LTG_SIGN(key);
485 	int			qlen = query->numlevel;
486 
487 	if (LTG_ISALLTRUE(key))
488 		return true;
489 
490 	while (qlen > 0)
491 	{
492 		if (curq->numvar && LQL_CANLOOKSIGN(curq))
493 		{
494 			bool		isexist = false;
495 			int			vlen = curq->numvar;
496 			lquery_variant *curv = LQL_FIRST(curq);
497 
498 			while (vlen > 0)
499 			{
500 				if (GETBIT(sign, AHASHVAL(curv->val)))
501 				{
502 					isexist = true;
503 					break;
504 				}
505 				curv = LVAR_NEXT(curv);
506 				vlen--;
507 			}
508 			if (!isexist)
509 				return false;
510 		}
511 
512 		curq = LQL_NEXT(curq);
513 		qlen--;
514 	}
515 
516 	return true;
517 }
518 
519 static bool
_arrq_cons(ltree_gist * key,ArrayType * _query)520 _arrq_cons(ltree_gist *key, ArrayType *_query)
521 {
522 	lquery	   *query = (lquery *) ARR_DATA_PTR(_query);
523 	int			num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
524 
525 	if (ARR_NDIM(_query) > 1)
526 		ereport(ERROR,
527 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
528 				 errmsg("array must be one-dimensional")));
529 	if (array_contains_nulls(_query))
530 		ereport(ERROR,
531 				(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
532 				 errmsg("array must not contain nulls")));
533 
534 	while (num > 0)
535 	{
536 		if (gist_qe(key, query))
537 			return true;
538 		num--;
539 		query = (lquery *) NEXTVAL(query);
540 	}
541 	return false;
542 }
543 
544 Datum
_ltree_consistent(PG_FUNCTION_ARGS)545 _ltree_consistent(PG_FUNCTION_ARGS)
546 {
547 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
548 	char	   *query = (char *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
549 	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
550 
551 	/* Oid		subtype = PG_GETARG_OID(3); */
552 	bool	   *recheck = (bool *) PG_GETARG_POINTER(4);
553 	ltree_gist *key = (ltree_gist *) DatumGetPointer(entry->key);
554 	bool		res = false;
555 
556 	/* All cases served by this function are inexact */
557 	*recheck = true;
558 
559 	switch (strategy)
560 	{
561 		case 10:
562 		case 11:
563 			res = gist_te(key, (ltree *) query);
564 			break;
565 		case 12:
566 		case 13:
567 			res = gist_qe(key, (lquery *) query);
568 			break;
569 		case 14:
570 		case 15:
571 			res = gist_qtxt(key, (ltxtquery *) query);
572 			break;
573 		case 16:
574 		case 17:
575 			res = _arrq_cons(key, (ArrayType *) query);
576 			break;
577 		default:
578 			/* internal error */
579 			elog(ERROR, "unrecognized StrategyNumber: %d", strategy);
580 	}
581 	PG_FREE_IF_COPY(query, 1);
582 	PG_RETURN_BOOL(res);
583 }
584