1 /*
2  * contrib/hstore/hstore_gist.c
3  */
4 #include "postgres.h"
5 
6 #include "access/gist.h"
7 #include "access/reloptions.h"
8 #include "access/stratnum.h"
9 #include "catalog/pg_type.h"
10 #include "hstore.h"
11 #include "utils/pg_crc.h"
12 
13 /* gist_hstore_ops opclass options */
14 typedef struct
15 {
16 	int32		vl_len_;		/* varlena header (do not touch directly!) */
17 	int			siglen;			/* signature length in bytes */
18 } GistHstoreOptions;
19 
20 /* bigint defines */
21 #define BITBYTE 8
22 #define SIGLEN_DEFAULT	(sizeof(int32) * 4)
23 #define SIGLEN_MAX		GISTMaxIndexKeySize
24 #define SIGLENBIT(siglen) ((siglen) * BITBYTE)
25 #define GET_SIGLEN()	(PG_HAS_OPCLASS_OPTIONS() ? \
26 						 ((GistHstoreOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \
27 						 SIGLEN_DEFAULT)
28 
29 
30 typedef char *BITVECP;
31 
32 #define LOOPBYTE(siglen) \
33 			for (i = 0; i < (siglen); i++)
34 
35 #define LOOPBIT(siglen) \
36 			for (i = 0; i < SIGLENBIT(siglen); i++)
37 
38 /* beware of multiple evaluation of arguments to these macros! */
39 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
40 #define GETBITBYTE(x,i) ( (*((char*)(x)) >> (i)) & 0x01 )
41 #define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
42 #define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
43 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
44 #define HASHVAL(val, siglen) (((unsigned int)(val)) % SIGLENBIT(siglen))
45 #define HASH(sign, val, siglen) SETBIT((sign), HASHVAL(val, siglen))
46 
47 typedef struct
48 {
49 	int32		vl_len_;		/* varlena header (do not touch directly!) */
50 	int32		flag;
51 	char		data[FLEXIBLE_ARRAY_MEMBER];
52 } GISTTYPE;
53 
54 #define ALLISTRUE		0x04
55 
56 #define ISALLTRUE(x)	( ((GISTTYPE*)x)->flag & ALLISTRUE )
57 
58 #define GTHDRSIZE		(VARHDRSZ + sizeof(int32))
59 #define CALCGTSIZE(flag, siglen) ( GTHDRSIZE+(((flag) & ALLISTRUE) ? 0 : (siglen)) )
60 
61 #define GETSIGN(x)		( (BITVECP)( (char*)x+GTHDRSIZE ) )
62 
63 #define SUMBIT(val) (		\
64 	GETBITBYTE((val),0) + \
65 	GETBITBYTE((val),1) + \
66 	GETBITBYTE((val),2) + \
67 	GETBITBYTE((val),3) + \
68 	GETBITBYTE((val),4) + \
69 	GETBITBYTE((val),5) + \
70 	GETBITBYTE((val),6) + \
71 	GETBITBYTE((val),7)   \
72 )
73 
74 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
75 
76 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
77 
78 /* shorthand for calculating CRC-32 of a single chunk of data. */
79 static pg_crc32
crc32_sz(char * buf,int size)80 crc32_sz(char *buf, int size)
81 {
82 	pg_crc32	crc;
83 
84 	INIT_TRADITIONAL_CRC32(crc);
85 	COMP_TRADITIONAL_CRC32(crc, buf, size);
86 	FIN_TRADITIONAL_CRC32(crc);
87 
88 	return crc;
89 }
90 
91 
92 PG_FUNCTION_INFO_V1(ghstore_in);
93 PG_FUNCTION_INFO_V1(ghstore_out);
94 
95 
96 Datum
ghstore_in(PG_FUNCTION_ARGS)97 ghstore_in(PG_FUNCTION_ARGS)
98 {
99 	elog(ERROR, "Not implemented");
100 	PG_RETURN_DATUM(0);
101 }
102 
103 Datum
ghstore_out(PG_FUNCTION_ARGS)104 ghstore_out(PG_FUNCTION_ARGS)
105 {
106 	elog(ERROR, "Not implemented");
107 	PG_RETURN_DATUM(0);
108 }
109 
110 static GISTTYPE *
ghstore_alloc(bool allistrue,int siglen,BITVECP sign)111 ghstore_alloc(bool allistrue, int siglen, BITVECP sign)
112 {
113 	int			flag = allistrue ? ALLISTRUE : 0;
114 	int			size = CALCGTSIZE(flag, siglen);
115 	GISTTYPE   *res = palloc(size);
116 
117 	SET_VARSIZE(res, size);
118 	res->flag = flag;
119 
120 	if (!allistrue)
121 	{
122 		if (sign)
123 			memcpy(GETSIGN(res), sign, siglen);
124 		else
125 			memset(GETSIGN(res), 0, siglen);
126 	}
127 
128 	return res;
129 }
130 
131 PG_FUNCTION_INFO_V1(ghstore_consistent);
132 PG_FUNCTION_INFO_V1(ghstore_compress);
133 PG_FUNCTION_INFO_V1(ghstore_decompress);
134 PG_FUNCTION_INFO_V1(ghstore_penalty);
135 PG_FUNCTION_INFO_V1(ghstore_picksplit);
136 PG_FUNCTION_INFO_V1(ghstore_union);
137 PG_FUNCTION_INFO_V1(ghstore_same);
138 PG_FUNCTION_INFO_V1(ghstore_options);
139 
140 Datum
ghstore_compress(PG_FUNCTION_ARGS)141 ghstore_compress(PG_FUNCTION_ARGS)
142 {
143 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
144 	int			siglen = GET_SIGLEN();
145 	GISTENTRY  *retval = entry;
146 
147 	if (entry->leafkey)
148 	{
149 		GISTTYPE   *res = ghstore_alloc(false, siglen, NULL);
150 		HStore	   *val = DatumGetHStoreP(entry->key);
151 		HEntry	   *hsent = ARRPTR(val);
152 		char	   *ptr = STRPTR(val);
153 		int			count = HS_COUNT(val);
154 		int			i;
155 
156 		for (i = 0; i < count; ++i)
157 		{
158 			int			h;
159 
160 			h = crc32_sz((char *) HSTORE_KEY(hsent, ptr, i),
161 						 HSTORE_KEYLEN(hsent, i));
162 			HASH(GETSIGN(res), h, siglen);
163 			if (!HSTORE_VALISNULL(hsent, i))
164 			{
165 				h = crc32_sz((char *) HSTORE_VAL(hsent, ptr, i),
166 							 HSTORE_VALLEN(hsent, i));
167 				HASH(GETSIGN(res), h, siglen);
168 			}
169 		}
170 
171 		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
172 		gistentryinit(*retval, PointerGetDatum(res),
173 					  entry->rel, entry->page,
174 					  entry->offset,
175 					  false);
176 	}
177 	else if (!ISALLTRUE(DatumGetPointer(entry->key)))
178 	{
179 		int32		i;
180 		GISTTYPE   *res;
181 		BITVECP		sign = GETSIGN(DatumGetPointer(entry->key));
182 
183 		LOOPBYTE(siglen)
184 		{
185 			if ((sign[i] & 0xff) != 0xff)
186 				PG_RETURN_POINTER(retval);
187 		}
188 
189 		res = ghstore_alloc(true, siglen, NULL);
190 
191 		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
192 		gistentryinit(*retval, PointerGetDatum(res),
193 					  entry->rel, entry->page,
194 					  entry->offset,
195 					  false);
196 	}
197 
198 	PG_RETURN_POINTER(retval);
199 }
200 
201 /*
202  * Since type ghstore isn't toastable (and doesn't need to be),
203  * this function can be a no-op.
204  */
205 Datum
ghstore_decompress(PG_FUNCTION_ARGS)206 ghstore_decompress(PG_FUNCTION_ARGS)
207 {
208 	PG_RETURN_POINTER(PG_GETARG_POINTER(0));
209 }
210 
211 Datum
ghstore_same(PG_FUNCTION_ARGS)212 ghstore_same(PG_FUNCTION_ARGS)
213 {
214 	GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
215 	GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
216 	bool	   *result = (bool *) PG_GETARG_POINTER(2);
217 	int			siglen = GET_SIGLEN();
218 
219 
220 	if (ISALLTRUE(a) && ISALLTRUE(b))
221 		*result = true;
222 	else if (ISALLTRUE(a))
223 		*result = false;
224 	else if (ISALLTRUE(b))
225 		*result = false;
226 	else
227 	{
228 		int32		i;
229 		BITVECP		sa = GETSIGN(a),
230 					sb = GETSIGN(b);
231 
232 		*result = true;
233 		LOOPBYTE(siglen)
234 		{
235 			if (sa[i] != sb[i])
236 			{
237 				*result = false;
238 				break;
239 			}
240 		}
241 	}
242 	PG_RETURN_POINTER(result);
243 }
244 
245 static int32
sizebitvec(BITVECP sign,int siglen)246 sizebitvec(BITVECP sign, int siglen)
247 {
248 	int32		size = 0,
249 				i;
250 
251 	LOOPBYTE(siglen)
252 	{
253 		size += SUMBIT(sign);
254 		sign = (BITVECP) (((char *) sign) + 1);
255 	}
256 	return size;
257 }
258 
259 static int
hemdistsign(BITVECP a,BITVECP b,int siglen)260 hemdistsign(BITVECP a, BITVECP b, int siglen)
261 {
262 	int			i,
263 				dist = 0;
264 
265 	LOOPBIT(siglen)
266 	{
267 		if (GETBIT(a, i) != GETBIT(b, i))
268 			dist++;
269 	}
270 	return dist;
271 }
272 
273 static int
hemdist(GISTTYPE * a,GISTTYPE * b,int siglen)274 hemdist(GISTTYPE *a, GISTTYPE *b, int siglen)
275 {
276 	if (ISALLTRUE(a))
277 	{
278 		if (ISALLTRUE(b))
279 			return 0;
280 		else
281 			return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
282 	}
283 	else if (ISALLTRUE(b))
284 		return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
285 
286 	return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
287 }
288 
289 static int32
unionkey(BITVECP sbase,GISTTYPE * add,int siglen)290 unionkey(BITVECP sbase, GISTTYPE *add, int siglen)
291 {
292 	int32		i;
293 	BITVECP		sadd = GETSIGN(add);
294 
295 	if (ISALLTRUE(add))
296 		return 1;
297 	LOOPBYTE(siglen)
298 		sbase[i] |= sadd[i];
299 	return 0;
300 }
301 
302 Datum
ghstore_union(PG_FUNCTION_ARGS)303 ghstore_union(PG_FUNCTION_ARGS)
304 {
305 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
306 	int32		len = entryvec->n;
307 
308 	int		   *size = (int *) PG_GETARG_POINTER(1);
309 	int			siglen = GET_SIGLEN();
310 	int32		i;
311 	GISTTYPE   *result = ghstore_alloc(false, siglen, NULL);
312 	BITVECP		base = GETSIGN(result);
313 
314 	for (i = 0; i < len; i++)
315 	{
316 		if (unionkey(base, GETENTRY(entryvec, i), siglen))
317 		{
318 			result->flag |= ALLISTRUE;
319 			SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
320 			break;
321 		}
322 	}
323 
324 	*size = VARSIZE(result);
325 
326 	PG_RETURN_POINTER(result);
327 }
328 
329 Datum
ghstore_penalty(PG_FUNCTION_ARGS)330 ghstore_penalty(PG_FUNCTION_ARGS)
331 {
332 	GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
333 	GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
334 	float	   *penalty = (float *) PG_GETARG_POINTER(2);
335 	int			siglen = GET_SIGLEN();
336 	GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
337 	GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
338 
339 	*penalty = hemdist(origval, newval, siglen);
340 	PG_RETURN_POINTER(penalty);
341 }
342 
343 
344 typedef struct
345 {
346 	OffsetNumber pos;
347 	int32		cost;
348 } SPLITCOST;
349 
350 static int
comparecost(const void * a,const void * b)351 comparecost(const void *a, const void *b)
352 {
353 	return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
354 }
355 
356 
357 Datum
ghstore_picksplit(PG_FUNCTION_ARGS)358 ghstore_picksplit(PG_FUNCTION_ARGS)
359 {
360 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
361 	OffsetNumber maxoff = entryvec->n - 2;
362 
363 	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
364 	int			siglen = GET_SIGLEN();
365 	OffsetNumber k,
366 				j;
367 	GISTTYPE   *datum_l,
368 			   *datum_r;
369 	BITVECP		union_l,
370 				union_r;
371 	int32		size_alpha,
372 				size_beta;
373 	int32		size_waste,
374 				waste = -1;
375 	int32		nbytes;
376 	OffsetNumber seed_1 = 0,
377 				seed_2 = 0;
378 	OffsetNumber *left,
379 			   *right;
380 	BITVECP		ptr;
381 	int			i;
382 	SPLITCOST  *costvector;
383 	GISTTYPE   *_k,
384 			   *_j;
385 
386 	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
387 	v->spl_left = (OffsetNumber *) palloc(nbytes);
388 	v->spl_right = (OffsetNumber *) palloc(nbytes);
389 
390 	for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
391 	{
392 		_k = GETENTRY(entryvec, k);
393 		for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
394 		{
395 			size_waste = hemdist(_k, GETENTRY(entryvec, j), siglen);
396 			if (size_waste > waste)
397 			{
398 				waste = size_waste;
399 				seed_1 = k;
400 				seed_2 = j;
401 			}
402 		}
403 	}
404 
405 	left = v->spl_left;
406 	v->spl_nleft = 0;
407 	right = v->spl_right;
408 	v->spl_nright = 0;
409 
410 	if (seed_1 == 0 || seed_2 == 0)
411 	{
412 		seed_1 = 1;
413 		seed_2 = 2;
414 	}
415 
416 	/* form initial .. */
417 	datum_l = ghstore_alloc(ISALLTRUE(GETENTRY(entryvec, seed_1)), siglen,
418 							GETSIGN(GETENTRY(entryvec, seed_1)));
419 	datum_r = ghstore_alloc(ISALLTRUE(GETENTRY(entryvec, seed_2)), siglen,
420 							GETSIGN(GETENTRY(entryvec, seed_2)));
421 
422 	maxoff = OffsetNumberNext(maxoff);
423 	/* sort before ... */
424 	costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
425 	for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
426 	{
427 		costvector[j - 1].pos = j;
428 		_j = GETENTRY(entryvec, j);
429 		size_alpha = hemdist(datum_l, _j, siglen);
430 		size_beta = hemdist(datum_r, _j, siglen);
431 		costvector[j - 1].cost = abs(size_alpha - size_beta);
432 	}
433 	qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
434 
435 	union_l = GETSIGN(datum_l);
436 	union_r = GETSIGN(datum_r);
437 
438 	for (k = 0; k < maxoff; k++)
439 	{
440 		j = costvector[k].pos;
441 		if (j == seed_1)
442 		{
443 			*left++ = j;
444 			v->spl_nleft++;
445 			continue;
446 		}
447 		else if (j == seed_2)
448 		{
449 			*right++ = j;
450 			v->spl_nright++;
451 			continue;
452 		}
453 		_j = GETENTRY(entryvec, j);
454 		size_alpha = hemdist(datum_l, _j, siglen);
455 		size_beta = hemdist(datum_r, _j, siglen);
456 
457 		if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.0001))
458 		{
459 			if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
460 			{
461 				if (!ISALLTRUE(datum_l))
462 					MemSet((void *) union_l, 0xff, siglen);
463 			}
464 			else
465 			{
466 				ptr = GETSIGN(_j);
467 				LOOPBYTE(siglen)
468 					union_l[i] |= ptr[i];
469 			}
470 			*left++ = j;
471 			v->spl_nleft++;
472 		}
473 		else
474 		{
475 			if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
476 			{
477 				if (!ISALLTRUE(datum_r))
478 					MemSet((void *) union_r, 0xff, siglen);
479 			}
480 			else
481 			{
482 				ptr = GETSIGN(_j);
483 				LOOPBYTE(siglen)
484 					union_r[i] |= ptr[i];
485 			}
486 			*right++ = j;
487 			v->spl_nright++;
488 		}
489 	}
490 
491 	*right = *left = FirstOffsetNumber;
492 
493 	v->spl_ldatum = PointerGetDatum(datum_l);
494 	v->spl_rdatum = PointerGetDatum(datum_r);
495 
496 	PG_RETURN_POINTER(v);
497 }
498 
499 
500 Datum
ghstore_consistent(PG_FUNCTION_ARGS)501 ghstore_consistent(PG_FUNCTION_ARGS)
502 {
503 	GISTTYPE   *entry = (GISTTYPE *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
504 	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
505 
506 	/* Oid		subtype = PG_GETARG_OID(3); */
507 	bool	   *recheck = (bool *) PG_GETARG_POINTER(4);
508 	int			siglen = GET_SIGLEN();
509 	bool		res = true;
510 	BITVECP		sign;
511 
512 	/* All cases served by this function are inexact */
513 	*recheck = true;
514 
515 	if (ISALLTRUE(entry))
516 		PG_RETURN_BOOL(true);
517 
518 	sign = GETSIGN(entry);
519 
520 	if (strategy == HStoreContainsStrategyNumber ||
521 		strategy == HStoreOldContainsStrategyNumber)
522 	{
523 		HStore	   *query = PG_GETARG_HSTORE_P(1);
524 		HEntry	   *qe = ARRPTR(query);
525 		char	   *qv = STRPTR(query);
526 		int			count = HS_COUNT(query);
527 		int			i;
528 
529 		for (i = 0; res && i < count; ++i)
530 		{
531 			int			crc = crc32_sz((char *) HSTORE_KEY(qe, qv, i),
532 									   HSTORE_KEYLEN(qe, i));
533 
534 			if (GETBIT(sign, HASHVAL(crc, siglen)))
535 			{
536 				if (!HSTORE_VALISNULL(qe, i))
537 				{
538 					crc = crc32_sz((char *) HSTORE_VAL(qe, qv, i),
539 								   HSTORE_VALLEN(qe, i));
540 					if (!GETBIT(sign, HASHVAL(crc, siglen)))
541 						res = false;
542 				}
543 			}
544 			else
545 				res = false;
546 		}
547 	}
548 	else if (strategy == HStoreExistsStrategyNumber)
549 	{
550 		text	   *query = PG_GETARG_TEXT_PP(1);
551 		int			crc = crc32_sz(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query));
552 
553 		res = (GETBIT(sign, HASHVAL(crc, siglen))) ? true : false;
554 	}
555 	else if (strategy == HStoreExistsAllStrategyNumber)
556 	{
557 		ArrayType  *query = PG_GETARG_ARRAYTYPE_P(1);
558 		Datum	   *key_datums;
559 		bool	   *key_nulls;
560 		int			key_count;
561 		int			i;
562 
563 		deconstruct_array(query,
564 						  TEXTOID, -1, false, TYPALIGN_INT,
565 						  &key_datums, &key_nulls, &key_count);
566 
567 		for (i = 0; res && i < key_count; ++i)
568 		{
569 			int			crc;
570 
571 			if (key_nulls[i])
572 				continue;
573 			crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
574 			if (!(GETBIT(sign, HASHVAL(crc, siglen))))
575 				res = false;
576 		}
577 	}
578 	else if (strategy == HStoreExistsAnyStrategyNumber)
579 	{
580 		ArrayType  *query = PG_GETARG_ARRAYTYPE_P(1);
581 		Datum	   *key_datums;
582 		bool	   *key_nulls;
583 		int			key_count;
584 		int			i;
585 
586 		deconstruct_array(query,
587 						  TEXTOID, -1, false, TYPALIGN_INT,
588 						  &key_datums, &key_nulls, &key_count);
589 
590 		res = false;
591 
592 		for (i = 0; !res && i < key_count; ++i)
593 		{
594 			int			crc;
595 
596 			if (key_nulls[i])
597 				continue;
598 			crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
599 			if (GETBIT(sign, HASHVAL(crc, siglen)))
600 				res = true;
601 		}
602 	}
603 	else
604 		elog(ERROR, "Unsupported strategy number: %d", strategy);
605 
606 	PG_RETURN_BOOL(res);
607 }
608 
609 Datum
ghstore_options(PG_FUNCTION_ARGS)610 ghstore_options(PG_FUNCTION_ARGS)
611 {
612 	local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
613 
614 	init_local_reloptions(relopts, sizeof(GistHstoreOptions));
615 	add_local_int_reloption(relopts, "siglen",
616 							"signature length in bytes",
617 							SIGLEN_DEFAULT, 1, SIGLEN_MAX,
618 							offsetof(GistHstoreOptions, siglen));
619 
620 	PG_RETURN_VOID();
621 }
622