1 /*-------------------------------------------------------------------------
2  *
3  * spgtextproc.c
4  *	  implementation of radix tree (compressed trie) over text
5  *
6  * In a text_ops SPGiST index, inner tuples can have a prefix which is the
7  * common prefix of all strings indexed under that tuple.  The node labels
8  * represent the next byte of the string(s) after the prefix.  Assuming we
9  * always use the longest possible prefix, we will get more than one node
10  * label unless the prefix length is restricted by SPGIST_MAX_PREFIX_LENGTH.
11  *
12  * To reconstruct the indexed string for any index entry, concatenate the
13  * inner-tuple prefixes and node labels starting at the root and working
14  * down to the leaf entry, then append the datum in the leaf entry.
15  * (While descending the tree, "level" is the number of bytes reconstructed
16  * so far.)
17  *
18  * However, there are two special cases for node labels: -1 indicates that
19  * there are no more bytes after the prefix-so-far, and -2 indicates that we
20  * had to split an existing allTheSame tuple (in such a case we have to create
21  * a node label that doesn't correspond to any string byte).  In either case,
22  * the node label does not contribute anything to the reconstructed string.
23  *
24  * Previously, we used a node label of zero for both special cases, but
25  * this was problematic because one can't tell whether a string ending at
26  * the current level can be pushed down into such a child node.  For
27  * backwards compatibility, we still support such node labels for reading;
28  * but no new entries will ever be pushed down into a zero-labeled child.
29  * No new entries ever get pushed into a -2-labeled child, either.
30  *
31  *
32  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
33  * Portions Copyright (c) 1994, Regents of the University of California
34  *
35  * IDENTIFICATION
36  *			src/backend/access/spgist/spgtextproc.c
37  *
38  *-------------------------------------------------------------------------
39  */
40 #include "postgres.h"
41 
42 #include "access/spgist.h"
43 #include "catalog/pg_type.h"
44 #include "mb/pg_wchar.h"
45 #include "utils/builtins.h"
46 #include "utils/datum.h"
47 #include "utils/pg_locale.h"
48 #include "utils/varlena.h"
49 
50 
51 /*
52  * In the worst case, an inner tuple in a text radix tree could have as many
53  * as 258 nodes (one for each possible byte value, plus the two special
54  * cases).  Each node can take 16 bytes on MAXALIGN=8 machines.  The inner
55  * tuple must fit on an index page of size BLCKSZ.  Rather than assuming we
56  * know the exact amount of overhead imposed by page headers, tuple headers,
57  * etc, we leave 100 bytes for that (the actual overhead should be no more
58  * than 56 bytes at this writing, so there is slop in this number).
59  * So we can safely create prefixes up to BLCKSZ - 258 * 16 - 100 bytes long.
60  * Unfortunately, because 258 * 16 is over 4K, there is no safe prefix length
61  * when BLCKSZ is less than 8K; it is always possible to get "SPGiST inner
62  * tuple size exceeds maximum" if there are too many distinct next-byte values
63  * at a given place in the tree.  Since use of nonstandard block sizes appears
64  * to be negligible in the field, we just live with that fact for now,
65  * choosing a max prefix size of 32 bytes when BLCKSZ is configured smaller
66  * than default.
67  */
68 #define SPGIST_MAX_PREFIX_LENGTH	Max((int) (BLCKSZ - 258 * 16 - 100), 32)
69 
70 /*
71  * Strategy for collation aware operator on text is equal to btree strategy
72  * plus value of 10.
73  *
74  * Current collation aware strategies and their corresponding btree strategies:
75  * 11 BTLessStrategyNumber
76  * 12 BTLessEqualStrategyNumber
77  * 14 BTGreaterEqualStrategyNumber
78  * 15 BTGreaterStrategyNumber
79  */
80 #define SPG_STRATEGY_ADDITION	(10)
81 #define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
82 										 && (s) != RTPrefixStrategyNumber)
83 
84 /* Struct for sorting values in picksplit */
85 typedef struct spgNodePtr
86 {
87 	Datum		d;
88 	int			i;
89 	int16		c;
90 } spgNodePtr;
91 
92 
93 Datum
spg_text_config(PG_FUNCTION_ARGS)94 spg_text_config(PG_FUNCTION_ARGS)
95 {
96 	/* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
97 	spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
98 
99 	cfg->prefixType = TEXTOID;
100 	cfg->labelType = INT2OID;
101 	cfg->canReturnData = true;
102 	cfg->longValuesOK = true;	/* suffixing will shorten long values */
103 	PG_RETURN_VOID();
104 }
105 
106 /*
107  * Form a text datum from the given not-necessarily-null-terminated string,
108  * using short varlena header format if possible
109  */
110 static Datum
formTextDatum(const char * data,int datalen)111 formTextDatum(const char *data, int datalen)
112 {
113 	char	   *p;
114 
115 	p = (char *) palloc(datalen + VARHDRSZ);
116 
117 	if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
118 	{
119 		SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
120 		if (datalen)
121 			memcpy(p + VARHDRSZ_SHORT, data, datalen);
122 	}
123 	else
124 	{
125 		SET_VARSIZE(p, datalen + VARHDRSZ);
126 		memcpy(p + VARHDRSZ, data, datalen);
127 	}
128 
129 	return PointerGetDatum(p);
130 }
131 
132 /*
133  * Find the length of the common prefix of a and b
134  */
135 static int
commonPrefix(const char * a,const char * b,int lena,int lenb)136 commonPrefix(const char *a, const char *b, int lena, int lenb)
137 {
138 	int			i = 0;
139 
140 	while (i < lena && i < lenb && *a == *b)
141 	{
142 		a++;
143 		b++;
144 		i++;
145 	}
146 
147 	return i;
148 }
149 
150 /*
151  * Binary search an array of int16 datums for a match to c
152  *
153  * On success, *i gets the match location; on failure, it gets where to insert
154  */
155 static bool
searchChar(Datum * nodeLabels,int nNodes,int16 c,int * i)156 searchChar(Datum *nodeLabels, int nNodes, int16 c, int *i)
157 {
158 	int			StopLow = 0,
159 				StopHigh = nNodes;
160 
161 	while (StopLow < StopHigh)
162 	{
163 		int			StopMiddle = (StopLow + StopHigh) >> 1;
164 		int16		middle = DatumGetInt16(nodeLabels[StopMiddle]);
165 
166 		if (c < middle)
167 			StopHigh = StopMiddle;
168 		else if (c > middle)
169 			StopLow = StopMiddle + 1;
170 		else
171 		{
172 			*i = StopMiddle;
173 			return true;
174 		}
175 	}
176 
177 	*i = StopHigh;
178 	return false;
179 }
180 
181 Datum
spg_text_choose(PG_FUNCTION_ARGS)182 spg_text_choose(PG_FUNCTION_ARGS)
183 {
184 	spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
185 	spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
186 	text	   *inText = DatumGetTextPP(in->datum);
187 	char	   *inStr = VARDATA_ANY(inText);
188 	int			inSize = VARSIZE_ANY_EXHDR(inText);
189 	char	   *prefixStr = NULL;
190 	int			prefixSize = 0;
191 	int			commonLen = 0;
192 	int16		nodeChar = 0;
193 	int			i = 0;
194 
195 	/* Check for prefix match, set nodeChar to first byte after prefix */
196 	if (in->hasPrefix)
197 	{
198 		text	   *prefixText = DatumGetTextPP(in->prefixDatum);
199 
200 		prefixStr = VARDATA_ANY(prefixText);
201 		prefixSize = VARSIZE_ANY_EXHDR(prefixText);
202 
203 		commonLen = commonPrefix(inStr + in->level,
204 								 prefixStr,
205 								 inSize - in->level,
206 								 prefixSize);
207 
208 		if (commonLen == prefixSize)
209 		{
210 			if (inSize - in->level > commonLen)
211 				nodeChar = *(unsigned char *) (inStr + in->level + commonLen);
212 			else
213 				nodeChar = -1;
214 		}
215 		else
216 		{
217 			/* Must split tuple because incoming value doesn't match prefix */
218 			out->resultType = spgSplitTuple;
219 
220 			if (commonLen == 0)
221 			{
222 				out->result.splitTuple.prefixHasPrefix = false;
223 			}
224 			else
225 			{
226 				out->result.splitTuple.prefixHasPrefix = true;
227 				out->result.splitTuple.prefixPrefixDatum =
228 					formTextDatum(prefixStr, commonLen);
229 			}
230 			out->result.splitTuple.prefixNNodes = 1;
231 			out->result.splitTuple.prefixNodeLabels =
232 				(Datum *) palloc(sizeof(Datum));
233 			out->result.splitTuple.prefixNodeLabels[0] =
234 				Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
235 
236 			out->result.splitTuple.childNodeN = 0;
237 
238 			if (prefixSize - commonLen == 1)
239 			{
240 				out->result.splitTuple.postfixHasPrefix = false;
241 			}
242 			else
243 			{
244 				out->result.splitTuple.postfixHasPrefix = true;
245 				out->result.splitTuple.postfixPrefixDatum =
246 					formTextDatum(prefixStr + commonLen + 1,
247 								  prefixSize - commonLen - 1);
248 			}
249 
250 			PG_RETURN_VOID();
251 		}
252 	}
253 	else if (inSize > in->level)
254 	{
255 		nodeChar = *(unsigned char *) (inStr + in->level);
256 	}
257 	else
258 	{
259 		nodeChar = -1;
260 	}
261 
262 	/* Look up nodeChar in the node label array */
263 	if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
264 	{
265 		/*
266 		 * Descend to existing node.  (If in->allTheSame, the core code will
267 		 * ignore our nodeN specification here, but that's OK.  We still have
268 		 * to provide the correct levelAdd and restDatum values, and those are
269 		 * the same regardless of which node gets chosen by core.)
270 		 */
271 		int			levelAdd;
272 
273 		out->resultType = spgMatchNode;
274 		out->result.matchNode.nodeN = i;
275 		levelAdd = commonLen;
276 		if (nodeChar >= 0)
277 			levelAdd++;
278 		out->result.matchNode.levelAdd = levelAdd;
279 		if (inSize - in->level - levelAdd > 0)
280 			out->result.matchNode.restDatum =
281 				formTextDatum(inStr + in->level + levelAdd,
282 							  inSize - in->level - levelAdd);
283 		else
284 			out->result.matchNode.restDatum =
285 				formTextDatum(NULL, 0);
286 	}
287 	else if (in->allTheSame)
288 	{
289 		/*
290 		 * Can't use AddNode action, so split the tuple.  The upper tuple has
291 		 * the same prefix as before and uses a dummy node label -2 for the
292 		 * lower tuple.  The lower tuple has no prefix and the same node
293 		 * labels as the original tuple.
294 		 *
295 		 * Note: it might seem tempting to shorten the upper tuple's prefix,
296 		 * if it has one, then use its last byte as label for the lower tuple.
297 		 * But that doesn't win since we know the incoming value matches the
298 		 * whole prefix: we'd just end up splitting the lower tuple again.
299 		 */
300 		out->resultType = spgSplitTuple;
301 		out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
302 		out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
303 		out->result.splitTuple.prefixNNodes = 1;
304 		out->result.splitTuple.prefixNodeLabels = (Datum *) palloc(sizeof(Datum));
305 		out->result.splitTuple.prefixNodeLabels[0] = Int16GetDatum(-2);
306 		out->result.splitTuple.childNodeN = 0;
307 		out->result.splitTuple.postfixHasPrefix = false;
308 	}
309 	else
310 	{
311 		/* Add a node for the not-previously-seen nodeChar value */
312 		out->resultType = spgAddNode;
313 		out->result.addNode.nodeLabel = Int16GetDatum(nodeChar);
314 		out->result.addNode.nodeN = i;
315 	}
316 
317 	PG_RETURN_VOID();
318 }
319 
320 /* qsort comparator to sort spgNodePtr structs by "c" */
321 static int
cmpNodePtr(const void * a,const void * b)322 cmpNodePtr(const void *a, const void *b)
323 {
324 	const spgNodePtr *aa = (const spgNodePtr *) a;
325 	const spgNodePtr *bb = (const spgNodePtr *) b;
326 
327 	return aa->c - bb->c;
328 }
329 
330 Datum
spg_text_picksplit(PG_FUNCTION_ARGS)331 spg_text_picksplit(PG_FUNCTION_ARGS)
332 {
333 	spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
334 	spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
335 	text	   *text0 = DatumGetTextPP(in->datums[0]);
336 	int			i,
337 				commonLen;
338 	spgNodePtr *nodes;
339 
340 	/* Identify longest common prefix, if any */
341 	commonLen = VARSIZE_ANY_EXHDR(text0);
342 	for (i = 1; i < in->nTuples && commonLen > 0; i++)
343 	{
344 		text	   *texti = DatumGetTextPP(in->datums[i]);
345 		int			tmp = commonPrefix(VARDATA_ANY(text0),
346 									   VARDATA_ANY(texti),
347 									   VARSIZE_ANY_EXHDR(text0),
348 									   VARSIZE_ANY_EXHDR(texti));
349 
350 		if (tmp < commonLen)
351 			commonLen = tmp;
352 	}
353 
354 	/*
355 	 * Limit the prefix length, if necessary, to ensure that the resulting
356 	 * inner tuple will fit on a page.
357 	 */
358 	commonLen = Min(commonLen, SPGIST_MAX_PREFIX_LENGTH);
359 
360 	/* Set node prefix to be that string, if it's not empty */
361 	if (commonLen == 0)
362 	{
363 		out->hasPrefix = false;
364 	}
365 	else
366 	{
367 		out->hasPrefix = true;
368 		out->prefixDatum = formTextDatum(VARDATA_ANY(text0), commonLen);
369 	}
370 
371 	/* Extract the node label (first non-common byte) from each value */
372 	nodes = (spgNodePtr *) palloc(sizeof(spgNodePtr) * in->nTuples);
373 
374 	for (i = 0; i < in->nTuples; i++)
375 	{
376 		text	   *texti = DatumGetTextPP(in->datums[i]);
377 
378 		if (commonLen < VARSIZE_ANY_EXHDR(texti))
379 			nodes[i].c = *(unsigned char *) (VARDATA_ANY(texti) + commonLen);
380 		else
381 			nodes[i].c = -1;	/* use -1 if string is all common */
382 		nodes[i].i = i;
383 		nodes[i].d = in->datums[i];
384 	}
385 
386 	/*
387 	 * Sort by label values so that we can group the values into nodes.  This
388 	 * also ensures that the nodes are ordered by label value, allowing the
389 	 * use of binary search in searchChar.
390 	 */
391 	qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr);
392 
393 	/* And emit results */
394 	out->nNodes = 0;
395 	out->nodeLabels = (Datum *) palloc(sizeof(Datum) * in->nTuples);
396 	out->mapTuplesToNodes = (int *) palloc(sizeof(int) * in->nTuples);
397 	out->leafTupleDatums = (Datum *) palloc(sizeof(Datum) * in->nTuples);
398 
399 	for (i = 0; i < in->nTuples; i++)
400 	{
401 		text	   *texti = DatumGetTextPP(nodes[i].d);
402 		Datum		leafD;
403 
404 		if (i == 0 || nodes[i].c != nodes[i - 1].c)
405 		{
406 			out->nodeLabels[out->nNodes] = Int16GetDatum(nodes[i].c);
407 			out->nNodes++;
408 		}
409 
410 		if (commonLen < VARSIZE_ANY_EXHDR(texti))
411 			leafD = formTextDatum(VARDATA_ANY(texti) + commonLen + 1,
412 								  VARSIZE_ANY_EXHDR(texti) - commonLen - 1);
413 		else
414 			leafD = formTextDatum(NULL, 0);
415 
416 		out->leafTupleDatums[nodes[i].i] = leafD;
417 		out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1;
418 	}
419 
420 	PG_RETURN_VOID();
421 }
422 
423 Datum
spg_text_inner_consistent(PG_FUNCTION_ARGS)424 spg_text_inner_consistent(PG_FUNCTION_ARGS)
425 {
426 	spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
427 	spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
428 	bool		collate_is_c = lc_collate_is_c(PG_GET_COLLATION());
429 	text	   *reconstructedValue;
430 	text	   *reconstrText;
431 	int			maxReconstrLen;
432 	text	   *prefixText = NULL;
433 	int			prefixSize = 0;
434 	int			i;
435 
436 	/*
437 	 * Reconstruct values represented at this tuple, including parent data,
438 	 * prefix of this tuple if any, and the node label if it's non-dummy.
439 	 * in->level should be the length of the previously reconstructed value,
440 	 * and the number of bytes added here is prefixSize or prefixSize + 1.
441 	 *
442 	 * Note: we assume that in->reconstructedValue isn't toasted and doesn't
443 	 * have a short varlena header.  This is okay because it must have been
444 	 * created by a previous invocation of this routine, and we always emit
445 	 * long-format reconstructed values.
446 	 */
447 	reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue);
448 	Assert(reconstructedValue == NULL ? in->level == 0 :
449 		   VARSIZE_ANY_EXHDR(reconstructedValue) == in->level);
450 
451 	maxReconstrLen = in->level + 1;
452 	if (in->hasPrefix)
453 	{
454 		prefixText = DatumGetTextPP(in->prefixDatum);
455 		prefixSize = VARSIZE_ANY_EXHDR(prefixText);
456 		maxReconstrLen += prefixSize;
457 	}
458 
459 	reconstrText = palloc(VARHDRSZ + maxReconstrLen);
460 	SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen);
461 
462 	if (in->level)
463 		memcpy(VARDATA(reconstrText),
464 			   VARDATA(reconstructedValue),
465 			   in->level);
466 	if (prefixSize)
467 		memcpy(((char *) VARDATA(reconstrText)) + in->level,
468 			   VARDATA_ANY(prefixText),
469 			   prefixSize);
470 	/* last byte of reconstrText will be filled in below */
471 
472 	/*
473 	 * Scan the child nodes.  For each one, complete the reconstructed value
474 	 * and see if it's consistent with the query.  If so, emit an entry into
475 	 * the output arrays.
476 	 */
477 	out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
478 	out->levelAdds = (int *) palloc(sizeof(int) * in->nNodes);
479 	out->reconstructedValues = (Datum *) palloc(sizeof(Datum) * in->nNodes);
480 	out->nNodes = 0;
481 
482 	for (i = 0; i < in->nNodes; i++)
483 	{
484 		int16		nodeChar = DatumGetInt16(in->nodeLabels[i]);
485 		int			thisLen;
486 		bool		res = true;
487 		int			j;
488 
489 		/* If nodeChar is a dummy value, don't include it in data */
490 		if (nodeChar <= 0)
491 			thisLen = maxReconstrLen - 1;
492 		else
493 		{
494 			((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
495 			thisLen = maxReconstrLen;
496 		}
497 
498 		for (j = 0; j < in->nkeys; j++)
499 		{
500 			StrategyNumber strategy = in->scankeys[j].sk_strategy;
501 			text	   *inText;
502 			int			inSize;
503 			int			r;
504 
505 			/*
506 			 * If it's a collation-aware operator, but the collation is C, we
507 			 * can treat it as non-collation-aware.  With non-C collation we
508 			 * need to traverse whole tree :-( so there's no point in making
509 			 * any check here.  (Note also that our reconstructed value may
510 			 * well end with a partial multibyte character, so that applying
511 			 * any encoding-sensitive test to it would be risky anyhow.)
512 			 */
513 			if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
514 			{
515 				if (collate_is_c)
516 					strategy -= SPG_STRATEGY_ADDITION;
517 				else
518 					continue;
519 			}
520 
521 			inText = DatumGetTextPP(in->scankeys[j].sk_argument);
522 			inSize = VARSIZE_ANY_EXHDR(inText);
523 
524 			r = memcmp(VARDATA(reconstrText), VARDATA_ANY(inText),
525 					   Min(inSize, thisLen));
526 
527 			switch (strategy)
528 			{
529 				case BTLessStrategyNumber:
530 				case BTLessEqualStrategyNumber:
531 					if (r > 0)
532 						res = false;
533 					break;
534 				case BTEqualStrategyNumber:
535 					if (r != 0 || inSize < thisLen)
536 						res = false;
537 					break;
538 				case BTGreaterEqualStrategyNumber:
539 				case BTGreaterStrategyNumber:
540 					if (r < 0)
541 						res = false;
542 					break;
543 				case RTPrefixStrategyNumber:
544 					if (r != 0)
545 						res = false;
546 					break;
547 				default:
548 					elog(ERROR, "unrecognized strategy number: %d",
549 						 in->scankeys[j].sk_strategy);
550 					break;
551 			}
552 
553 			if (!res)
554 				break;			/* no need to consider remaining conditions */
555 		}
556 
557 		if (res)
558 		{
559 			out->nodeNumbers[out->nNodes] = i;
560 			out->levelAdds[out->nNodes] = thisLen - in->level;
561 			SET_VARSIZE(reconstrText, VARHDRSZ + thisLen);
562 			out->reconstructedValues[out->nNodes] =
563 				datumCopy(PointerGetDatum(reconstrText), false, -1);
564 			out->nNodes++;
565 		}
566 	}
567 
568 	PG_RETURN_VOID();
569 }
570 
571 Datum
spg_text_leaf_consistent(PG_FUNCTION_ARGS)572 spg_text_leaf_consistent(PG_FUNCTION_ARGS)
573 {
574 	spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
575 	spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
576 	int			level = in->level;
577 	text	   *leafValue,
578 			   *reconstrValue = NULL;
579 	char	   *fullValue;
580 	int			fullLen;
581 	bool		res;
582 	int			j;
583 
584 	/* all tests are exact */
585 	out->recheck = false;
586 
587 	leafValue = DatumGetTextPP(in->leafDatum);
588 
589 	/* As above, in->reconstructedValue isn't toasted or short. */
590 	if (DatumGetPointer(in->reconstructedValue))
591 		reconstrValue = (text *) DatumGetPointer(in->reconstructedValue);
592 
593 	Assert(reconstrValue == NULL ? level == 0 :
594 		   VARSIZE_ANY_EXHDR(reconstrValue) == level);
595 
596 	/* Reconstruct the full string represented by this leaf tuple */
597 	fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
598 	if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
599 	{
600 		fullValue = VARDATA(reconstrValue);
601 		out->leafValue = PointerGetDatum(reconstrValue);
602 	}
603 	else
604 	{
605 		text	   *fullText = palloc(VARHDRSZ + fullLen);
606 
607 		SET_VARSIZE(fullText, VARHDRSZ + fullLen);
608 		fullValue = VARDATA(fullText);
609 		if (level)
610 			memcpy(fullValue, VARDATA(reconstrValue), level);
611 		if (VARSIZE_ANY_EXHDR(leafValue) > 0)
612 			memcpy(fullValue + level, VARDATA_ANY(leafValue),
613 				   VARSIZE_ANY_EXHDR(leafValue));
614 		out->leafValue = PointerGetDatum(fullText);
615 	}
616 
617 	/* Perform the required comparison(s) */
618 	res = true;
619 	for (j = 0; j < in->nkeys; j++)
620 	{
621 		StrategyNumber strategy = in->scankeys[j].sk_strategy;
622 		text	   *query = DatumGetTextPP(in->scankeys[j].sk_argument);
623 		int			queryLen = VARSIZE_ANY_EXHDR(query);
624 		int			r;
625 
626 		if (strategy == RTPrefixStrategyNumber)
627 		{
628 			/*
629 			 * if level >= length of query then reconstrValue must begin with
630 			 * query (prefix) string, so we don't need to check it again.
631 			 */
632 			res = (level >= queryLen) ||
633 				DatumGetBool(DirectFunctionCall2(text_starts_with,
634 												 out->leafValue,
635 												 PointerGetDatum(query)));
636 
637 			if (!res)			/* no need to consider remaining conditions */
638 				break;
639 
640 			continue;
641 		}
642 
643 		if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
644 		{
645 			/* Collation-aware comparison */
646 			strategy -= SPG_STRATEGY_ADDITION;
647 
648 			/* If asserts enabled, verify encoding of reconstructed string */
649 			Assert(pg_verifymbstr(fullValue, fullLen, false));
650 
651 			r = varstr_cmp(fullValue, fullLen,
652 						   VARDATA_ANY(query), queryLen,
653 						   PG_GET_COLLATION());
654 		}
655 		else
656 		{
657 			/* Non-collation-aware comparison */
658 			r = memcmp(fullValue, VARDATA_ANY(query), Min(queryLen, fullLen));
659 
660 			if (r == 0)
661 			{
662 				if (queryLen > fullLen)
663 					r = -1;
664 				else if (queryLen < fullLen)
665 					r = 1;
666 			}
667 		}
668 
669 		switch (strategy)
670 		{
671 			case BTLessStrategyNumber:
672 				res = (r < 0);
673 				break;
674 			case BTLessEqualStrategyNumber:
675 				res = (r <= 0);
676 				break;
677 			case BTEqualStrategyNumber:
678 				res = (r == 0);
679 				break;
680 			case BTGreaterEqualStrategyNumber:
681 				res = (r >= 0);
682 				break;
683 			case BTGreaterStrategyNumber:
684 				res = (r > 0);
685 				break;
686 			default:
687 				elog(ERROR, "unrecognized strategy number: %d",
688 					 in->scankeys[j].sk_strategy);
689 				res = false;
690 				break;
691 		}
692 
693 		if (!res)
694 			break;				/* no need to consider remaining conditions */
695 	}
696 
697 	PG_RETURN_BOOL(res);
698 }
699