1 /*-------------------------------------------------------------------------
2  *
3  * varchar.c
4  *	  Functions for the built-in types char(n) and varchar(n).
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/utils/adt/varchar.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/detoast.h"
18 #include "catalog/pg_collation.h"
19 #include "catalog/pg_type.h"
20 #include "common/hashfn.h"
21 #include "libpq/pqformat.h"
22 #include "mb/pg_wchar.h"
23 #include "nodes/nodeFuncs.h"
24 #include "nodes/supportnodes.h"
25 #include "utils/array.h"
26 #include "utils/builtins.h"
27 #include "utils/lsyscache.h"
28 #include "utils/pg_locale.h"
29 #include "utils/varlena.h"
30 
31 /* common code for bpchartypmodin and varchartypmodin */
32 static int32
anychar_typmodin(ArrayType * ta,const char * typename)33 anychar_typmodin(ArrayType *ta, const char *typename)
34 {
35 	int32		typmod;
36 	int32	   *tl;
37 	int			n;
38 
39 	tl = ArrayGetIntegerTypmods(ta, &n);
40 
41 	/*
42 	 * we're not too tense about good error message here because grammar
43 	 * shouldn't allow wrong number of modifiers for CHAR
44 	 */
45 	if (n != 1)
46 		ereport(ERROR,
47 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
48 				 errmsg("invalid type modifier")));
49 
50 	if (*tl < 1)
51 		ereport(ERROR,
52 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
53 				 errmsg("length for type %s must be at least 1", typename)));
54 	if (*tl > MaxAttrSize)
55 		ereport(ERROR,
56 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
57 				 errmsg("length for type %s cannot exceed %d",
58 						typename, MaxAttrSize)));
59 
60 	/*
61 	 * For largely historical reasons, the typmod is VARHDRSZ plus the number
62 	 * of characters; there is enough client-side code that knows about that
63 	 * that we'd better not change it.
64 	 */
65 	typmod = VARHDRSZ + *tl;
66 
67 	return typmod;
68 }
69 
70 /* common code for bpchartypmodout and varchartypmodout */
71 static char *
anychar_typmodout(int32 typmod)72 anychar_typmodout(int32 typmod)
73 {
74 	char	   *res = (char *) palloc(64);
75 
76 	if (typmod > VARHDRSZ)
77 		snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
78 	else
79 		*res = '\0';
80 
81 	return res;
82 }
83 
84 
85 /*
86  * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
87  * is for blank-padded string whose length is specified in CREATE TABLE.
88  * VARCHAR is for storing string whose length is at most the length specified
89  * at CREATE TABLE time.
90  *
91  * It's hard to implement these types because we cannot figure out
92  * the length of the type from the type itself. I changed (hopefully all) the
93  * fmgr calls that invoke input functions of a data type to supply the
94  * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
95  * the length of the attributes and hence the exact length of the char() or
96  * varchar(). We pass this to bpcharin() or varcharin().) In the case where
97  * we cannot determine the length, we pass in -1 instead and the input
98  * converter does not enforce any length check.
99  *
100  * We actually implement this as a varlena so that we don't have to pass in
101  * the length for the comparison functions. (The difference between these
102  * types and "text" is that we truncate and possibly blank-pad the string
103  * at insertion time.)
104  *
105  *															  - ay 6/95
106  */
107 
108 
109 /*****************************************************************************
110  *	 bpchar - char()														 *
111  *****************************************************************************/
112 
113 /*
114  * bpchar_input -- common guts of bpcharin and bpcharrecv
115  *
116  * s is the input text of length len (may not be null-terminated)
117  * atttypmod is the typmod value to apply
118  *
119  * Note that atttypmod is measured in characters, which
120  * is not necessarily the same as the number of bytes.
121  *
122  * If the input string is too long, raise an error, unless the extra
123  * characters are spaces, in which case they're truncated.  (per SQL)
124  */
125 static BpChar *
bpchar_input(const char * s,size_t len,int32 atttypmod)126 bpchar_input(const char *s, size_t len, int32 atttypmod)
127 {
128 	BpChar	   *result;
129 	char	   *r;
130 	size_t		maxlen;
131 
132 	/* If typmod is -1 (or invalid), use the actual string length */
133 	if (atttypmod < (int32) VARHDRSZ)
134 		maxlen = len;
135 	else
136 	{
137 		size_t		charlen;	/* number of CHARACTERS in the input */
138 
139 		maxlen = atttypmod - VARHDRSZ;
140 		charlen = pg_mbstrlen_with_len(s, len);
141 		if (charlen > maxlen)
142 		{
143 			/* Verify that extra characters are spaces, and clip them off */
144 			size_t		mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
145 			size_t		j;
146 
147 			/*
148 			 * at this point, len is the actual BYTE length of the input
149 			 * string, maxlen is the max number of CHARACTERS allowed for this
150 			 * bpchar type, mbmaxlen is the length in BYTES of those chars.
151 			 */
152 			for (j = mbmaxlen; j < len; j++)
153 			{
154 				if (s[j] != ' ')
155 					ereport(ERROR,
156 							(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
157 							 errmsg("value too long for type character(%d)",
158 									(int) maxlen)));
159 			}
160 
161 			/*
162 			 * Now we set maxlen to the necessary byte length, not the number
163 			 * of CHARACTERS!
164 			 */
165 			maxlen = len = mbmaxlen;
166 		}
167 		else
168 		{
169 			/*
170 			 * Now we set maxlen to the necessary byte length, not the number
171 			 * of CHARACTERS!
172 			 */
173 			maxlen = len + (maxlen - charlen);
174 		}
175 	}
176 
177 	result = (BpChar *) palloc(maxlen + VARHDRSZ);
178 	SET_VARSIZE(result, maxlen + VARHDRSZ);
179 	r = VARDATA(result);
180 	memcpy(r, s, len);
181 
182 	/* blank pad the string if necessary */
183 	if (maxlen > len)
184 		memset(r + len, ' ', maxlen - len);
185 
186 	return result;
187 }
188 
189 /*
190  * Convert a C string to CHARACTER internal representation.  atttypmod
191  * is the declared length of the type plus VARHDRSZ.
192  */
193 Datum
bpcharin(PG_FUNCTION_ARGS)194 bpcharin(PG_FUNCTION_ARGS)
195 {
196 	char	   *s = PG_GETARG_CSTRING(0);
197 
198 #ifdef NOT_USED
199 	Oid			typelem = PG_GETARG_OID(1);
200 #endif
201 	int32		atttypmod = PG_GETARG_INT32(2);
202 	BpChar	   *result;
203 
204 	result = bpchar_input(s, strlen(s), atttypmod);
205 	PG_RETURN_BPCHAR_P(result);
206 }
207 
208 
209 /*
210  * Convert a CHARACTER value to a C string.
211  *
212  * Uses the text conversion functions, which is only appropriate if BpChar
213  * and text are equivalent types.
214  */
215 Datum
bpcharout(PG_FUNCTION_ARGS)216 bpcharout(PG_FUNCTION_ARGS)
217 {
218 	Datum		txt = PG_GETARG_DATUM(0);
219 
220 	PG_RETURN_CSTRING(TextDatumGetCString(txt));
221 }
222 
223 /*
224  *		bpcharrecv			- converts external binary format to bpchar
225  */
226 Datum
bpcharrecv(PG_FUNCTION_ARGS)227 bpcharrecv(PG_FUNCTION_ARGS)
228 {
229 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
230 
231 #ifdef NOT_USED
232 	Oid			typelem = PG_GETARG_OID(1);
233 #endif
234 	int32		atttypmod = PG_GETARG_INT32(2);
235 	BpChar	   *result;
236 	char	   *str;
237 	int			nbytes;
238 
239 	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
240 	result = bpchar_input(str, nbytes, atttypmod);
241 	pfree(str);
242 	PG_RETURN_BPCHAR_P(result);
243 }
244 
245 /*
246  *		bpcharsend			- converts bpchar to binary format
247  */
248 Datum
bpcharsend(PG_FUNCTION_ARGS)249 bpcharsend(PG_FUNCTION_ARGS)
250 {
251 	/* Exactly the same as textsend, so share code */
252 	return textsend(fcinfo);
253 }
254 
255 
256 /*
257  * Converts a CHARACTER type to the specified size.
258  *
259  * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
260  * isExplicit is true if this is for an explicit cast to char(N).
261  *
262  * Truncation rules: for an explicit cast, silently truncate to the given
263  * length; for an implicit cast, raise error unless extra characters are
264  * all spaces.  (This is sort-of per SQL: the spec would actually have us
265  * raise a "completion condition" for the explicit cast case, but Postgres
266  * hasn't got such a concept.)
267  */
268 Datum
bpchar(PG_FUNCTION_ARGS)269 bpchar(PG_FUNCTION_ARGS)
270 {
271 	BpChar	   *source = PG_GETARG_BPCHAR_PP(0);
272 	int32		maxlen = PG_GETARG_INT32(1);
273 	bool		isExplicit = PG_GETARG_BOOL(2);
274 	BpChar	   *result;
275 	int32		len;
276 	char	   *r;
277 	char	   *s;
278 	int			i;
279 	int			charlen;		/* number of characters in the input string +
280 								 * VARHDRSZ */
281 
282 	/* No work if typmod is invalid */
283 	if (maxlen < (int32) VARHDRSZ)
284 		PG_RETURN_BPCHAR_P(source);
285 
286 	maxlen -= VARHDRSZ;
287 
288 	len = VARSIZE_ANY_EXHDR(source);
289 	s = VARDATA_ANY(source);
290 
291 	charlen = pg_mbstrlen_with_len(s, len);
292 
293 	/* No work if supplied data matches typmod already */
294 	if (charlen == maxlen)
295 		PG_RETURN_BPCHAR_P(source);
296 
297 	if (charlen > maxlen)
298 	{
299 		/* Verify that extra characters are spaces, and clip them off */
300 		size_t		maxmblen;
301 
302 		maxmblen = pg_mbcharcliplen(s, len, maxlen);
303 
304 		if (!isExplicit)
305 		{
306 			for (i = maxmblen; i < len; i++)
307 				if (s[i] != ' ')
308 					ereport(ERROR,
309 							(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
310 							 errmsg("value too long for type character(%d)",
311 									maxlen)));
312 		}
313 
314 		len = maxmblen;
315 
316 		/*
317 		 * At this point, maxlen is the necessary byte length, not the number
318 		 * of CHARACTERS!
319 		 */
320 		maxlen = len;
321 	}
322 	else
323 	{
324 		/*
325 		 * At this point, maxlen is the necessary byte length, not the number
326 		 * of CHARACTERS!
327 		 */
328 		maxlen = len + (maxlen - charlen);
329 	}
330 
331 	Assert(maxlen >= len);
332 
333 	result = palloc(maxlen + VARHDRSZ);
334 	SET_VARSIZE(result, maxlen + VARHDRSZ);
335 	r = VARDATA(result);
336 
337 	memcpy(r, s, len);
338 
339 	/* blank pad the string if necessary */
340 	if (maxlen > len)
341 		memset(r + len, ' ', maxlen - len);
342 
343 	PG_RETURN_BPCHAR_P(result);
344 }
345 
346 
347 /* char_bpchar()
348  * Convert char to bpchar(1).
349  */
350 Datum
char_bpchar(PG_FUNCTION_ARGS)351 char_bpchar(PG_FUNCTION_ARGS)
352 {
353 	char		c = PG_GETARG_CHAR(0);
354 	BpChar	   *result;
355 
356 	result = (BpChar *) palloc(VARHDRSZ + 1);
357 
358 	SET_VARSIZE(result, VARHDRSZ + 1);
359 	*(VARDATA(result)) = c;
360 
361 	PG_RETURN_BPCHAR_P(result);
362 }
363 
364 
365 /* bpchar_name()
366  * Converts a bpchar() type to a NameData type.
367  */
368 Datum
bpchar_name(PG_FUNCTION_ARGS)369 bpchar_name(PG_FUNCTION_ARGS)
370 {
371 	BpChar	   *s = PG_GETARG_BPCHAR_PP(0);
372 	char	   *s_data;
373 	Name		result;
374 	int			len;
375 
376 	len = VARSIZE_ANY_EXHDR(s);
377 	s_data = VARDATA_ANY(s);
378 
379 	/* Truncate oversize input */
380 	if (len >= NAMEDATALEN)
381 		len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
382 
383 	/* Remove trailing blanks */
384 	while (len > 0)
385 	{
386 		if (s_data[len - 1] != ' ')
387 			break;
388 		len--;
389 	}
390 
391 	/* We use palloc0 here to ensure result is zero-padded */
392 	result = (Name) palloc0(NAMEDATALEN);
393 	memcpy(NameStr(*result), s_data, len);
394 
395 	PG_RETURN_NAME(result);
396 }
397 
398 /* name_bpchar()
399  * Converts a NameData type to a bpchar type.
400  *
401  * Uses the text conversion functions, which is only appropriate if BpChar
402  * and text are equivalent types.
403  */
404 Datum
name_bpchar(PG_FUNCTION_ARGS)405 name_bpchar(PG_FUNCTION_ARGS)
406 {
407 	Name		s = PG_GETARG_NAME(0);
408 	BpChar	   *result;
409 
410 	result = (BpChar *) cstring_to_text(NameStr(*s));
411 	PG_RETURN_BPCHAR_P(result);
412 }
413 
414 Datum
bpchartypmodin(PG_FUNCTION_ARGS)415 bpchartypmodin(PG_FUNCTION_ARGS)
416 {
417 	ArrayType  *ta = PG_GETARG_ARRAYTYPE_P(0);
418 
419 	PG_RETURN_INT32(anychar_typmodin(ta, "char"));
420 }
421 
422 Datum
bpchartypmodout(PG_FUNCTION_ARGS)423 bpchartypmodout(PG_FUNCTION_ARGS)
424 {
425 	int32		typmod = PG_GETARG_INT32(0);
426 
427 	PG_RETURN_CSTRING(anychar_typmodout(typmod));
428 }
429 
430 
431 /*****************************************************************************
432  *	 varchar - varchar(n)
433  *
434  * Note: varchar piggybacks on type text for most operations, and so has no
435  * C-coded functions except for I/O and typmod checking.
436  *****************************************************************************/
437 
438 /*
439  * varchar_input -- common guts of varcharin and varcharrecv
440  *
441  * s is the input text of length len (may not be null-terminated)
442  * atttypmod is the typmod value to apply
443  *
444  * Note that atttypmod is measured in characters, which
445  * is not necessarily the same as the number of bytes.
446  *
447  * If the input string is too long, raise an error, unless the extra
448  * characters are spaces, in which case they're truncated.  (per SQL)
449  *
450  * Uses the C string to text conversion function, which is only appropriate
451  * if VarChar and text are equivalent types.
452  */
453 static VarChar *
varchar_input(const char * s,size_t len,int32 atttypmod)454 varchar_input(const char *s, size_t len, int32 atttypmod)
455 {
456 	VarChar    *result;
457 	size_t		maxlen;
458 
459 	maxlen = atttypmod - VARHDRSZ;
460 
461 	if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
462 	{
463 		/* Verify that extra characters are spaces, and clip them off */
464 		size_t		mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
465 		size_t		j;
466 
467 		for (j = mbmaxlen; j < len; j++)
468 		{
469 			if (s[j] != ' ')
470 				ereport(ERROR,
471 						(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
472 						 errmsg("value too long for type character varying(%d)",
473 								(int) maxlen)));
474 		}
475 
476 		len = mbmaxlen;
477 	}
478 
479 	result = (VarChar *) cstring_to_text_with_len(s, len);
480 	return result;
481 }
482 
483 /*
484  * Convert a C string to VARCHAR internal representation.  atttypmod
485  * is the declared length of the type plus VARHDRSZ.
486  */
487 Datum
varcharin(PG_FUNCTION_ARGS)488 varcharin(PG_FUNCTION_ARGS)
489 {
490 	char	   *s = PG_GETARG_CSTRING(0);
491 
492 #ifdef NOT_USED
493 	Oid			typelem = PG_GETARG_OID(1);
494 #endif
495 	int32		atttypmod = PG_GETARG_INT32(2);
496 	VarChar    *result;
497 
498 	result = varchar_input(s, strlen(s), atttypmod);
499 	PG_RETURN_VARCHAR_P(result);
500 }
501 
502 
503 /*
504  * Convert a VARCHAR value to a C string.
505  *
506  * Uses the text to C string conversion function, which is only appropriate
507  * if VarChar and text are equivalent types.
508  */
509 Datum
varcharout(PG_FUNCTION_ARGS)510 varcharout(PG_FUNCTION_ARGS)
511 {
512 	Datum		txt = PG_GETARG_DATUM(0);
513 
514 	PG_RETURN_CSTRING(TextDatumGetCString(txt));
515 }
516 
517 /*
518  *		varcharrecv			- converts external binary format to varchar
519  */
520 Datum
varcharrecv(PG_FUNCTION_ARGS)521 varcharrecv(PG_FUNCTION_ARGS)
522 {
523 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
524 
525 #ifdef NOT_USED
526 	Oid			typelem = PG_GETARG_OID(1);
527 #endif
528 	int32		atttypmod = PG_GETARG_INT32(2);
529 	VarChar    *result;
530 	char	   *str;
531 	int			nbytes;
532 
533 	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
534 	result = varchar_input(str, nbytes, atttypmod);
535 	pfree(str);
536 	PG_RETURN_VARCHAR_P(result);
537 }
538 
539 /*
540  *		varcharsend			- converts varchar to binary format
541  */
542 Datum
varcharsend(PG_FUNCTION_ARGS)543 varcharsend(PG_FUNCTION_ARGS)
544 {
545 	/* Exactly the same as textsend, so share code */
546 	return textsend(fcinfo);
547 }
548 
549 
550 /*
551  * varchar_support()
552  *
553  * Planner support function for the varchar() length coercion function.
554  *
555  * Currently, the only interesting thing we can do is flatten calls that set
556  * the new maximum length >= the previous maximum length.  We can ignore the
557  * isExplicit argument, since that only affects truncation cases.
558  */
559 Datum
varchar_support(PG_FUNCTION_ARGS)560 varchar_support(PG_FUNCTION_ARGS)
561 {
562 	Node	   *rawreq = (Node *) PG_GETARG_POINTER(0);
563 	Node	   *ret = NULL;
564 
565 	if (IsA(rawreq, SupportRequestSimplify))
566 	{
567 		SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
568 		FuncExpr   *expr = req->fcall;
569 		Node	   *typmod;
570 
571 		Assert(list_length(expr->args) >= 2);
572 
573 		typmod = (Node *) lsecond(expr->args);
574 
575 		if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
576 		{
577 			Node	   *source = (Node *) linitial(expr->args);
578 			int32		old_typmod = exprTypmod(source);
579 			int32		new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
580 			int32		old_max = old_typmod - VARHDRSZ;
581 			int32		new_max = new_typmod - VARHDRSZ;
582 
583 			if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
584 				ret = relabel_to_typmod(source, new_typmod);
585 		}
586 	}
587 
588 	PG_RETURN_POINTER(ret);
589 }
590 
591 /*
592  * Converts a VARCHAR type to the specified size.
593  *
594  * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
595  * isExplicit is true if this is for an explicit cast to varchar(N).
596  *
597  * Truncation rules: for an explicit cast, silently truncate to the given
598  * length; for an implicit cast, raise error unless extra characters are
599  * all spaces.  (This is sort-of per SQL: the spec would actually have us
600  * raise a "completion condition" for the explicit cast case, but Postgres
601  * hasn't got such a concept.)
602  */
603 Datum
varchar(PG_FUNCTION_ARGS)604 varchar(PG_FUNCTION_ARGS)
605 {
606 	VarChar    *source = PG_GETARG_VARCHAR_PP(0);
607 	int32		typmod = PG_GETARG_INT32(1);
608 	bool		isExplicit = PG_GETARG_BOOL(2);
609 	int32		len,
610 				maxlen;
611 	size_t		maxmblen;
612 	int			i;
613 	char	   *s_data;
614 
615 	len = VARSIZE_ANY_EXHDR(source);
616 	s_data = VARDATA_ANY(source);
617 	maxlen = typmod - VARHDRSZ;
618 
619 	/* No work if typmod is invalid or supplied data fits it already */
620 	if (maxlen < 0 || len <= maxlen)
621 		PG_RETURN_VARCHAR_P(source);
622 
623 	/* only reach here if string is too long... */
624 
625 	/* truncate multibyte string preserving multibyte boundary */
626 	maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
627 
628 	if (!isExplicit)
629 	{
630 		for (i = maxmblen; i < len; i++)
631 			if (s_data[i] != ' ')
632 				ereport(ERROR,
633 						(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
634 						 errmsg("value too long for type character varying(%d)",
635 								maxlen)));
636 	}
637 
638 	PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
639 															 maxmblen));
640 }
641 
642 Datum
varchartypmodin(PG_FUNCTION_ARGS)643 varchartypmodin(PG_FUNCTION_ARGS)
644 {
645 	ArrayType  *ta = PG_GETARG_ARRAYTYPE_P(0);
646 
647 	PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
648 }
649 
650 Datum
varchartypmodout(PG_FUNCTION_ARGS)651 varchartypmodout(PG_FUNCTION_ARGS)
652 {
653 	int32		typmod = PG_GETARG_INT32(0);
654 
655 	PG_RETURN_CSTRING(anychar_typmodout(typmod));
656 }
657 
658 
659 /*****************************************************************************
660  * Exported functions
661  *****************************************************************************/
662 
663 /* "True" length (not counting trailing blanks) of a BpChar */
664 static inline int
bcTruelen(BpChar * arg)665 bcTruelen(BpChar *arg)
666 {
667 	return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
668 }
669 
670 int
bpchartruelen(char * s,int len)671 bpchartruelen(char *s, int len)
672 {
673 	int			i;
674 
675 	/*
676 	 * Note that we rely on the assumption that ' ' is a singleton unit on
677 	 * every supported multibyte server encoding.
678 	 */
679 	for (i = len - 1; i >= 0; i--)
680 	{
681 		if (s[i] != ' ')
682 			break;
683 	}
684 	return i + 1;
685 }
686 
687 Datum
bpcharlen(PG_FUNCTION_ARGS)688 bpcharlen(PG_FUNCTION_ARGS)
689 {
690 	BpChar	   *arg = PG_GETARG_BPCHAR_PP(0);
691 	int			len;
692 
693 	/* get number of bytes, ignoring trailing spaces */
694 	len = bcTruelen(arg);
695 
696 	/* in multibyte encoding, convert to number of characters */
697 	if (pg_database_encoding_max_length() != 1)
698 		len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
699 
700 	PG_RETURN_INT32(len);
701 }
702 
703 Datum
bpcharoctetlen(PG_FUNCTION_ARGS)704 bpcharoctetlen(PG_FUNCTION_ARGS)
705 {
706 	Datum		arg = PG_GETARG_DATUM(0);
707 
708 	/* We need not detoast the input at all */
709 	PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
710 }
711 
712 
713 /*****************************************************************************
714  *	Comparison Functions used for bpchar
715  *
716  * Note: btree indexes need these routines not to leak memory; therefore,
717  * be careful to free working copies of toasted datums.  Most places don't
718  * need to be so careful.
719  *****************************************************************************/
720 
721 static void
check_collation_set(Oid collid)722 check_collation_set(Oid collid)
723 {
724 	if (!OidIsValid(collid))
725 	{
726 		/*
727 		 * This typically means that the parser could not resolve a conflict
728 		 * of implicit collations, so report it that way.
729 		 */
730 		ereport(ERROR,
731 				(errcode(ERRCODE_INDETERMINATE_COLLATION),
732 				 errmsg("could not determine which collation to use for string comparison"),
733 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
734 	}
735 }
736 
737 Datum
bpchareq(PG_FUNCTION_ARGS)738 bpchareq(PG_FUNCTION_ARGS)
739 {
740 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
741 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
742 	int			len1,
743 				len2;
744 	bool		result;
745 	Oid			collid = PG_GET_COLLATION();
746 
747 	check_collation_set(collid);
748 
749 	len1 = bcTruelen(arg1);
750 	len2 = bcTruelen(arg2);
751 
752 	if (lc_collate_is_c(collid) ||
753 		collid == DEFAULT_COLLATION_OID ||
754 		pg_newlocale_from_collation(collid)->deterministic)
755 	{
756 		/*
757 		 * Since we only care about equality or not-equality, we can avoid all
758 		 * the expense of strcoll() here, and just do bitwise comparison.
759 		 */
760 		if (len1 != len2)
761 			result = false;
762 		else
763 			result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
764 	}
765 	else
766 	{
767 		result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
768 							 collid) == 0);
769 	}
770 
771 	PG_FREE_IF_COPY(arg1, 0);
772 	PG_FREE_IF_COPY(arg2, 1);
773 
774 	PG_RETURN_BOOL(result);
775 }
776 
777 Datum
bpcharne(PG_FUNCTION_ARGS)778 bpcharne(PG_FUNCTION_ARGS)
779 {
780 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
781 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
782 	int			len1,
783 				len2;
784 	bool		result;
785 	Oid			collid = PG_GET_COLLATION();
786 
787 	check_collation_set(collid);
788 
789 	len1 = bcTruelen(arg1);
790 	len2 = bcTruelen(arg2);
791 
792 	if (lc_collate_is_c(collid) ||
793 		collid == DEFAULT_COLLATION_OID ||
794 		pg_newlocale_from_collation(collid)->deterministic)
795 	{
796 		/*
797 		 * Since we only care about equality or not-equality, we can avoid all
798 		 * the expense of strcoll() here, and just do bitwise comparison.
799 		 */
800 		if (len1 != len2)
801 			result = true;
802 		else
803 			result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
804 	}
805 	else
806 	{
807 		result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
808 							 collid) != 0);
809 	}
810 
811 	PG_FREE_IF_COPY(arg1, 0);
812 	PG_FREE_IF_COPY(arg2, 1);
813 
814 	PG_RETURN_BOOL(result);
815 }
816 
817 Datum
bpcharlt(PG_FUNCTION_ARGS)818 bpcharlt(PG_FUNCTION_ARGS)
819 {
820 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
821 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
822 	int			len1,
823 				len2;
824 	int			cmp;
825 
826 	len1 = bcTruelen(arg1);
827 	len2 = bcTruelen(arg2);
828 
829 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
830 					 PG_GET_COLLATION());
831 
832 	PG_FREE_IF_COPY(arg1, 0);
833 	PG_FREE_IF_COPY(arg2, 1);
834 
835 	PG_RETURN_BOOL(cmp < 0);
836 }
837 
838 Datum
bpcharle(PG_FUNCTION_ARGS)839 bpcharle(PG_FUNCTION_ARGS)
840 {
841 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
842 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
843 	int			len1,
844 				len2;
845 	int			cmp;
846 
847 	len1 = bcTruelen(arg1);
848 	len2 = bcTruelen(arg2);
849 
850 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
851 					 PG_GET_COLLATION());
852 
853 	PG_FREE_IF_COPY(arg1, 0);
854 	PG_FREE_IF_COPY(arg2, 1);
855 
856 	PG_RETURN_BOOL(cmp <= 0);
857 }
858 
859 Datum
bpchargt(PG_FUNCTION_ARGS)860 bpchargt(PG_FUNCTION_ARGS)
861 {
862 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
863 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
864 	int			len1,
865 				len2;
866 	int			cmp;
867 
868 	len1 = bcTruelen(arg1);
869 	len2 = bcTruelen(arg2);
870 
871 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
872 					 PG_GET_COLLATION());
873 
874 	PG_FREE_IF_COPY(arg1, 0);
875 	PG_FREE_IF_COPY(arg2, 1);
876 
877 	PG_RETURN_BOOL(cmp > 0);
878 }
879 
880 Datum
bpcharge(PG_FUNCTION_ARGS)881 bpcharge(PG_FUNCTION_ARGS)
882 {
883 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
884 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
885 	int			len1,
886 				len2;
887 	int			cmp;
888 
889 	len1 = bcTruelen(arg1);
890 	len2 = bcTruelen(arg2);
891 
892 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
893 					 PG_GET_COLLATION());
894 
895 	PG_FREE_IF_COPY(arg1, 0);
896 	PG_FREE_IF_COPY(arg2, 1);
897 
898 	PG_RETURN_BOOL(cmp >= 0);
899 }
900 
901 Datum
bpcharcmp(PG_FUNCTION_ARGS)902 bpcharcmp(PG_FUNCTION_ARGS)
903 {
904 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
905 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
906 	int			len1,
907 				len2;
908 	int			cmp;
909 
910 	len1 = bcTruelen(arg1);
911 	len2 = bcTruelen(arg2);
912 
913 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
914 					 PG_GET_COLLATION());
915 
916 	PG_FREE_IF_COPY(arg1, 0);
917 	PG_FREE_IF_COPY(arg2, 1);
918 
919 	PG_RETURN_INT32(cmp);
920 }
921 
922 Datum
bpchar_sortsupport(PG_FUNCTION_ARGS)923 bpchar_sortsupport(PG_FUNCTION_ARGS)
924 {
925 	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
926 	Oid			collid = ssup->ssup_collation;
927 	MemoryContext oldcontext;
928 
929 	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
930 
931 	/* Use generic string SortSupport */
932 	varstr_sortsupport(ssup, BPCHAROID, collid);
933 
934 	MemoryContextSwitchTo(oldcontext);
935 
936 	PG_RETURN_VOID();
937 }
938 
939 Datum
bpchar_larger(PG_FUNCTION_ARGS)940 bpchar_larger(PG_FUNCTION_ARGS)
941 {
942 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
943 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
944 	int			len1,
945 				len2;
946 	int			cmp;
947 
948 	len1 = bcTruelen(arg1);
949 	len2 = bcTruelen(arg2);
950 
951 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
952 					 PG_GET_COLLATION());
953 
954 	PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
955 }
956 
957 Datum
bpchar_smaller(PG_FUNCTION_ARGS)958 bpchar_smaller(PG_FUNCTION_ARGS)
959 {
960 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
961 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
962 	int			len1,
963 				len2;
964 	int			cmp;
965 
966 	len1 = bcTruelen(arg1);
967 	len2 = bcTruelen(arg2);
968 
969 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
970 					 PG_GET_COLLATION());
971 
972 	PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
973 }
974 
975 
976 /*
977  * bpchar needs a specialized hash function because we want to ignore
978  * trailing blanks in comparisons.
979  */
980 Datum
hashbpchar(PG_FUNCTION_ARGS)981 hashbpchar(PG_FUNCTION_ARGS)
982 {
983 	BpChar	   *key = PG_GETARG_BPCHAR_PP(0);
984 	Oid			collid = PG_GET_COLLATION();
985 	char	   *keydata;
986 	int			keylen;
987 	pg_locale_t mylocale = 0;
988 	Datum		result;
989 
990 	if (!collid)
991 		ereport(ERROR,
992 				(errcode(ERRCODE_INDETERMINATE_COLLATION),
993 				 errmsg("could not determine which collation to use for string hashing"),
994 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
995 
996 	keydata = VARDATA_ANY(key);
997 	keylen = bcTruelen(key);
998 
999 	if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1000 		mylocale = pg_newlocale_from_collation(collid);
1001 
1002 	if (!mylocale || mylocale->deterministic)
1003 	{
1004 		result = hash_any((unsigned char *) keydata, keylen);
1005 	}
1006 	else
1007 	{
1008 #ifdef USE_ICU
1009 		if (mylocale->provider == COLLPROVIDER_ICU)
1010 		{
1011 			int32_t		ulen = -1;
1012 			UChar	   *uchar = NULL;
1013 			Size		bsize;
1014 			uint8_t    *buf;
1015 
1016 			ulen = icu_to_uchar(&uchar, keydata, keylen);
1017 
1018 			bsize = ucol_getSortKey(mylocale->info.icu.ucol,
1019 									uchar, ulen, NULL, 0);
1020 			buf = palloc(bsize);
1021 			ucol_getSortKey(mylocale->info.icu.ucol,
1022 							uchar, ulen, buf, bsize);
1023 
1024 			result = hash_any(buf, bsize);
1025 
1026 			pfree(buf);
1027 		}
1028 		else
1029 #endif
1030 			/* shouldn't happen */
1031 			elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1032 	}
1033 
1034 	/* Avoid leaking memory for toasted inputs */
1035 	PG_FREE_IF_COPY(key, 0);
1036 
1037 	return result;
1038 }
1039 
1040 Datum
hashbpcharextended(PG_FUNCTION_ARGS)1041 hashbpcharextended(PG_FUNCTION_ARGS)
1042 {
1043 	BpChar	   *key = PG_GETARG_BPCHAR_PP(0);
1044 	Oid			collid = PG_GET_COLLATION();
1045 	char	   *keydata;
1046 	int			keylen;
1047 	pg_locale_t mylocale = 0;
1048 	Datum		result;
1049 
1050 	if (!collid)
1051 		ereport(ERROR,
1052 				(errcode(ERRCODE_INDETERMINATE_COLLATION),
1053 				 errmsg("could not determine which collation to use for string hashing"),
1054 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
1055 
1056 	keydata = VARDATA_ANY(key);
1057 	keylen = bcTruelen(key);
1058 
1059 	if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1060 		mylocale = pg_newlocale_from_collation(collid);
1061 
1062 	if (!mylocale || mylocale->deterministic)
1063 	{
1064 		result = hash_any_extended((unsigned char *) keydata, keylen,
1065 								   PG_GETARG_INT64(1));
1066 	}
1067 	else
1068 	{
1069 #ifdef USE_ICU
1070 		if (mylocale->provider == COLLPROVIDER_ICU)
1071 		{
1072 			int32_t		ulen = -1;
1073 			UChar	   *uchar = NULL;
1074 			Size		bsize;
1075 			uint8_t    *buf;
1076 
1077 			ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
1078 
1079 			bsize = ucol_getSortKey(mylocale->info.icu.ucol,
1080 									uchar, ulen, NULL, 0);
1081 			buf = palloc(bsize);
1082 			ucol_getSortKey(mylocale->info.icu.ucol,
1083 							uchar, ulen, buf, bsize);
1084 
1085 			result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
1086 
1087 			pfree(buf);
1088 		}
1089 		else
1090 #endif
1091 			/* shouldn't happen */
1092 			elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1093 	}
1094 
1095 	PG_FREE_IF_COPY(key, 0);
1096 
1097 	return result;
1098 }
1099 
1100 /*
1101  * The following operators support character-by-character comparison
1102  * of bpchar datums, to allow building indexes suitable for LIKE clauses.
1103  * Note that the regular bpchareq/bpcharne comparison operators, and
1104  * regular support functions 1 and 2 with "C" collation are assumed to be
1105  * compatible with these!
1106  */
1107 
1108 static int
internal_bpchar_pattern_compare(BpChar * arg1,BpChar * arg2)1109 internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
1110 {
1111 	int			result;
1112 	int			len1,
1113 				len2;
1114 
1115 	len1 = bcTruelen(arg1);
1116 	len2 = bcTruelen(arg2);
1117 
1118 	result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1119 	if (result != 0)
1120 		return result;
1121 	else if (len1 < len2)
1122 		return -1;
1123 	else if (len1 > len2)
1124 		return 1;
1125 	else
1126 		return 0;
1127 }
1128 
1129 
1130 Datum
bpchar_pattern_lt(PG_FUNCTION_ARGS)1131 bpchar_pattern_lt(PG_FUNCTION_ARGS)
1132 {
1133 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
1134 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
1135 	int			result;
1136 
1137 	result = internal_bpchar_pattern_compare(arg1, arg2);
1138 
1139 	PG_FREE_IF_COPY(arg1, 0);
1140 	PG_FREE_IF_COPY(arg2, 1);
1141 
1142 	PG_RETURN_BOOL(result < 0);
1143 }
1144 
1145 
1146 Datum
bpchar_pattern_le(PG_FUNCTION_ARGS)1147 bpchar_pattern_le(PG_FUNCTION_ARGS)
1148 {
1149 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
1150 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
1151 	int			result;
1152 
1153 	result = internal_bpchar_pattern_compare(arg1, arg2);
1154 
1155 	PG_FREE_IF_COPY(arg1, 0);
1156 	PG_FREE_IF_COPY(arg2, 1);
1157 
1158 	PG_RETURN_BOOL(result <= 0);
1159 }
1160 
1161 
1162 Datum
bpchar_pattern_ge(PG_FUNCTION_ARGS)1163 bpchar_pattern_ge(PG_FUNCTION_ARGS)
1164 {
1165 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
1166 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
1167 	int			result;
1168 
1169 	result = internal_bpchar_pattern_compare(arg1, arg2);
1170 
1171 	PG_FREE_IF_COPY(arg1, 0);
1172 	PG_FREE_IF_COPY(arg2, 1);
1173 
1174 	PG_RETURN_BOOL(result >= 0);
1175 }
1176 
1177 
1178 Datum
bpchar_pattern_gt(PG_FUNCTION_ARGS)1179 bpchar_pattern_gt(PG_FUNCTION_ARGS)
1180 {
1181 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
1182 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
1183 	int			result;
1184 
1185 	result = internal_bpchar_pattern_compare(arg1, arg2);
1186 
1187 	PG_FREE_IF_COPY(arg1, 0);
1188 	PG_FREE_IF_COPY(arg2, 1);
1189 
1190 	PG_RETURN_BOOL(result > 0);
1191 }
1192 
1193 
1194 Datum
btbpchar_pattern_cmp(PG_FUNCTION_ARGS)1195 btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
1196 {
1197 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
1198 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
1199 	int			result;
1200 
1201 	result = internal_bpchar_pattern_compare(arg1, arg2);
1202 
1203 	PG_FREE_IF_COPY(arg1, 0);
1204 	PG_FREE_IF_COPY(arg2, 1);
1205 
1206 	PG_RETURN_INT32(result);
1207 }
1208 
1209 
1210 Datum
btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)1211 btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
1212 {
1213 	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
1214 	MemoryContext oldcontext;
1215 
1216 	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1217 
1218 	/* Use generic string SortSupport, forcing "C" collation */
1219 	varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
1220 
1221 	MemoryContextSwitchTo(oldcontext);
1222 
1223 	PG_RETURN_VOID();
1224 }
1225