1 /*-------------------------------------------------------------------------
2  *
3  * varchar.c
4  *	  Functions for the built-in types char(n) and varchar(n).
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/utils/adt/varchar.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 
18 #include "access/hash.h"
19 #include "access/tuptoaster.h"
20 #include "catalog/pg_collation.h"
21 #include "libpq/pqformat.h"
22 #include "nodes/nodeFuncs.h"
23 #include "utils/array.h"
24 #include "utils/builtins.h"
25 #include "utils/varlena.h"
26 #include "mb/pg_wchar.h"
27 
28 
29 /* common code for bpchartypmodin and varchartypmodin */
30 static int32
anychar_typmodin(ArrayType * ta,const char * typename)31 anychar_typmodin(ArrayType *ta, const char *typename)
32 {
33 	int32		typmod;
34 	int32	   *tl;
35 	int			n;
36 
37 	tl = ArrayGetIntegerTypmods(ta, &n);
38 
39 	/*
40 	 * we're not too tense about good error message here because grammar
41 	 * shouldn't allow wrong number of modifiers for CHAR
42 	 */
43 	if (n != 1)
44 		ereport(ERROR,
45 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
46 				 errmsg("invalid type modifier")));
47 
48 	if (*tl < 1)
49 		ereport(ERROR,
50 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
51 				 errmsg("length for type %s must be at least 1", typename)));
52 	if (*tl > MaxAttrSize)
53 		ereport(ERROR,
54 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
55 				 errmsg("length for type %s cannot exceed %d",
56 						typename, MaxAttrSize)));
57 
58 	/*
59 	 * For largely historical reasons, the typmod is VARHDRSZ plus the number
60 	 * of characters; there is enough client-side code that knows about that
61 	 * that we'd better not change it.
62 	 */
63 	typmod = VARHDRSZ + *tl;
64 
65 	return typmod;
66 }
67 
68 /* common code for bpchartypmodout and varchartypmodout */
69 static char *
anychar_typmodout(int32 typmod)70 anychar_typmodout(int32 typmod)
71 {
72 	char	   *res = (char *) palloc(64);
73 
74 	if (typmod > VARHDRSZ)
75 		snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
76 	else
77 		*res = '\0';
78 
79 	return res;
80 }
81 
82 
83 /*
84  * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
85  * is for blank-padded string whose length is specified in CREATE TABLE.
86  * VARCHAR is for storing string whose length is at most the length specified
87  * at CREATE TABLE time.
88  *
89  * It's hard to implement these types because we cannot figure out
90  * the length of the type from the type itself. I changed (hopefully all) the
91  * fmgr calls that invoke input functions of a data type to supply the
92  * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
93  * the length of the attributes and hence the exact length of the char() or
94  * varchar(). We pass this to bpcharin() or varcharin().) In the case where
95  * we cannot determine the length, we pass in -1 instead and the input
96  * converter does not enforce any length check.
97  *
98  * We actually implement this as a varlena so that we don't have to pass in
99  * the length for the comparison functions. (The difference between these
100  * types and "text" is that we truncate and possibly blank-pad the string
101  * at insertion time.)
102  *
103  *															  - ay 6/95
104  */
105 
106 
107 /*****************************************************************************
108  *	 bpchar - char()														 *
109  *****************************************************************************/
110 
111 /*
112  * bpchar_input -- common guts of bpcharin and bpcharrecv
113  *
114  * s is the input text of length len (may not be null-terminated)
115  * atttypmod is the typmod value to apply
116  *
117  * Note that atttypmod is measured in characters, which
118  * is not necessarily the same as the number of bytes.
119  *
120  * If the input string is too long, raise an error, unless the extra
121  * characters are spaces, in which case they're truncated.  (per SQL)
122  */
123 static BpChar *
bpchar_input(const char * s,size_t len,int32 atttypmod)124 bpchar_input(const char *s, size_t len, int32 atttypmod)
125 {
126 	BpChar	   *result;
127 	char	   *r;
128 	size_t		maxlen;
129 
130 	/* If typmod is -1 (or invalid), use the actual string length */
131 	if (atttypmod < (int32) VARHDRSZ)
132 		maxlen = len;
133 	else
134 	{
135 		size_t		charlen;	/* number of CHARACTERS in the input */
136 
137 		maxlen = atttypmod - VARHDRSZ;
138 		charlen = pg_mbstrlen_with_len(s, len);
139 		if (charlen > maxlen)
140 		{
141 			/* Verify that extra characters are spaces, and clip them off */
142 			size_t		mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
143 			size_t		j;
144 
145 			/*
146 			 * at this point, len is the actual BYTE length of the input
147 			 * string, maxlen is the max number of CHARACTERS allowed for this
148 			 * bpchar type, mbmaxlen is the length in BYTES of those chars.
149 			 */
150 			for (j = mbmaxlen; j < len; j++)
151 			{
152 				if (s[j] != ' ')
153 					ereport(ERROR,
154 							(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
155 							 errmsg("value too long for type character(%d)",
156 									(int) maxlen)));
157 			}
158 
159 			/*
160 			 * Now we set maxlen to the necessary byte length, not the number
161 			 * of CHARACTERS!
162 			 */
163 			maxlen = len = mbmaxlen;
164 		}
165 		else
166 		{
167 			/*
168 			 * Now we set maxlen to the necessary byte length, not the number
169 			 * of CHARACTERS!
170 			 */
171 			maxlen = len + (maxlen - charlen);
172 		}
173 	}
174 
175 	result = (BpChar *) palloc(maxlen + VARHDRSZ);
176 	SET_VARSIZE(result, maxlen + VARHDRSZ);
177 	r = VARDATA(result);
178 	memcpy(r, s, len);
179 
180 	/* blank pad the string if necessary */
181 	if (maxlen > len)
182 		memset(r + len, ' ', maxlen - len);
183 
184 	return result;
185 }
186 
187 /*
188  * Convert a C string to CHARACTER internal representation.  atttypmod
189  * is the declared length of the type plus VARHDRSZ.
190  */
191 Datum
bpcharin(PG_FUNCTION_ARGS)192 bpcharin(PG_FUNCTION_ARGS)
193 {
194 	char	   *s = PG_GETARG_CSTRING(0);
195 
196 #ifdef NOT_USED
197 	Oid			typelem = PG_GETARG_OID(1);
198 #endif
199 	int32		atttypmod = PG_GETARG_INT32(2);
200 	BpChar	   *result;
201 
202 	result = bpchar_input(s, strlen(s), atttypmod);
203 	PG_RETURN_BPCHAR_P(result);
204 }
205 
206 
207 /*
208  * Convert a CHARACTER value to a C string.
209  *
210  * Uses the text conversion functions, which is only appropriate if BpChar
211  * and text are equivalent types.
212  */
213 Datum
bpcharout(PG_FUNCTION_ARGS)214 bpcharout(PG_FUNCTION_ARGS)
215 {
216 	Datum		txt = PG_GETARG_DATUM(0);
217 
218 	PG_RETURN_CSTRING(TextDatumGetCString(txt));
219 }
220 
221 /*
222  *		bpcharrecv			- converts external binary format to bpchar
223  */
224 Datum
bpcharrecv(PG_FUNCTION_ARGS)225 bpcharrecv(PG_FUNCTION_ARGS)
226 {
227 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
228 
229 #ifdef NOT_USED
230 	Oid			typelem = PG_GETARG_OID(1);
231 #endif
232 	int32		atttypmod = PG_GETARG_INT32(2);
233 	BpChar	   *result;
234 	char	   *str;
235 	int			nbytes;
236 
237 	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
238 	result = bpchar_input(str, nbytes, atttypmod);
239 	pfree(str);
240 	PG_RETURN_BPCHAR_P(result);
241 }
242 
243 /*
244  *		bpcharsend			- converts bpchar to binary format
245  */
246 Datum
bpcharsend(PG_FUNCTION_ARGS)247 bpcharsend(PG_FUNCTION_ARGS)
248 {
249 	/* Exactly the same as textsend, so share code */
250 	return textsend(fcinfo);
251 }
252 
253 
254 /*
255  * Converts a CHARACTER type to the specified size.
256  *
257  * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
258  * isExplicit is true if this is for an explicit cast to char(N).
259  *
260  * Truncation rules: for an explicit cast, silently truncate to the given
261  * length; for an implicit cast, raise error unless extra characters are
262  * all spaces.  (This is sort-of per SQL: the spec would actually have us
263  * raise a "completion condition" for the explicit cast case, but Postgres
264  * hasn't got such a concept.)
265  */
266 Datum
bpchar(PG_FUNCTION_ARGS)267 bpchar(PG_FUNCTION_ARGS)
268 {
269 	BpChar	   *source = PG_GETARG_BPCHAR_PP(0);
270 	int32		maxlen = PG_GETARG_INT32(1);
271 	bool		isExplicit = PG_GETARG_BOOL(2);
272 	BpChar	   *result;
273 	int32		len;
274 	char	   *r;
275 	char	   *s;
276 	int			i;
277 	int			charlen;		/* number of characters in the input string +
278 								 * VARHDRSZ */
279 
280 	/* No work if typmod is invalid */
281 	if (maxlen < (int32) VARHDRSZ)
282 		PG_RETURN_BPCHAR_P(source);
283 
284 	maxlen -= VARHDRSZ;
285 
286 	len = VARSIZE_ANY_EXHDR(source);
287 	s = VARDATA_ANY(source);
288 
289 	charlen = pg_mbstrlen_with_len(s, len);
290 
291 	/* No work if supplied data matches typmod already */
292 	if (charlen == maxlen)
293 		PG_RETURN_BPCHAR_P(source);
294 
295 	if (charlen > maxlen)
296 	{
297 		/* Verify that extra characters are spaces, and clip them off */
298 		size_t		maxmblen;
299 
300 		maxmblen = pg_mbcharcliplen(s, len, maxlen);
301 
302 		if (!isExplicit)
303 		{
304 			for (i = maxmblen; i < len; i++)
305 				if (s[i] != ' ')
306 					ereport(ERROR,
307 							(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
308 							 errmsg("value too long for type character(%d)",
309 									maxlen)));
310 		}
311 
312 		len = maxmblen;
313 
314 		/*
315 		 * At this point, maxlen is the necessary byte length, not the number
316 		 * of CHARACTERS!
317 		 */
318 		maxlen = len;
319 	}
320 	else
321 	{
322 		/*
323 		 * At this point, maxlen is the necessary byte length, not the number
324 		 * of CHARACTERS!
325 		 */
326 		maxlen = len + (maxlen - charlen);
327 	}
328 
329 	Assert(maxlen >= len);
330 
331 	result = palloc(maxlen + VARHDRSZ);
332 	SET_VARSIZE(result, maxlen + VARHDRSZ);
333 	r = VARDATA(result);
334 
335 	memcpy(r, s, len);
336 
337 	/* blank pad the string if necessary */
338 	if (maxlen > len)
339 		memset(r + len, ' ', maxlen - len);
340 
341 	PG_RETURN_BPCHAR_P(result);
342 }
343 
344 
345 /* char_bpchar()
346  * Convert char to bpchar(1).
347  */
348 Datum
char_bpchar(PG_FUNCTION_ARGS)349 char_bpchar(PG_FUNCTION_ARGS)
350 {
351 	char		c = PG_GETARG_CHAR(0);
352 	BpChar	   *result;
353 
354 	result = (BpChar *) palloc(VARHDRSZ + 1);
355 
356 	SET_VARSIZE(result, VARHDRSZ + 1);
357 	*(VARDATA(result)) = c;
358 
359 	PG_RETURN_BPCHAR_P(result);
360 }
361 
362 
363 /* bpchar_name()
364  * Converts a bpchar() type to a NameData type.
365  */
366 Datum
bpchar_name(PG_FUNCTION_ARGS)367 bpchar_name(PG_FUNCTION_ARGS)
368 {
369 	BpChar	   *s = PG_GETARG_BPCHAR_PP(0);
370 	char	   *s_data;
371 	Name		result;
372 	int			len;
373 
374 	len = VARSIZE_ANY_EXHDR(s);
375 	s_data = VARDATA_ANY(s);
376 
377 	/* Truncate oversize input */
378 	if (len >= NAMEDATALEN)
379 		len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
380 
381 	/* Remove trailing blanks */
382 	while (len > 0)
383 	{
384 		if (s_data[len - 1] != ' ')
385 			break;
386 		len--;
387 	}
388 
389 	/* We use palloc0 here to ensure result is zero-padded */
390 	result = (Name) palloc0(NAMEDATALEN);
391 	memcpy(NameStr(*result), s_data, len);
392 
393 	PG_RETURN_NAME(result);
394 }
395 
396 /* name_bpchar()
397  * Converts a NameData type to a bpchar type.
398  *
399  * Uses the text conversion functions, which is only appropriate if BpChar
400  * and text are equivalent types.
401  */
402 Datum
name_bpchar(PG_FUNCTION_ARGS)403 name_bpchar(PG_FUNCTION_ARGS)
404 {
405 	Name		s = PG_GETARG_NAME(0);
406 	BpChar	   *result;
407 
408 	result = (BpChar *) cstring_to_text(NameStr(*s));
409 	PG_RETURN_BPCHAR_P(result);
410 }
411 
412 Datum
bpchartypmodin(PG_FUNCTION_ARGS)413 bpchartypmodin(PG_FUNCTION_ARGS)
414 {
415 	ArrayType  *ta = PG_GETARG_ARRAYTYPE_P(0);
416 
417 	PG_RETURN_INT32(anychar_typmodin(ta, "char"));
418 }
419 
420 Datum
bpchartypmodout(PG_FUNCTION_ARGS)421 bpchartypmodout(PG_FUNCTION_ARGS)
422 {
423 	int32		typmod = PG_GETARG_INT32(0);
424 
425 	PG_RETURN_CSTRING(anychar_typmodout(typmod));
426 }
427 
428 
429 /*****************************************************************************
430  *	 varchar - varchar(n)
431  *
432  * Note: varchar piggybacks on type text for most operations, and so has no
433  * C-coded functions except for I/O and typmod checking.
434  *****************************************************************************/
435 
436 /*
437  * varchar_input -- common guts of varcharin and varcharrecv
438  *
439  * s is the input text of length len (may not be null-terminated)
440  * atttypmod is the typmod value to apply
441  *
442  * Note that atttypmod is measured in characters, which
443  * is not necessarily the same as the number of bytes.
444  *
445  * If the input string is too long, raise an error, unless the extra
446  * characters are spaces, in which case they're truncated.  (per SQL)
447  *
448  * Uses the C string to text conversion function, which is only appropriate
449  * if VarChar and text are equivalent types.
450  */
451 static VarChar *
varchar_input(const char * s,size_t len,int32 atttypmod)452 varchar_input(const char *s, size_t len, int32 atttypmod)
453 {
454 	VarChar    *result;
455 	size_t		maxlen;
456 
457 	maxlen = atttypmod - VARHDRSZ;
458 
459 	if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
460 	{
461 		/* Verify that extra characters are spaces, and clip them off */
462 		size_t		mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
463 		size_t		j;
464 
465 		for (j = mbmaxlen; j < len; j++)
466 		{
467 			if (s[j] != ' ')
468 				ereport(ERROR,
469 						(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
470 						 errmsg("value too long for type character varying(%d)",
471 								(int) maxlen)));
472 		}
473 
474 		len = mbmaxlen;
475 	}
476 
477 	result = (VarChar *) cstring_to_text_with_len(s, len);
478 	return result;
479 }
480 
481 /*
482  * Convert a C string to VARCHAR internal representation.  atttypmod
483  * is the declared length of the type plus VARHDRSZ.
484  */
485 Datum
varcharin(PG_FUNCTION_ARGS)486 varcharin(PG_FUNCTION_ARGS)
487 {
488 	char	   *s = PG_GETARG_CSTRING(0);
489 
490 #ifdef NOT_USED
491 	Oid			typelem = PG_GETARG_OID(1);
492 #endif
493 	int32		atttypmod = PG_GETARG_INT32(2);
494 	VarChar    *result;
495 
496 	result = varchar_input(s, strlen(s), atttypmod);
497 	PG_RETURN_VARCHAR_P(result);
498 }
499 
500 
501 /*
502  * Convert a VARCHAR value to a C string.
503  *
504  * Uses the text to C string conversion function, which is only appropriate
505  * if VarChar and text are equivalent types.
506  */
507 Datum
varcharout(PG_FUNCTION_ARGS)508 varcharout(PG_FUNCTION_ARGS)
509 {
510 	Datum		txt = PG_GETARG_DATUM(0);
511 
512 	PG_RETURN_CSTRING(TextDatumGetCString(txt));
513 }
514 
515 /*
516  *		varcharrecv			- converts external binary format to varchar
517  */
518 Datum
varcharrecv(PG_FUNCTION_ARGS)519 varcharrecv(PG_FUNCTION_ARGS)
520 {
521 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
522 
523 #ifdef NOT_USED
524 	Oid			typelem = PG_GETARG_OID(1);
525 #endif
526 	int32		atttypmod = PG_GETARG_INT32(2);
527 	VarChar    *result;
528 	char	   *str;
529 	int			nbytes;
530 
531 	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
532 	result = varchar_input(str, nbytes, atttypmod);
533 	pfree(str);
534 	PG_RETURN_VARCHAR_P(result);
535 }
536 
537 /*
538  *		varcharsend			- converts varchar to binary format
539  */
540 Datum
varcharsend(PG_FUNCTION_ARGS)541 varcharsend(PG_FUNCTION_ARGS)
542 {
543 	/* Exactly the same as textsend, so share code */
544 	return textsend(fcinfo);
545 }
546 
547 
548 /*
549  * varchar_transform()
550  * Flatten calls to varchar's length coercion function that set the new maximum
551  * length >= the previous maximum length.  We can ignore the isExplicit
552  * argument, since that only affects truncation cases.
553  */
554 Datum
varchar_transform(PG_FUNCTION_ARGS)555 varchar_transform(PG_FUNCTION_ARGS)
556 {
557 	FuncExpr   *expr = castNode(FuncExpr, PG_GETARG_POINTER(0));
558 	Node	   *ret = NULL;
559 	Node	   *typmod;
560 
561 	Assert(list_length(expr->args) >= 2);
562 
563 	typmod = (Node *) lsecond(expr->args);
564 
565 	if (IsA(typmod, Const) &&!((Const *) typmod)->constisnull)
566 	{
567 		Node	   *source = (Node *) linitial(expr->args);
568 		int32		old_typmod = exprTypmod(source);
569 		int32		new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
570 		int32		old_max = old_typmod - VARHDRSZ;
571 		int32		new_max = new_typmod - VARHDRSZ;
572 
573 		if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
574 			ret = relabel_to_typmod(source, new_typmod);
575 	}
576 
577 	PG_RETURN_POINTER(ret);
578 }
579 
580 /*
581  * Converts a VARCHAR type to the specified size.
582  *
583  * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
584  * isExplicit is true if this is for an explicit cast to varchar(N).
585  *
586  * Truncation rules: for an explicit cast, silently truncate to the given
587  * length; for an implicit cast, raise error unless extra characters are
588  * all spaces.  (This is sort-of per SQL: the spec would actually have us
589  * raise a "completion condition" for the explicit cast case, but Postgres
590  * hasn't got such a concept.)
591  */
592 Datum
varchar(PG_FUNCTION_ARGS)593 varchar(PG_FUNCTION_ARGS)
594 {
595 	VarChar    *source = PG_GETARG_VARCHAR_PP(0);
596 	int32		typmod = PG_GETARG_INT32(1);
597 	bool		isExplicit = PG_GETARG_BOOL(2);
598 	int32		len,
599 				maxlen;
600 	size_t		maxmblen;
601 	int			i;
602 	char	   *s_data;
603 
604 	len = VARSIZE_ANY_EXHDR(source);
605 	s_data = VARDATA_ANY(source);
606 	maxlen = typmod - VARHDRSZ;
607 
608 	/* No work if typmod is invalid or supplied data fits it already */
609 	if (maxlen < 0 || len <= maxlen)
610 		PG_RETURN_VARCHAR_P(source);
611 
612 	/* only reach here if string is too long... */
613 
614 	/* truncate multibyte string preserving multibyte boundary */
615 	maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
616 
617 	if (!isExplicit)
618 	{
619 		for (i = maxmblen; i < len; i++)
620 			if (s_data[i] != ' ')
621 				ereport(ERROR,
622 						(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
623 						 errmsg("value too long for type character varying(%d)",
624 								maxlen)));
625 	}
626 
627 	PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
628 															 maxmblen));
629 }
630 
631 Datum
varchartypmodin(PG_FUNCTION_ARGS)632 varchartypmodin(PG_FUNCTION_ARGS)
633 {
634 	ArrayType  *ta = PG_GETARG_ARRAYTYPE_P(0);
635 
636 	PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
637 }
638 
639 Datum
varchartypmodout(PG_FUNCTION_ARGS)640 varchartypmodout(PG_FUNCTION_ARGS)
641 {
642 	int32		typmod = PG_GETARG_INT32(0);
643 
644 	PG_RETURN_CSTRING(anychar_typmodout(typmod));
645 }
646 
647 
648 /*****************************************************************************
649  * Exported functions
650  *****************************************************************************/
651 
652 /* "True" length (not counting trailing blanks) of a BpChar */
653 static inline int
bcTruelen(BpChar * arg)654 bcTruelen(BpChar *arg)
655 {
656 	return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
657 }
658 
659 int
bpchartruelen(char * s,int len)660 bpchartruelen(char *s, int len)
661 {
662 	int			i;
663 
664 	/*
665 	 * Note that we rely on the assumption that ' ' is a singleton unit on
666 	 * every supported multibyte server encoding.
667 	 */
668 	for (i = len - 1; i >= 0; i--)
669 	{
670 		if (s[i] != ' ')
671 			break;
672 	}
673 	return i + 1;
674 }
675 
676 Datum
bpcharlen(PG_FUNCTION_ARGS)677 bpcharlen(PG_FUNCTION_ARGS)
678 {
679 	BpChar	   *arg = PG_GETARG_BPCHAR_PP(0);
680 	int			len;
681 
682 	/* get number of bytes, ignoring trailing spaces */
683 	len = bcTruelen(arg);
684 
685 	/* in multibyte encoding, convert to number of characters */
686 	if (pg_database_encoding_max_length() != 1)
687 		len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
688 
689 	PG_RETURN_INT32(len);
690 }
691 
692 Datum
bpcharoctetlen(PG_FUNCTION_ARGS)693 bpcharoctetlen(PG_FUNCTION_ARGS)
694 {
695 	Datum		arg = PG_GETARG_DATUM(0);
696 
697 	/* We need not detoast the input at all */
698 	PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
699 }
700 
701 
702 /*****************************************************************************
703  *	Comparison Functions used for bpchar
704  *
705  * Note: btree indexes need these routines not to leak memory; therefore,
706  * be careful to free working copies of toasted datums.  Most places don't
707  * need to be so careful.
708  *****************************************************************************/
709 
710 Datum
bpchareq(PG_FUNCTION_ARGS)711 bpchareq(PG_FUNCTION_ARGS)
712 {
713 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
714 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
715 	int			len1,
716 				len2;
717 	bool		result;
718 
719 	len1 = bcTruelen(arg1);
720 	len2 = bcTruelen(arg2);
721 
722 	/*
723 	 * Since we only care about equality or not-equality, we can avoid all the
724 	 * expense of strcoll() here, and just do bitwise comparison.
725 	 */
726 	if (len1 != len2)
727 		result = false;
728 	else
729 		result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
730 
731 	PG_FREE_IF_COPY(arg1, 0);
732 	PG_FREE_IF_COPY(arg2, 1);
733 
734 	PG_RETURN_BOOL(result);
735 }
736 
737 Datum
bpcharne(PG_FUNCTION_ARGS)738 bpcharne(PG_FUNCTION_ARGS)
739 {
740 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
741 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
742 	int			len1,
743 				len2;
744 	bool		result;
745 
746 	len1 = bcTruelen(arg1);
747 	len2 = bcTruelen(arg2);
748 
749 	/*
750 	 * Since we only care about equality or not-equality, we can avoid all the
751 	 * expense of strcoll() here, and just do bitwise comparison.
752 	 */
753 	if (len1 != len2)
754 		result = true;
755 	else
756 		result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
757 
758 	PG_FREE_IF_COPY(arg1, 0);
759 	PG_FREE_IF_COPY(arg2, 1);
760 
761 	PG_RETURN_BOOL(result);
762 }
763 
764 Datum
bpcharlt(PG_FUNCTION_ARGS)765 bpcharlt(PG_FUNCTION_ARGS)
766 {
767 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
768 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
769 	int			len1,
770 				len2;
771 	int			cmp;
772 
773 	len1 = bcTruelen(arg1);
774 	len2 = bcTruelen(arg2);
775 
776 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
777 					 PG_GET_COLLATION());
778 
779 	PG_FREE_IF_COPY(arg1, 0);
780 	PG_FREE_IF_COPY(arg2, 1);
781 
782 	PG_RETURN_BOOL(cmp < 0);
783 }
784 
785 Datum
bpcharle(PG_FUNCTION_ARGS)786 bpcharle(PG_FUNCTION_ARGS)
787 {
788 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
789 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
790 	int			len1,
791 				len2;
792 	int			cmp;
793 
794 	len1 = bcTruelen(arg1);
795 	len2 = bcTruelen(arg2);
796 
797 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
798 					 PG_GET_COLLATION());
799 
800 	PG_FREE_IF_COPY(arg1, 0);
801 	PG_FREE_IF_COPY(arg2, 1);
802 
803 	PG_RETURN_BOOL(cmp <= 0);
804 }
805 
806 Datum
bpchargt(PG_FUNCTION_ARGS)807 bpchargt(PG_FUNCTION_ARGS)
808 {
809 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
810 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
811 	int			len1,
812 				len2;
813 	int			cmp;
814 
815 	len1 = bcTruelen(arg1);
816 	len2 = bcTruelen(arg2);
817 
818 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
819 					 PG_GET_COLLATION());
820 
821 	PG_FREE_IF_COPY(arg1, 0);
822 	PG_FREE_IF_COPY(arg2, 1);
823 
824 	PG_RETURN_BOOL(cmp > 0);
825 }
826 
827 Datum
bpcharge(PG_FUNCTION_ARGS)828 bpcharge(PG_FUNCTION_ARGS)
829 {
830 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
831 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
832 	int			len1,
833 				len2;
834 	int			cmp;
835 
836 	len1 = bcTruelen(arg1);
837 	len2 = bcTruelen(arg2);
838 
839 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
840 					 PG_GET_COLLATION());
841 
842 	PG_FREE_IF_COPY(arg1, 0);
843 	PG_FREE_IF_COPY(arg2, 1);
844 
845 	PG_RETURN_BOOL(cmp >= 0);
846 }
847 
848 Datum
bpcharcmp(PG_FUNCTION_ARGS)849 bpcharcmp(PG_FUNCTION_ARGS)
850 {
851 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
852 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
853 	int			len1,
854 				len2;
855 	int			cmp;
856 
857 	len1 = bcTruelen(arg1);
858 	len2 = bcTruelen(arg2);
859 
860 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
861 					 PG_GET_COLLATION());
862 
863 	PG_FREE_IF_COPY(arg1, 0);
864 	PG_FREE_IF_COPY(arg2, 1);
865 
866 	PG_RETURN_INT32(cmp);
867 }
868 
869 Datum
bpchar_sortsupport(PG_FUNCTION_ARGS)870 bpchar_sortsupport(PG_FUNCTION_ARGS)
871 {
872 	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
873 	Oid			collid = ssup->ssup_collation;
874 	MemoryContext oldcontext;
875 
876 	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
877 
878 	/* Use generic string SortSupport */
879 	varstr_sortsupport(ssup, collid, true);
880 
881 	MemoryContextSwitchTo(oldcontext);
882 
883 	PG_RETURN_VOID();
884 }
885 
886 Datum
bpchar_larger(PG_FUNCTION_ARGS)887 bpchar_larger(PG_FUNCTION_ARGS)
888 {
889 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
890 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
891 	int			len1,
892 				len2;
893 	int			cmp;
894 
895 	len1 = bcTruelen(arg1);
896 	len2 = bcTruelen(arg2);
897 
898 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
899 					 PG_GET_COLLATION());
900 
901 	PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
902 }
903 
904 Datum
bpchar_smaller(PG_FUNCTION_ARGS)905 bpchar_smaller(PG_FUNCTION_ARGS)
906 {
907 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
908 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
909 	int			len1,
910 				len2;
911 	int			cmp;
912 
913 	len1 = bcTruelen(arg1);
914 	len2 = bcTruelen(arg2);
915 
916 	cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
917 					 PG_GET_COLLATION());
918 
919 	PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
920 }
921 
922 
923 /*
924  * bpchar needs a specialized hash function because we want to ignore
925  * trailing blanks in comparisons.
926  *
927  * Note: currently there is no need for locale-specific behavior here,
928  * but if we ever change the semantics of bpchar comparison to trust
929  * strcoll() completely, we'd need to do something different in non-C locales.
930  */
931 Datum
hashbpchar(PG_FUNCTION_ARGS)932 hashbpchar(PG_FUNCTION_ARGS)
933 {
934 	BpChar	   *key = PG_GETARG_BPCHAR_PP(0);
935 	char	   *keydata;
936 	int			keylen;
937 	Datum		result;
938 
939 	keydata = VARDATA_ANY(key);
940 	keylen = bcTruelen(key);
941 
942 	result = hash_any((unsigned char *) keydata, keylen);
943 
944 	/* Avoid leaking memory for toasted inputs */
945 	PG_FREE_IF_COPY(key, 0);
946 
947 	return result;
948 }
949 
950 Datum
hashbpcharextended(PG_FUNCTION_ARGS)951 hashbpcharextended(PG_FUNCTION_ARGS)
952 {
953 	BpChar	   *key = PG_GETARG_BPCHAR_PP(0);
954 	char	   *keydata;
955 	int			keylen;
956 	Datum		result;
957 
958 	keydata = VARDATA_ANY(key);
959 	keylen = bcTruelen(key);
960 
961 	result = hash_any_extended((unsigned char *) keydata, keylen,
962 							   PG_GETARG_INT64(1));
963 
964 	PG_FREE_IF_COPY(key, 0);
965 
966 	return result;
967 }
968 
969 /*
970  * The following operators support character-by-character comparison
971  * of bpchar datums, to allow building indexes suitable for LIKE clauses.
972  * Note that the regular bpchareq/bpcharne comparison operators, and
973  * regular support functions 1 and 2 with "C" collation are assumed to be
974  * compatible with these!
975  */
976 
977 static int
internal_bpchar_pattern_compare(BpChar * arg1,BpChar * arg2)978 internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
979 {
980 	int			result;
981 	int			len1,
982 				len2;
983 
984 	len1 = bcTruelen(arg1);
985 	len2 = bcTruelen(arg2);
986 
987 	result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
988 	if (result != 0)
989 		return result;
990 	else if (len1 < len2)
991 		return -1;
992 	else if (len1 > len2)
993 		return 1;
994 	else
995 		return 0;
996 }
997 
998 
999 Datum
bpchar_pattern_lt(PG_FUNCTION_ARGS)1000 bpchar_pattern_lt(PG_FUNCTION_ARGS)
1001 {
1002 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
1003 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
1004 	int			result;
1005 
1006 	result = internal_bpchar_pattern_compare(arg1, arg2);
1007 
1008 	PG_FREE_IF_COPY(arg1, 0);
1009 	PG_FREE_IF_COPY(arg2, 1);
1010 
1011 	PG_RETURN_BOOL(result < 0);
1012 }
1013 
1014 
1015 Datum
bpchar_pattern_le(PG_FUNCTION_ARGS)1016 bpchar_pattern_le(PG_FUNCTION_ARGS)
1017 {
1018 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
1019 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
1020 	int			result;
1021 
1022 	result = internal_bpchar_pattern_compare(arg1, arg2);
1023 
1024 	PG_FREE_IF_COPY(arg1, 0);
1025 	PG_FREE_IF_COPY(arg2, 1);
1026 
1027 	PG_RETURN_BOOL(result <= 0);
1028 }
1029 
1030 
1031 Datum
bpchar_pattern_ge(PG_FUNCTION_ARGS)1032 bpchar_pattern_ge(PG_FUNCTION_ARGS)
1033 {
1034 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
1035 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
1036 	int			result;
1037 
1038 	result = internal_bpchar_pattern_compare(arg1, arg2);
1039 
1040 	PG_FREE_IF_COPY(arg1, 0);
1041 	PG_FREE_IF_COPY(arg2, 1);
1042 
1043 	PG_RETURN_BOOL(result >= 0);
1044 }
1045 
1046 
1047 Datum
bpchar_pattern_gt(PG_FUNCTION_ARGS)1048 bpchar_pattern_gt(PG_FUNCTION_ARGS)
1049 {
1050 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
1051 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
1052 	int			result;
1053 
1054 	result = internal_bpchar_pattern_compare(arg1, arg2);
1055 
1056 	PG_FREE_IF_COPY(arg1, 0);
1057 	PG_FREE_IF_COPY(arg2, 1);
1058 
1059 	PG_RETURN_BOOL(result > 0);
1060 }
1061 
1062 
1063 Datum
btbpchar_pattern_cmp(PG_FUNCTION_ARGS)1064 btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
1065 {
1066 	BpChar	   *arg1 = PG_GETARG_BPCHAR_PP(0);
1067 	BpChar	   *arg2 = PG_GETARG_BPCHAR_PP(1);
1068 	int			result;
1069 
1070 	result = internal_bpchar_pattern_compare(arg1, arg2);
1071 
1072 	PG_FREE_IF_COPY(arg1, 0);
1073 	PG_FREE_IF_COPY(arg2, 1);
1074 
1075 	PG_RETURN_INT32(result);
1076 }
1077 
1078 
1079 Datum
btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)1080 btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
1081 {
1082 	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
1083 	MemoryContext oldcontext;
1084 
1085 	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1086 
1087 	/* Use generic string SortSupport, forcing "C" collation */
1088 	varstr_sortsupport(ssup, C_COLLATION_OID, true);
1089 
1090 	MemoryContextSwitchTo(oldcontext);
1091 
1092 	PG_RETURN_VOID();
1093 }
1094