1 /*-------------------------------------------------------------------------
2 *
3 * varchar.c
4 * Functions for the built-in types char(n) and varchar(n).
5 *
6 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/utils/adt/varchar.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include "access/detoast.h"
18 #include "catalog/pg_collation.h"
19 #include "catalog/pg_type.h"
20 #include "common/hashfn.h"
21 #include "libpq/pqformat.h"
22 #include "mb/pg_wchar.h"
23 #include "nodes/nodeFuncs.h"
24 #include "nodes/supportnodes.h"
25 #include "utils/array.h"
26 #include "utils/builtins.h"
27 #include "utils/lsyscache.h"
28 #include "utils/pg_locale.h"
29 #include "utils/varlena.h"
30
31 /* common code for bpchartypmodin and varchartypmodin */
32 static int32
anychar_typmodin(ArrayType * ta,const char * typename)33 anychar_typmodin(ArrayType *ta, const char *typename)
34 {
35 int32 typmod;
36 int32 *tl;
37 int n;
38
39 tl = ArrayGetIntegerTypmods(ta, &n);
40
41 /*
42 * we're not too tense about good error message here because grammar
43 * shouldn't allow wrong number of modifiers for CHAR
44 */
45 if (n != 1)
46 ereport(ERROR,
47 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
48 errmsg("invalid type modifier")));
49
50 if (*tl < 1)
51 ereport(ERROR,
52 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
53 errmsg("length for type %s must be at least 1", typename)));
54 if (*tl > MaxAttrSize)
55 ereport(ERROR,
56 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
57 errmsg("length for type %s cannot exceed %d",
58 typename, MaxAttrSize)));
59
60 /*
61 * For largely historical reasons, the typmod is VARHDRSZ plus the number
62 * of characters; there is enough client-side code that knows about that
63 * that we'd better not change it.
64 */
65 typmod = VARHDRSZ + *tl;
66
67 return typmod;
68 }
69
70 /* common code for bpchartypmodout and varchartypmodout */
71 static char *
anychar_typmodout(int32 typmod)72 anychar_typmodout(int32 typmod)
73 {
74 char *res = (char *) palloc(64);
75
76 if (typmod > VARHDRSZ)
77 snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
78 else
79 *res = '\0';
80
81 return res;
82 }
83
84
85 /*
86 * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
87 * is for blank-padded string whose length is specified in CREATE TABLE.
88 * VARCHAR is for storing string whose length is at most the length specified
89 * at CREATE TABLE time.
90 *
91 * It's hard to implement these types because we cannot figure out
92 * the length of the type from the type itself. I changed (hopefully all) the
93 * fmgr calls that invoke input functions of a data type to supply the
94 * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
95 * the length of the attributes and hence the exact length of the char() or
96 * varchar(). We pass this to bpcharin() or varcharin().) In the case where
97 * we cannot determine the length, we pass in -1 instead and the input
98 * converter does not enforce any length check.
99 *
100 * We actually implement this as a varlena so that we don't have to pass in
101 * the length for the comparison functions. (The difference between these
102 * types and "text" is that we truncate and possibly blank-pad the string
103 * at insertion time.)
104 *
105 * - ay 6/95
106 */
107
108
109 /*****************************************************************************
110 * bpchar - char() *
111 *****************************************************************************/
112
113 /*
114 * bpchar_input -- common guts of bpcharin and bpcharrecv
115 *
116 * s is the input text of length len (may not be null-terminated)
117 * atttypmod is the typmod value to apply
118 *
119 * Note that atttypmod is measured in characters, which
120 * is not necessarily the same as the number of bytes.
121 *
122 * If the input string is too long, raise an error, unless the extra
123 * characters are spaces, in which case they're truncated. (per SQL)
124 */
125 static BpChar *
bpchar_input(const char * s,size_t len,int32 atttypmod)126 bpchar_input(const char *s, size_t len, int32 atttypmod)
127 {
128 BpChar *result;
129 char *r;
130 size_t maxlen;
131
132 /* If typmod is -1 (or invalid), use the actual string length */
133 if (atttypmod < (int32) VARHDRSZ)
134 maxlen = len;
135 else
136 {
137 size_t charlen; /* number of CHARACTERS in the input */
138
139 maxlen = atttypmod - VARHDRSZ;
140 charlen = pg_mbstrlen_with_len(s, len);
141 if (charlen > maxlen)
142 {
143 /* Verify that extra characters are spaces, and clip them off */
144 size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
145 size_t j;
146
147 /*
148 * at this point, len is the actual BYTE length of the input
149 * string, maxlen is the max number of CHARACTERS allowed for this
150 * bpchar type, mbmaxlen is the length in BYTES of those chars.
151 */
152 for (j = mbmaxlen; j < len; j++)
153 {
154 if (s[j] != ' ')
155 ereport(ERROR,
156 (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
157 errmsg("value too long for type character(%d)",
158 (int) maxlen)));
159 }
160
161 /*
162 * Now we set maxlen to the necessary byte length, not the number
163 * of CHARACTERS!
164 */
165 maxlen = len = mbmaxlen;
166 }
167 else
168 {
169 /*
170 * Now we set maxlen to the necessary byte length, not the number
171 * of CHARACTERS!
172 */
173 maxlen = len + (maxlen - charlen);
174 }
175 }
176
177 result = (BpChar *) palloc(maxlen + VARHDRSZ);
178 SET_VARSIZE(result, maxlen + VARHDRSZ);
179 r = VARDATA(result);
180 memcpy(r, s, len);
181
182 /* blank pad the string if necessary */
183 if (maxlen > len)
184 memset(r + len, ' ', maxlen - len);
185
186 return result;
187 }
188
189 /*
190 * Convert a C string to CHARACTER internal representation. atttypmod
191 * is the declared length of the type plus VARHDRSZ.
192 */
193 Datum
bpcharin(PG_FUNCTION_ARGS)194 bpcharin(PG_FUNCTION_ARGS)
195 {
196 char *s = PG_GETARG_CSTRING(0);
197
198 #ifdef NOT_USED
199 Oid typelem = PG_GETARG_OID(1);
200 #endif
201 int32 atttypmod = PG_GETARG_INT32(2);
202 BpChar *result;
203
204 result = bpchar_input(s, strlen(s), atttypmod);
205 PG_RETURN_BPCHAR_P(result);
206 }
207
208
209 /*
210 * Convert a CHARACTER value to a C string.
211 *
212 * Uses the text conversion functions, which is only appropriate if BpChar
213 * and text are equivalent types.
214 */
215 Datum
bpcharout(PG_FUNCTION_ARGS)216 bpcharout(PG_FUNCTION_ARGS)
217 {
218 Datum txt = PG_GETARG_DATUM(0);
219
220 PG_RETURN_CSTRING(TextDatumGetCString(txt));
221 }
222
223 /*
224 * bpcharrecv - converts external binary format to bpchar
225 */
226 Datum
bpcharrecv(PG_FUNCTION_ARGS)227 bpcharrecv(PG_FUNCTION_ARGS)
228 {
229 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
230
231 #ifdef NOT_USED
232 Oid typelem = PG_GETARG_OID(1);
233 #endif
234 int32 atttypmod = PG_GETARG_INT32(2);
235 BpChar *result;
236 char *str;
237 int nbytes;
238
239 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
240 result = bpchar_input(str, nbytes, atttypmod);
241 pfree(str);
242 PG_RETURN_BPCHAR_P(result);
243 }
244
245 /*
246 * bpcharsend - converts bpchar to binary format
247 */
248 Datum
bpcharsend(PG_FUNCTION_ARGS)249 bpcharsend(PG_FUNCTION_ARGS)
250 {
251 /* Exactly the same as textsend, so share code */
252 return textsend(fcinfo);
253 }
254
255
256 /*
257 * Converts a CHARACTER type to the specified size.
258 *
259 * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
260 * isExplicit is true if this is for an explicit cast to char(N).
261 *
262 * Truncation rules: for an explicit cast, silently truncate to the given
263 * length; for an implicit cast, raise error unless extra characters are
264 * all spaces. (This is sort-of per SQL: the spec would actually have us
265 * raise a "completion condition" for the explicit cast case, but Postgres
266 * hasn't got such a concept.)
267 */
268 Datum
bpchar(PG_FUNCTION_ARGS)269 bpchar(PG_FUNCTION_ARGS)
270 {
271 BpChar *source = PG_GETARG_BPCHAR_PP(0);
272 int32 maxlen = PG_GETARG_INT32(1);
273 bool isExplicit = PG_GETARG_BOOL(2);
274 BpChar *result;
275 int32 len;
276 char *r;
277 char *s;
278 int i;
279 int charlen; /* number of characters in the input string +
280 * VARHDRSZ */
281
282 /* No work if typmod is invalid */
283 if (maxlen < (int32) VARHDRSZ)
284 PG_RETURN_BPCHAR_P(source);
285
286 maxlen -= VARHDRSZ;
287
288 len = VARSIZE_ANY_EXHDR(source);
289 s = VARDATA_ANY(source);
290
291 charlen = pg_mbstrlen_with_len(s, len);
292
293 /* No work if supplied data matches typmod already */
294 if (charlen == maxlen)
295 PG_RETURN_BPCHAR_P(source);
296
297 if (charlen > maxlen)
298 {
299 /* Verify that extra characters are spaces, and clip them off */
300 size_t maxmblen;
301
302 maxmblen = pg_mbcharcliplen(s, len, maxlen);
303
304 if (!isExplicit)
305 {
306 for (i = maxmblen; i < len; i++)
307 if (s[i] != ' ')
308 ereport(ERROR,
309 (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
310 errmsg("value too long for type character(%d)",
311 maxlen)));
312 }
313
314 len = maxmblen;
315
316 /*
317 * At this point, maxlen is the necessary byte length, not the number
318 * of CHARACTERS!
319 */
320 maxlen = len;
321 }
322 else
323 {
324 /*
325 * At this point, maxlen is the necessary byte length, not the number
326 * of CHARACTERS!
327 */
328 maxlen = len + (maxlen - charlen);
329 }
330
331 Assert(maxlen >= len);
332
333 result = palloc(maxlen + VARHDRSZ);
334 SET_VARSIZE(result, maxlen + VARHDRSZ);
335 r = VARDATA(result);
336
337 memcpy(r, s, len);
338
339 /* blank pad the string if necessary */
340 if (maxlen > len)
341 memset(r + len, ' ', maxlen - len);
342
343 PG_RETURN_BPCHAR_P(result);
344 }
345
346
347 /* char_bpchar()
348 * Convert char to bpchar(1).
349 */
350 Datum
char_bpchar(PG_FUNCTION_ARGS)351 char_bpchar(PG_FUNCTION_ARGS)
352 {
353 char c = PG_GETARG_CHAR(0);
354 BpChar *result;
355
356 result = (BpChar *) palloc(VARHDRSZ + 1);
357
358 SET_VARSIZE(result, VARHDRSZ + 1);
359 *(VARDATA(result)) = c;
360
361 PG_RETURN_BPCHAR_P(result);
362 }
363
364
365 /* bpchar_name()
366 * Converts a bpchar() type to a NameData type.
367 */
368 Datum
bpchar_name(PG_FUNCTION_ARGS)369 bpchar_name(PG_FUNCTION_ARGS)
370 {
371 BpChar *s = PG_GETARG_BPCHAR_PP(0);
372 char *s_data;
373 Name result;
374 int len;
375
376 len = VARSIZE_ANY_EXHDR(s);
377 s_data = VARDATA_ANY(s);
378
379 /* Truncate oversize input */
380 if (len >= NAMEDATALEN)
381 len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
382
383 /* Remove trailing blanks */
384 while (len > 0)
385 {
386 if (s_data[len - 1] != ' ')
387 break;
388 len--;
389 }
390
391 /* We use palloc0 here to ensure result is zero-padded */
392 result = (Name) palloc0(NAMEDATALEN);
393 memcpy(NameStr(*result), s_data, len);
394
395 PG_RETURN_NAME(result);
396 }
397
398 /* name_bpchar()
399 * Converts a NameData type to a bpchar type.
400 *
401 * Uses the text conversion functions, which is only appropriate if BpChar
402 * and text are equivalent types.
403 */
404 Datum
name_bpchar(PG_FUNCTION_ARGS)405 name_bpchar(PG_FUNCTION_ARGS)
406 {
407 Name s = PG_GETARG_NAME(0);
408 BpChar *result;
409
410 result = (BpChar *) cstring_to_text(NameStr(*s));
411 PG_RETURN_BPCHAR_P(result);
412 }
413
414 Datum
bpchartypmodin(PG_FUNCTION_ARGS)415 bpchartypmodin(PG_FUNCTION_ARGS)
416 {
417 ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
418
419 PG_RETURN_INT32(anychar_typmodin(ta, "char"));
420 }
421
422 Datum
bpchartypmodout(PG_FUNCTION_ARGS)423 bpchartypmodout(PG_FUNCTION_ARGS)
424 {
425 int32 typmod = PG_GETARG_INT32(0);
426
427 PG_RETURN_CSTRING(anychar_typmodout(typmod));
428 }
429
430
431 /*****************************************************************************
432 * varchar - varchar(n)
433 *
434 * Note: varchar piggybacks on type text for most operations, and so has no
435 * C-coded functions except for I/O and typmod checking.
436 *****************************************************************************/
437
438 /*
439 * varchar_input -- common guts of varcharin and varcharrecv
440 *
441 * s is the input text of length len (may not be null-terminated)
442 * atttypmod is the typmod value to apply
443 *
444 * Note that atttypmod is measured in characters, which
445 * is not necessarily the same as the number of bytes.
446 *
447 * If the input string is too long, raise an error, unless the extra
448 * characters are spaces, in which case they're truncated. (per SQL)
449 *
450 * Uses the C string to text conversion function, which is only appropriate
451 * if VarChar and text are equivalent types.
452 */
453 static VarChar *
varchar_input(const char * s,size_t len,int32 atttypmod)454 varchar_input(const char *s, size_t len, int32 atttypmod)
455 {
456 VarChar *result;
457 size_t maxlen;
458
459 maxlen = atttypmod - VARHDRSZ;
460
461 if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
462 {
463 /* Verify that extra characters are spaces, and clip them off */
464 size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
465 size_t j;
466
467 for (j = mbmaxlen; j < len; j++)
468 {
469 if (s[j] != ' ')
470 ereport(ERROR,
471 (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
472 errmsg("value too long for type character varying(%d)",
473 (int) maxlen)));
474 }
475
476 len = mbmaxlen;
477 }
478
479 result = (VarChar *) cstring_to_text_with_len(s, len);
480 return result;
481 }
482
483 /*
484 * Convert a C string to VARCHAR internal representation. atttypmod
485 * is the declared length of the type plus VARHDRSZ.
486 */
487 Datum
varcharin(PG_FUNCTION_ARGS)488 varcharin(PG_FUNCTION_ARGS)
489 {
490 char *s = PG_GETARG_CSTRING(0);
491
492 #ifdef NOT_USED
493 Oid typelem = PG_GETARG_OID(1);
494 #endif
495 int32 atttypmod = PG_GETARG_INT32(2);
496 VarChar *result;
497
498 result = varchar_input(s, strlen(s), atttypmod);
499 PG_RETURN_VARCHAR_P(result);
500 }
501
502
503 /*
504 * Convert a VARCHAR value to a C string.
505 *
506 * Uses the text to C string conversion function, which is only appropriate
507 * if VarChar and text are equivalent types.
508 */
509 Datum
varcharout(PG_FUNCTION_ARGS)510 varcharout(PG_FUNCTION_ARGS)
511 {
512 Datum txt = PG_GETARG_DATUM(0);
513
514 PG_RETURN_CSTRING(TextDatumGetCString(txt));
515 }
516
517 /*
518 * varcharrecv - converts external binary format to varchar
519 */
520 Datum
varcharrecv(PG_FUNCTION_ARGS)521 varcharrecv(PG_FUNCTION_ARGS)
522 {
523 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
524
525 #ifdef NOT_USED
526 Oid typelem = PG_GETARG_OID(1);
527 #endif
528 int32 atttypmod = PG_GETARG_INT32(2);
529 VarChar *result;
530 char *str;
531 int nbytes;
532
533 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
534 result = varchar_input(str, nbytes, atttypmod);
535 pfree(str);
536 PG_RETURN_VARCHAR_P(result);
537 }
538
539 /*
540 * varcharsend - converts varchar to binary format
541 */
542 Datum
varcharsend(PG_FUNCTION_ARGS)543 varcharsend(PG_FUNCTION_ARGS)
544 {
545 /* Exactly the same as textsend, so share code */
546 return textsend(fcinfo);
547 }
548
549
550 /*
551 * varchar_support()
552 *
553 * Planner support function for the varchar() length coercion function.
554 *
555 * Currently, the only interesting thing we can do is flatten calls that set
556 * the new maximum length >= the previous maximum length. We can ignore the
557 * isExplicit argument, since that only affects truncation cases.
558 */
559 Datum
varchar_support(PG_FUNCTION_ARGS)560 varchar_support(PG_FUNCTION_ARGS)
561 {
562 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
563 Node *ret = NULL;
564
565 if (IsA(rawreq, SupportRequestSimplify))
566 {
567 SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
568 FuncExpr *expr = req->fcall;
569 Node *typmod;
570
571 Assert(list_length(expr->args) >= 2);
572
573 typmod = (Node *) lsecond(expr->args);
574
575 if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
576 {
577 Node *source = (Node *) linitial(expr->args);
578 int32 old_typmod = exprTypmod(source);
579 int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
580 int32 old_max = old_typmod - VARHDRSZ;
581 int32 new_max = new_typmod - VARHDRSZ;
582
583 if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
584 ret = relabel_to_typmod(source, new_typmod);
585 }
586 }
587
588 PG_RETURN_POINTER(ret);
589 }
590
591 /*
592 * Converts a VARCHAR type to the specified size.
593 *
594 * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
595 * isExplicit is true if this is for an explicit cast to varchar(N).
596 *
597 * Truncation rules: for an explicit cast, silently truncate to the given
598 * length; for an implicit cast, raise error unless extra characters are
599 * all spaces. (This is sort-of per SQL: the spec would actually have us
600 * raise a "completion condition" for the explicit cast case, but Postgres
601 * hasn't got such a concept.)
602 */
603 Datum
varchar(PG_FUNCTION_ARGS)604 varchar(PG_FUNCTION_ARGS)
605 {
606 VarChar *source = PG_GETARG_VARCHAR_PP(0);
607 int32 typmod = PG_GETARG_INT32(1);
608 bool isExplicit = PG_GETARG_BOOL(2);
609 int32 len,
610 maxlen;
611 size_t maxmblen;
612 int i;
613 char *s_data;
614
615 len = VARSIZE_ANY_EXHDR(source);
616 s_data = VARDATA_ANY(source);
617 maxlen = typmod - VARHDRSZ;
618
619 /* No work if typmod is invalid or supplied data fits it already */
620 if (maxlen < 0 || len <= maxlen)
621 PG_RETURN_VARCHAR_P(source);
622
623 /* only reach here if string is too long... */
624
625 /* truncate multibyte string preserving multibyte boundary */
626 maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
627
628 if (!isExplicit)
629 {
630 for (i = maxmblen; i < len; i++)
631 if (s_data[i] != ' ')
632 ereport(ERROR,
633 (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
634 errmsg("value too long for type character varying(%d)",
635 maxlen)));
636 }
637
638 PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
639 maxmblen));
640 }
641
642 Datum
varchartypmodin(PG_FUNCTION_ARGS)643 varchartypmodin(PG_FUNCTION_ARGS)
644 {
645 ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
646
647 PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
648 }
649
650 Datum
varchartypmodout(PG_FUNCTION_ARGS)651 varchartypmodout(PG_FUNCTION_ARGS)
652 {
653 int32 typmod = PG_GETARG_INT32(0);
654
655 PG_RETURN_CSTRING(anychar_typmodout(typmod));
656 }
657
658
659 /*****************************************************************************
660 * Exported functions
661 *****************************************************************************/
662
663 /* "True" length (not counting trailing blanks) of a BpChar */
664 static inline int
bcTruelen(BpChar * arg)665 bcTruelen(BpChar *arg)
666 {
667 return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
668 }
669
670 int
bpchartruelen(char * s,int len)671 bpchartruelen(char *s, int len)
672 {
673 int i;
674
675 /*
676 * Note that we rely on the assumption that ' ' is a singleton unit on
677 * every supported multibyte server encoding.
678 */
679 for (i = len - 1; i >= 0; i--)
680 {
681 if (s[i] != ' ')
682 break;
683 }
684 return i + 1;
685 }
686
687 Datum
bpcharlen(PG_FUNCTION_ARGS)688 bpcharlen(PG_FUNCTION_ARGS)
689 {
690 BpChar *arg = PG_GETARG_BPCHAR_PP(0);
691 int len;
692
693 /* get number of bytes, ignoring trailing spaces */
694 len = bcTruelen(arg);
695
696 /* in multibyte encoding, convert to number of characters */
697 if (pg_database_encoding_max_length() != 1)
698 len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
699
700 PG_RETURN_INT32(len);
701 }
702
703 Datum
bpcharoctetlen(PG_FUNCTION_ARGS)704 bpcharoctetlen(PG_FUNCTION_ARGS)
705 {
706 Datum arg = PG_GETARG_DATUM(0);
707
708 /* We need not detoast the input at all */
709 PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
710 }
711
712
713 /*****************************************************************************
714 * Comparison Functions used for bpchar
715 *
716 * Note: btree indexes need these routines not to leak memory; therefore,
717 * be careful to free working copies of toasted datums. Most places don't
718 * need to be so careful.
719 *****************************************************************************/
720
721 static void
check_collation_set(Oid collid)722 check_collation_set(Oid collid)
723 {
724 if (!OidIsValid(collid))
725 {
726 /*
727 * This typically means that the parser could not resolve a conflict
728 * of implicit collations, so report it that way.
729 */
730 ereport(ERROR,
731 (errcode(ERRCODE_INDETERMINATE_COLLATION),
732 errmsg("could not determine which collation to use for string comparison"),
733 errhint("Use the COLLATE clause to set the collation explicitly.")));
734 }
735 }
736
737 Datum
bpchareq(PG_FUNCTION_ARGS)738 bpchareq(PG_FUNCTION_ARGS)
739 {
740 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
741 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
742 int len1,
743 len2;
744 bool result;
745 Oid collid = PG_GET_COLLATION();
746
747 check_collation_set(collid);
748
749 len1 = bcTruelen(arg1);
750 len2 = bcTruelen(arg2);
751
752 if (lc_collate_is_c(collid) ||
753 collid == DEFAULT_COLLATION_OID ||
754 pg_newlocale_from_collation(collid)->deterministic)
755 {
756 /*
757 * Since we only care about equality or not-equality, we can avoid all
758 * the expense of strcoll() here, and just do bitwise comparison.
759 */
760 if (len1 != len2)
761 result = false;
762 else
763 result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
764 }
765 else
766 {
767 result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
768 collid) == 0);
769 }
770
771 PG_FREE_IF_COPY(arg1, 0);
772 PG_FREE_IF_COPY(arg2, 1);
773
774 PG_RETURN_BOOL(result);
775 }
776
777 Datum
bpcharne(PG_FUNCTION_ARGS)778 bpcharne(PG_FUNCTION_ARGS)
779 {
780 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
781 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
782 int len1,
783 len2;
784 bool result;
785 Oid collid = PG_GET_COLLATION();
786
787 check_collation_set(collid);
788
789 len1 = bcTruelen(arg1);
790 len2 = bcTruelen(arg2);
791
792 if (lc_collate_is_c(collid) ||
793 collid == DEFAULT_COLLATION_OID ||
794 pg_newlocale_from_collation(collid)->deterministic)
795 {
796 /*
797 * Since we only care about equality or not-equality, we can avoid all
798 * the expense of strcoll() here, and just do bitwise comparison.
799 */
800 if (len1 != len2)
801 result = true;
802 else
803 result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
804 }
805 else
806 {
807 result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
808 collid) != 0);
809 }
810
811 PG_FREE_IF_COPY(arg1, 0);
812 PG_FREE_IF_COPY(arg2, 1);
813
814 PG_RETURN_BOOL(result);
815 }
816
817 Datum
bpcharlt(PG_FUNCTION_ARGS)818 bpcharlt(PG_FUNCTION_ARGS)
819 {
820 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
821 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
822 int len1,
823 len2;
824 int cmp;
825
826 len1 = bcTruelen(arg1);
827 len2 = bcTruelen(arg2);
828
829 cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
830 PG_GET_COLLATION());
831
832 PG_FREE_IF_COPY(arg1, 0);
833 PG_FREE_IF_COPY(arg2, 1);
834
835 PG_RETURN_BOOL(cmp < 0);
836 }
837
838 Datum
bpcharle(PG_FUNCTION_ARGS)839 bpcharle(PG_FUNCTION_ARGS)
840 {
841 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
842 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
843 int len1,
844 len2;
845 int cmp;
846
847 len1 = bcTruelen(arg1);
848 len2 = bcTruelen(arg2);
849
850 cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
851 PG_GET_COLLATION());
852
853 PG_FREE_IF_COPY(arg1, 0);
854 PG_FREE_IF_COPY(arg2, 1);
855
856 PG_RETURN_BOOL(cmp <= 0);
857 }
858
859 Datum
bpchargt(PG_FUNCTION_ARGS)860 bpchargt(PG_FUNCTION_ARGS)
861 {
862 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
863 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
864 int len1,
865 len2;
866 int cmp;
867
868 len1 = bcTruelen(arg1);
869 len2 = bcTruelen(arg2);
870
871 cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
872 PG_GET_COLLATION());
873
874 PG_FREE_IF_COPY(arg1, 0);
875 PG_FREE_IF_COPY(arg2, 1);
876
877 PG_RETURN_BOOL(cmp > 0);
878 }
879
880 Datum
bpcharge(PG_FUNCTION_ARGS)881 bpcharge(PG_FUNCTION_ARGS)
882 {
883 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
884 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
885 int len1,
886 len2;
887 int cmp;
888
889 len1 = bcTruelen(arg1);
890 len2 = bcTruelen(arg2);
891
892 cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
893 PG_GET_COLLATION());
894
895 PG_FREE_IF_COPY(arg1, 0);
896 PG_FREE_IF_COPY(arg2, 1);
897
898 PG_RETURN_BOOL(cmp >= 0);
899 }
900
901 Datum
bpcharcmp(PG_FUNCTION_ARGS)902 bpcharcmp(PG_FUNCTION_ARGS)
903 {
904 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
905 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
906 int len1,
907 len2;
908 int cmp;
909
910 len1 = bcTruelen(arg1);
911 len2 = bcTruelen(arg2);
912
913 cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
914 PG_GET_COLLATION());
915
916 PG_FREE_IF_COPY(arg1, 0);
917 PG_FREE_IF_COPY(arg2, 1);
918
919 PG_RETURN_INT32(cmp);
920 }
921
922 Datum
bpchar_sortsupport(PG_FUNCTION_ARGS)923 bpchar_sortsupport(PG_FUNCTION_ARGS)
924 {
925 SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
926 Oid collid = ssup->ssup_collation;
927 MemoryContext oldcontext;
928
929 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
930
931 /* Use generic string SortSupport */
932 varstr_sortsupport(ssup, BPCHAROID, collid);
933
934 MemoryContextSwitchTo(oldcontext);
935
936 PG_RETURN_VOID();
937 }
938
939 Datum
bpchar_larger(PG_FUNCTION_ARGS)940 bpchar_larger(PG_FUNCTION_ARGS)
941 {
942 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
943 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
944 int len1,
945 len2;
946 int cmp;
947
948 len1 = bcTruelen(arg1);
949 len2 = bcTruelen(arg2);
950
951 cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
952 PG_GET_COLLATION());
953
954 PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
955 }
956
957 Datum
bpchar_smaller(PG_FUNCTION_ARGS)958 bpchar_smaller(PG_FUNCTION_ARGS)
959 {
960 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
961 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
962 int len1,
963 len2;
964 int cmp;
965
966 len1 = bcTruelen(arg1);
967 len2 = bcTruelen(arg2);
968
969 cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
970 PG_GET_COLLATION());
971
972 PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
973 }
974
975
976 /*
977 * bpchar needs a specialized hash function because we want to ignore
978 * trailing blanks in comparisons.
979 */
980 Datum
hashbpchar(PG_FUNCTION_ARGS)981 hashbpchar(PG_FUNCTION_ARGS)
982 {
983 BpChar *key = PG_GETARG_BPCHAR_PP(0);
984 Oid collid = PG_GET_COLLATION();
985 char *keydata;
986 int keylen;
987 pg_locale_t mylocale = 0;
988 Datum result;
989
990 if (!collid)
991 ereport(ERROR,
992 (errcode(ERRCODE_INDETERMINATE_COLLATION),
993 errmsg("could not determine which collation to use for string hashing"),
994 errhint("Use the COLLATE clause to set the collation explicitly.")));
995
996 keydata = VARDATA_ANY(key);
997 keylen = bcTruelen(key);
998
999 if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1000 mylocale = pg_newlocale_from_collation(collid);
1001
1002 if (!mylocale || mylocale->deterministic)
1003 {
1004 result = hash_any((unsigned char *) keydata, keylen);
1005 }
1006 else
1007 {
1008 #ifdef USE_ICU
1009 if (mylocale->provider == COLLPROVIDER_ICU)
1010 {
1011 int32_t ulen = -1;
1012 UChar *uchar = NULL;
1013 Size bsize;
1014 uint8_t *buf;
1015
1016 ulen = icu_to_uchar(&uchar, keydata, keylen);
1017
1018 bsize = ucol_getSortKey(mylocale->info.icu.ucol,
1019 uchar, ulen, NULL, 0);
1020 buf = palloc(bsize);
1021 ucol_getSortKey(mylocale->info.icu.ucol,
1022 uchar, ulen, buf, bsize);
1023
1024 result = hash_any(buf, bsize);
1025
1026 pfree(buf);
1027 }
1028 else
1029 #endif
1030 /* shouldn't happen */
1031 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1032 }
1033
1034 /* Avoid leaking memory for toasted inputs */
1035 PG_FREE_IF_COPY(key, 0);
1036
1037 return result;
1038 }
1039
1040 Datum
hashbpcharextended(PG_FUNCTION_ARGS)1041 hashbpcharextended(PG_FUNCTION_ARGS)
1042 {
1043 BpChar *key = PG_GETARG_BPCHAR_PP(0);
1044 Oid collid = PG_GET_COLLATION();
1045 char *keydata;
1046 int keylen;
1047 pg_locale_t mylocale = 0;
1048 Datum result;
1049
1050 if (!collid)
1051 ereport(ERROR,
1052 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1053 errmsg("could not determine which collation to use for string hashing"),
1054 errhint("Use the COLLATE clause to set the collation explicitly.")));
1055
1056 keydata = VARDATA_ANY(key);
1057 keylen = bcTruelen(key);
1058
1059 if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1060 mylocale = pg_newlocale_from_collation(collid);
1061
1062 if (!mylocale || mylocale->deterministic)
1063 {
1064 result = hash_any_extended((unsigned char *) keydata, keylen,
1065 PG_GETARG_INT64(1));
1066 }
1067 else
1068 {
1069 #ifdef USE_ICU
1070 if (mylocale->provider == COLLPROVIDER_ICU)
1071 {
1072 int32_t ulen = -1;
1073 UChar *uchar = NULL;
1074 Size bsize;
1075 uint8_t *buf;
1076
1077 ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
1078
1079 bsize = ucol_getSortKey(mylocale->info.icu.ucol,
1080 uchar, ulen, NULL, 0);
1081 buf = palloc(bsize);
1082 ucol_getSortKey(mylocale->info.icu.ucol,
1083 uchar, ulen, buf, bsize);
1084
1085 result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
1086
1087 pfree(buf);
1088 }
1089 else
1090 #endif
1091 /* shouldn't happen */
1092 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1093 }
1094
1095 PG_FREE_IF_COPY(key, 0);
1096
1097 return result;
1098 }
1099
1100 /*
1101 * The following operators support character-by-character comparison
1102 * of bpchar datums, to allow building indexes suitable for LIKE clauses.
1103 * Note that the regular bpchareq/bpcharne comparison operators, and
1104 * regular support functions 1 and 2 with "C" collation are assumed to be
1105 * compatible with these!
1106 */
1107
1108 static int
internal_bpchar_pattern_compare(BpChar * arg1,BpChar * arg2)1109 internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
1110 {
1111 int result;
1112 int len1,
1113 len2;
1114
1115 len1 = bcTruelen(arg1);
1116 len2 = bcTruelen(arg2);
1117
1118 result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1119 if (result != 0)
1120 return result;
1121 else if (len1 < len2)
1122 return -1;
1123 else if (len1 > len2)
1124 return 1;
1125 else
1126 return 0;
1127 }
1128
1129
1130 Datum
bpchar_pattern_lt(PG_FUNCTION_ARGS)1131 bpchar_pattern_lt(PG_FUNCTION_ARGS)
1132 {
1133 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1134 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1135 int result;
1136
1137 result = internal_bpchar_pattern_compare(arg1, arg2);
1138
1139 PG_FREE_IF_COPY(arg1, 0);
1140 PG_FREE_IF_COPY(arg2, 1);
1141
1142 PG_RETURN_BOOL(result < 0);
1143 }
1144
1145
1146 Datum
bpchar_pattern_le(PG_FUNCTION_ARGS)1147 bpchar_pattern_le(PG_FUNCTION_ARGS)
1148 {
1149 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1150 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1151 int result;
1152
1153 result = internal_bpchar_pattern_compare(arg1, arg2);
1154
1155 PG_FREE_IF_COPY(arg1, 0);
1156 PG_FREE_IF_COPY(arg2, 1);
1157
1158 PG_RETURN_BOOL(result <= 0);
1159 }
1160
1161
1162 Datum
bpchar_pattern_ge(PG_FUNCTION_ARGS)1163 bpchar_pattern_ge(PG_FUNCTION_ARGS)
1164 {
1165 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1166 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1167 int result;
1168
1169 result = internal_bpchar_pattern_compare(arg1, arg2);
1170
1171 PG_FREE_IF_COPY(arg1, 0);
1172 PG_FREE_IF_COPY(arg2, 1);
1173
1174 PG_RETURN_BOOL(result >= 0);
1175 }
1176
1177
1178 Datum
bpchar_pattern_gt(PG_FUNCTION_ARGS)1179 bpchar_pattern_gt(PG_FUNCTION_ARGS)
1180 {
1181 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1182 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1183 int result;
1184
1185 result = internal_bpchar_pattern_compare(arg1, arg2);
1186
1187 PG_FREE_IF_COPY(arg1, 0);
1188 PG_FREE_IF_COPY(arg2, 1);
1189
1190 PG_RETURN_BOOL(result > 0);
1191 }
1192
1193
1194 Datum
btbpchar_pattern_cmp(PG_FUNCTION_ARGS)1195 btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
1196 {
1197 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1198 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1199 int result;
1200
1201 result = internal_bpchar_pattern_compare(arg1, arg2);
1202
1203 PG_FREE_IF_COPY(arg1, 0);
1204 PG_FREE_IF_COPY(arg2, 1);
1205
1206 PG_RETURN_INT32(result);
1207 }
1208
1209
1210 Datum
btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)1211 btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
1212 {
1213 SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
1214 MemoryContext oldcontext;
1215
1216 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1217
1218 /* Use generic string SortSupport, forcing "C" collation */
1219 varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
1220
1221 MemoryContextSwitchTo(oldcontext);
1222
1223 PG_RETURN_VOID();
1224 }
1225