1 /*-------------------------------------------------------------------------
2 * oracle_compat.c
3 * Oracle compatible functions.
4 *
5 * Copyright (c) 1996-2021, PostgreSQL Global Development Group
6 *
7 * Author: Edmund Mergl <E.Mergl@bawue.de>
8 * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
9 *
10 *
11 * IDENTIFICATION
12 * src/backend/utils/adt/oracle_compat.c
13 *
14 *-------------------------------------------------------------------------
15 */
16 #include "postgres.h"
17
18 #include "common/int.h"
19 #include "mb/pg_wchar.h"
20 #include "miscadmin.h"
21 #include "utils/builtins.h"
22 #include "utils/formatting.h"
23
24 static text *dotrim(const char *string, int stringlen,
25 const char *set, int setlen,
26 bool doltrim, bool dortrim);
27 static bytea *dobyteatrim(bytea *string, bytea *set,
28 bool doltrim, bool dortrim);
29
30
31 /********************************************************************
32 *
33 * lower
34 *
35 * Syntax:
36 *
37 * text lower(text string)
38 *
39 * Purpose:
40 *
41 * Returns string, with all letters forced to lowercase.
42 *
43 ********************************************************************/
44
45 Datum
lower(PG_FUNCTION_ARGS)46 lower(PG_FUNCTION_ARGS)
47 {
48 text *in_string = PG_GETARG_TEXT_PP(0);
49 char *out_string;
50 text *result;
51
52 out_string = str_tolower(VARDATA_ANY(in_string),
53 VARSIZE_ANY_EXHDR(in_string),
54 PG_GET_COLLATION());
55 result = cstring_to_text(out_string);
56 pfree(out_string);
57
58 PG_RETURN_TEXT_P(result);
59 }
60
61
62 /********************************************************************
63 *
64 * upper
65 *
66 * Syntax:
67 *
68 * text upper(text string)
69 *
70 * Purpose:
71 *
72 * Returns string, with all letters forced to uppercase.
73 *
74 ********************************************************************/
75
76 Datum
upper(PG_FUNCTION_ARGS)77 upper(PG_FUNCTION_ARGS)
78 {
79 text *in_string = PG_GETARG_TEXT_PP(0);
80 char *out_string;
81 text *result;
82
83 out_string = str_toupper(VARDATA_ANY(in_string),
84 VARSIZE_ANY_EXHDR(in_string),
85 PG_GET_COLLATION());
86 result = cstring_to_text(out_string);
87 pfree(out_string);
88
89 PG_RETURN_TEXT_P(result);
90 }
91
92
93 /********************************************************************
94 *
95 * initcap
96 *
97 * Syntax:
98 *
99 * text initcap(text string)
100 *
101 * Purpose:
102 *
103 * Returns string, with first letter of each word in uppercase, all
104 * other letters in lowercase. A word is defined as a sequence of
105 * alphanumeric characters, delimited by non-alphanumeric
106 * characters.
107 *
108 ********************************************************************/
109
110 Datum
initcap(PG_FUNCTION_ARGS)111 initcap(PG_FUNCTION_ARGS)
112 {
113 text *in_string = PG_GETARG_TEXT_PP(0);
114 char *out_string;
115 text *result;
116
117 out_string = str_initcap(VARDATA_ANY(in_string),
118 VARSIZE_ANY_EXHDR(in_string),
119 PG_GET_COLLATION());
120 result = cstring_to_text(out_string);
121 pfree(out_string);
122
123 PG_RETURN_TEXT_P(result);
124 }
125
126
127 /********************************************************************
128 *
129 * lpad
130 *
131 * Syntax:
132 *
133 * text lpad(text string1, int4 len, text string2)
134 *
135 * Purpose:
136 *
137 * Returns string1, left-padded to length len with the sequence of
138 * characters in string2. If len is less than the length of string1,
139 * instead truncate (on the right) to len.
140 *
141 ********************************************************************/
142
143 Datum
lpad(PG_FUNCTION_ARGS)144 lpad(PG_FUNCTION_ARGS)
145 {
146 text *string1 = PG_GETARG_TEXT_PP(0);
147 int32 len = PG_GETARG_INT32(1);
148 text *string2 = PG_GETARG_TEXT_PP(2);
149 text *ret;
150 char *ptr1,
151 *ptr2,
152 *ptr2start,
153 *ptr2end,
154 *ptr_ret;
155 int m,
156 s1len,
157 s2len;
158
159 int bytelen;
160
161 /* Negative len is silently taken as zero */
162 if (len < 0)
163 len = 0;
164
165 s1len = VARSIZE_ANY_EXHDR(string1);
166 if (s1len < 0)
167 s1len = 0; /* shouldn't happen */
168
169 s2len = VARSIZE_ANY_EXHDR(string2);
170 if (s2len < 0)
171 s2len = 0; /* shouldn't happen */
172
173 s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
174
175 if (s1len > len)
176 s1len = len; /* truncate string1 to len chars */
177
178 if (s2len <= 0)
179 len = s1len; /* nothing to pad with, so don't pad */
180
181 bytelen = pg_database_encoding_max_length() * len;
182
183 /* check for integer overflow */
184 if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
185 ereport(ERROR,
186 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
187 errmsg("requested length too large")));
188
189 ret = (text *) palloc(VARHDRSZ + bytelen);
190
191 m = len - s1len;
192
193 ptr2 = ptr2start = VARDATA_ANY(string2);
194 ptr2end = ptr2 + s2len;
195 ptr_ret = VARDATA(ret);
196
197 while (m--)
198 {
199 int mlen = pg_mblen(ptr2);
200
201 memcpy(ptr_ret, ptr2, mlen);
202 ptr_ret += mlen;
203 ptr2 += mlen;
204 if (ptr2 == ptr2end) /* wrap around at end of s2 */
205 ptr2 = ptr2start;
206 }
207
208 ptr1 = VARDATA_ANY(string1);
209
210 while (s1len--)
211 {
212 int mlen = pg_mblen(ptr1);
213
214 memcpy(ptr_ret, ptr1, mlen);
215 ptr_ret += mlen;
216 ptr1 += mlen;
217 }
218
219 SET_VARSIZE(ret, ptr_ret - (char *) ret);
220
221 PG_RETURN_TEXT_P(ret);
222 }
223
224
225 /********************************************************************
226 *
227 * rpad
228 *
229 * Syntax:
230 *
231 * text rpad(text string1, int4 len, text string2)
232 *
233 * Purpose:
234 *
235 * Returns string1, right-padded to length len with the sequence of
236 * characters in string2. If len is less than the length of string1,
237 * instead truncate (on the right) to len.
238 *
239 ********************************************************************/
240
241 Datum
rpad(PG_FUNCTION_ARGS)242 rpad(PG_FUNCTION_ARGS)
243 {
244 text *string1 = PG_GETARG_TEXT_PP(0);
245 int32 len = PG_GETARG_INT32(1);
246 text *string2 = PG_GETARG_TEXT_PP(2);
247 text *ret;
248 char *ptr1,
249 *ptr2,
250 *ptr2start,
251 *ptr2end,
252 *ptr_ret;
253 int m,
254 s1len,
255 s2len;
256
257 int bytelen;
258
259 /* Negative len is silently taken as zero */
260 if (len < 0)
261 len = 0;
262
263 s1len = VARSIZE_ANY_EXHDR(string1);
264 if (s1len < 0)
265 s1len = 0; /* shouldn't happen */
266
267 s2len = VARSIZE_ANY_EXHDR(string2);
268 if (s2len < 0)
269 s2len = 0; /* shouldn't happen */
270
271 s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
272
273 if (s1len > len)
274 s1len = len; /* truncate string1 to len chars */
275
276 if (s2len <= 0)
277 len = s1len; /* nothing to pad with, so don't pad */
278
279 bytelen = pg_database_encoding_max_length() * len;
280
281 /* Check for integer overflow */
282 if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
283 ereport(ERROR,
284 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
285 errmsg("requested length too large")));
286
287 ret = (text *) palloc(VARHDRSZ + bytelen);
288 m = len - s1len;
289
290 ptr1 = VARDATA_ANY(string1);
291 ptr_ret = VARDATA(ret);
292
293 while (s1len--)
294 {
295 int mlen = pg_mblen(ptr1);
296
297 memcpy(ptr_ret, ptr1, mlen);
298 ptr_ret += mlen;
299 ptr1 += mlen;
300 }
301
302 ptr2 = ptr2start = VARDATA_ANY(string2);
303 ptr2end = ptr2 + s2len;
304
305 while (m--)
306 {
307 int mlen = pg_mblen(ptr2);
308
309 memcpy(ptr_ret, ptr2, mlen);
310 ptr_ret += mlen;
311 ptr2 += mlen;
312 if (ptr2 == ptr2end) /* wrap around at end of s2 */
313 ptr2 = ptr2start;
314 }
315
316 SET_VARSIZE(ret, ptr_ret - (char *) ret);
317
318 PG_RETURN_TEXT_P(ret);
319 }
320
321
322 /********************************************************************
323 *
324 * btrim
325 *
326 * Syntax:
327 *
328 * text btrim(text string, text set)
329 *
330 * Purpose:
331 *
332 * Returns string with characters removed from the front and back
333 * up to the first character not in set.
334 *
335 ********************************************************************/
336
337 Datum
btrim(PG_FUNCTION_ARGS)338 btrim(PG_FUNCTION_ARGS)
339 {
340 text *string = PG_GETARG_TEXT_PP(0);
341 text *set = PG_GETARG_TEXT_PP(1);
342 text *ret;
343
344 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
345 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
346 true, true);
347
348 PG_RETURN_TEXT_P(ret);
349 }
350
351 /********************************************************************
352 *
353 * btrim1 --- btrim with set fixed as ' '
354 *
355 ********************************************************************/
356
357 Datum
btrim1(PG_FUNCTION_ARGS)358 btrim1(PG_FUNCTION_ARGS)
359 {
360 text *string = PG_GETARG_TEXT_PP(0);
361 text *ret;
362
363 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
364 " ", 1,
365 true, true);
366
367 PG_RETURN_TEXT_P(ret);
368 }
369
370 /*
371 * Common implementation for btrim, ltrim, rtrim
372 */
373 static text *
dotrim(const char * string,int stringlen,const char * set,int setlen,bool doltrim,bool dortrim)374 dotrim(const char *string, int stringlen,
375 const char *set, int setlen,
376 bool doltrim, bool dortrim)
377 {
378 int i;
379
380 /* Nothing to do if either string or set is empty */
381 if (stringlen > 0 && setlen > 0)
382 {
383 if (pg_database_encoding_max_length() > 1)
384 {
385 /*
386 * In the multibyte-encoding case, build arrays of pointers to
387 * character starts, so that we can avoid inefficient checks in
388 * the inner loops.
389 */
390 const char **stringchars;
391 const char **setchars;
392 int *stringmblen;
393 int *setmblen;
394 int stringnchars;
395 int setnchars;
396 int resultndx;
397 int resultnchars;
398 const char *p;
399 int len;
400 int mblen;
401 const char *str_pos;
402 int str_len;
403
404 stringchars = (const char **) palloc(stringlen * sizeof(char *));
405 stringmblen = (int *) palloc(stringlen * sizeof(int));
406 stringnchars = 0;
407 p = string;
408 len = stringlen;
409 while (len > 0)
410 {
411 stringchars[stringnchars] = p;
412 stringmblen[stringnchars] = mblen = pg_mblen(p);
413 stringnchars++;
414 p += mblen;
415 len -= mblen;
416 }
417
418 setchars = (const char **) palloc(setlen * sizeof(char *));
419 setmblen = (int *) palloc(setlen * sizeof(int));
420 setnchars = 0;
421 p = set;
422 len = setlen;
423 while (len > 0)
424 {
425 setchars[setnchars] = p;
426 setmblen[setnchars] = mblen = pg_mblen(p);
427 setnchars++;
428 p += mblen;
429 len -= mblen;
430 }
431
432 resultndx = 0; /* index in stringchars[] */
433 resultnchars = stringnchars;
434
435 if (doltrim)
436 {
437 while (resultnchars > 0)
438 {
439 str_pos = stringchars[resultndx];
440 str_len = stringmblen[resultndx];
441 for (i = 0; i < setnchars; i++)
442 {
443 if (str_len == setmblen[i] &&
444 memcmp(str_pos, setchars[i], str_len) == 0)
445 break;
446 }
447 if (i >= setnchars)
448 break; /* no match here */
449 string += str_len;
450 stringlen -= str_len;
451 resultndx++;
452 resultnchars--;
453 }
454 }
455
456 if (dortrim)
457 {
458 while (resultnchars > 0)
459 {
460 str_pos = stringchars[resultndx + resultnchars - 1];
461 str_len = stringmblen[resultndx + resultnchars - 1];
462 for (i = 0; i < setnchars; i++)
463 {
464 if (str_len == setmblen[i] &&
465 memcmp(str_pos, setchars[i], str_len) == 0)
466 break;
467 }
468 if (i >= setnchars)
469 break; /* no match here */
470 stringlen -= str_len;
471 resultnchars--;
472 }
473 }
474
475 pfree(stringchars);
476 pfree(stringmblen);
477 pfree(setchars);
478 pfree(setmblen);
479 }
480 else
481 {
482 /*
483 * In the single-byte-encoding case, we don't need such overhead.
484 */
485 if (doltrim)
486 {
487 while (stringlen > 0)
488 {
489 char str_ch = *string;
490
491 for (i = 0; i < setlen; i++)
492 {
493 if (str_ch == set[i])
494 break;
495 }
496 if (i >= setlen)
497 break; /* no match here */
498 string++;
499 stringlen--;
500 }
501 }
502
503 if (dortrim)
504 {
505 while (stringlen > 0)
506 {
507 char str_ch = string[stringlen - 1];
508
509 for (i = 0; i < setlen; i++)
510 {
511 if (str_ch == set[i])
512 break;
513 }
514 if (i >= setlen)
515 break; /* no match here */
516 stringlen--;
517 }
518 }
519 }
520 }
521
522 /* Return selected portion of string */
523 return cstring_to_text_with_len(string, stringlen);
524 }
525
526 /*
527 * Common implementation for bytea versions of btrim, ltrim, rtrim
528 */
529 bytea *
dobyteatrim(bytea * string,bytea * set,bool doltrim,bool dortrim)530 dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
531 {
532 bytea *ret;
533 char *ptr,
534 *end,
535 *ptr2,
536 *ptr2start,
537 *end2;
538 int m,
539 stringlen,
540 setlen;
541
542 stringlen = VARSIZE_ANY_EXHDR(string);
543 setlen = VARSIZE_ANY_EXHDR(set);
544
545 if (stringlen <= 0 || setlen <= 0)
546 return string;
547
548 m = stringlen;
549 ptr = VARDATA_ANY(string);
550 end = ptr + stringlen - 1;
551 ptr2start = VARDATA_ANY(set);
552 end2 = ptr2start + setlen - 1;
553
554 if (doltrim)
555 {
556 while (m > 0)
557 {
558 ptr2 = ptr2start;
559 while (ptr2 <= end2)
560 {
561 if (*ptr == *ptr2)
562 break;
563 ++ptr2;
564 }
565 if (ptr2 > end2)
566 break;
567 ptr++;
568 m--;
569 }
570 }
571
572 if (dortrim)
573 {
574 while (m > 0)
575 {
576 ptr2 = ptr2start;
577 while (ptr2 <= end2)
578 {
579 if (*end == *ptr2)
580 break;
581 ++ptr2;
582 }
583 if (ptr2 > end2)
584 break;
585 end--;
586 m--;
587 }
588 }
589
590 ret = (bytea *) palloc(VARHDRSZ + m);
591 SET_VARSIZE(ret, VARHDRSZ + m);
592 memcpy(VARDATA(ret), ptr, m);
593 return ret;
594 }
595
596 /********************************************************************
597 *
598 * byteatrim
599 *
600 * Syntax:
601 *
602 * bytea byteatrim(bytea string, bytea set)
603 *
604 * Purpose:
605 *
606 * Returns string with characters removed from the front and back
607 * up to the first character not in set.
608 *
609 * Cloned from btrim and modified as required.
610 ********************************************************************/
611
612 Datum
byteatrim(PG_FUNCTION_ARGS)613 byteatrim(PG_FUNCTION_ARGS)
614 {
615 bytea *string = PG_GETARG_BYTEA_PP(0);
616 bytea *set = PG_GETARG_BYTEA_PP(1);
617 bytea *ret;
618
619 ret = dobyteatrim(string, set, true, true);
620
621 PG_RETURN_BYTEA_P(ret);
622 }
623
624 /********************************************************************
625 *
626 * bytealtrim
627 *
628 * Syntax:
629 *
630 * bytea bytealtrim(bytea string, bytea set)
631 *
632 * Purpose:
633 *
634 * Returns string with initial characters removed up to the first
635 * character not in set.
636 *
637 ********************************************************************/
638
639 Datum
bytealtrim(PG_FUNCTION_ARGS)640 bytealtrim(PG_FUNCTION_ARGS)
641 {
642 bytea *string = PG_GETARG_BYTEA_PP(0);
643 bytea *set = PG_GETARG_BYTEA_PP(1);
644 bytea *ret;
645
646 ret = dobyteatrim(string, set, true, false);
647
648 PG_RETURN_BYTEA_P(ret);
649 }
650
651 /********************************************************************
652 *
653 * byteartrim
654 *
655 * Syntax:
656 *
657 * bytea byteartrim(bytea string, bytea set)
658 *
659 * Purpose:
660 *
661 * Returns string with final characters removed after the last
662 * character not in set.
663 *
664 ********************************************************************/
665
666 Datum
byteartrim(PG_FUNCTION_ARGS)667 byteartrim(PG_FUNCTION_ARGS)
668 {
669 bytea *string = PG_GETARG_BYTEA_PP(0);
670 bytea *set = PG_GETARG_BYTEA_PP(1);
671 bytea *ret;
672
673 ret = dobyteatrim(string, set, false, true);
674
675 PG_RETURN_BYTEA_P(ret);
676 }
677
678 /********************************************************************
679 *
680 * ltrim
681 *
682 * Syntax:
683 *
684 * text ltrim(text string, text set)
685 *
686 * Purpose:
687 *
688 * Returns string with initial characters removed up to the first
689 * character not in set.
690 *
691 ********************************************************************/
692
693 Datum
ltrim(PG_FUNCTION_ARGS)694 ltrim(PG_FUNCTION_ARGS)
695 {
696 text *string = PG_GETARG_TEXT_PP(0);
697 text *set = PG_GETARG_TEXT_PP(1);
698 text *ret;
699
700 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
701 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
702 true, false);
703
704 PG_RETURN_TEXT_P(ret);
705 }
706
707 /********************************************************************
708 *
709 * ltrim1 --- ltrim with set fixed as ' '
710 *
711 ********************************************************************/
712
713 Datum
ltrim1(PG_FUNCTION_ARGS)714 ltrim1(PG_FUNCTION_ARGS)
715 {
716 text *string = PG_GETARG_TEXT_PP(0);
717 text *ret;
718
719 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
720 " ", 1,
721 true, false);
722
723 PG_RETURN_TEXT_P(ret);
724 }
725
726 /********************************************************************
727 *
728 * rtrim
729 *
730 * Syntax:
731 *
732 * text rtrim(text string, text set)
733 *
734 * Purpose:
735 *
736 * Returns string with final characters removed after the last
737 * character not in set.
738 *
739 ********************************************************************/
740
741 Datum
rtrim(PG_FUNCTION_ARGS)742 rtrim(PG_FUNCTION_ARGS)
743 {
744 text *string = PG_GETARG_TEXT_PP(0);
745 text *set = PG_GETARG_TEXT_PP(1);
746 text *ret;
747
748 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
749 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
750 false, true);
751
752 PG_RETURN_TEXT_P(ret);
753 }
754
755 /********************************************************************
756 *
757 * rtrim1 --- rtrim with set fixed as ' '
758 *
759 ********************************************************************/
760
761 Datum
rtrim1(PG_FUNCTION_ARGS)762 rtrim1(PG_FUNCTION_ARGS)
763 {
764 text *string = PG_GETARG_TEXT_PP(0);
765 text *ret;
766
767 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
768 " ", 1,
769 false, true);
770
771 PG_RETURN_TEXT_P(ret);
772 }
773
774
775 /********************************************************************
776 *
777 * translate
778 *
779 * Syntax:
780 *
781 * text translate(text string, text from, text to)
782 *
783 * Purpose:
784 *
785 * Returns string after replacing all occurrences of characters in from
786 * with the corresponding character in to. If from is longer than to,
787 * occurrences of the extra characters in from are deleted.
788 * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
789 *
790 ********************************************************************/
791
792 Datum
translate(PG_FUNCTION_ARGS)793 translate(PG_FUNCTION_ARGS)
794 {
795 text *string = PG_GETARG_TEXT_PP(0);
796 text *from = PG_GETARG_TEXT_PP(1);
797 text *to = PG_GETARG_TEXT_PP(2);
798 text *result;
799 char *from_ptr,
800 *to_ptr;
801 char *source,
802 *target;
803 int m,
804 fromlen,
805 tolen,
806 retlen,
807 i;
808 int worst_len;
809 int len;
810 int source_len;
811 int from_index;
812
813 m = VARSIZE_ANY_EXHDR(string);
814 if (m <= 0)
815 PG_RETURN_TEXT_P(string);
816 source = VARDATA_ANY(string);
817
818 fromlen = VARSIZE_ANY_EXHDR(from);
819 from_ptr = VARDATA_ANY(from);
820 tolen = VARSIZE_ANY_EXHDR(to);
821 to_ptr = VARDATA_ANY(to);
822
823 /*
824 * The worst-case expansion is to substitute a max-length character for a
825 * single-byte character at each position of the string.
826 */
827 worst_len = pg_database_encoding_max_length() * m;
828
829 /* check for integer overflow */
830 if (worst_len / pg_database_encoding_max_length() != m)
831 ereport(ERROR,
832 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
833 errmsg("requested length too large")));
834
835 result = (text *) palloc(worst_len + VARHDRSZ);
836 target = VARDATA(result);
837 retlen = 0;
838
839 while (m > 0)
840 {
841 source_len = pg_mblen(source);
842 from_index = 0;
843
844 for (i = 0; i < fromlen; i += len)
845 {
846 len = pg_mblen(&from_ptr[i]);
847 if (len == source_len &&
848 memcmp(source, &from_ptr[i], len) == 0)
849 break;
850
851 from_index++;
852 }
853 if (i < fromlen)
854 {
855 /* substitute */
856 char *p = to_ptr;
857
858 for (i = 0; i < from_index; i++)
859 {
860 p += pg_mblen(p);
861 if (p >= (to_ptr + tolen))
862 break;
863 }
864 if (p < (to_ptr + tolen))
865 {
866 len = pg_mblen(p);
867 memcpy(target, p, len);
868 target += len;
869 retlen += len;
870 }
871
872 }
873 else
874 {
875 /* no match, so copy */
876 memcpy(target, source, source_len);
877 target += source_len;
878 retlen += source_len;
879 }
880
881 source += source_len;
882 m -= source_len;
883 }
884
885 SET_VARSIZE(result, retlen + VARHDRSZ);
886
887 /*
888 * The function result is probably much bigger than needed, if we're using
889 * a multibyte encoding, but it's not worth reallocating it; the result
890 * probably won't live long anyway.
891 */
892
893 PG_RETURN_TEXT_P(result);
894 }
895
896 /********************************************************************
897 *
898 * ascii
899 *
900 * Syntax:
901 *
902 * int ascii(text string)
903 *
904 * Purpose:
905 *
906 * Returns the decimal representation of the first character from
907 * string.
908 * If the string is empty we return 0.
909 * If the database encoding is UTF8, we return the Unicode codepoint.
910 * If the database encoding is any other multi-byte encoding, we
911 * return the value of the first byte if it is an ASCII character
912 * (range 1 .. 127), or raise an error.
913 * For all other encodings we return the value of the first byte,
914 * (range 1..255).
915 *
916 ********************************************************************/
917
918 Datum
ascii(PG_FUNCTION_ARGS)919 ascii(PG_FUNCTION_ARGS)
920 {
921 text *string = PG_GETARG_TEXT_PP(0);
922 int encoding = GetDatabaseEncoding();
923 unsigned char *data;
924
925 if (VARSIZE_ANY_EXHDR(string) <= 0)
926 PG_RETURN_INT32(0);
927
928 data = (unsigned char *) VARDATA_ANY(string);
929
930 if (encoding == PG_UTF8 && *data > 127)
931 {
932 /* return the code point for Unicode */
933
934 int result = 0,
935 tbytes = 0,
936 i;
937
938 if (*data >= 0xF0)
939 {
940 result = *data & 0x07;
941 tbytes = 3;
942 }
943 else if (*data >= 0xE0)
944 {
945 result = *data & 0x0F;
946 tbytes = 2;
947 }
948 else
949 {
950 Assert(*data > 0xC0);
951 result = *data & 0x1f;
952 tbytes = 1;
953 }
954
955 Assert(tbytes > 0);
956
957 for (i = 1; i <= tbytes; i++)
958 {
959 Assert((data[i] & 0xC0) == 0x80);
960 result = (result << 6) + (data[i] & 0x3f);
961 }
962
963 PG_RETURN_INT32(result);
964 }
965 else
966 {
967 if (pg_encoding_max_length(encoding) > 1 && *data > 127)
968 ereport(ERROR,
969 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
970 errmsg("requested character too large")));
971
972
973 PG_RETURN_INT32((int32) *data);
974 }
975 }
976
977 /********************************************************************
978 *
979 * chr
980 *
981 * Syntax:
982 *
983 * text chr(int val)
984 *
985 * Purpose:
986 *
987 * Returns the character having the binary equivalent to val.
988 *
989 * For UTF8 we treat the argument as a Unicode code point.
990 * For other multi-byte encodings we raise an error for arguments
991 * outside the strict ASCII range (1..127).
992 *
993 * It's important that we don't ever return a value that is not valid
994 * in the database encoding, so that this doesn't become a way for
995 * invalid data to enter the database.
996 *
997 ********************************************************************/
998
999 Datum
chr(PG_FUNCTION_ARGS)1000 chr (PG_FUNCTION_ARGS)
1001 {
1002 uint32 cvalue = PG_GETARG_UINT32(0);
1003 text *result;
1004 int encoding = GetDatabaseEncoding();
1005
1006 if (encoding == PG_UTF8 && cvalue > 127)
1007 {
1008 /* for Unicode we treat the argument as a code point */
1009 int bytes;
1010 unsigned char *wch;
1011
1012 /*
1013 * We only allow valid Unicode code points; per RFC3629 that stops at
1014 * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to
1015 * U+1FFFFF.
1016 */
1017 if (cvalue > 0x0010ffff)
1018 ereport(ERROR,
1019 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1020 errmsg("requested character too large for encoding: %d",
1021 cvalue)));
1022
1023 if (cvalue > 0xffff)
1024 bytes = 4;
1025 else if (cvalue > 0x07ff)
1026 bytes = 3;
1027 else
1028 bytes = 2;
1029
1030 result = (text *) palloc(VARHDRSZ + bytes);
1031 SET_VARSIZE(result, VARHDRSZ + bytes);
1032 wch = (unsigned char *) VARDATA(result);
1033
1034 if (bytes == 2)
1035 {
1036 wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
1037 wch[1] = 0x80 | (cvalue & 0x3F);
1038 }
1039 else if (bytes == 3)
1040 {
1041 wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
1042 wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
1043 wch[2] = 0x80 | (cvalue & 0x3F);
1044 }
1045 else
1046 {
1047 wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
1048 wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
1049 wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
1050 wch[3] = 0x80 | (cvalue & 0x3F);
1051 }
1052
1053 /*
1054 * The preceding range check isn't sufficient, because UTF8 excludes
1055 * Unicode "surrogate pair" codes. Make sure what we created is valid
1056 * UTF8.
1057 */
1058 if (!pg_utf8_islegal(wch, bytes))
1059 ereport(ERROR,
1060 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1061 errmsg("requested character not valid for encoding: %d",
1062 cvalue)));
1063 }
1064 else
1065 {
1066 bool is_mb;
1067
1068 /*
1069 * Error out on arguments that make no sense or that we can't validly
1070 * represent in the encoding.
1071 */
1072 if (cvalue == 0)
1073 ereport(ERROR,
1074 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1075 errmsg("null character not permitted")));
1076
1077 is_mb = pg_encoding_max_length(encoding) > 1;
1078
1079 if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
1080 ereport(ERROR,
1081 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1082 errmsg("requested character too large for encoding: %d",
1083 cvalue)));
1084
1085 result = (text *) palloc(VARHDRSZ + 1);
1086 SET_VARSIZE(result, VARHDRSZ + 1);
1087 *VARDATA(result) = (char) cvalue;
1088 }
1089
1090 PG_RETURN_TEXT_P(result);
1091 }
1092
1093 /********************************************************************
1094 *
1095 * repeat
1096 *
1097 * Syntax:
1098 *
1099 * text repeat(text string, int val)
1100 *
1101 * Purpose:
1102 *
1103 * Repeat string by val.
1104 *
1105 ********************************************************************/
1106
1107 Datum
repeat(PG_FUNCTION_ARGS)1108 repeat(PG_FUNCTION_ARGS)
1109 {
1110 text *string = PG_GETARG_TEXT_PP(0);
1111 int32 count = PG_GETARG_INT32(1);
1112 text *result;
1113 int slen,
1114 tlen;
1115 int i;
1116 char *cp,
1117 *sp;
1118
1119 if (count < 0)
1120 count = 0;
1121
1122 slen = VARSIZE_ANY_EXHDR(string);
1123
1124 if (unlikely(pg_mul_s32_overflow(count, slen, &tlen)) ||
1125 unlikely(pg_add_s32_overflow(tlen, VARHDRSZ, &tlen)))
1126 ereport(ERROR,
1127 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1128 errmsg("requested length too large")));
1129
1130 result = (text *) palloc(tlen);
1131
1132 SET_VARSIZE(result, tlen);
1133 cp = VARDATA(result);
1134 sp = VARDATA_ANY(string);
1135 for (i = 0; i < count; i++)
1136 {
1137 memcpy(cp, sp, slen);
1138 cp += slen;
1139 CHECK_FOR_INTERRUPTS();
1140 }
1141
1142 PG_RETURN_TEXT_P(result);
1143 }
1144