1 /*-------------------------------------------------------------------------
2 * oracle_compat.c
3 * Oracle compatible functions.
4 *
5 * Copyright (c) 1996-2017, PostgreSQL Global Development Group
6 *
7 * Author: Edmund Mergl <E.Mergl@bawue.de>
8 * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
9 *
10 *
11 * IDENTIFICATION
12 * src/backend/utils/adt/oracle_compat.c
13 *
14 *-------------------------------------------------------------------------
15 */
16 #include "postgres.h"
17
18 #include "utils/builtins.h"
19 #include "utils/formatting.h"
20 #include "mb/pg_wchar.h"
21 #include "miscadmin.h"
22
23 static text *dotrim(const char *string, int stringlen,
24 const char *set, int setlen,
25 bool doltrim, bool dortrim);
26
27
28 /********************************************************************
29 *
30 * lower
31 *
32 * Syntax:
33 *
34 * text lower(text string)
35 *
36 * Purpose:
37 *
38 * Returns string, with all letters forced to lowercase.
39 *
40 ********************************************************************/
41
42 Datum
lower(PG_FUNCTION_ARGS)43 lower(PG_FUNCTION_ARGS)
44 {
45 text *in_string = PG_GETARG_TEXT_PP(0);
46 char *out_string;
47 text *result;
48
49 out_string = str_tolower(VARDATA_ANY(in_string),
50 VARSIZE_ANY_EXHDR(in_string),
51 PG_GET_COLLATION());
52 result = cstring_to_text(out_string);
53 pfree(out_string);
54
55 PG_RETURN_TEXT_P(result);
56 }
57
58
59 /********************************************************************
60 *
61 * upper
62 *
63 * Syntax:
64 *
65 * text upper(text string)
66 *
67 * Purpose:
68 *
69 * Returns string, with all letters forced to uppercase.
70 *
71 ********************************************************************/
72
73 Datum
upper(PG_FUNCTION_ARGS)74 upper(PG_FUNCTION_ARGS)
75 {
76 text *in_string = PG_GETARG_TEXT_PP(0);
77 char *out_string;
78 text *result;
79
80 out_string = str_toupper(VARDATA_ANY(in_string),
81 VARSIZE_ANY_EXHDR(in_string),
82 PG_GET_COLLATION());
83 result = cstring_to_text(out_string);
84 pfree(out_string);
85
86 PG_RETURN_TEXT_P(result);
87 }
88
89
90 /********************************************************************
91 *
92 * initcap
93 *
94 * Syntax:
95 *
96 * text initcap(text string)
97 *
98 * Purpose:
99 *
100 * Returns string, with first letter of each word in uppercase, all
101 * other letters in lowercase. A word is defined as a sequence of
102 * alphanumeric characters, delimited by non-alphanumeric
103 * characters.
104 *
105 ********************************************************************/
106
107 Datum
initcap(PG_FUNCTION_ARGS)108 initcap(PG_FUNCTION_ARGS)
109 {
110 text *in_string = PG_GETARG_TEXT_PP(0);
111 char *out_string;
112 text *result;
113
114 out_string = str_initcap(VARDATA_ANY(in_string),
115 VARSIZE_ANY_EXHDR(in_string),
116 PG_GET_COLLATION());
117 result = cstring_to_text(out_string);
118 pfree(out_string);
119
120 PG_RETURN_TEXT_P(result);
121 }
122
123
124 /********************************************************************
125 *
126 * lpad
127 *
128 * Syntax:
129 *
130 * text lpad(text string1, int4 len, text string2)
131 *
132 * Purpose:
133 *
134 * Returns string1, left-padded to length len with the sequence of
135 * characters in string2. If len is less than the length of string1,
136 * instead truncate (on the right) to len.
137 *
138 ********************************************************************/
139
140 Datum
lpad(PG_FUNCTION_ARGS)141 lpad(PG_FUNCTION_ARGS)
142 {
143 text *string1 = PG_GETARG_TEXT_PP(0);
144 int32 len = PG_GETARG_INT32(1);
145 text *string2 = PG_GETARG_TEXT_PP(2);
146 text *ret;
147 char *ptr1,
148 *ptr2,
149 *ptr2start,
150 *ptr2end,
151 *ptr_ret;
152 int m,
153 s1len,
154 s2len;
155
156 int bytelen;
157
158 /* Negative len is silently taken as zero */
159 if (len < 0)
160 len = 0;
161
162 s1len = VARSIZE_ANY_EXHDR(string1);
163 if (s1len < 0)
164 s1len = 0; /* shouldn't happen */
165
166 s2len = VARSIZE_ANY_EXHDR(string2);
167 if (s2len < 0)
168 s2len = 0; /* shouldn't happen */
169
170 s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
171
172 if (s1len > len)
173 s1len = len; /* truncate string1 to len chars */
174
175 if (s2len <= 0)
176 len = s1len; /* nothing to pad with, so don't pad */
177
178 bytelen = pg_database_encoding_max_length() * len;
179
180 /* check for integer overflow */
181 if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
182 ereport(ERROR,
183 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
184 errmsg("requested length too large")));
185
186 ret = (text *) palloc(VARHDRSZ + bytelen);
187
188 m = len - s1len;
189
190 ptr2 = ptr2start = VARDATA_ANY(string2);
191 ptr2end = ptr2 + s2len;
192 ptr_ret = VARDATA(ret);
193
194 while (m--)
195 {
196 int mlen = pg_mblen(ptr2);
197
198 memcpy(ptr_ret, ptr2, mlen);
199 ptr_ret += mlen;
200 ptr2 += mlen;
201 if (ptr2 == ptr2end) /* wrap around at end of s2 */
202 ptr2 = ptr2start;
203 }
204
205 ptr1 = VARDATA_ANY(string1);
206
207 while (s1len--)
208 {
209 int mlen = pg_mblen(ptr1);
210
211 memcpy(ptr_ret, ptr1, mlen);
212 ptr_ret += mlen;
213 ptr1 += mlen;
214 }
215
216 SET_VARSIZE(ret, ptr_ret - (char *) ret);
217
218 PG_RETURN_TEXT_P(ret);
219 }
220
221
222 /********************************************************************
223 *
224 * rpad
225 *
226 * Syntax:
227 *
228 * text rpad(text string1, int4 len, text string2)
229 *
230 * Purpose:
231 *
232 * Returns string1, right-padded to length len with the sequence of
233 * characters in string2. If len is less than the length of string1,
234 * instead truncate (on the right) to len.
235 *
236 ********************************************************************/
237
238 Datum
rpad(PG_FUNCTION_ARGS)239 rpad(PG_FUNCTION_ARGS)
240 {
241 text *string1 = PG_GETARG_TEXT_PP(0);
242 int32 len = PG_GETARG_INT32(1);
243 text *string2 = PG_GETARG_TEXT_PP(2);
244 text *ret;
245 char *ptr1,
246 *ptr2,
247 *ptr2start,
248 *ptr2end,
249 *ptr_ret;
250 int m,
251 s1len,
252 s2len;
253
254 int bytelen;
255
256 /* Negative len is silently taken as zero */
257 if (len < 0)
258 len = 0;
259
260 s1len = VARSIZE_ANY_EXHDR(string1);
261 if (s1len < 0)
262 s1len = 0; /* shouldn't happen */
263
264 s2len = VARSIZE_ANY_EXHDR(string2);
265 if (s2len < 0)
266 s2len = 0; /* shouldn't happen */
267
268 s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
269
270 if (s1len > len)
271 s1len = len; /* truncate string1 to len chars */
272
273 if (s2len <= 0)
274 len = s1len; /* nothing to pad with, so don't pad */
275
276 bytelen = pg_database_encoding_max_length() * len;
277
278 /* Check for integer overflow */
279 if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
280 ereport(ERROR,
281 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
282 errmsg("requested length too large")));
283
284 ret = (text *) palloc(VARHDRSZ + bytelen);
285 m = len - s1len;
286
287 ptr1 = VARDATA_ANY(string1);
288 ptr_ret = VARDATA(ret);
289
290 while (s1len--)
291 {
292 int mlen = pg_mblen(ptr1);
293
294 memcpy(ptr_ret, ptr1, mlen);
295 ptr_ret += mlen;
296 ptr1 += mlen;
297 }
298
299 ptr2 = ptr2start = VARDATA_ANY(string2);
300 ptr2end = ptr2 + s2len;
301
302 while (m--)
303 {
304 int mlen = pg_mblen(ptr2);
305
306 memcpy(ptr_ret, ptr2, mlen);
307 ptr_ret += mlen;
308 ptr2 += mlen;
309 if (ptr2 == ptr2end) /* wrap around at end of s2 */
310 ptr2 = ptr2start;
311 }
312
313 SET_VARSIZE(ret, ptr_ret - (char *) ret);
314
315 PG_RETURN_TEXT_P(ret);
316 }
317
318
319 /********************************************************************
320 *
321 * btrim
322 *
323 * Syntax:
324 *
325 * text btrim(text string, text set)
326 *
327 * Purpose:
328 *
329 * Returns string with characters removed from the front and back
330 * up to the first character not in set.
331 *
332 ********************************************************************/
333
334 Datum
btrim(PG_FUNCTION_ARGS)335 btrim(PG_FUNCTION_ARGS)
336 {
337 text *string = PG_GETARG_TEXT_PP(0);
338 text *set = PG_GETARG_TEXT_PP(1);
339 text *ret;
340
341 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
342 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
343 true, true);
344
345 PG_RETURN_TEXT_P(ret);
346 }
347
348 /********************************************************************
349 *
350 * btrim1 --- btrim with set fixed as ' '
351 *
352 ********************************************************************/
353
354 Datum
btrim1(PG_FUNCTION_ARGS)355 btrim1(PG_FUNCTION_ARGS)
356 {
357 text *string = PG_GETARG_TEXT_PP(0);
358 text *ret;
359
360 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
361 " ", 1,
362 true, true);
363
364 PG_RETURN_TEXT_P(ret);
365 }
366
367 /*
368 * Common implementation for btrim, ltrim, rtrim
369 */
370 static text *
dotrim(const char * string,int stringlen,const char * set,int setlen,bool doltrim,bool dortrim)371 dotrim(const char *string, int stringlen,
372 const char *set, int setlen,
373 bool doltrim, bool dortrim)
374 {
375 int i;
376
377 /* Nothing to do if either string or set is empty */
378 if (stringlen > 0 && setlen > 0)
379 {
380 if (pg_database_encoding_max_length() > 1)
381 {
382 /*
383 * In the multibyte-encoding case, build arrays of pointers to
384 * character starts, so that we can avoid inefficient checks in
385 * the inner loops.
386 */
387 const char **stringchars;
388 const char **setchars;
389 int *stringmblen;
390 int *setmblen;
391 int stringnchars;
392 int setnchars;
393 int resultndx;
394 int resultnchars;
395 const char *p;
396 int len;
397 int mblen;
398 const char *str_pos;
399 int str_len;
400
401 stringchars = (const char **) palloc(stringlen * sizeof(char *));
402 stringmblen = (int *) palloc(stringlen * sizeof(int));
403 stringnchars = 0;
404 p = string;
405 len = stringlen;
406 while (len > 0)
407 {
408 stringchars[stringnchars] = p;
409 stringmblen[stringnchars] = mblen = pg_mblen(p);
410 stringnchars++;
411 p += mblen;
412 len -= mblen;
413 }
414
415 setchars = (const char **) palloc(setlen * sizeof(char *));
416 setmblen = (int *) palloc(setlen * sizeof(int));
417 setnchars = 0;
418 p = set;
419 len = setlen;
420 while (len > 0)
421 {
422 setchars[setnchars] = p;
423 setmblen[setnchars] = mblen = pg_mblen(p);
424 setnchars++;
425 p += mblen;
426 len -= mblen;
427 }
428
429 resultndx = 0; /* index in stringchars[] */
430 resultnchars = stringnchars;
431
432 if (doltrim)
433 {
434 while (resultnchars > 0)
435 {
436 str_pos = stringchars[resultndx];
437 str_len = stringmblen[resultndx];
438 for (i = 0; i < setnchars; i++)
439 {
440 if (str_len == setmblen[i] &&
441 memcmp(str_pos, setchars[i], str_len) == 0)
442 break;
443 }
444 if (i >= setnchars)
445 break; /* no match here */
446 string += str_len;
447 stringlen -= str_len;
448 resultndx++;
449 resultnchars--;
450 }
451 }
452
453 if (dortrim)
454 {
455 while (resultnchars > 0)
456 {
457 str_pos = stringchars[resultndx + resultnchars - 1];
458 str_len = stringmblen[resultndx + resultnchars - 1];
459 for (i = 0; i < setnchars; i++)
460 {
461 if (str_len == setmblen[i] &&
462 memcmp(str_pos, setchars[i], str_len) == 0)
463 break;
464 }
465 if (i >= setnchars)
466 break; /* no match here */
467 stringlen -= str_len;
468 resultnchars--;
469 }
470 }
471
472 pfree(stringchars);
473 pfree(stringmblen);
474 pfree(setchars);
475 pfree(setmblen);
476 }
477 else
478 {
479 /*
480 * In the single-byte-encoding case, we don't need such overhead.
481 */
482 if (doltrim)
483 {
484 while (stringlen > 0)
485 {
486 char str_ch = *string;
487
488 for (i = 0; i < setlen; i++)
489 {
490 if (str_ch == set[i])
491 break;
492 }
493 if (i >= setlen)
494 break; /* no match here */
495 string++;
496 stringlen--;
497 }
498 }
499
500 if (dortrim)
501 {
502 while (stringlen > 0)
503 {
504 char str_ch = string[stringlen - 1];
505
506 for (i = 0; i < setlen; i++)
507 {
508 if (str_ch == set[i])
509 break;
510 }
511 if (i >= setlen)
512 break; /* no match here */
513 stringlen--;
514 }
515 }
516 }
517 }
518
519 /* Return selected portion of string */
520 return cstring_to_text_with_len(string, stringlen);
521 }
522
523 /********************************************************************
524 *
525 * byteatrim
526 *
527 * Syntax:
528 *
529 * bytea byteatrim(byta string, bytea set)
530 *
531 * Purpose:
532 *
533 * Returns string with characters removed from the front and back
534 * up to the first character not in set.
535 *
536 * Cloned from btrim and modified as required.
537 ********************************************************************/
538
539 Datum
byteatrim(PG_FUNCTION_ARGS)540 byteatrim(PG_FUNCTION_ARGS)
541 {
542 bytea *string = PG_GETARG_BYTEA_PP(0);
543 bytea *set = PG_GETARG_BYTEA_PP(1);
544 bytea *ret;
545 char *ptr,
546 *end,
547 *ptr2,
548 *ptr2start,
549 *end2;
550 int m,
551 stringlen,
552 setlen;
553
554 stringlen = VARSIZE_ANY_EXHDR(string);
555 setlen = VARSIZE_ANY_EXHDR(set);
556
557 if (stringlen <= 0 || setlen <= 0)
558 PG_RETURN_BYTEA_P(string);
559
560 m = stringlen;
561 ptr = VARDATA_ANY(string);
562 end = ptr + stringlen - 1;
563 ptr2start = VARDATA_ANY(set);
564 end2 = ptr2start + setlen - 1;
565
566 while (m > 0)
567 {
568 ptr2 = ptr2start;
569 while (ptr2 <= end2)
570 {
571 if (*ptr == *ptr2)
572 break;
573 ++ptr2;
574 }
575 if (ptr2 > end2)
576 break;
577 ptr++;
578 m--;
579 }
580
581 while (m > 0)
582 {
583 ptr2 = ptr2start;
584 while (ptr2 <= end2)
585 {
586 if (*end == *ptr2)
587 break;
588 ++ptr2;
589 }
590 if (ptr2 > end2)
591 break;
592 end--;
593 m--;
594 }
595
596 ret = (bytea *) palloc(VARHDRSZ + m);
597 SET_VARSIZE(ret, VARHDRSZ + m);
598 memcpy(VARDATA(ret), ptr, m);
599
600 PG_RETURN_BYTEA_P(ret);
601 }
602
603 /********************************************************************
604 *
605 * ltrim
606 *
607 * Syntax:
608 *
609 * text ltrim(text string, text set)
610 *
611 * Purpose:
612 *
613 * Returns string with initial characters removed up to the first
614 * character not in set.
615 *
616 ********************************************************************/
617
618 Datum
ltrim(PG_FUNCTION_ARGS)619 ltrim(PG_FUNCTION_ARGS)
620 {
621 text *string = PG_GETARG_TEXT_PP(0);
622 text *set = PG_GETARG_TEXT_PP(1);
623 text *ret;
624
625 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
626 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
627 true, false);
628
629 PG_RETURN_TEXT_P(ret);
630 }
631
632 /********************************************************************
633 *
634 * ltrim1 --- ltrim with set fixed as ' '
635 *
636 ********************************************************************/
637
638 Datum
ltrim1(PG_FUNCTION_ARGS)639 ltrim1(PG_FUNCTION_ARGS)
640 {
641 text *string = PG_GETARG_TEXT_PP(0);
642 text *ret;
643
644 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
645 " ", 1,
646 true, false);
647
648 PG_RETURN_TEXT_P(ret);
649 }
650
651 /********************************************************************
652 *
653 * rtrim
654 *
655 * Syntax:
656 *
657 * text rtrim(text string, text set)
658 *
659 * Purpose:
660 *
661 * Returns string with final characters removed after the last
662 * character not in set.
663 *
664 ********************************************************************/
665
666 Datum
rtrim(PG_FUNCTION_ARGS)667 rtrim(PG_FUNCTION_ARGS)
668 {
669 text *string = PG_GETARG_TEXT_PP(0);
670 text *set = PG_GETARG_TEXT_PP(1);
671 text *ret;
672
673 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
674 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
675 false, true);
676
677 PG_RETURN_TEXT_P(ret);
678 }
679
680 /********************************************************************
681 *
682 * rtrim1 --- rtrim with set fixed as ' '
683 *
684 ********************************************************************/
685
686 Datum
rtrim1(PG_FUNCTION_ARGS)687 rtrim1(PG_FUNCTION_ARGS)
688 {
689 text *string = PG_GETARG_TEXT_PP(0);
690 text *ret;
691
692 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
693 " ", 1,
694 false, true);
695
696 PG_RETURN_TEXT_P(ret);
697 }
698
699
700 /********************************************************************
701 *
702 * translate
703 *
704 * Syntax:
705 *
706 * text translate(text string, text from, text to)
707 *
708 * Purpose:
709 *
710 * Returns string after replacing all occurrences of characters in from
711 * with the corresponding character in to. If from is longer than to,
712 * occurrences of the extra characters in from are deleted.
713 * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
714 *
715 ********************************************************************/
716
717 Datum
translate(PG_FUNCTION_ARGS)718 translate(PG_FUNCTION_ARGS)
719 {
720 text *string = PG_GETARG_TEXT_PP(0);
721 text *from = PG_GETARG_TEXT_PP(1);
722 text *to = PG_GETARG_TEXT_PP(2);
723 text *result;
724 char *from_ptr,
725 *to_ptr;
726 char *source,
727 *target;
728 int m,
729 fromlen,
730 tolen,
731 retlen,
732 i;
733 int worst_len;
734 int len;
735 int source_len;
736 int from_index;
737
738 m = VARSIZE_ANY_EXHDR(string);
739 if (m <= 0)
740 PG_RETURN_TEXT_P(string);
741 source = VARDATA_ANY(string);
742
743 fromlen = VARSIZE_ANY_EXHDR(from);
744 from_ptr = VARDATA_ANY(from);
745 tolen = VARSIZE_ANY_EXHDR(to);
746 to_ptr = VARDATA_ANY(to);
747
748 /*
749 * The worst-case expansion is to substitute a max-length character for a
750 * single-byte character at each position of the string.
751 */
752 worst_len = pg_database_encoding_max_length() * m;
753
754 /* check for integer overflow */
755 if (worst_len / pg_database_encoding_max_length() != m)
756 ereport(ERROR,
757 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
758 errmsg("requested length too large")));
759
760 result = (text *) palloc(worst_len + VARHDRSZ);
761 target = VARDATA(result);
762 retlen = 0;
763
764 while (m > 0)
765 {
766 source_len = pg_mblen(source);
767 from_index = 0;
768
769 for (i = 0; i < fromlen; i += len)
770 {
771 len = pg_mblen(&from_ptr[i]);
772 if (len == source_len &&
773 memcmp(source, &from_ptr[i], len) == 0)
774 break;
775
776 from_index++;
777 }
778 if (i < fromlen)
779 {
780 /* substitute */
781 char *p = to_ptr;
782
783 for (i = 0; i < from_index; i++)
784 {
785 p += pg_mblen(p);
786 if (p >= (to_ptr + tolen))
787 break;
788 }
789 if (p < (to_ptr + tolen))
790 {
791 len = pg_mblen(p);
792 memcpy(target, p, len);
793 target += len;
794 retlen += len;
795 }
796
797 }
798 else
799 {
800 /* no match, so copy */
801 memcpy(target, source, source_len);
802 target += source_len;
803 retlen += source_len;
804 }
805
806 source += source_len;
807 m -= source_len;
808 }
809
810 SET_VARSIZE(result, retlen + VARHDRSZ);
811
812 /*
813 * The function result is probably much bigger than needed, if we're using
814 * a multibyte encoding, but it's not worth reallocating it; the result
815 * probably won't live long anyway.
816 */
817
818 PG_RETURN_TEXT_P(result);
819 }
820
821 /********************************************************************
822 *
823 * ascii
824 *
825 * Syntax:
826 *
827 * int ascii(text string)
828 *
829 * Purpose:
830 *
831 * Returns the decimal representation of the first character from
832 * string.
833 * If the string is empty we return 0.
834 * If the database encoding is UTF8, we return the Unicode codepoint.
835 * If the database encoding is any other multi-byte encoding, we
836 * return the value of the first byte if it is an ASCII character
837 * (range 1 .. 127), or raise an error.
838 * For all other encodings we return the value of the first byte,
839 * (range 1..255).
840 *
841 ********************************************************************/
842
843 Datum
ascii(PG_FUNCTION_ARGS)844 ascii(PG_FUNCTION_ARGS)
845 {
846 text *string = PG_GETARG_TEXT_PP(0);
847 int encoding = GetDatabaseEncoding();
848 unsigned char *data;
849
850 if (VARSIZE_ANY_EXHDR(string) <= 0)
851 PG_RETURN_INT32(0);
852
853 data = (unsigned char *) VARDATA_ANY(string);
854
855 if (encoding == PG_UTF8 && *data > 127)
856 {
857 /* return the code point for Unicode */
858
859 int result = 0,
860 tbytes = 0,
861 i;
862
863 if (*data >= 0xF0)
864 {
865 result = *data & 0x07;
866 tbytes = 3;
867 }
868 else if (*data >= 0xE0)
869 {
870 result = *data & 0x0F;
871 tbytes = 2;
872 }
873 else
874 {
875 Assert(*data > 0xC0);
876 result = *data & 0x1f;
877 tbytes = 1;
878 }
879
880 Assert(tbytes > 0);
881
882 for (i = 1; i <= tbytes; i++)
883 {
884 Assert((data[i] & 0xC0) == 0x80);
885 result = (result << 6) + (data[i] & 0x3f);
886 }
887
888 PG_RETURN_INT32(result);
889 }
890 else
891 {
892 if (pg_encoding_max_length(encoding) > 1 && *data > 127)
893 ereport(ERROR,
894 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
895 errmsg("requested character too large")));
896
897
898 PG_RETURN_INT32((int32) *data);
899 }
900 }
901
902 /********************************************************************
903 *
904 * chr
905 *
906 * Syntax:
907 *
908 * text chr(int val)
909 *
910 * Purpose:
911 *
912 * Returns the character having the binary equivalent to val.
913 *
914 * For UTF8 we treat the argumwent as a Unicode code point.
915 * For other multi-byte encodings we raise an error for arguments
916 * outside the strict ASCII range (1..127).
917 *
918 * It's important that we don't ever return a value that is not valid
919 * in the database encoding, so that this doesn't become a way for
920 * invalid data to enter the database.
921 *
922 ********************************************************************/
923
924 Datum
chr(PG_FUNCTION_ARGS)925 chr (PG_FUNCTION_ARGS)
926 {
927 uint32 cvalue = PG_GETARG_UINT32(0);
928 text *result;
929 int encoding = GetDatabaseEncoding();
930
931 if (encoding == PG_UTF8 && cvalue > 127)
932 {
933 /* for Unicode we treat the argument as a code point */
934 int bytes;
935 unsigned char *wch;
936
937 /*
938 * We only allow valid Unicode code points; per RFC3629 that stops at
939 * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to
940 * U+1FFFFF.
941 */
942 if (cvalue > 0x0010ffff)
943 ereport(ERROR,
944 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
945 errmsg("requested character too large for encoding: %d",
946 cvalue)));
947
948 if (cvalue > 0xffff)
949 bytes = 4;
950 else if (cvalue > 0x07ff)
951 bytes = 3;
952 else
953 bytes = 2;
954
955 result = (text *) palloc(VARHDRSZ + bytes);
956 SET_VARSIZE(result, VARHDRSZ + bytes);
957 wch = (unsigned char *) VARDATA(result);
958
959 if (bytes == 2)
960 {
961 wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
962 wch[1] = 0x80 | (cvalue & 0x3F);
963 }
964 else if (bytes == 3)
965 {
966 wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
967 wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
968 wch[2] = 0x80 | (cvalue & 0x3F);
969 }
970 else
971 {
972 wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
973 wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
974 wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
975 wch[3] = 0x80 | (cvalue & 0x3F);
976 }
977
978 /*
979 * The preceding range check isn't sufficient, because UTF8 excludes
980 * Unicode "surrogate pair" codes. Make sure what we created is valid
981 * UTF8.
982 */
983 if (!pg_utf8_islegal(wch, bytes))
984 ereport(ERROR,
985 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
986 errmsg("requested character not valid for encoding: %d",
987 cvalue)));
988 }
989 else
990 {
991 bool is_mb;
992
993 /*
994 * Error out on arguments that make no sense or that we can't validly
995 * represent in the encoding.
996 */
997 if (cvalue == 0)
998 ereport(ERROR,
999 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1000 errmsg("null character not permitted")));
1001
1002 is_mb = pg_encoding_max_length(encoding) > 1;
1003
1004 if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
1005 ereport(ERROR,
1006 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1007 errmsg("requested character too large for encoding: %d",
1008 cvalue)));
1009
1010 result = (text *) palloc(VARHDRSZ + 1);
1011 SET_VARSIZE(result, VARHDRSZ + 1);
1012 *VARDATA(result) = (char) cvalue;
1013 }
1014
1015 PG_RETURN_TEXT_P(result);
1016 }
1017
1018 /********************************************************************
1019 *
1020 * repeat
1021 *
1022 * Syntax:
1023 *
1024 * text repeat(text string, int val)
1025 *
1026 * Purpose:
1027 *
1028 * Repeat string by val.
1029 *
1030 ********************************************************************/
1031
1032 Datum
repeat(PG_FUNCTION_ARGS)1033 repeat(PG_FUNCTION_ARGS)
1034 {
1035 text *string = PG_GETARG_TEXT_PP(0);
1036 int32 count = PG_GETARG_INT32(1);
1037 text *result;
1038 int slen,
1039 tlen;
1040 int i;
1041 char *cp,
1042 *sp;
1043
1044 if (count < 0)
1045 count = 0;
1046
1047 slen = VARSIZE_ANY_EXHDR(string);
1048 tlen = VARHDRSZ + (count * slen);
1049
1050 /* Check for integer overflow */
1051 if (slen != 0 && count != 0)
1052 {
1053 int check = count * slen;
1054 int check2 = check + VARHDRSZ;
1055
1056 if ((check / slen) != count || check2 <= check)
1057 ereport(ERROR,
1058 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1059 errmsg("requested length too large")));
1060 }
1061
1062 result = (text *) palloc(tlen);
1063
1064 SET_VARSIZE(result, tlen);
1065 cp = VARDATA(result);
1066 sp = VARDATA_ANY(string);
1067 for (i = 0; i < count; i++)
1068 {
1069 memcpy(cp, sp, slen);
1070 cp += slen;
1071 CHECK_FOR_INTERRUPTS();
1072 }
1073
1074 PG_RETURN_TEXT_P(result);
1075 }
1076