1 /*-------------------------------------------------------------------------
2  * oracle_compat.c
3  *	Oracle compatible functions.
4  *
5  * Copyright (c) 1996-2020, PostgreSQL Global Development Group
6  *
7  *	Author: Edmund Mergl <E.Mergl@bawue.de>
8  *	Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
9  *
10  *
11  * IDENTIFICATION
12  *	src/backend/utils/adt/oracle_compat.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "postgres.h"
17 
18 #include "common/int.h"
19 #include "mb/pg_wchar.h"
20 #include "miscadmin.h"
21 #include "utils/builtins.h"
22 #include "utils/formatting.h"
23 
24 static text *dotrim(const char *string, int stringlen,
25 					const char *set, int setlen,
26 					bool doltrim, bool dortrim);
27 
28 
29 /********************************************************************
30  *
31  * lower
32  *
33  * Syntax:
34  *
35  *	 text lower(text string)
36  *
37  * Purpose:
38  *
39  *	 Returns string, with all letters forced to lowercase.
40  *
41  ********************************************************************/
42 
43 Datum
lower(PG_FUNCTION_ARGS)44 lower(PG_FUNCTION_ARGS)
45 {
46 	text	   *in_string = PG_GETARG_TEXT_PP(0);
47 	char	   *out_string;
48 	text	   *result;
49 
50 	out_string = str_tolower(VARDATA_ANY(in_string),
51 							 VARSIZE_ANY_EXHDR(in_string),
52 							 PG_GET_COLLATION());
53 	result = cstring_to_text(out_string);
54 	pfree(out_string);
55 
56 	PG_RETURN_TEXT_P(result);
57 }
58 
59 
60 /********************************************************************
61  *
62  * upper
63  *
64  * Syntax:
65  *
66  *	 text upper(text string)
67  *
68  * Purpose:
69  *
70  *	 Returns string, with all letters forced to uppercase.
71  *
72  ********************************************************************/
73 
74 Datum
upper(PG_FUNCTION_ARGS)75 upper(PG_FUNCTION_ARGS)
76 {
77 	text	   *in_string = PG_GETARG_TEXT_PP(0);
78 	char	   *out_string;
79 	text	   *result;
80 
81 	out_string = str_toupper(VARDATA_ANY(in_string),
82 							 VARSIZE_ANY_EXHDR(in_string),
83 							 PG_GET_COLLATION());
84 	result = cstring_to_text(out_string);
85 	pfree(out_string);
86 
87 	PG_RETURN_TEXT_P(result);
88 }
89 
90 
91 /********************************************************************
92  *
93  * initcap
94  *
95  * Syntax:
96  *
97  *	 text initcap(text string)
98  *
99  * Purpose:
100  *
101  *	 Returns string, with first letter of each word in uppercase, all
102  *	 other letters in lowercase. A word is defined as a sequence of
103  *	 alphanumeric characters, delimited by non-alphanumeric
104  *	 characters.
105  *
106  ********************************************************************/
107 
108 Datum
initcap(PG_FUNCTION_ARGS)109 initcap(PG_FUNCTION_ARGS)
110 {
111 	text	   *in_string = PG_GETARG_TEXT_PP(0);
112 	char	   *out_string;
113 	text	   *result;
114 
115 	out_string = str_initcap(VARDATA_ANY(in_string),
116 							 VARSIZE_ANY_EXHDR(in_string),
117 							 PG_GET_COLLATION());
118 	result = cstring_to_text(out_string);
119 	pfree(out_string);
120 
121 	PG_RETURN_TEXT_P(result);
122 }
123 
124 
125 /********************************************************************
126  *
127  * lpad
128  *
129  * Syntax:
130  *
131  *	 text lpad(text string1, int4 len, text string2)
132  *
133  * Purpose:
134  *
135  *	 Returns string1, left-padded to length len with the sequence of
136  *	 characters in string2.  If len is less than the length of string1,
137  *	 instead truncate (on the right) to len.
138  *
139  ********************************************************************/
140 
141 Datum
lpad(PG_FUNCTION_ARGS)142 lpad(PG_FUNCTION_ARGS)
143 {
144 	text	   *string1 = PG_GETARG_TEXT_PP(0);
145 	int32		len = PG_GETARG_INT32(1);
146 	text	   *string2 = PG_GETARG_TEXT_PP(2);
147 	text	   *ret;
148 	char	   *ptr1,
149 			   *ptr2,
150 			   *ptr2start,
151 			   *ptr2end,
152 			   *ptr_ret;
153 	int			m,
154 				s1len,
155 				s2len;
156 
157 	int			bytelen;
158 
159 	/* Negative len is silently taken as zero */
160 	if (len < 0)
161 		len = 0;
162 
163 	s1len = VARSIZE_ANY_EXHDR(string1);
164 	if (s1len < 0)
165 		s1len = 0;				/* shouldn't happen */
166 
167 	s2len = VARSIZE_ANY_EXHDR(string2);
168 	if (s2len < 0)
169 		s2len = 0;				/* shouldn't happen */
170 
171 	s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
172 
173 	if (s1len > len)
174 		s1len = len;			/* truncate string1 to len chars */
175 
176 	if (s2len <= 0)
177 		len = s1len;			/* nothing to pad with, so don't pad */
178 
179 	bytelen = pg_database_encoding_max_length() * len;
180 
181 	/* check for integer overflow */
182 	if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
183 		ereport(ERROR,
184 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
185 				 errmsg("requested length too large")));
186 
187 	ret = (text *) palloc(VARHDRSZ + bytelen);
188 
189 	m = len - s1len;
190 
191 	ptr2 = ptr2start = VARDATA_ANY(string2);
192 	ptr2end = ptr2 + s2len;
193 	ptr_ret = VARDATA(ret);
194 
195 	while (m--)
196 	{
197 		int			mlen = pg_mblen(ptr2);
198 
199 		memcpy(ptr_ret, ptr2, mlen);
200 		ptr_ret += mlen;
201 		ptr2 += mlen;
202 		if (ptr2 == ptr2end)	/* wrap around at end of s2 */
203 			ptr2 = ptr2start;
204 	}
205 
206 	ptr1 = VARDATA_ANY(string1);
207 
208 	while (s1len--)
209 	{
210 		int			mlen = pg_mblen(ptr1);
211 
212 		memcpy(ptr_ret, ptr1, mlen);
213 		ptr_ret += mlen;
214 		ptr1 += mlen;
215 	}
216 
217 	SET_VARSIZE(ret, ptr_ret - (char *) ret);
218 
219 	PG_RETURN_TEXT_P(ret);
220 }
221 
222 
223 /********************************************************************
224  *
225  * rpad
226  *
227  * Syntax:
228  *
229  *	 text rpad(text string1, int4 len, text string2)
230  *
231  * Purpose:
232  *
233  *	 Returns string1, right-padded to length len with the sequence of
234  *	 characters in string2.  If len is less than the length of string1,
235  *	 instead truncate (on the right) to len.
236  *
237  ********************************************************************/
238 
239 Datum
rpad(PG_FUNCTION_ARGS)240 rpad(PG_FUNCTION_ARGS)
241 {
242 	text	   *string1 = PG_GETARG_TEXT_PP(0);
243 	int32		len = PG_GETARG_INT32(1);
244 	text	   *string2 = PG_GETARG_TEXT_PP(2);
245 	text	   *ret;
246 	char	   *ptr1,
247 			   *ptr2,
248 			   *ptr2start,
249 			   *ptr2end,
250 			   *ptr_ret;
251 	int			m,
252 				s1len,
253 				s2len;
254 
255 	int			bytelen;
256 
257 	/* Negative len is silently taken as zero */
258 	if (len < 0)
259 		len = 0;
260 
261 	s1len = VARSIZE_ANY_EXHDR(string1);
262 	if (s1len < 0)
263 		s1len = 0;				/* shouldn't happen */
264 
265 	s2len = VARSIZE_ANY_EXHDR(string2);
266 	if (s2len < 0)
267 		s2len = 0;				/* shouldn't happen */
268 
269 	s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
270 
271 	if (s1len > len)
272 		s1len = len;			/* truncate string1 to len chars */
273 
274 	if (s2len <= 0)
275 		len = s1len;			/* nothing to pad with, so don't pad */
276 
277 	bytelen = pg_database_encoding_max_length() * len;
278 
279 	/* Check for integer overflow */
280 	if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
281 		ereport(ERROR,
282 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
283 				 errmsg("requested length too large")));
284 
285 	ret = (text *) palloc(VARHDRSZ + bytelen);
286 	m = len - s1len;
287 
288 	ptr1 = VARDATA_ANY(string1);
289 	ptr_ret = VARDATA(ret);
290 
291 	while (s1len--)
292 	{
293 		int			mlen = pg_mblen(ptr1);
294 
295 		memcpy(ptr_ret, ptr1, mlen);
296 		ptr_ret += mlen;
297 		ptr1 += mlen;
298 	}
299 
300 	ptr2 = ptr2start = VARDATA_ANY(string2);
301 	ptr2end = ptr2 + s2len;
302 
303 	while (m--)
304 	{
305 		int			mlen = pg_mblen(ptr2);
306 
307 		memcpy(ptr_ret, ptr2, mlen);
308 		ptr_ret += mlen;
309 		ptr2 += mlen;
310 		if (ptr2 == ptr2end)	/* wrap around at end of s2 */
311 			ptr2 = ptr2start;
312 	}
313 
314 	SET_VARSIZE(ret, ptr_ret - (char *) ret);
315 
316 	PG_RETURN_TEXT_P(ret);
317 }
318 
319 
320 /********************************************************************
321  *
322  * btrim
323  *
324  * Syntax:
325  *
326  *	 text btrim(text string, text set)
327  *
328  * Purpose:
329  *
330  *	 Returns string with characters removed from the front and back
331  *	 up to the first character not in set.
332  *
333  ********************************************************************/
334 
335 Datum
btrim(PG_FUNCTION_ARGS)336 btrim(PG_FUNCTION_ARGS)
337 {
338 	text	   *string = PG_GETARG_TEXT_PP(0);
339 	text	   *set = PG_GETARG_TEXT_PP(1);
340 	text	   *ret;
341 
342 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
343 				 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
344 				 true, true);
345 
346 	PG_RETURN_TEXT_P(ret);
347 }
348 
349 /********************************************************************
350  *
351  * btrim1 --- btrim with set fixed as ' '
352  *
353  ********************************************************************/
354 
355 Datum
btrim1(PG_FUNCTION_ARGS)356 btrim1(PG_FUNCTION_ARGS)
357 {
358 	text	   *string = PG_GETARG_TEXT_PP(0);
359 	text	   *ret;
360 
361 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
362 				 " ", 1,
363 				 true, true);
364 
365 	PG_RETURN_TEXT_P(ret);
366 }
367 
368 /*
369  * Common implementation for btrim, ltrim, rtrim
370  */
371 static text *
dotrim(const char * string,int stringlen,const char * set,int setlen,bool doltrim,bool dortrim)372 dotrim(const char *string, int stringlen,
373 	   const char *set, int setlen,
374 	   bool doltrim, bool dortrim)
375 {
376 	int			i;
377 
378 	/* Nothing to do if either string or set is empty */
379 	if (stringlen > 0 && setlen > 0)
380 	{
381 		if (pg_database_encoding_max_length() > 1)
382 		{
383 			/*
384 			 * In the multibyte-encoding case, build arrays of pointers to
385 			 * character starts, so that we can avoid inefficient checks in
386 			 * the inner loops.
387 			 */
388 			const char **stringchars;
389 			const char **setchars;
390 			int		   *stringmblen;
391 			int		   *setmblen;
392 			int			stringnchars;
393 			int			setnchars;
394 			int			resultndx;
395 			int			resultnchars;
396 			const char *p;
397 			int			len;
398 			int			mblen;
399 			const char *str_pos;
400 			int			str_len;
401 
402 			stringchars = (const char **) palloc(stringlen * sizeof(char *));
403 			stringmblen = (int *) palloc(stringlen * sizeof(int));
404 			stringnchars = 0;
405 			p = string;
406 			len = stringlen;
407 			while (len > 0)
408 			{
409 				stringchars[stringnchars] = p;
410 				stringmblen[stringnchars] = mblen = pg_mblen(p);
411 				stringnchars++;
412 				p += mblen;
413 				len -= mblen;
414 			}
415 
416 			setchars = (const char **) palloc(setlen * sizeof(char *));
417 			setmblen = (int *) palloc(setlen * sizeof(int));
418 			setnchars = 0;
419 			p = set;
420 			len = setlen;
421 			while (len > 0)
422 			{
423 				setchars[setnchars] = p;
424 				setmblen[setnchars] = mblen = pg_mblen(p);
425 				setnchars++;
426 				p += mblen;
427 				len -= mblen;
428 			}
429 
430 			resultndx = 0;		/* index in stringchars[] */
431 			resultnchars = stringnchars;
432 
433 			if (doltrim)
434 			{
435 				while (resultnchars > 0)
436 				{
437 					str_pos = stringchars[resultndx];
438 					str_len = stringmblen[resultndx];
439 					for (i = 0; i < setnchars; i++)
440 					{
441 						if (str_len == setmblen[i] &&
442 							memcmp(str_pos, setchars[i], str_len) == 0)
443 							break;
444 					}
445 					if (i >= setnchars)
446 						break;	/* no match here */
447 					string += str_len;
448 					stringlen -= str_len;
449 					resultndx++;
450 					resultnchars--;
451 				}
452 			}
453 
454 			if (dortrim)
455 			{
456 				while (resultnchars > 0)
457 				{
458 					str_pos = stringchars[resultndx + resultnchars - 1];
459 					str_len = stringmblen[resultndx + resultnchars - 1];
460 					for (i = 0; i < setnchars; i++)
461 					{
462 						if (str_len == setmblen[i] &&
463 							memcmp(str_pos, setchars[i], str_len) == 0)
464 							break;
465 					}
466 					if (i >= setnchars)
467 						break;	/* no match here */
468 					stringlen -= str_len;
469 					resultnchars--;
470 				}
471 			}
472 
473 			pfree(stringchars);
474 			pfree(stringmblen);
475 			pfree(setchars);
476 			pfree(setmblen);
477 		}
478 		else
479 		{
480 			/*
481 			 * In the single-byte-encoding case, we don't need such overhead.
482 			 */
483 			if (doltrim)
484 			{
485 				while (stringlen > 0)
486 				{
487 					char		str_ch = *string;
488 
489 					for (i = 0; i < setlen; i++)
490 					{
491 						if (str_ch == set[i])
492 							break;
493 					}
494 					if (i >= setlen)
495 						break;	/* no match here */
496 					string++;
497 					stringlen--;
498 				}
499 			}
500 
501 			if (dortrim)
502 			{
503 				while (stringlen > 0)
504 				{
505 					char		str_ch = string[stringlen - 1];
506 
507 					for (i = 0; i < setlen; i++)
508 					{
509 						if (str_ch == set[i])
510 							break;
511 					}
512 					if (i >= setlen)
513 						break;	/* no match here */
514 					stringlen--;
515 				}
516 			}
517 		}
518 	}
519 
520 	/* Return selected portion of string */
521 	return cstring_to_text_with_len(string, stringlen);
522 }
523 
524 /********************************************************************
525  *
526  * byteatrim
527  *
528  * Syntax:
529  *
530  *	 bytea byteatrim(bytea string, bytea set)
531  *
532  * Purpose:
533  *
534  *	 Returns string with characters removed from the front and back
535  *	 up to the first character not in set.
536  *
537  * Cloned from btrim and modified as required.
538  ********************************************************************/
539 
540 Datum
byteatrim(PG_FUNCTION_ARGS)541 byteatrim(PG_FUNCTION_ARGS)
542 {
543 	bytea	   *string = PG_GETARG_BYTEA_PP(0);
544 	bytea	   *set = PG_GETARG_BYTEA_PP(1);
545 	bytea	   *ret;
546 	char	   *ptr,
547 			   *end,
548 			   *ptr2,
549 			   *ptr2start,
550 			   *end2;
551 	int			m,
552 				stringlen,
553 				setlen;
554 
555 	stringlen = VARSIZE_ANY_EXHDR(string);
556 	setlen = VARSIZE_ANY_EXHDR(set);
557 
558 	if (stringlen <= 0 || setlen <= 0)
559 		PG_RETURN_BYTEA_P(string);
560 
561 	m = stringlen;
562 	ptr = VARDATA_ANY(string);
563 	end = ptr + stringlen - 1;
564 	ptr2start = VARDATA_ANY(set);
565 	end2 = ptr2start + setlen - 1;
566 
567 	while (m > 0)
568 	{
569 		ptr2 = ptr2start;
570 		while (ptr2 <= end2)
571 		{
572 			if (*ptr == *ptr2)
573 				break;
574 			++ptr2;
575 		}
576 		if (ptr2 > end2)
577 			break;
578 		ptr++;
579 		m--;
580 	}
581 
582 	while (m > 0)
583 	{
584 		ptr2 = ptr2start;
585 		while (ptr2 <= end2)
586 		{
587 			if (*end == *ptr2)
588 				break;
589 			++ptr2;
590 		}
591 		if (ptr2 > end2)
592 			break;
593 		end--;
594 		m--;
595 	}
596 
597 	ret = (bytea *) palloc(VARHDRSZ + m);
598 	SET_VARSIZE(ret, VARHDRSZ + m);
599 	memcpy(VARDATA(ret), ptr, m);
600 
601 	PG_RETURN_BYTEA_P(ret);
602 }
603 
604 /********************************************************************
605  *
606  * ltrim
607  *
608  * Syntax:
609  *
610  *	 text ltrim(text string, text set)
611  *
612  * Purpose:
613  *
614  *	 Returns string with initial characters removed up to the first
615  *	 character not in set.
616  *
617  ********************************************************************/
618 
619 Datum
ltrim(PG_FUNCTION_ARGS)620 ltrim(PG_FUNCTION_ARGS)
621 {
622 	text	   *string = PG_GETARG_TEXT_PP(0);
623 	text	   *set = PG_GETARG_TEXT_PP(1);
624 	text	   *ret;
625 
626 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
627 				 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
628 				 true, false);
629 
630 	PG_RETURN_TEXT_P(ret);
631 }
632 
633 /********************************************************************
634  *
635  * ltrim1 --- ltrim with set fixed as ' '
636  *
637  ********************************************************************/
638 
639 Datum
ltrim1(PG_FUNCTION_ARGS)640 ltrim1(PG_FUNCTION_ARGS)
641 {
642 	text	   *string = PG_GETARG_TEXT_PP(0);
643 	text	   *ret;
644 
645 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
646 				 " ", 1,
647 				 true, false);
648 
649 	PG_RETURN_TEXT_P(ret);
650 }
651 
652 /********************************************************************
653  *
654  * rtrim
655  *
656  * Syntax:
657  *
658  *	 text rtrim(text string, text set)
659  *
660  * Purpose:
661  *
662  *	 Returns string with final characters removed after the last
663  *	 character not in set.
664  *
665  ********************************************************************/
666 
667 Datum
rtrim(PG_FUNCTION_ARGS)668 rtrim(PG_FUNCTION_ARGS)
669 {
670 	text	   *string = PG_GETARG_TEXT_PP(0);
671 	text	   *set = PG_GETARG_TEXT_PP(1);
672 	text	   *ret;
673 
674 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
675 				 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
676 				 false, true);
677 
678 	PG_RETURN_TEXT_P(ret);
679 }
680 
681 /********************************************************************
682  *
683  * rtrim1 --- rtrim with set fixed as ' '
684  *
685  ********************************************************************/
686 
687 Datum
rtrim1(PG_FUNCTION_ARGS)688 rtrim1(PG_FUNCTION_ARGS)
689 {
690 	text	   *string = PG_GETARG_TEXT_PP(0);
691 	text	   *ret;
692 
693 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
694 				 " ", 1,
695 				 false, true);
696 
697 	PG_RETURN_TEXT_P(ret);
698 }
699 
700 
701 /********************************************************************
702  *
703  * translate
704  *
705  * Syntax:
706  *
707  *	 text translate(text string, text from, text to)
708  *
709  * Purpose:
710  *
711  *	 Returns string after replacing all occurrences of characters in from
712  *	 with the corresponding character in to.  If from is longer than to,
713  *	 occurrences of the extra characters in from are deleted.
714  *	 Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
715  *
716  ********************************************************************/
717 
718 Datum
translate(PG_FUNCTION_ARGS)719 translate(PG_FUNCTION_ARGS)
720 {
721 	text	   *string = PG_GETARG_TEXT_PP(0);
722 	text	   *from = PG_GETARG_TEXT_PP(1);
723 	text	   *to = PG_GETARG_TEXT_PP(2);
724 	text	   *result;
725 	char	   *from_ptr,
726 			   *to_ptr;
727 	char	   *source,
728 			   *target;
729 	int			m,
730 				fromlen,
731 				tolen,
732 				retlen,
733 				i;
734 	int			worst_len;
735 	int			len;
736 	int			source_len;
737 	int			from_index;
738 
739 	m = VARSIZE_ANY_EXHDR(string);
740 	if (m <= 0)
741 		PG_RETURN_TEXT_P(string);
742 	source = VARDATA_ANY(string);
743 
744 	fromlen = VARSIZE_ANY_EXHDR(from);
745 	from_ptr = VARDATA_ANY(from);
746 	tolen = VARSIZE_ANY_EXHDR(to);
747 	to_ptr = VARDATA_ANY(to);
748 
749 	/*
750 	 * The worst-case expansion is to substitute a max-length character for a
751 	 * single-byte character at each position of the string.
752 	 */
753 	worst_len = pg_database_encoding_max_length() * m;
754 
755 	/* check for integer overflow */
756 	if (worst_len / pg_database_encoding_max_length() != m)
757 		ereport(ERROR,
758 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
759 				 errmsg("requested length too large")));
760 
761 	result = (text *) palloc(worst_len + VARHDRSZ);
762 	target = VARDATA(result);
763 	retlen = 0;
764 
765 	while (m > 0)
766 	{
767 		source_len = pg_mblen(source);
768 		from_index = 0;
769 
770 		for (i = 0; i < fromlen; i += len)
771 		{
772 			len = pg_mblen(&from_ptr[i]);
773 			if (len == source_len &&
774 				memcmp(source, &from_ptr[i], len) == 0)
775 				break;
776 
777 			from_index++;
778 		}
779 		if (i < fromlen)
780 		{
781 			/* substitute */
782 			char	   *p = to_ptr;
783 
784 			for (i = 0; i < from_index; i++)
785 			{
786 				p += pg_mblen(p);
787 				if (p >= (to_ptr + tolen))
788 					break;
789 			}
790 			if (p < (to_ptr + tolen))
791 			{
792 				len = pg_mblen(p);
793 				memcpy(target, p, len);
794 				target += len;
795 				retlen += len;
796 			}
797 
798 		}
799 		else
800 		{
801 			/* no match, so copy */
802 			memcpy(target, source, source_len);
803 			target += source_len;
804 			retlen += source_len;
805 		}
806 
807 		source += source_len;
808 		m -= source_len;
809 	}
810 
811 	SET_VARSIZE(result, retlen + VARHDRSZ);
812 
813 	/*
814 	 * The function result is probably much bigger than needed, if we're using
815 	 * a multibyte encoding, but it's not worth reallocating it; the result
816 	 * probably won't live long anyway.
817 	 */
818 
819 	PG_RETURN_TEXT_P(result);
820 }
821 
822 /********************************************************************
823  *
824  * ascii
825  *
826  * Syntax:
827  *
828  *	 int ascii(text string)
829  *
830  * Purpose:
831  *
832  *	 Returns the decimal representation of the first character from
833  *	 string.
834  *	 If the string is empty we return 0.
835  *	 If the database encoding is UTF8, we return the Unicode codepoint.
836  *	 If the database encoding is any other multi-byte encoding, we
837  *	 return the value of the first byte if it is an ASCII character
838  *	 (range 1 .. 127), or raise an error.
839  *	 For all other encodings we return the value of the first byte,
840  *	 (range 1..255).
841  *
842  ********************************************************************/
843 
844 Datum
ascii(PG_FUNCTION_ARGS)845 ascii(PG_FUNCTION_ARGS)
846 {
847 	text	   *string = PG_GETARG_TEXT_PP(0);
848 	int			encoding = GetDatabaseEncoding();
849 	unsigned char *data;
850 
851 	if (VARSIZE_ANY_EXHDR(string) <= 0)
852 		PG_RETURN_INT32(0);
853 
854 	data = (unsigned char *) VARDATA_ANY(string);
855 
856 	if (encoding == PG_UTF8 && *data > 127)
857 	{
858 		/* return the code point for Unicode */
859 
860 		int			result = 0,
861 					tbytes = 0,
862 					i;
863 
864 		if (*data >= 0xF0)
865 		{
866 			result = *data & 0x07;
867 			tbytes = 3;
868 		}
869 		else if (*data >= 0xE0)
870 		{
871 			result = *data & 0x0F;
872 			tbytes = 2;
873 		}
874 		else
875 		{
876 			Assert(*data > 0xC0);
877 			result = *data & 0x1f;
878 			tbytes = 1;
879 		}
880 
881 		Assert(tbytes > 0);
882 
883 		for (i = 1; i <= tbytes; i++)
884 		{
885 			Assert((data[i] & 0xC0) == 0x80);
886 			result = (result << 6) + (data[i] & 0x3f);
887 		}
888 
889 		PG_RETURN_INT32(result);
890 	}
891 	else
892 	{
893 		if (pg_encoding_max_length(encoding) > 1 && *data > 127)
894 			ereport(ERROR,
895 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
896 					 errmsg("requested character too large")));
897 
898 
899 		PG_RETURN_INT32((int32) *data);
900 	}
901 }
902 
903 /********************************************************************
904  *
905  * chr
906  *
907  * Syntax:
908  *
909  *	 text chr(int val)
910  *
911  * Purpose:
912  *
913  *	Returns the character having the binary equivalent to val.
914  *
915  * For UTF8 we treat the argument as a Unicode code point.
916  * For other multi-byte encodings we raise an error for arguments
917  * outside the strict ASCII range (1..127).
918  *
919  * It's important that we don't ever return a value that is not valid
920  * in the database encoding, so that this doesn't become a way for
921  * invalid data to enter the database.
922  *
923  ********************************************************************/
924 
925 Datum
chr(PG_FUNCTION_ARGS)926 chr			(PG_FUNCTION_ARGS)
927 {
928 	uint32		cvalue = PG_GETARG_UINT32(0);
929 	text	   *result;
930 	int			encoding = GetDatabaseEncoding();
931 
932 	if (encoding == PG_UTF8 && cvalue > 127)
933 	{
934 		/* for Unicode we treat the argument as a code point */
935 		int			bytes;
936 		unsigned char *wch;
937 
938 		/*
939 		 * We only allow valid Unicode code points; per RFC3629 that stops at
940 		 * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to
941 		 * U+1FFFFF.
942 		 */
943 		if (cvalue > 0x0010ffff)
944 			ereport(ERROR,
945 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
946 					 errmsg("requested character too large for encoding: %d",
947 							cvalue)));
948 
949 		if (cvalue > 0xffff)
950 			bytes = 4;
951 		else if (cvalue > 0x07ff)
952 			bytes = 3;
953 		else
954 			bytes = 2;
955 
956 		result = (text *) palloc(VARHDRSZ + bytes);
957 		SET_VARSIZE(result, VARHDRSZ + bytes);
958 		wch = (unsigned char *) VARDATA(result);
959 
960 		if (bytes == 2)
961 		{
962 			wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
963 			wch[1] = 0x80 | (cvalue & 0x3F);
964 		}
965 		else if (bytes == 3)
966 		{
967 			wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
968 			wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
969 			wch[2] = 0x80 | (cvalue & 0x3F);
970 		}
971 		else
972 		{
973 			wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
974 			wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
975 			wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
976 			wch[3] = 0x80 | (cvalue & 0x3F);
977 		}
978 
979 		/*
980 		 * The preceding range check isn't sufficient, because UTF8 excludes
981 		 * Unicode "surrogate pair" codes.  Make sure what we created is valid
982 		 * UTF8.
983 		 */
984 		if (!pg_utf8_islegal(wch, bytes))
985 			ereport(ERROR,
986 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
987 					 errmsg("requested character not valid for encoding: %d",
988 							cvalue)));
989 	}
990 	else
991 	{
992 		bool		is_mb;
993 
994 		/*
995 		 * Error out on arguments that make no sense or that we can't validly
996 		 * represent in the encoding.
997 		 */
998 		if (cvalue == 0)
999 			ereport(ERROR,
1000 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1001 					 errmsg("null character not permitted")));
1002 
1003 		is_mb = pg_encoding_max_length(encoding) > 1;
1004 
1005 		if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
1006 			ereport(ERROR,
1007 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1008 					 errmsg("requested character too large for encoding: %d",
1009 							cvalue)));
1010 
1011 		result = (text *) palloc(VARHDRSZ + 1);
1012 		SET_VARSIZE(result, VARHDRSZ + 1);
1013 		*VARDATA(result) = (char) cvalue;
1014 	}
1015 
1016 	PG_RETURN_TEXT_P(result);
1017 }
1018 
1019 /********************************************************************
1020  *
1021  * repeat
1022  *
1023  * Syntax:
1024  *
1025  *	 text repeat(text string, int val)
1026  *
1027  * Purpose:
1028  *
1029  *	Repeat string by val.
1030  *
1031  ********************************************************************/
1032 
1033 Datum
repeat(PG_FUNCTION_ARGS)1034 repeat(PG_FUNCTION_ARGS)
1035 {
1036 	text	   *string = PG_GETARG_TEXT_PP(0);
1037 	int32		count = PG_GETARG_INT32(1);
1038 	text	   *result;
1039 	int			slen,
1040 				tlen;
1041 	int			i;
1042 	char	   *cp,
1043 			   *sp;
1044 
1045 	if (count < 0)
1046 		count = 0;
1047 
1048 	slen = VARSIZE_ANY_EXHDR(string);
1049 
1050 	if (unlikely(pg_mul_s32_overflow(count, slen, &tlen)) ||
1051 		unlikely(pg_add_s32_overflow(tlen, VARHDRSZ, &tlen)))
1052 		ereport(ERROR,
1053 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1054 				 errmsg("requested length too large")));
1055 
1056 	result = (text *) palloc(tlen);
1057 
1058 	SET_VARSIZE(result, tlen);
1059 	cp = VARDATA(result);
1060 	sp = VARDATA_ANY(string);
1061 	for (i = 0; i < count; i++)
1062 	{
1063 		memcpy(cp, sp, slen);
1064 		cp += slen;
1065 		CHECK_FOR_INTERRUPTS();
1066 	}
1067 
1068 	PG_RETURN_TEXT_P(result);
1069 }
1070