1 /*-------------------------------------------------------------------------
2  * oracle_compat.c
3  *	Oracle compatible functions.
4  *
5  * Copyright (c) 1996-2021, PostgreSQL Global Development Group
6  *
7  *	Author: Edmund Mergl <E.Mergl@bawue.de>
8  *	Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
9  *
10  *
11  * IDENTIFICATION
12  *	src/backend/utils/adt/oracle_compat.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "postgres.h"
17 
18 #include "common/int.h"
19 #include "mb/pg_wchar.h"
20 #include "miscadmin.h"
21 #include "utils/builtins.h"
22 #include "utils/formatting.h"
23 
24 static text *dotrim(const char *string, int stringlen,
25 					const char *set, int setlen,
26 					bool doltrim, bool dortrim);
27 static bytea *dobyteatrim(bytea *string, bytea *set,
28 						  bool doltrim, bool dortrim);
29 
30 
31 /********************************************************************
32  *
33  * lower
34  *
35  * Syntax:
36  *
37  *	 text lower(text string)
38  *
39  * Purpose:
40  *
41  *	 Returns string, with all letters forced to lowercase.
42  *
43  ********************************************************************/
44 
45 Datum
lower(PG_FUNCTION_ARGS)46 lower(PG_FUNCTION_ARGS)
47 {
48 	text	   *in_string = PG_GETARG_TEXT_PP(0);
49 	char	   *out_string;
50 	text	   *result;
51 
52 	out_string = str_tolower(VARDATA_ANY(in_string),
53 							 VARSIZE_ANY_EXHDR(in_string),
54 							 PG_GET_COLLATION());
55 	result = cstring_to_text(out_string);
56 	pfree(out_string);
57 
58 	PG_RETURN_TEXT_P(result);
59 }
60 
61 
62 /********************************************************************
63  *
64  * upper
65  *
66  * Syntax:
67  *
68  *	 text upper(text string)
69  *
70  * Purpose:
71  *
72  *	 Returns string, with all letters forced to uppercase.
73  *
74  ********************************************************************/
75 
76 Datum
upper(PG_FUNCTION_ARGS)77 upper(PG_FUNCTION_ARGS)
78 {
79 	text	   *in_string = PG_GETARG_TEXT_PP(0);
80 	char	   *out_string;
81 	text	   *result;
82 
83 	out_string = str_toupper(VARDATA_ANY(in_string),
84 							 VARSIZE_ANY_EXHDR(in_string),
85 							 PG_GET_COLLATION());
86 	result = cstring_to_text(out_string);
87 	pfree(out_string);
88 
89 	PG_RETURN_TEXT_P(result);
90 }
91 
92 
93 /********************************************************************
94  *
95  * initcap
96  *
97  * Syntax:
98  *
99  *	 text initcap(text string)
100  *
101  * Purpose:
102  *
103  *	 Returns string, with first letter of each word in uppercase, all
104  *	 other letters in lowercase. A word is defined as a sequence of
105  *	 alphanumeric characters, delimited by non-alphanumeric
106  *	 characters.
107  *
108  ********************************************************************/
109 
110 Datum
initcap(PG_FUNCTION_ARGS)111 initcap(PG_FUNCTION_ARGS)
112 {
113 	text	   *in_string = PG_GETARG_TEXT_PP(0);
114 	char	   *out_string;
115 	text	   *result;
116 
117 	out_string = str_initcap(VARDATA_ANY(in_string),
118 							 VARSIZE_ANY_EXHDR(in_string),
119 							 PG_GET_COLLATION());
120 	result = cstring_to_text(out_string);
121 	pfree(out_string);
122 
123 	PG_RETURN_TEXT_P(result);
124 }
125 
126 
127 /********************************************************************
128  *
129  * lpad
130  *
131  * Syntax:
132  *
133  *	 text lpad(text string1, int4 len, text string2)
134  *
135  * Purpose:
136  *
137  *	 Returns string1, left-padded to length len with the sequence of
138  *	 characters in string2.  If len is less than the length of string1,
139  *	 instead truncate (on the right) to len.
140  *
141  ********************************************************************/
142 
143 Datum
lpad(PG_FUNCTION_ARGS)144 lpad(PG_FUNCTION_ARGS)
145 {
146 	text	   *string1 = PG_GETARG_TEXT_PP(0);
147 	int32		len = PG_GETARG_INT32(1);
148 	text	   *string2 = PG_GETARG_TEXT_PP(2);
149 	text	   *ret;
150 	char	   *ptr1,
151 			   *ptr2,
152 			   *ptr2start,
153 			   *ptr2end,
154 			   *ptr_ret;
155 	int			m,
156 				s1len,
157 				s2len;
158 
159 	int			bytelen;
160 
161 	/* Negative len is silently taken as zero */
162 	if (len < 0)
163 		len = 0;
164 
165 	s1len = VARSIZE_ANY_EXHDR(string1);
166 	if (s1len < 0)
167 		s1len = 0;				/* shouldn't happen */
168 
169 	s2len = VARSIZE_ANY_EXHDR(string2);
170 	if (s2len < 0)
171 		s2len = 0;				/* shouldn't happen */
172 
173 	s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
174 
175 	if (s1len > len)
176 		s1len = len;			/* truncate string1 to len chars */
177 
178 	if (s2len <= 0)
179 		len = s1len;			/* nothing to pad with, so don't pad */
180 
181 	bytelen = pg_database_encoding_max_length() * len;
182 
183 	/* check for integer overflow */
184 	if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
185 		ereport(ERROR,
186 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
187 				 errmsg("requested length too large")));
188 
189 	ret = (text *) palloc(VARHDRSZ + bytelen);
190 
191 	m = len - s1len;
192 
193 	ptr2 = ptr2start = VARDATA_ANY(string2);
194 	ptr2end = ptr2 + s2len;
195 	ptr_ret = VARDATA(ret);
196 
197 	while (m--)
198 	{
199 		int			mlen = pg_mblen(ptr2);
200 
201 		memcpy(ptr_ret, ptr2, mlen);
202 		ptr_ret += mlen;
203 		ptr2 += mlen;
204 		if (ptr2 == ptr2end)	/* wrap around at end of s2 */
205 			ptr2 = ptr2start;
206 	}
207 
208 	ptr1 = VARDATA_ANY(string1);
209 
210 	while (s1len--)
211 	{
212 		int			mlen = pg_mblen(ptr1);
213 
214 		memcpy(ptr_ret, ptr1, mlen);
215 		ptr_ret += mlen;
216 		ptr1 += mlen;
217 	}
218 
219 	SET_VARSIZE(ret, ptr_ret - (char *) ret);
220 
221 	PG_RETURN_TEXT_P(ret);
222 }
223 
224 
225 /********************************************************************
226  *
227  * rpad
228  *
229  * Syntax:
230  *
231  *	 text rpad(text string1, int4 len, text string2)
232  *
233  * Purpose:
234  *
235  *	 Returns string1, right-padded to length len with the sequence of
236  *	 characters in string2.  If len is less than the length of string1,
237  *	 instead truncate (on the right) to len.
238  *
239  ********************************************************************/
240 
241 Datum
rpad(PG_FUNCTION_ARGS)242 rpad(PG_FUNCTION_ARGS)
243 {
244 	text	   *string1 = PG_GETARG_TEXT_PP(0);
245 	int32		len = PG_GETARG_INT32(1);
246 	text	   *string2 = PG_GETARG_TEXT_PP(2);
247 	text	   *ret;
248 	char	   *ptr1,
249 			   *ptr2,
250 			   *ptr2start,
251 			   *ptr2end,
252 			   *ptr_ret;
253 	int			m,
254 				s1len,
255 				s2len;
256 
257 	int			bytelen;
258 
259 	/* Negative len is silently taken as zero */
260 	if (len < 0)
261 		len = 0;
262 
263 	s1len = VARSIZE_ANY_EXHDR(string1);
264 	if (s1len < 0)
265 		s1len = 0;				/* shouldn't happen */
266 
267 	s2len = VARSIZE_ANY_EXHDR(string2);
268 	if (s2len < 0)
269 		s2len = 0;				/* shouldn't happen */
270 
271 	s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
272 
273 	if (s1len > len)
274 		s1len = len;			/* truncate string1 to len chars */
275 
276 	if (s2len <= 0)
277 		len = s1len;			/* nothing to pad with, so don't pad */
278 
279 	bytelen = pg_database_encoding_max_length() * len;
280 
281 	/* Check for integer overflow */
282 	if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
283 		ereport(ERROR,
284 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
285 				 errmsg("requested length too large")));
286 
287 	ret = (text *) palloc(VARHDRSZ + bytelen);
288 	m = len - s1len;
289 
290 	ptr1 = VARDATA_ANY(string1);
291 	ptr_ret = VARDATA(ret);
292 
293 	while (s1len--)
294 	{
295 		int			mlen = pg_mblen(ptr1);
296 
297 		memcpy(ptr_ret, ptr1, mlen);
298 		ptr_ret += mlen;
299 		ptr1 += mlen;
300 	}
301 
302 	ptr2 = ptr2start = VARDATA_ANY(string2);
303 	ptr2end = ptr2 + s2len;
304 
305 	while (m--)
306 	{
307 		int			mlen = pg_mblen(ptr2);
308 
309 		memcpy(ptr_ret, ptr2, mlen);
310 		ptr_ret += mlen;
311 		ptr2 += mlen;
312 		if (ptr2 == ptr2end)	/* wrap around at end of s2 */
313 			ptr2 = ptr2start;
314 	}
315 
316 	SET_VARSIZE(ret, ptr_ret - (char *) ret);
317 
318 	PG_RETURN_TEXT_P(ret);
319 }
320 
321 
322 /********************************************************************
323  *
324  * btrim
325  *
326  * Syntax:
327  *
328  *	 text btrim(text string, text set)
329  *
330  * Purpose:
331  *
332  *	 Returns string with characters removed from the front and back
333  *	 up to the first character not in set.
334  *
335  ********************************************************************/
336 
337 Datum
btrim(PG_FUNCTION_ARGS)338 btrim(PG_FUNCTION_ARGS)
339 {
340 	text	   *string = PG_GETARG_TEXT_PP(0);
341 	text	   *set = PG_GETARG_TEXT_PP(1);
342 	text	   *ret;
343 
344 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
345 				 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
346 				 true, true);
347 
348 	PG_RETURN_TEXT_P(ret);
349 }
350 
351 /********************************************************************
352  *
353  * btrim1 --- btrim with set fixed as ' '
354  *
355  ********************************************************************/
356 
357 Datum
btrim1(PG_FUNCTION_ARGS)358 btrim1(PG_FUNCTION_ARGS)
359 {
360 	text	   *string = PG_GETARG_TEXT_PP(0);
361 	text	   *ret;
362 
363 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
364 				 " ", 1,
365 				 true, true);
366 
367 	PG_RETURN_TEXT_P(ret);
368 }
369 
370 /*
371  * Common implementation for btrim, ltrim, rtrim
372  */
373 static text *
dotrim(const char * string,int stringlen,const char * set,int setlen,bool doltrim,bool dortrim)374 dotrim(const char *string, int stringlen,
375 	   const char *set, int setlen,
376 	   bool doltrim, bool dortrim)
377 {
378 	int			i;
379 
380 	/* Nothing to do if either string or set is empty */
381 	if (stringlen > 0 && setlen > 0)
382 	{
383 		if (pg_database_encoding_max_length() > 1)
384 		{
385 			/*
386 			 * In the multibyte-encoding case, build arrays of pointers to
387 			 * character starts, so that we can avoid inefficient checks in
388 			 * the inner loops.
389 			 */
390 			const char **stringchars;
391 			const char **setchars;
392 			int		   *stringmblen;
393 			int		   *setmblen;
394 			int			stringnchars;
395 			int			setnchars;
396 			int			resultndx;
397 			int			resultnchars;
398 			const char *p;
399 			int			len;
400 			int			mblen;
401 			const char *str_pos;
402 			int			str_len;
403 
404 			stringchars = (const char **) palloc(stringlen * sizeof(char *));
405 			stringmblen = (int *) palloc(stringlen * sizeof(int));
406 			stringnchars = 0;
407 			p = string;
408 			len = stringlen;
409 			while (len > 0)
410 			{
411 				stringchars[stringnchars] = p;
412 				stringmblen[stringnchars] = mblen = pg_mblen(p);
413 				stringnchars++;
414 				p += mblen;
415 				len -= mblen;
416 			}
417 
418 			setchars = (const char **) palloc(setlen * sizeof(char *));
419 			setmblen = (int *) palloc(setlen * sizeof(int));
420 			setnchars = 0;
421 			p = set;
422 			len = setlen;
423 			while (len > 0)
424 			{
425 				setchars[setnchars] = p;
426 				setmblen[setnchars] = mblen = pg_mblen(p);
427 				setnchars++;
428 				p += mblen;
429 				len -= mblen;
430 			}
431 
432 			resultndx = 0;		/* index in stringchars[] */
433 			resultnchars = stringnchars;
434 
435 			if (doltrim)
436 			{
437 				while (resultnchars > 0)
438 				{
439 					str_pos = stringchars[resultndx];
440 					str_len = stringmblen[resultndx];
441 					for (i = 0; i < setnchars; i++)
442 					{
443 						if (str_len == setmblen[i] &&
444 							memcmp(str_pos, setchars[i], str_len) == 0)
445 							break;
446 					}
447 					if (i >= setnchars)
448 						break;	/* no match here */
449 					string += str_len;
450 					stringlen -= str_len;
451 					resultndx++;
452 					resultnchars--;
453 				}
454 			}
455 
456 			if (dortrim)
457 			{
458 				while (resultnchars > 0)
459 				{
460 					str_pos = stringchars[resultndx + resultnchars - 1];
461 					str_len = stringmblen[resultndx + resultnchars - 1];
462 					for (i = 0; i < setnchars; i++)
463 					{
464 						if (str_len == setmblen[i] &&
465 							memcmp(str_pos, setchars[i], str_len) == 0)
466 							break;
467 					}
468 					if (i >= setnchars)
469 						break;	/* no match here */
470 					stringlen -= str_len;
471 					resultnchars--;
472 				}
473 			}
474 
475 			pfree(stringchars);
476 			pfree(stringmblen);
477 			pfree(setchars);
478 			pfree(setmblen);
479 		}
480 		else
481 		{
482 			/*
483 			 * In the single-byte-encoding case, we don't need such overhead.
484 			 */
485 			if (doltrim)
486 			{
487 				while (stringlen > 0)
488 				{
489 					char		str_ch = *string;
490 
491 					for (i = 0; i < setlen; i++)
492 					{
493 						if (str_ch == set[i])
494 							break;
495 					}
496 					if (i >= setlen)
497 						break;	/* no match here */
498 					string++;
499 					stringlen--;
500 				}
501 			}
502 
503 			if (dortrim)
504 			{
505 				while (stringlen > 0)
506 				{
507 					char		str_ch = string[stringlen - 1];
508 
509 					for (i = 0; i < setlen; i++)
510 					{
511 						if (str_ch == set[i])
512 							break;
513 					}
514 					if (i >= setlen)
515 						break;	/* no match here */
516 					stringlen--;
517 				}
518 			}
519 		}
520 	}
521 
522 	/* Return selected portion of string */
523 	return cstring_to_text_with_len(string, stringlen);
524 }
525 
526 /*
527  * Common implementation for bytea versions of btrim, ltrim, rtrim
528  */
529 bytea *
dobyteatrim(bytea * string,bytea * set,bool doltrim,bool dortrim)530 dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
531 {
532 	bytea	   *ret;
533 	char	   *ptr,
534 			   *end,
535 			   *ptr2,
536 			   *ptr2start,
537 			   *end2;
538 	int			m,
539 				stringlen,
540 				setlen;
541 
542 	stringlen = VARSIZE_ANY_EXHDR(string);
543 	setlen = VARSIZE_ANY_EXHDR(set);
544 
545 	if (stringlen <= 0 || setlen <= 0)
546 		return string;
547 
548 	m = stringlen;
549 	ptr = VARDATA_ANY(string);
550 	end = ptr + stringlen - 1;
551 	ptr2start = VARDATA_ANY(set);
552 	end2 = ptr2start + setlen - 1;
553 
554 	if (doltrim)
555 	{
556 		while (m > 0)
557 		{
558 			ptr2 = ptr2start;
559 			while (ptr2 <= end2)
560 			{
561 				if (*ptr == *ptr2)
562 					break;
563 				++ptr2;
564 			}
565 			if (ptr2 > end2)
566 				break;
567 			ptr++;
568 			m--;
569 		}
570 	}
571 
572 	if (dortrim)
573 	{
574 		while (m > 0)
575 		{
576 			ptr2 = ptr2start;
577 			while (ptr2 <= end2)
578 			{
579 				if (*end == *ptr2)
580 					break;
581 				++ptr2;
582 			}
583 			if (ptr2 > end2)
584 				break;
585 			end--;
586 			m--;
587 		}
588 	}
589 
590 	ret = (bytea *) palloc(VARHDRSZ + m);
591 	SET_VARSIZE(ret, VARHDRSZ + m);
592 	memcpy(VARDATA(ret), ptr, m);
593 	return ret;
594 }
595 
596 /********************************************************************
597  *
598  * byteatrim
599  *
600  * Syntax:
601  *
602  *	 bytea byteatrim(bytea string, bytea set)
603  *
604  * Purpose:
605  *
606  *	 Returns string with characters removed from the front and back
607  *	 up to the first character not in set.
608  *
609  * Cloned from btrim and modified as required.
610  ********************************************************************/
611 
612 Datum
byteatrim(PG_FUNCTION_ARGS)613 byteatrim(PG_FUNCTION_ARGS)
614 {
615 	bytea	   *string = PG_GETARG_BYTEA_PP(0);
616 	bytea	   *set = PG_GETARG_BYTEA_PP(1);
617 	bytea	   *ret;
618 
619 	ret = dobyteatrim(string, set, true, true);
620 
621 	PG_RETURN_BYTEA_P(ret);
622 }
623 
624 /********************************************************************
625  *
626  * bytealtrim
627  *
628  * Syntax:
629  *
630  *	 bytea bytealtrim(bytea string, bytea set)
631  *
632  * Purpose:
633  *
634  *	 Returns string with initial characters removed up to the first
635  *	 character not in set.
636  *
637  ********************************************************************/
638 
639 Datum
bytealtrim(PG_FUNCTION_ARGS)640 bytealtrim(PG_FUNCTION_ARGS)
641 {
642 	bytea	   *string = PG_GETARG_BYTEA_PP(0);
643 	bytea	   *set = PG_GETARG_BYTEA_PP(1);
644 	bytea	   *ret;
645 
646 	ret = dobyteatrim(string, set, true, false);
647 
648 	PG_RETURN_BYTEA_P(ret);
649 }
650 
651 /********************************************************************
652  *
653  * byteartrim
654  *
655  * Syntax:
656  *
657  *	 bytea byteartrim(bytea string, bytea set)
658  *
659  * Purpose:
660  *
661  *	 Returns string with final characters removed after the last
662  *	 character not in set.
663  *
664  ********************************************************************/
665 
666 Datum
byteartrim(PG_FUNCTION_ARGS)667 byteartrim(PG_FUNCTION_ARGS)
668 {
669 	bytea	   *string = PG_GETARG_BYTEA_PP(0);
670 	bytea	   *set = PG_GETARG_BYTEA_PP(1);
671 	bytea	   *ret;
672 
673 	ret = dobyteatrim(string, set, false, true);
674 
675 	PG_RETURN_BYTEA_P(ret);
676 }
677 
678 /********************************************************************
679  *
680  * ltrim
681  *
682  * Syntax:
683  *
684  *	 text ltrim(text string, text set)
685  *
686  * Purpose:
687  *
688  *	 Returns string with initial characters removed up to the first
689  *	 character not in set.
690  *
691  ********************************************************************/
692 
693 Datum
ltrim(PG_FUNCTION_ARGS)694 ltrim(PG_FUNCTION_ARGS)
695 {
696 	text	   *string = PG_GETARG_TEXT_PP(0);
697 	text	   *set = PG_GETARG_TEXT_PP(1);
698 	text	   *ret;
699 
700 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
701 				 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
702 				 true, false);
703 
704 	PG_RETURN_TEXT_P(ret);
705 }
706 
707 /********************************************************************
708  *
709  * ltrim1 --- ltrim with set fixed as ' '
710  *
711  ********************************************************************/
712 
713 Datum
ltrim1(PG_FUNCTION_ARGS)714 ltrim1(PG_FUNCTION_ARGS)
715 {
716 	text	   *string = PG_GETARG_TEXT_PP(0);
717 	text	   *ret;
718 
719 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
720 				 " ", 1,
721 				 true, false);
722 
723 	PG_RETURN_TEXT_P(ret);
724 }
725 
726 /********************************************************************
727  *
728  * rtrim
729  *
730  * Syntax:
731  *
732  *	 text rtrim(text string, text set)
733  *
734  * Purpose:
735  *
736  *	 Returns string with final characters removed after the last
737  *	 character not in set.
738  *
739  ********************************************************************/
740 
741 Datum
rtrim(PG_FUNCTION_ARGS)742 rtrim(PG_FUNCTION_ARGS)
743 {
744 	text	   *string = PG_GETARG_TEXT_PP(0);
745 	text	   *set = PG_GETARG_TEXT_PP(1);
746 	text	   *ret;
747 
748 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
749 				 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
750 				 false, true);
751 
752 	PG_RETURN_TEXT_P(ret);
753 }
754 
755 /********************************************************************
756  *
757  * rtrim1 --- rtrim with set fixed as ' '
758  *
759  ********************************************************************/
760 
761 Datum
rtrim1(PG_FUNCTION_ARGS)762 rtrim1(PG_FUNCTION_ARGS)
763 {
764 	text	   *string = PG_GETARG_TEXT_PP(0);
765 	text	   *ret;
766 
767 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
768 				 " ", 1,
769 				 false, true);
770 
771 	PG_RETURN_TEXT_P(ret);
772 }
773 
774 
775 /********************************************************************
776  *
777  * translate
778  *
779  * Syntax:
780  *
781  *	 text translate(text string, text from, text to)
782  *
783  * Purpose:
784  *
785  *	 Returns string after replacing all occurrences of characters in from
786  *	 with the corresponding character in to.  If from is longer than to,
787  *	 occurrences of the extra characters in from are deleted.
788  *	 Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
789  *
790  ********************************************************************/
791 
792 Datum
translate(PG_FUNCTION_ARGS)793 translate(PG_FUNCTION_ARGS)
794 {
795 	text	   *string = PG_GETARG_TEXT_PP(0);
796 	text	   *from = PG_GETARG_TEXT_PP(1);
797 	text	   *to = PG_GETARG_TEXT_PP(2);
798 	text	   *result;
799 	char	   *from_ptr,
800 			   *to_ptr;
801 	char	   *source,
802 			   *target;
803 	int			m,
804 				fromlen,
805 				tolen,
806 				retlen,
807 				i;
808 	int			worst_len;
809 	int			len;
810 	int			source_len;
811 	int			from_index;
812 
813 	m = VARSIZE_ANY_EXHDR(string);
814 	if (m <= 0)
815 		PG_RETURN_TEXT_P(string);
816 	source = VARDATA_ANY(string);
817 
818 	fromlen = VARSIZE_ANY_EXHDR(from);
819 	from_ptr = VARDATA_ANY(from);
820 	tolen = VARSIZE_ANY_EXHDR(to);
821 	to_ptr = VARDATA_ANY(to);
822 
823 	/*
824 	 * The worst-case expansion is to substitute a max-length character for a
825 	 * single-byte character at each position of the string.
826 	 */
827 	worst_len = pg_database_encoding_max_length() * m;
828 
829 	/* check for integer overflow */
830 	if (worst_len / pg_database_encoding_max_length() != m)
831 		ereport(ERROR,
832 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
833 				 errmsg("requested length too large")));
834 
835 	result = (text *) palloc(worst_len + VARHDRSZ);
836 	target = VARDATA(result);
837 	retlen = 0;
838 
839 	while (m > 0)
840 	{
841 		source_len = pg_mblen(source);
842 		from_index = 0;
843 
844 		for (i = 0; i < fromlen; i += len)
845 		{
846 			len = pg_mblen(&from_ptr[i]);
847 			if (len == source_len &&
848 				memcmp(source, &from_ptr[i], len) == 0)
849 				break;
850 
851 			from_index++;
852 		}
853 		if (i < fromlen)
854 		{
855 			/* substitute */
856 			char	   *p = to_ptr;
857 
858 			for (i = 0; i < from_index; i++)
859 			{
860 				p += pg_mblen(p);
861 				if (p >= (to_ptr + tolen))
862 					break;
863 			}
864 			if (p < (to_ptr + tolen))
865 			{
866 				len = pg_mblen(p);
867 				memcpy(target, p, len);
868 				target += len;
869 				retlen += len;
870 			}
871 
872 		}
873 		else
874 		{
875 			/* no match, so copy */
876 			memcpy(target, source, source_len);
877 			target += source_len;
878 			retlen += source_len;
879 		}
880 
881 		source += source_len;
882 		m -= source_len;
883 	}
884 
885 	SET_VARSIZE(result, retlen + VARHDRSZ);
886 
887 	/*
888 	 * The function result is probably much bigger than needed, if we're using
889 	 * a multibyte encoding, but it's not worth reallocating it; the result
890 	 * probably won't live long anyway.
891 	 */
892 
893 	PG_RETURN_TEXT_P(result);
894 }
895 
896 /********************************************************************
897  *
898  * ascii
899  *
900  * Syntax:
901  *
902  *	 int ascii(text string)
903  *
904  * Purpose:
905  *
906  *	 Returns the decimal representation of the first character from
907  *	 string.
908  *	 If the string is empty we return 0.
909  *	 If the database encoding is UTF8, we return the Unicode codepoint.
910  *	 If the database encoding is any other multi-byte encoding, we
911  *	 return the value of the first byte if it is an ASCII character
912  *	 (range 1 .. 127), or raise an error.
913  *	 For all other encodings we return the value of the first byte,
914  *	 (range 1..255).
915  *
916  ********************************************************************/
917 
918 Datum
ascii(PG_FUNCTION_ARGS)919 ascii(PG_FUNCTION_ARGS)
920 {
921 	text	   *string = PG_GETARG_TEXT_PP(0);
922 	int			encoding = GetDatabaseEncoding();
923 	unsigned char *data;
924 
925 	if (VARSIZE_ANY_EXHDR(string) <= 0)
926 		PG_RETURN_INT32(0);
927 
928 	data = (unsigned char *) VARDATA_ANY(string);
929 
930 	if (encoding == PG_UTF8 && *data > 127)
931 	{
932 		/* return the code point for Unicode */
933 
934 		int			result = 0,
935 					tbytes = 0,
936 					i;
937 
938 		if (*data >= 0xF0)
939 		{
940 			result = *data & 0x07;
941 			tbytes = 3;
942 		}
943 		else if (*data >= 0xE0)
944 		{
945 			result = *data & 0x0F;
946 			tbytes = 2;
947 		}
948 		else
949 		{
950 			Assert(*data > 0xC0);
951 			result = *data & 0x1f;
952 			tbytes = 1;
953 		}
954 
955 		Assert(tbytes > 0);
956 
957 		for (i = 1; i <= tbytes; i++)
958 		{
959 			Assert((data[i] & 0xC0) == 0x80);
960 			result = (result << 6) + (data[i] & 0x3f);
961 		}
962 
963 		PG_RETURN_INT32(result);
964 	}
965 	else
966 	{
967 		if (pg_encoding_max_length(encoding) > 1 && *data > 127)
968 			ereport(ERROR,
969 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
970 					 errmsg("requested character too large")));
971 
972 
973 		PG_RETURN_INT32((int32) *data);
974 	}
975 }
976 
977 /********************************************************************
978  *
979  * chr
980  *
981  * Syntax:
982  *
983  *	 text chr(int val)
984  *
985  * Purpose:
986  *
987  *	Returns the character having the binary equivalent to val.
988  *
989  * For UTF8 we treat the argument as a Unicode code point.
990  * For other multi-byte encodings we raise an error for arguments
991  * outside the strict ASCII range (1..127).
992  *
993  * It's important that we don't ever return a value that is not valid
994  * in the database encoding, so that this doesn't become a way for
995  * invalid data to enter the database.
996  *
997  ********************************************************************/
998 
999 Datum
chr(PG_FUNCTION_ARGS)1000 chr			(PG_FUNCTION_ARGS)
1001 {
1002 	uint32		cvalue = PG_GETARG_UINT32(0);
1003 	text	   *result;
1004 	int			encoding = GetDatabaseEncoding();
1005 
1006 	if (encoding == PG_UTF8 && cvalue > 127)
1007 	{
1008 		/* for Unicode we treat the argument as a code point */
1009 		int			bytes;
1010 		unsigned char *wch;
1011 
1012 		/*
1013 		 * We only allow valid Unicode code points; per RFC3629 that stops at
1014 		 * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to
1015 		 * U+1FFFFF.
1016 		 */
1017 		if (cvalue > 0x0010ffff)
1018 			ereport(ERROR,
1019 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1020 					 errmsg("requested character too large for encoding: %d",
1021 							cvalue)));
1022 
1023 		if (cvalue > 0xffff)
1024 			bytes = 4;
1025 		else if (cvalue > 0x07ff)
1026 			bytes = 3;
1027 		else
1028 			bytes = 2;
1029 
1030 		result = (text *) palloc(VARHDRSZ + bytes);
1031 		SET_VARSIZE(result, VARHDRSZ + bytes);
1032 		wch = (unsigned char *) VARDATA(result);
1033 
1034 		if (bytes == 2)
1035 		{
1036 			wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
1037 			wch[1] = 0x80 | (cvalue & 0x3F);
1038 		}
1039 		else if (bytes == 3)
1040 		{
1041 			wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
1042 			wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
1043 			wch[2] = 0x80 | (cvalue & 0x3F);
1044 		}
1045 		else
1046 		{
1047 			wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
1048 			wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
1049 			wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
1050 			wch[3] = 0x80 | (cvalue & 0x3F);
1051 		}
1052 
1053 		/*
1054 		 * The preceding range check isn't sufficient, because UTF8 excludes
1055 		 * Unicode "surrogate pair" codes.  Make sure what we created is valid
1056 		 * UTF8.
1057 		 */
1058 		if (!pg_utf8_islegal(wch, bytes))
1059 			ereport(ERROR,
1060 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1061 					 errmsg("requested character not valid for encoding: %d",
1062 							cvalue)));
1063 	}
1064 	else
1065 	{
1066 		bool		is_mb;
1067 
1068 		/*
1069 		 * Error out on arguments that make no sense or that we can't validly
1070 		 * represent in the encoding.
1071 		 */
1072 		if (cvalue == 0)
1073 			ereport(ERROR,
1074 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1075 					 errmsg("null character not permitted")));
1076 
1077 		is_mb = pg_encoding_max_length(encoding) > 1;
1078 
1079 		if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
1080 			ereport(ERROR,
1081 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1082 					 errmsg("requested character too large for encoding: %d",
1083 							cvalue)));
1084 
1085 		result = (text *) palloc(VARHDRSZ + 1);
1086 		SET_VARSIZE(result, VARHDRSZ + 1);
1087 		*VARDATA(result) = (char) cvalue;
1088 	}
1089 
1090 	PG_RETURN_TEXT_P(result);
1091 }
1092 
1093 /********************************************************************
1094  *
1095  * repeat
1096  *
1097  * Syntax:
1098  *
1099  *	 text repeat(text string, int val)
1100  *
1101  * Purpose:
1102  *
1103  *	Repeat string by val.
1104  *
1105  ********************************************************************/
1106 
1107 Datum
repeat(PG_FUNCTION_ARGS)1108 repeat(PG_FUNCTION_ARGS)
1109 {
1110 	text	   *string = PG_GETARG_TEXT_PP(0);
1111 	int32		count = PG_GETARG_INT32(1);
1112 	text	   *result;
1113 	int			slen,
1114 				tlen;
1115 	int			i;
1116 	char	   *cp,
1117 			   *sp;
1118 
1119 	if (count < 0)
1120 		count = 0;
1121 
1122 	slen = VARSIZE_ANY_EXHDR(string);
1123 
1124 	if (unlikely(pg_mul_s32_overflow(count, slen, &tlen)) ||
1125 		unlikely(pg_add_s32_overflow(tlen, VARHDRSZ, &tlen)))
1126 		ereport(ERROR,
1127 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1128 				 errmsg("requested length too large")));
1129 
1130 	result = (text *) palloc(tlen);
1131 
1132 	SET_VARSIZE(result, tlen);
1133 	cp = VARDATA(result);
1134 	sp = VARDATA_ANY(string);
1135 	for (i = 0; i < count; i++)
1136 	{
1137 		memcpy(cp, sp, slen);
1138 		cp += slen;
1139 		CHECK_FOR_INTERRUPTS();
1140 	}
1141 
1142 	PG_RETURN_TEXT_P(result);
1143 }
1144