1 /*-------------------------------------------------------------------------
2  * oracle_compat.c
3  *	Oracle compatible functions.
4  *
5  * Copyright (c) 1996-2016, PostgreSQL Global Development Group
6  *
7  *	Author: Edmund Mergl <E.Mergl@bawue.de>
8  *	Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
9  *
10  *
11  * IDENTIFICATION
12  *	src/backend/utils/adt/oracle_compat.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "postgres.h"
17 
18 #include "utils/builtins.h"
19 #include "utils/formatting.h"
20 #include "mb/pg_wchar.h"
21 #include "miscadmin.h"
22 
23 static text *dotrim(const char *string, int stringlen,
24 	   const char *set, int setlen,
25 	   bool doltrim, bool dortrim);
26 
27 
28 /********************************************************************
29  *
30  * lower
31  *
32  * Syntax:
33  *
34  *	 text lower(text string)
35  *
36  * Purpose:
37  *
38  *	 Returns string, with all letters forced to lowercase.
39  *
40  ********************************************************************/
41 
42 Datum
lower(PG_FUNCTION_ARGS)43 lower(PG_FUNCTION_ARGS)
44 {
45 	text	   *in_string = PG_GETARG_TEXT_PP(0);
46 	char	   *out_string;
47 	text	   *result;
48 
49 	out_string = str_tolower(VARDATA_ANY(in_string),
50 							 VARSIZE_ANY_EXHDR(in_string),
51 							 PG_GET_COLLATION());
52 	result = cstring_to_text(out_string);
53 	pfree(out_string);
54 
55 	PG_RETURN_TEXT_P(result);
56 }
57 
58 
59 /********************************************************************
60  *
61  * upper
62  *
63  * Syntax:
64  *
65  *	 text upper(text string)
66  *
67  * Purpose:
68  *
69  *	 Returns string, with all letters forced to uppercase.
70  *
71  ********************************************************************/
72 
73 Datum
upper(PG_FUNCTION_ARGS)74 upper(PG_FUNCTION_ARGS)
75 {
76 	text	   *in_string = PG_GETARG_TEXT_PP(0);
77 	char	   *out_string;
78 	text	   *result;
79 
80 	out_string = str_toupper(VARDATA_ANY(in_string),
81 							 VARSIZE_ANY_EXHDR(in_string),
82 							 PG_GET_COLLATION());
83 	result = cstring_to_text(out_string);
84 	pfree(out_string);
85 
86 	PG_RETURN_TEXT_P(result);
87 }
88 
89 
90 /********************************************************************
91  *
92  * initcap
93  *
94  * Syntax:
95  *
96  *	 text initcap(text string)
97  *
98  * Purpose:
99  *
100  *	 Returns string, with first letter of each word in uppercase, all
101  *	 other letters in lowercase. A word is defined as a sequence of
102  *	 alphanumeric characters, delimited by non-alphanumeric
103  *	 characters.
104  *
105  ********************************************************************/
106 
107 Datum
initcap(PG_FUNCTION_ARGS)108 initcap(PG_FUNCTION_ARGS)
109 {
110 	text	   *in_string = PG_GETARG_TEXT_PP(0);
111 	char	   *out_string;
112 	text	   *result;
113 
114 	out_string = str_initcap(VARDATA_ANY(in_string),
115 							 VARSIZE_ANY_EXHDR(in_string),
116 							 PG_GET_COLLATION());
117 	result = cstring_to_text(out_string);
118 	pfree(out_string);
119 
120 	PG_RETURN_TEXT_P(result);
121 }
122 
123 
124 /********************************************************************
125  *
126  * lpad
127  *
128  * Syntax:
129  *
130  *	 text lpad(text string1, int4 len, text string2)
131  *
132  * Purpose:
133  *
134  *	 Returns string1, left-padded to length len with the sequence of
135  *	 characters in string2.  If len is less than the length of string1,
136  *	 instead truncate (on the right) to len.
137  *
138  ********************************************************************/
139 
140 Datum
lpad(PG_FUNCTION_ARGS)141 lpad(PG_FUNCTION_ARGS)
142 {
143 	text	   *string1 = PG_GETARG_TEXT_PP(0);
144 	int32		len = PG_GETARG_INT32(1);
145 	text	   *string2 = PG_GETARG_TEXT_PP(2);
146 	text	   *ret;
147 	char	   *ptr1,
148 			   *ptr2,
149 			   *ptr2start,
150 			   *ptr2end,
151 			   *ptr_ret;
152 	int			m,
153 				s1len,
154 				s2len;
155 
156 	int			bytelen;
157 
158 	/* Negative len is silently taken as zero */
159 	if (len < 0)
160 		len = 0;
161 
162 	s1len = VARSIZE_ANY_EXHDR(string1);
163 	if (s1len < 0)
164 		s1len = 0;				/* shouldn't happen */
165 
166 	s2len = VARSIZE_ANY_EXHDR(string2);
167 	if (s2len < 0)
168 		s2len = 0;				/* shouldn't happen */
169 
170 	s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
171 
172 	if (s1len > len)
173 		s1len = len;			/* truncate string1 to len chars */
174 
175 	if (s2len <= 0)
176 		len = s1len;			/* nothing to pad with, so don't pad */
177 
178 	bytelen = pg_database_encoding_max_length() * len;
179 
180 	/* check for integer overflow */
181 	if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
182 		ereport(ERROR,
183 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
184 				 errmsg("requested length too large")));
185 
186 	ret = (text *) palloc(VARHDRSZ + bytelen);
187 
188 	m = len - s1len;
189 
190 	ptr2 = ptr2start = VARDATA_ANY(string2);
191 	ptr2end = ptr2 + s2len;
192 	ptr_ret = VARDATA(ret);
193 
194 	while (m--)
195 	{
196 		int			mlen = pg_mblen(ptr2);
197 
198 		memcpy(ptr_ret, ptr2, mlen);
199 		ptr_ret += mlen;
200 		ptr2 += mlen;
201 		if (ptr2 == ptr2end)	/* wrap around at end of s2 */
202 			ptr2 = ptr2start;
203 	}
204 
205 	ptr1 = VARDATA_ANY(string1);
206 
207 	while (s1len--)
208 	{
209 		int			mlen = pg_mblen(ptr1);
210 
211 		memcpy(ptr_ret, ptr1, mlen);
212 		ptr_ret += mlen;
213 		ptr1 += mlen;
214 	}
215 
216 	SET_VARSIZE(ret, ptr_ret - (char *) ret);
217 
218 	PG_RETURN_TEXT_P(ret);
219 }
220 
221 
222 /********************************************************************
223  *
224  * rpad
225  *
226  * Syntax:
227  *
228  *	 text rpad(text string1, int4 len, text string2)
229  *
230  * Purpose:
231  *
232  *	 Returns string1, right-padded to length len with the sequence of
233  *	 characters in string2.  If len is less than the length of string1,
234  *	 instead truncate (on the right) to len.
235  *
236  ********************************************************************/
237 
238 Datum
rpad(PG_FUNCTION_ARGS)239 rpad(PG_FUNCTION_ARGS)
240 {
241 	text	   *string1 = PG_GETARG_TEXT_PP(0);
242 	int32		len = PG_GETARG_INT32(1);
243 	text	   *string2 = PG_GETARG_TEXT_PP(2);
244 	text	   *ret;
245 	char	   *ptr1,
246 			   *ptr2,
247 			   *ptr2start,
248 			   *ptr2end,
249 			   *ptr_ret;
250 	int			m,
251 				s1len,
252 				s2len;
253 
254 	int			bytelen;
255 
256 	/* Negative len is silently taken as zero */
257 	if (len < 0)
258 		len = 0;
259 
260 	s1len = VARSIZE_ANY_EXHDR(string1);
261 	if (s1len < 0)
262 		s1len = 0;				/* shouldn't happen */
263 
264 	s2len = VARSIZE_ANY_EXHDR(string2);
265 	if (s2len < 0)
266 		s2len = 0;				/* shouldn't happen */
267 
268 	s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
269 
270 	if (s1len > len)
271 		s1len = len;			/* truncate string1 to len chars */
272 
273 	if (s2len <= 0)
274 		len = s1len;			/* nothing to pad with, so don't pad */
275 
276 	bytelen = pg_database_encoding_max_length() * len;
277 
278 	/* Check for integer overflow */
279 	if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
280 		ereport(ERROR,
281 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
282 				 errmsg("requested length too large")));
283 
284 	ret = (text *) palloc(VARHDRSZ + bytelen);
285 	m = len - s1len;
286 
287 	ptr1 = VARDATA_ANY(string1);
288 	ptr_ret = VARDATA(ret);
289 
290 	while (s1len--)
291 	{
292 		int			mlen = pg_mblen(ptr1);
293 
294 		memcpy(ptr_ret, ptr1, mlen);
295 		ptr_ret += mlen;
296 		ptr1 += mlen;
297 	}
298 
299 	ptr2 = ptr2start = VARDATA_ANY(string2);
300 	ptr2end = ptr2 + s2len;
301 
302 	while (m--)
303 	{
304 		int			mlen = pg_mblen(ptr2);
305 
306 		memcpy(ptr_ret, ptr2, mlen);
307 		ptr_ret += mlen;
308 		ptr2 += mlen;
309 		if (ptr2 == ptr2end)	/* wrap around at end of s2 */
310 			ptr2 = ptr2start;
311 	}
312 
313 	SET_VARSIZE(ret, ptr_ret - (char *) ret);
314 
315 	PG_RETURN_TEXT_P(ret);
316 }
317 
318 
319 /********************************************************************
320  *
321  * btrim
322  *
323  * Syntax:
324  *
325  *	 text btrim(text string, text set)
326  *
327  * Purpose:
328  *
329  *	 Returns string with characters removed from the front and back
330  *	 up to the first character not in set.
331  *
332  ********************************************************************/
333 
334 Datum
btrim(PG_FUNCTION_ARGS)335 btrim(PG_FUNCTION_ARGS)
336 {
337 	text	   *string = PG_GETARG_TEXT_PP(0);
338 	text	   *set = PG_GETARG_TEXT_PP(1);
339 	text	   *ret;
340 
341 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
342 				 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
343 				 true, true);
344 
345 	PG_RETURN_TEXT_P(ret);
346 }
347 
348 /********************************************************************
349  *
350  * btrim1 --- btrim with set fixed as ' '
351  *
352  ********************************************************************/
353 
354 Datum
btrim1(PG_FUNCTION_ARGS)355 btrim1(PG_FUNCTION_ARGS)
356 {
357 	text	   *string = PG_GETARG_TEXT_PP(0);
358 	text	   *ret;
359 
360 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
361 				 " ", 1,
362 				 true, true);
363 
364 	PG_RETURN_TEXT_P(ret);
365 }
366 
367 /*
368  * Common implementation for btrim, ltrim, rtrim
369  */
370 static text *
dotrim(const char * string,int stringlen,const char * set,int setlen,bool doltrim,bool dortrim)371 dotrim(const char *string, int stringlen,
372 	   const char *set, int setlen,
373 	   bool doltrim, bool dortrim)
374 {
375 	int			i;
376 
377 	/* Nothing to do if either string or set is empty */
378 	if (stringlen > 0 && setlen > 0)
379 	{
380 		if (pg_database_encoding_max_length() > 1)
381 		{
382 			/*
383 			 * In the multibyte-encoding case, build arrays of pointers to
384 			 * character starts, so that we can avoid inefficient checks in
385 			 * the inner loops.
386 			 */
387 			const char **stringchars;
388 			const char **setchars;
389 			int		   *stringmblen;
390 			int		   *setmblen;
391 			int			stringnchars;
392 			int			setnchars;
393 			int			resultndx;
394 			int			resultnchars;
395 			const char *p;
396 			int			len;
397 			int			mblen;
398 			const char *str_pos;
399 			int			str_len;
400 
401 			stringchars = (const char **) palloc(stringlen * sizeof(char *));
402 			stringmblen = (int *) palloc(stringlen * sizeof(int));
403 			stringnchars = 0;
404 			p = string;
405 			len = stringlen;
406 			while (len > 0)
407 			{
408 				stringchars[stringnchars] = p;
409 				stringmblen[stringnchars] = mblen = pg_mblen(p);
410 				stringnchars++;
411 				p += mblen;
412 				len -= mblen;
413 			}
414 
415 			setchars = (const char **) palloc(setlen * sizeof(char *));
416 			setmblen = (int *) palloc(setlen * sizeof(int));
417 			setnchars = 0;
418 			p = set;
419 			len = setlen;
420 			while (len > 0)
421 			{
422 				setchars[setnchars] = p;
423 				setmblen[setnchars] = mblen = pg_mblen(p);
424 				setnchars++;
425 				p += mblen;
426 				len -= mblen;
427 			}
428 
429 			resultndx = 0;		/* index in stringchars[] */
430 			resultnchars = stringnchars;
431 
432 			if (doltrim)
433 			{
434 				while (resultnchars > 0)
435 				{
436 					str_pos = stringchars[resultndx];
437 					str_len = stringmblen[resultndx];
438 					for (i = 0; i < setnchars; i++)
439 					{
440 						if (str_len == setmblen[i] &&
441 							memcmp(str_pos, setchars[i], str_len) == 0)
442 							break;
443 					}
444 					if (i >= setnchars)
445 						break;	/* no match here */
446 					string += str_len;
447 					stringlen -= str_len;
448 					resultndx++;
449 					resultnchars--;
450 				}
451 			}
452 
453 			if (dortrim)
454 			{
455 				while (resultnchars > 0)
456 				{
457 					str_pos = stringchars[resultndx + resultnchars - 1];
458 					str_len = stringmblen[resultndx + resultnchars - 1];
459 					for (i = 0; i < setnchars; i++)
460 					{
461 						if (str_len == setmblen[i] &&
462 							memcmp(str_pos, setchars[i], str_len) == 0)
463 							break;
464 					}
465 					if (i >= setnchars)
466 						break;	/* no match here */
467 					stringlen -= str_len;
468 					resultnchars--;
469 				}
470 			}
471 
472 			pfree(stringchars);
473 			pfree(stringmblen);
474 			pfree(setchars);
475 			pfree(setmblen);
476 		}
477 		else
478 		{
479 			/*
480 			 * In the single-byte-encoding case, we don't need such overhead.
481 			 */
482 			if (doltrim)
483 			{
484 				while (stringlen > 0)
485 				{
486 					char		str_ch = *string;
487 
488 					for (i = 0; i < setlen; i++)
489 					{
490 						if (str_ch == set[i])
491 							break;
492 					}
493 					if (i >= setlen)
494 						break;	/* no match here */
495 					string++;
496 					stringlen--;
497 				}
498 			}
499 
500 			if (dortrim)
501 			{
502 				while (stringlen > 0)
503 				{
504 					char		str_ch = string[stringlen - 1];
505 
506 					for (i = 0; i < setlen; i++)
507 					{
508 						if (str_ch == set[i])
509 							break;
510 					}
511 					if (i >= setlen)
512 						break;	/* no match here */
513 					stringlen--;
514 				}
515 			}
516 		}
517 	}
518 
519 	/* Return selected portion of string */
520 	return cstring_to_text_with_len(string, stringlen);
521 }
522 
523 /********************************************************************
524  *
525  * byteatrim
526  *
527  * Syntax:
528  *
529  *	 bytea byteatrim(byta string, bytea set)
530  *
531  * Purpose:
532  *
533  *	 Returns string with characters removed from the front and back
534  *	 up to the first character not in set.
535  *
536  * Cloned from btrim and modified as required.
537  ********************************************************************/
538 
539 Datum
byteatrim(PG_FUNCTION_ARGS)540 byteatrim(PG_FUNCTION_ARGS)
541 {
542 	bytea	   *string = PG_GETARG_BYTEA_PP(0);
543 	bytea	   *set = PG_GETARG_BYTEA_PP(1);
544 	bytea	   *ret;
545 	char	   *ptr,
546 			   *end,
547 			   *ptr2,
548 			   *ptr2start,
549 			   *end2;
550 	int			m,
551 				stringlen,
552 				setlen;
553 
554 	stringlen = VARSIZE_ANY_EXHDR(string);
555 	setlen = VARSIZE_ANY_EXHDR(set);
556 
557 	if (stringlen <= 0 || setlen <= 0)
558 		PG_RETURN_BYTEA_P(string);
559 
560 	m = stringlen;
561 	ptr = VARDATA_ANY(string);
562 	end = ptr + stringlen - 1;
563 	ptr2start = VARDATA_ANY(set);
564 	end2 = ptr2start + setlen - 1;
565 
566 	while (m > 0)
567 	{
568 		ptr2 = ptr2start;
569 		while (ptr2 <= end2)
570 		{
571 			if (*ptr == *ptr2)
572 				break;
573 			++ptr2;
574 		}
575 		if (ptr2 > end2)
576 			break;
577 		ptr++;
578 		m--;
579 	}
580 
581 	while (m > 0)
582 	{
583 		ptr2 = ptr2start;
584 		while (ptr2 <= end2)
585 		{
586 			if (*end == *ptr2)
587 				break;
588 			++ptr2;
589 		}
590 		if (ptr2 > end2)
591 			break;
592 		end--;
593 		m--;
594 	}
595 
596 	ret = (bytea *) palloc(VARHDRSZ + m);
597 	SET_VARSIZE(ret, VARHDRSZ + m);
598 	memcpy(VARDATA(ret), ptr, m);
599 
600 	PG_RETURN_BYTEA_P(ret);
601 }
602 
603 /********************************************************************
604  *
605  * ltrim
606  *
607  * Syntax:
608  *
609  *	 text ltrim(text string, text set)
610  *
611  * Purpose:
612  *
613  *	 Returns string with initial characters removed up to the first
614  *	 character not in set.
615  *
616  ********************************************************************/
617 
618 Datum
ltrim(PG_FUNCTION_ARGS)619 ltrim(PG_FUNCTION_ARGS)
620 {
621 	text	   *string = PG_GETARG_TEXT_PP(0);
622 	text	   *set = PG_GETARG_TEXT_PP(1);
623 	text	   *ret;
624 
625 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
626 				 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
627 				 true, false);
628 
629 	PG_RETURN_TEXT_P(ret);
630 }
631 
632 /********************************************************************
633  *
634  * ltrim1 --- ltrim with set fixed as ' '
635  *
636  ********************************************************************/
637 
638 Datum
ltrim1(PG_FUNCTION_ARGS)639 ltrim1(PG_FUNCTION_ARGS)
640 {
641 	text	   *string = PG_GETARG_TEXT_PP(0);
642 	text	   *ret;
643 
644 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
645 				 " ", 1,
646 				 true, false);
647 
648 	PG_RETURN_TEXT_P(ret);
649 }
650 
651 /********************************************************************
652  *
653  * rtrim
654  *
655  * Syntax:
656  *
657  *	 text rtrim(text string, text set)
658  *
659  * Purpose:
660  *
661  *	 Returns string with final characters removed after the last
662  *	 character not in set.
663  *
664  ********************************************************************/
665 
666 Datum
rtrim(PG_FUNCTION_ARGS)667 rtrim(PG_FUNCTION_ARGS)
668 {
669 	text	   *string = PG_GETARG_TEXT_PP(0);
670 	text	   *set = PG_GETARG_TEXT_PP(1);
671 	text	   *ret;
672 
673 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
674 				 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
675 				 false, true);
676 
677 	PG_RETURN_TEXT_P(ret);
678 }
679 
680 /********************************************************************
681  *
682  * rtrim1 --- rtrim with set fixed as ' '
683  *
684  ********************************************************************/
685 
686 Datum
rtrim1(PG_FUNCTION_ARGS)687 rtrim1(PG_FUNCTION_ARGS)
688 {
689 	text	   *string = PG_GETARG_TEXT_PP(0);
690 	text	   *ret;
691 
692 	ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
693 				 " ", 1,
694 				 false, true);
695 
696 	PG_RETURN_TEXT_P(ret);
697 }
698 
699 
700 /********************************************************************
701  *
702  * translate
703  *
704  * Syntax:
705  *
706  *	 text translate(text string, text from, text to)
707  *
708  * Purpose:
709  *
710  *	 Returns string after replacing all occurrences of characters in from
711  *	 with the corresponding character in to.  If from is longer than to,
712  *	 occurrences of the extra characters in from are deleted.
713  *	 Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
714  *
715  ********************************************************************/
716 
717 Datum
translate(PG_FUNCTION_ARGS)718 translate(PG_FUNCTION_ARGS)
719 {
720 	text	   *string = PG_GETARG_TEXT_PP(0);
721 	text	   *from = PG_GETARG_TEXT_PP(1);
722 	text	   *to = PG_GETARG_TEXT_PP(2);
723 	text	   *result;
724 	char	   *from_ptr,
725 			   *to_ptr;
726 	char	   *source,
727 			   *target;
728 	int			m,
729 				fromlen,
730 				tolen,
731 				retlen,
732 				i;
733 	int			worst_len;
734 	int			len;
735 	int			source_len;
736 	int			from_index;
737 
738 	m = VARSIZE_ANY_EXHDR(string);
739 	if (m <= 0)
740 		PG_RETURN_TEXT_P(string);
741 	source = VARDATA_ANY(string);
742 
743 	fromlen = VARSIZE_ANY_EXHDR(from);
744 	from_ptr = VARDATA_ANY(from);
745 	tolen = VARSIZE_ANY_EXHDR(to);
746 	to_ptr = VARDATA_ANY(to);
747 
748 	/*
749 	 * The worst-case expansion is to substitute a max-length character for a
750 	 * single-byte character at each position of the string.
751 	 */
752 	worst_len = pg_database_encoding_max_length() * m;
753 
754 	/* check for integer overflow */
755 	if (worst_len / pg_database_encoding_max_length() != m)
756 		ereport(ERROR,
757 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
758 				 errmsg("requested length too large")));
759 
760 	result = (text *) palloc(worst_len + VARHDRSZ);
761 	target = VARDATA(result);
762 	retlen = 0;
763 
764 	while (m > 0)
765 	{
766 		source_len = pg_mblen(source);
767 		from_index = 0;
768 
769 		for (i = 0; i < fromlen; i += len)
770 		{
771 			len = pg_mblen(&from_ptr[i]);
772 			if (len == source_len &&
773 				memcmp(source, &from_ptr[i], len) == 0)
774 				break;
775 
776 			from_index++;
777 		}
778 		if (i < fromlen)
779 		{
780 			/* substitute */
781 			char	   *p = to_ptr;
782 
783 			for (i = 0; i < from_index; i++)
784 			{
785 				p += pg_mblen(p);
786 				if (p >= (to_ptr + tolen))
787 					break;
788 			}
789 			if (p < (to_ptr + tolen))
790 			{
791 				len = pg_mblen(p);
792 				memcpy(target, p, len);
793 				target += len;
794 				retlen += len;
795 			}
796 
797 		}
798 		else
799 		{
800 			/* no match, so copy */
801 			memcpy(target, source, source_len);
802 			target += source_len;
803 			retlen += source_len;
804 		}
805 
806 		source += source_len;
807 		m -= source_len;
808 	}
809 
810 	SET_VARSIZE(result, retlen + VARHDRSZ);
811 
812 	/*
813 	 * The function result is probably much bigger than needed, if we're using
814 	 * a multibyte encoding, but it's not worth reallocating it; the result
815 	 * probably won't live long anyway.
816 	 */
817 
818 	PG_RETURN_TEXT_P(result);
819 }
820 
821 /********************************************************************
822  *
823  * ascii
824  *
825  * Syntax:
826  *
827  *	 int ascii(text string)
828  *
829  * Purpose:
830  *
831  *	 Returns the decimal representation of the first character from
832  *	 string.
833  *	 If the string is empty we return 0.
834  *	 If the database encoding is UTF8, we return the Unicode codepoint.
835  *	 If the database encoding is any other multi-byte encoding, we
836  *	 return the value of the first byte if it is an ASCII character
837  *	 (range 1 .. 127), or raise an error.
838  *	 For all other encodings we return the value of the first byte,
839  *	 (range 1..255).
840  *
841  ********************************************************************/
842 
843 Datum
ascii(PG_FUNCTION_ARGS)844 ascii(PG_FUNCTION_ARGS)
845 {
846 	text	   *string = PG_GETARG_TEXT_PP(0);
847 	int			encoding = GetDatabaseEncoding();
848 	unsigned char *data;
849 
850 	if (VARSIZE_ANY_EXHDR(string) <= 0)
851 		PG_RETURN_INT32(0);
852 
853 	data = (unsigned char *) VARDATA_ANY(string);
854 
855 	if (encoding == PG_UTF8 && *data > 127)
856 	{
857 		/* return the code point for Unicode */
858 
859 		int			result = 0,
860 					tbytes = 0,
861 					i;
862 
863 		if (*data >= 0xF0)
864 		{
865 			result = *data & 0x07;
866 			tbytes = 3;
867 		}
868 		else if (*data >= 0xE0)
869 		{
870 			result = *data & 0x0F;
871 			tbytes = 2;
872 		}
873 		else
874 		{
875 			Assert(*data > 0xC0);
876 			result = *data & 0x1f;
877 			tbytes = 1;
878 		}
879 
880 		Assert(tbytes > 0);
881 
882 		for (i = 1; i <= tbytes; i++)
883 		{
884 			Assert((data[i] & 0xC0) == 0x80);
885 			result = (result << 6) + (data[i] & 0x3f);
886 		}
887 
888 		PG_RETURN_INT32(result);
889 	}
890 	else
891 	{
892 		if (pg_encoding_max_length(encoding) > 1 && *data > 127)
893 			ereport(ERROR,
894 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
895 					 errmsg("requested character too large")));
896 
897 
898 		PG_RETURN_INT32((int32) *data);
899 	}
900 }
901 
902 /********************************************************************
903  *
904  * chr
905  *
906  * Syntax:
907  *
908  *	 text chr(int val)
909  *
910  * Purpose:
911  *
912  *	Returns the character having the binary equivalent to val.
913  *
914  * For UTF8 we treat the argumwent as a Unicode code point.
915  * For other multi-byte encodings we raise an error for arguments
916  * outside the strict ASCII range (1..127).
917  *
918  * It's important that we don't ever return a value that is not valid
919  * in the database encoding, so that this doesn't become a way for
920  * invalid data to enter the database.
921  *
922  ********************************************************************/
923 
924 Datum
chr(PG_FUNCTION_ARGS)925 chr			(PG_FUNCTION_ARGS)
926 {
927 	uint32		cvalue = PG_GETARG_UINT32(0);
928 	text	   *result;
929 	int			encoding = GetDatabaseEncoding();
930 
931 	if (encoding == PG_UTF8 && cvalue > 127)
932 	{
933 		/* for Unicode we treat the argument as a code point */
934 		int			bytes;
935 		unsigned char *wch;
936 
937 		/*
938 		 * We only allow valid Unicode code points; per RFC3629 that stops at
939 		 * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to
940 		 * U+1FFFFF.
941 		 */
942 		if (cvalue > 0x0010ffff)
943 			ereport(ERROR,
944 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
945 					 errmsg("requested character too large for encoding: %d",
946 							cvalue)));
947 
948 		if (cvalue > 0xffff)
949 			bytes = 4;
950 		else if (cvalue > 0x07ff)
951 			bytes = 3;
952 		else
953 			bytes = 2;
954 
955 		result = (text *) palloc(VARHDRSZ + bytes);
956 		SET_VARSIZE(result, VARHDRSZ + bytes);
957 		wch = (unsigned char *) VARDATA(result);
958 
959 		if (bytes == 2)
960 		{
961 			wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
962 			wch[1] = 0x80 | (cvalue & 0x3F);
963 		}
964 		else if (bytes == 3)
965 		{
966 			wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
967 			wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
968 			wch[2] = 0x80 | (cvalue & 0x3F);
969 		}
970 		else
971 		{
972 			wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
973 			wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
974 			wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
975 			wch[3] = 0x80 | (cvalue & 0x3F);
976 		}
977 
978 		/*
979 		 * The preceding range check isn't sufficient, because UTF8 excludes
980 		 * Unicode "surrogate pair" codes.  Make sure what we created is valid
981 		 * UTF8.
982 		 */
983 		if (!pg_utf8_islegal(wch, bytes))
984 			ereport(ERROR,
985 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
986 					 errmsg("requested character not valid for encoding: %d",
987 							cvalue)));
988 	}
989 	else
990 	{
991 		bool		is_mb;
992 
993 		/*
994 		 * Error out on arguments that make no sense or that we can't validly
995 		 * represent in the encoding.
996 		 */
997 		if (cvalue == 0)
998 			ereport(ERROR,
999 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1000 					 errmsg("null character not permitted")));
1001 
1002 		is_mb = pg_encoding_max_length(encoding) > 1;
1003 
1004 		if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
1005 			ereport(ERROR,
1006 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1007 					 errmsg("requested character too large for encoding: %d",
1008 							cvalue)));
1009 
1010 		result = (text *) palloc(VARHDRSZ + 1);
1011 		SET_VARSIZE(result, VARHDRSZ + 1);
1012 		*VARDATA(result) = (char) cvalue;
1013 	}
1014 
1015 	PG_RETURN_TEXT_P(result);
1016 }
1017 
1018 /********************************************************************
1019  *
1020  * repeat
1021  *
1022  * Syntax:
1023  *
1024  *	 text repeat(text string, int val)
1025  *
1026  * Purpose:
1027  *
1028  *	Repeat string by val.
1029  *
1030  ********************************************************************/
1031 
1032 Datum
repeat(PG_FUNCTION_ARGS)1033 repeat(PG_FUNCTION_ARGS)
1034 {
1035 	text	   *string = PG_GETARG_TEXT_PP(0);
1036 	int32		count = PG_GETARG_INT32(1);
1037 	text	   *result;
1038 	int			slen,
1039 				tlen;
1040 	int			i;
1041 	char	   *cp,
1042 			   *sp;
1043 
1044 	if (count < 0)
1045 		count = 0;
1046 
1047 	slen = VARSIZE_ANY_EXHDR(string);
1048 	tlen = VARHDRSZ + (count * slen);
1049 
1050 	/* Check for integer overflow */
1051 	if (slen != 0 && count != 0)
1052 	{
1053 		int			check = count * slen;
1054 		int			check2 = check + VARHDRSZ;
1055 
1056 		if ((check / slen) != count || check2 <= check)
1057 			ereport(ERROR,
1058 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1059 					 errmsg("requested length too large")));
1060 	}
1061 
1062 	result = (text *) palloc(tlen);
1063 
1064 	SET_VARSIZE(result, tlen);
1065 	cp = VARDATA(result);
1066 	sp = VARDATA_ANY(string);
1067 	for (i = 0; i < count; i++)
1068 	{
1069 		memcpy(cp, sp, slen);
1070 		cp += slen;
1071 		CHECK_FOR_INTERRUPTS();
1072 	}
1073 
1074 	PG_RETURN_TEXT_P(result);
1075 }
1076