1 /*------------------------------------------------------------------------- 2 * oracle_compat.c 3 * Oracle compatible functions. 4 * 5 * Copyright (c) 1996-2020, PostgreSQL Global Development Group 6 * 7 * Author: Edmund Mergl <E.Mergl@bawue.de> 8 * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org> 9 * 10 * 11 * IDENTIFICATION 12 * src/backend/utils/adt/oracle_compat.c 13 * 14 *------------------------------------------------------------------------- 15 */ 16 #include "postgres.h" 17 18 #include "common/int.h" 19 #include "mb/pg_wchar.h" 20 #include "miscadmin.h" 21 #include "utils/builtins.h" 22 #include "utils/formatting.h" 23 24 static text *dotrim(const char *string, int stringlen, 25 const char *set, int setlen, 26 bool doltrim, bool dortrim); 27 28 29 /******************************************************************** 30 * 31 * lower 32 * 33 * Syntax: 34 * 35 * text lower(text string) 36 * 37 * Purpose: 38 * 39 * Returns string, with all letters forced to lowercase. 40 * 41 ********************************************************************/ 42 43 Datum 44 lower(PG_FUNCTION_ARGS) 45 { 46 text *in_string = PG_GETARG_TEXT_PP(0); 47 char *out_string; 48 text *result; 49 50 out_string = str_tolower(VARDATA_ANY(in_string), 51 VARSIZE_ANY_EXHDR(in_string), 52 PG_GET_COLLATION()); 53 result = cstring_to_text(out_string); 54 pfree(out_string); 55 56 PG_RETURN_TEXT_P(result); 57 } 58 59 60 /******************************************************************** 61 * 62 * upper 63 * 64 * Syntax: 65 * 66 * text upper(text string) 67 * 68 * Purpose: 69 * 70 * Returns string, with all letters forced to uppercase. 71 * 72 ********************************************************************/ 73 74 Datum 75 upper(PG_FUNCTION_ARGS) 76 { 77 text *in_string = PG_GETARG_TEXT_PP(0); 78 char *out_string; 79 text *result; 80 81 out_string = str_toupper(VARDATA_ANY(in_string), 82 VARSIZE_ANY_EXHDR(in_string), 83 PG_GET_COLLATION()); 84 result = cstring_to_text(out_string); 85 pfree(out_string); 86 87 PG_RETURN_TEXT_P(result); 88 } 89 90 91 /******************************************************************** 92 * 93 * initcap 94 * 95 * Syntax: 96 * 97 * text initcap(text string) 98 * 99 * Purpose: 100 * 101 * Returns string, with first letter of each word in uppercase, all 102 * other letters in lowercase. A word is defined as a sequence of 103 * alphanumeric characters, delimited by non-alphanumeric 104 * characters. 105 * 106 ********************************************************************/ 107 108 Datum 109 initcap(PG_FUNCTION_ARGS) 110 { 111 text *in_string = PG_GETARG_TEXT_PP(0); 112 char *out_string; 113 text *result; 114 115 out_string = str_initcap(VARDATA_ANY(in_string), 116 VARSIZE_ANY_EXHDR(in_string), 117 PG_GET_COLLATION()); 118 result = cstring_to_text(out_string); 119 pfree(out_string); 120 121 PG_RETURN_TEXT_P(result); 122 } 123 124 125 /******************************************************************** 126 * 127 * lpad 128 * 129 * Syntax: 130 * 131 * text lpad(text string1, int4 len, text string2) 132 * 133 * Purpose: 134 * 135 * Returns string1, left-padded to length len with the sequence of 136 * characters in string2. If len is less than the length of string1, 137 * instead truncate (on the right) to len. 138 * 139 ********************************************************************/ 140 141 Datum 142 lpad(PG_FUNCTION_ARGS) 143 { 144 text *string1 = PG_GETARG_TEXT_PP(0); 145 int32 len = PG_GETARG_INT32(1); 146 text *string2 = PG_GETARG_TEXT_PP(2); 147 text *ret; 148 char *ptr1, 149 *ptr2, 150 *ptr2start, 151 *ptr2end, 152 *ptr_ret; 153 int m, 154 s1len, 155 s2len; 156 157 int bytelen; 158 159 /* Negative len is silently taken as zero */ 160 if (len < 0) 161 len = 0; 162 163 s1len = VARSIZE_ANY_EXHDR(string1); 164 if (s1len < 0) 165 s1len = 0; /* shouldn't happen */ 166 167 s2len = VARSIZE_ANY_EXHDR(string2); 168 if (s2len < 0) 169 s2len = 0; /* shouldn't happen */ 170 171 s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len); 172 173 if (s1len > len) 174 s1len = len; /* truncate string1 to len chars */ 175 176 if (s2len <= 0) 177 len = s1len; /* nothing to pad with, so don't pad */ 178 179 bytelen = pg_database_encoding_max_length() * len; 180 181 /* check for integer overflow */ 182 if (len != 0 && bytelen / pg_database_encoding_max_length() != len) 183 ereport(ERROR, 184 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 185 errmsg("requested length too large"))); 186 187 ret = (text *) palloc(VARHDRSZ + bytelen); 188 189 m = len - s1len; 190 191 ptr2 = ptr2start = VARDATA_ANY(string2); 192 ptr2end = ptr2 + s2len; 193 ptr_ret = VARDATA(ret); 194 195 while (m--) 196 { 197 int mlen = pg_mblen(ptr2); 198 199 memcpy(ptr_ret, ptr2, mlen); 200 ptr_ret += mlen; 201 ptr2 += mlen; 202 if (ptr2 == ptr2end) /* wrap around at end of s2 */ 203 ptr2 = ptr2start; 204 } 205 206 ptr1 = VARDATA_ANY(string1); 207 208 while (s1len--) 209 { 210 int mlen = pg_mblen(ptr1); 211 212 memcpy(ptr_ret, ptr1, mlen); 213 ptr_ret += mlen; 214 ptr1 += mlen; 215 } 216 217 SET_VARSIZE(ret, ptr_ret - (char *) ret); 218 219 PG_RETURN_TEXT_P(ret); 220 } 221 222 223 /******************************************************************** 224 * 225 * rpad 226 * 227 * Syntax: 228 * 229 * text rpad(text string1, int4 len, text string2) 230 * 231 * Purpose: 232 * 233 * Returns string1, right-padded to length len with the sequence of 234 * characters in string2. If len is less than the length of string1, 235 * instead truncate (on the right) to len. 236 * 237 ********************************************************************/ 238 239 Datum 240 rpad(PG_FUNCTION_ARGS) 241 { 242 text *string1 = PG_GETARG_TEXT_PP(0); 243 int32 len = PG_GETARG_INT32(1); 244 text *string2 = PG_GETARG_TEXT_PP(2); 245 text *ret; 246 char *ptr1, 247 *ptr2, 248 *ptr2start, 249 *ptr2end, 250 *ptr_ret; 251 int m, 252 s1len, 253 s2len; 254 255 int bytelen; 256 257 /* Negative len is silently taken as zero */ 258 if (len < 0) 259 len = 0; 260 261 s1len = VARSIZE_ANY_EXHDR(string1); 262 if (s1len < 0) 263 s1len = 0; /* shouldn't happen */ 264 265 s2len = VARSIZE_ANY_EXHDR(string2); 266 if (s2len < 0) 267 s2len = 0; /* shouldn't happen */ 268 269 s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len); 270 271 if (s1len > len) 272 s1len = len; /* truncate string1 to len chars */ 273 274 if (s2len <= 0) 275 len = s1len; /* nothing to pad with, so don't pad */ 276 277 bytelen = pg_database_encoding_max_length() * len; 278 279 /* Check for integer overflow */ 280 if (len != 0 && bytelen / pg_database_encoding_max_length() != len) 281 ereport(ERROR, 282 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 283 errmsg("requested length too large"))); 284 285 ret = (text *) palloc(VARHDRSZ + bytelen); 286 m = len - s1len; 287 288 ptr1 = VARDATA_ANY(string1); 289 ptr_ret = VARDATA(ret); 290 291 while (s1len--) 292 { 293 int mlen = pg_mblen(ptr1); 294 295 memcpy(ptr_ret, ptr1, mlen); 296 ptr_ret += mlen; 297 ptr1 += mlen; 298 } 299 300 ptr2 = ptr2start = VARDATA_ANY(string2); 301 ptr2end = ptr2 + s2len; 302 303 while (m--) 304 { 305 int mlen = pg_mblen(ptr2); 306 307 memcpy(ptr_ret, ptr2, mlen); 308 ptr_ret += mlen; 309 ptr2 += mlen; 310 if (ptr2 == ptr2end) /* wrap around at end of s2 */ 311 ptr2 = ptr2start; 312 } 313 314 SET_VARSIZE(ret, ptr_ret - (char *) ret); 315 316 PG_RETURN_TEXT_P(ret); 317 } 318 319 320 /******************************************************************** 321 * 322 * btrim 323 * 324 * Syntax: 325 * 326 * text btrim(text string, text set) 327 * 328 * Purpose: 329 * 330 * Returns string with characters removed from the front and back 331 * up to the first character not in set. 332 * 333 ********************************************************************/ 334 335 Datum 336 btrim(PG_FUNCTION_ARGS) 337 { 338 text *string = PG_GETARG_TEXT_PP(0); 339 text *set = PG_GETARG_TEXT_PP(1); 340 text *ret; 341 342 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), 343 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), 344 true, true); 345 346 PG_RETURN_TEXT_P(ret); 347 } 348 349 /******************************************************************** 350 * 351 * btrim1 --- btrim with set fixed as ' ' 352 * 353 ********************************************************************/ 354 355 Datum 356 btrim1(PG_FUNCTION_ARGS) 357 { 358 text *string = PG_GETARG_TEXT_PP(0); 359 text *ret; 360 361 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), 362 " ", 1, 363 true, true); 364 365 PG_RETURN_TEXT_P(ret); 366 } 367 368 /* 369 * Common implementation for btrim, ltrim, rtrim 370 */ 371 static text * 372 dotrim(const char *string, int stringlen, 373 const char *set, int setlen, 374 bool doltrim, bool dortrim) 375 { 376 int i; 377 378 /* Nothing to do if either string or set is empty */ 379 if (stringlen > 0 && setlen > 0) 380 { 381 if (pg_database_encoding_max_length() > 1) 382 { 383 /* 384 * In the multibyte-encoding case, build arrays of pointers to 385 * character starts, so that we can avoid inefficient checks in 386 * the inner loops. 387 */ 388 const char **stringchars; 389 const char **setchars; 390 int *stringmblen; 391 int *setmblen; 392 int stringnchars; 393 int setnchars; 394 int resultndx; 395 int resultnchars; 396 const char *p; 397 int len; 398 int mblen; 399 const char *str_pos; 400 int str_len; 401 402 stringchars = (const char **) palloc(stringlen * sizeof(char *)); 403 stringmblen = (int *) palloc(stringlen * sizeof(int)); 404 stringnchars = 0; 405 p = string; 406 len = stringlen; 407 while (len > 0) 408 { 409 stringchars[stringnchars] = p; 410 stringmblen[stringnchars] = mblen = pg_mblen(p); 411 stringnchars++; 412 p += mblen; 413 len -= mblen; 414 } 415 416 setchars = (const char **) palloc(setlen * sizeof(char *)); 417 setmblen = (int *) palloc(setlen * sizeof(int)); 418 setnchars = 0; 419 p = set; 420 len = setlen; 421 while (len > 0) 422 { 423 setchars[setnchars] = p; 424 setmblen[setnchars] = mblen = pg_mblen(p); 425 setnchars++; 426 p += mblen; 427 len -= mblen; 428 } 429 430 resultndx = 0; /* index in stringchars[] */ 431 resultnchars = stringnchars; 432 433 if (doltrim) 434 { 435 while (resultnchars > 0) 436 { 437 str_pos = stringchars[resultndx]; 438 str_len = stringmblen[resultndx]; 439 for (i = 0; i < setnchars; i++) 440 { 441 if (str_len == setmblen[i] && 442 memcmp(str_pos, setchars[i], str_len) == 0) 443 break; 444 } 445 if (i >= setnchars) 446 break; /* no match here */ 447 string += str_len; 448 stringlen -= str_len; 449 resultndx++; 450 resultnchars--; 451 } 452 } 453 454 if (dortrim) 455 { 456 while (resultnchars > 0) 457 { 458 str_pos = stringchars[resultndx + resultnchars - 1]; 459 str_len = stringmblen[resultndx + resultnchars - 1]; 460 for (i = 0; i < setnchars; i++) 461 { 462 if (str_len == setmblen[i] && 463 memcmp(str_pos, setchars[i], str_len) == 0) 464 break; 465 } 466 if (i >= setnchars) 467 break; /* no match here */ 468 stringlen -= str_len; 469 resultnchars--; 470 } 471 } 472 473 pfree(stringchars); 474 pfree(stringmblen); 475 pfree(setchars); 476 pfree(setmblen); 477 } 478 else 479 { 480 /* 481 * In the single-byte-encoding case, we don't need such overhead. 482 */ 483 if (doltrim) 484 { 485 while (stringlen > 0) 486 { 487 char str_ch = *string; 488 489 for (i = 0; i < setlen; i++) 490 { 491 if (str_ch == set[i]) 492 break; 493 } 494 if (i >= setlen) 495 break; /* no match here */ 496 string++; 497 stringlen--; 498 } 499 } 500 501 if (dortrim) 502 { 503 while (stringlen > 0) 504 { 505 char str_ch = string[stringlen - 1]; 506 507 for (i = 0; i < setlen; i++) 508 { 509 if (str_ch == set[i]) 510 break; 511 } 512 if (i >= setlen) 513 break; /* no match here */ 514 stringlen--; 515 } 516 } 517 } 518 } 519 520 /* Return selected portion of string */ 521 return cstring_to_text_with_len(string, stringlen); 522 } 523 524 /******************************************************************** 525 * 526 * byteatrim 527 * 528 * Syntax: 529 * 530 * bytea byteatrim(bytea string, bytea set) 531 * 532 * Purpose: 533 * 534 * Returns string with characters removed from the front and back 535 * up to the first character not in set. 536 * 537 * Cloned from btrim and modified as required. 538 ********************************************************************/ 539 540 Datum 541 byteatrim(PG_FUNCTION_ARGS) 542 { 543 bytea *string = PG_GETARG_BYTEA_PP(0); 544 bytea *set = PG_GETARG_BYTEA_PP(1); 545 bytea *ret; 546 char *ptr, 547 *end, 548 *ptr2, 549 *ptr2start, 550 *end2; 551 int m, 552 stringlen, 553 setlen; 554 555 stringlen = VARSIZE_ANY_EXHDR(string); 556 setlen = VARSIZE_ANY_EXHDR(set); 557 558 if (stringlen <= 0 || setlen <= 0) 559 PG_RETURN_BYTEA_P(string); 560 561 m = stringlen; 562 ptr = VARDATA_ANY(string); 563 end = ptr + stringlen - 1; 564 ptr2start = VARDATA_ANY(set); 565 end2 = ptr2start + setlen - 1; 566 567 while (m > 0) 568 { 569 ptr2 = ptr2start; 570 while (ptr2 <= end2) 571 { 572 if (*ptr == *ptr2) 573 break; 574 ++ptr2; 575 } 576 if (ptr2 > end2) 577 break; 578 ptr++; 579 m--; 580 } 581 582 while (m > 0) 583 { 584 ptr2 = ptr2start; 585 while (ptr2 <= end2) 586 { 587 if (*end == *ptr2) 588 break; 589 ++ptr2; 590 } 591 if (ptr2 > end2) 592 break; 593 end--; 594 m--; 595 } 596 597 ret = (bytea *) palloc(VARHDRSZ + m); 598 SET_VARSIZE(ret, VARHDRSZ + m); 599 memcpy(VARDATA(ret), ptr, m); 600 601 PG_RETURN_BYTEA_P(ret); 602 } 603 604 /******************************************************************** 605 * 606 * ltrim 607 * 608 * Syntax: 609 * 610 * text ltrim(text string, text set) 611 * 612 * Purpose: 613 * 614 * Returns string with initial characters removed up to the first 615 * character not in set. 616 * 617 ********************************************************************/ 618 619 Datum 620 ltrim(PG_FUNCTION_ARGS) 621 { 622 text *string = PG_GETARG_TEXT_PP(0); 623 text *set = PG_GETARG_TEXT_PP(1); 624 text *ret; 625 626 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), 627 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), 628 true, false); 629 630 PG_RETURN_TEXT_P(ret); 631 } 632 633 /******************************************************************** 634 * 635 * ltrim1 --- ltrim with set fixed as ' ' 636 * 637 ********************************************************************/ 638 639 Datum 640 ltrim1(PG_FUNCTION_ARGS) 641 { 642 text *string = PG_GETARG_TEXT_PP(0); 643 text *ret; 644 645 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), 646 " ", 1, 647 true, false); 648 649 PG_RETURN_TEXT_P(ret); 650 } 651 652 /******************************************************************** 653 * 654 * rtrim 655 * 656 * Syntax: 657 * 658 * text rtrim(text string, text set) 659 * 660 * Purpose: 661 * 662 * Returns string with final characters removed after the last 663 * character not in set. 664 * 665 ********************************************************************/ 666 667 Datum 668 rtrim(PG_FUNCTION_ARGS) 669 { 670 text *string = PG_GETARG_TEXT_PP(0); 671 text *set = PG_GETARG_TEXT_PP(1); 672 text *ret; 673 674 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), 675 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), 676 false, true); 677 678 PG_RETURN_TEXT_P(ret); 679 } 680 681 /******************************************************************** 682 * 683 * rtrim1 --- rtrim with set fixed as ' ' 684 * 685 ********************************************************************/ 686 687 Datum 688 rtrim1(PG_FUNCTION_ARGS) 689 { 690 text *string = PG_GETARG_TEXT_PP(0); 691 text *ret; 692 693 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), 694 " ", 1, 695 false, true); 696 697 PG_RETURN_TEXT_P(ret); 698 } 699 700 701 /******************************************************************** 702 * 703 * translate 704 * 705 * Syntax: 706 * 707 * text translate(text string, text from, text to) 708 * 709 * Purpose: 710 * 711 * Returns string after replacing all occurrences of characters in from 712 * with the corresponding character in to. If from is longer than to, 713 * occurrences of the extra characters in from are deleted. 714 * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>. 715 * 716 ********************************************************************/ 717 718 Datum 719 translate(PG_FUNCTION_ARGS) 720 { 721 text *string = PG_GETARG_TEXT_PP(0); 722 text *from = PG_GETARG_TEXT_PP(1); 723 text *to = PG_GETARG_TEXT_PP(2); 724 text *result; 725 char *from_ptr, 726 *to_ptr; 727 char *source, 728 *target; 729 int m, 730 fromlen, 731 tolen, 732 retlen, 733 i; 734 int worst_len; 735 int len; 736 int source_len; 737 int from_index; 738 739 m = VARSIZE_ANY_EXHDR(string); 740 if (m <= 0) 741 PG_RETURN_TEXT_P(string); 742 source = VARDATA_ANY(string); 743 744 fromlen = VARSIZE_ANY_EXHDR(from); 745 from_ptr = VARDATA_ANY(from); 746 tolen = VARSIZE_ANY_EXHDR(to); 747 to_ptr = VARDATA_ANY(to); 748 749 /* 750 * The worst-case expansion is to substitute a max-length character for a 751 * single-byte character at each position of the string. 752 */ 753 worst_len = pg_database_encoding_max_length() * m; 754 755 /* check for integer overflow */ 756 if (worst_len / pg_database_encoding_max_length() != m) 757 ereport(ERROR, 758 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 759 errmsg("requested length too large"))); 760 761 result = (text *) palloc(worst_len + VARHDRSZ); 762 target = VARDATA(result); 763 retlen = 0; 764 765 while (m > 0) 766 { 767 source_len = pg_mblen(source); 768 from_index = 0; 769 770 for (i = 0; i < fromlen; i += len) 771 { 772 len = pg_mblen(&from_ptr[i]); 773 if (len == source_len && 774 memcmp(source, &from_ptr[i], len) == 0) 775 break; 776 777 from_index++; 778 } 779 if (i < fromlen) 780 { 781 /* substitute */ 782 char *p = to_ptr; 783 784 for (i = 0; i < from_index; i++) 785 { 786 p += pg_mblen(p); 787 if (p >= (to_ptr + tolen)) 788 break; 789 } 790 if (p < (to_ptr + tolen)) 791 { 792 len = pg_mblen(p); 793 memcpy(target, p, len); 794 target += len; 795 retlen += len; 796 } 797 798 } 799 else 800 { 801 /* no match, so copy */ 802 memcpy(target, source, source_len); 803 target += source_len; 804 retlen += source_len; 805 } 806 807 source += source_len; 808 m -= source_len; 809 } 810 811 SET_VARSIZE(result, retlen + VARHDRSZ); 812 813 /* 814 * The function result is probably much bigger than needed, if we're using 815 * a multibyte encoding, but it's not worth reallocating it; the result 816 * probably won't live long anyway. 817 */ 818 819 PG_RETURN_TEXT_P(result); 820 } 821 822 /******************************************************************** 823 * 824 * ascii 825 * 826 * Syntax: 827 * 828 * int ascii(text string) 829 * 830 * Purpose: 831 * 832 * Returns the decimal representation of the first character from 833 * string. 834 * If the string is empty we return 0. 835 * If the database encoding is UTF8, we return the Unicode codepoint. 836 * If the database encoding is any other multi-byte encoding, we 837 * return the value of the first byte if it is an ASCII character 838 * (range 1 .. 127), or raise an error. 839 * For all other encodings we return the value of the first byte, 840 * (range 1..255). 841 * 842 ********************************************************************/ 843 844 Datum 845 ascii(PG_FUNCTION_ARGS) 846 { 847 text *string = PG_GETARG_TEXT_PP(0); 848 int encoding = GetDatabaseEncoding(); 849 unsigned char *data; 850 851 if (VARSIZE_ANY_EXHDR(string) <= 0) 852 PG_RETURN_INT32(0); 853 854 data = (unsigned char *) VARDATA_ANY(string); 855 856 if (encoding == PG_UTF8 && *data > 127) 857 { 858 /* return the code point for Unicode */ 859 860 int result = 0, 861 tbytes = 0, 862 i; 863 864 if (*data >= 0xF0) 865 { 866 result = *data & 0x07; 867 tbytes = 3; 868 } 869 else if (*data >= 0xE0) 870 { 871 result = *data & 0x0F; 872 tbytes = 2; 873 } 874 else 875 { 876 Assert(*data > 0xC0); 877 result = *data & 0x1f; 878 tbytes = 1; 879 } 880 881 Assert(tbytes > 0); 882 883 for (i = 1; i <= tbytes; i++) 884 { 885 Assert((data[i] & 0xC0) == 0x80); 886 result = (result << 6) + (data[i] & 0x3f); 887 } 888 889 PG_RETURN_INT32(result); 890 } 891 else 892 { 893 if (pg_encoding_max_length(encoding) > 1 && *data > 127) 894 ereport(ERROR, 895 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 896 errmsg("requested character too large"))); 897 898 899 PG_RETURN_INT32((int32) *data); 900 } 901 } 902 903 /******************************************************************** 904 * 905 * chr 906 * 907 * Syntax: 908 * 909 * text chr(int val) 910 * 911 * Purpose: 912 * 913 * Returns the character having the binary equivalent to val. 914 * 915 * For UTF8 we treat the argument as a Unicode code point. 916 * For other multi-byte encodings we raise an error for arguments 917 * outside the strict ASCII range (1..127). 918 * 919 * It's important that we don't ever return a value that is not valid 920 * in the database encoding, so that this doesn't become a way for 921 * invalid data to enter the database. 922 * 923 ********************************************************************/ 924 925 Datum 926 chr (PG_FUNCTION_ARGS) 927 { 928 uint32 cvalue = PG_GETARG_UINT32(0); 929 text *result; 930 int encoding = GetDatabaseEncoding(); 931 932 if (encoding == PG_UTF8 && cvalue > 127) 933 { 934 /* for Unicode we treat the argument as a code point */ 935 int bytes; 936 unsigned char *wch; 937 938 /* 939 * We only allow valid Unicode code points; per RFC3629 that stops at 940 * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to 941 * U+1FFFFF. 942 */ 943 if (cvalue > 0x0010ffff) 944 ereport(ERROR, 945 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 946 errmsg("requested character too large for encoding: %d", 947 cvalue))); 948 949 if (cvalue > 0xffff) 950 bytes = 4; 951 else if (cvalue > 0x07ff) 952 bytes = 3; 953 else 954 bytes = 2; 955 956 result = (text *) palloc(VARHDRSZ + bytes); 957 SET_VARSIZE(result, VARHDRSZ + bytes); 958 wch = (unsigned char *) VARDATA(result); 959 960 if (bytes == 2) 961 { 962 wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F); 963 wch[1] = 0x80 | (cvalue & 0x3F); 964 } 965 else if (bytes == 3) 966 { 967 wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F); 968 wch[1] = 0x80 | ((cvalue >> 6) & 0x3F); 969 wch[2] = 0x80 | (cvalue & 0x3F); 970 } 971 else 972 { 973 wch[0] = 0xF0 | ((cvalue >> 18) & 0x07); 974 wch[1] = 0x80 | ((cvalue >> 12) & 0x3F); 975 wch[2] = 0x80 | ((cvalue >> 6) & 0x3F); 976 wch[3] = 0x80 | (cvalue & 0x3F); 977 } 978 979 /* 980 * The preceding range check isn't sufficient, because UTF8 excludes 981 * Unicode "surrogate pair" codes. Make sure what we created is valid 982 * UTF8. 983 */ 984 if (!pg_utf8_islegal(wch, bytes)) 985 ereport(ERROR, 986 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 987 errmsg("requested character not valid for encoding: %d", 988 cvalue))); 989 } 990 else 991 { 992 bool is_mb; 993 994 /* 995 * Error out on arguments that make no sense or that we can't validly 996 * represent in the encoding. 997 */ 998 if (cvalue == 0) 999 ereport(ERROR, 1000 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 1001 errmsg("null character not permitted"))); 1002 1003 is_mb = pg_encoding_max_length(encoding) > 1; 1004 1005 if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255))) 1006 ereport(ERROR, 1007 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 1008 errmsg("requested character too large for encoding: %d", 1009 cvalue))); 1010 1011 result = (text *) palloc(VARHDRSZ + 1); 1012 SET_VARSIZE(result, VARHDRSZ + 1); 1013 *VARDATA(result) = (char) cvalue; 1014 } 1015 1016 PG_RETURN_TEXT_P(result); 1017 } 1018 1019 /******************************************************************** 1020 * 1021 * repeat 1022 * 1023 * Syntax: 1024 * 1025 * text repeat(text string, int val) 1026 * 1027 * Purpose: 1028 * 1029 * Repeat string by val. 1030 * 1031 ********************************************************************/ 1032 1033 Datum 1034 repeat(PG_FUNCTION_ARGS) 1035 { 1036 text *string = PG_GETARG_TEXT_PP(0); 1037 int32 count = PG_GETARG_INT32(1); 1038 text *result; 1039 int slen, 1040 tlen; 1041 int i; 1042 char *cp, 1043 *sp; 1044 1045 if (count < 0) 1046 count = 0; 1047 1048 slen = VARSIZE_ANY_EXHDR(string); 1049 1050 if (unlikely(pg_mul_s32_overflow(count, slen, &tlen)) || 1051 unlikely(pg_add_s32_overflow(tlen, VARHDRSZ, &tlen))) 1052 ereport(ERROR, 1053 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 1054 errmsg("requested length too large"))); 1055 1056 result = (text *) palloc(tlen); 1057 1058 SET_VARSIZE(result, tlen); 1059 cp = VARDATA(result); 1060 sp = VARDATA_ANY(string); 1061 for (i = 0; i < count; i++) 1062 { 1063 memcpy(cp, sp, slen); 1064 cp += slen; 1065 CHECK_FOR_INTERRUPTS(); 1066 } 1067 1068 PG_RETURN_TEXT_P(result); 1069 } 1070