1 /*------------------------------------------------------------------------- 2 * 3 * numutils.c 4 * utility functions for I/O of built-in numeric types. 5 * 6 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group 7 * Portions Copyright (c) 1994, Regents of the University of California 8 * 9 * 10 * IDENTIFICATION 11 * src/backend/utils/adt/numutils.c 12 * 13 *------------------------------------------------------------------------- 14 */ 15 #include "postgres.h" 16 17 #include <math.h> 18 #include <limits.h> 19 #include <ctype.h> 20 21 #include "common/int.h" 22 #include "utils/builtins.h" 23 #include "port/pg_bitutils.h" 24 25 /* 26 * A table of all two-digit numbers. This is used to speed up decimal digit 27 * generation by copying pairs of digits into the final output. 28 */ 29 static const char DIGIT_TABLE[200] = 30 "00" "01" "02" "03" "04" "05" "06" "07" "08" "09" 31 "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" 32 "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" 33 "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" 34 "40" "41" "42" "43" "44" "45" "46" "47" "48" "49" 35 "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" 36 "60" "61" "62" "63" "64" "65" "66" "67" "68" "69" 37 "70" "71" "72" "73" "74" "75" "76" "77" "78" "79" 38 "80" "81" "82" "83" "84" "85" "86" "87" "88" "89" 39 "90" "91" "92" "93" "94" "95" "96" "97" "98" "99"; 40 41 /* 42 * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 43 */ 44 static inline int 45 decimalLength32(const uint32 v) 46 { 47 int t; 48 static const uint32 PowersOfTen[] = { 49 1, 10, 100, 50 1000, 10000, 100000, 51 1000000, 10000000, 100000000, 52 1000000000 53 }; 54 55 /* 56 * Compute base-10 logarithm by dividing the base-2 logarithm by a 57 * good-enough approximation of the base-2 logarithm of 10 58 */ 59 t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096; 60 return t + (v >= PowersOfTen[t]); 61 } 62 63 static inline int 64 decimalLength64(const uint64 v) 65 { 66 int t; 67 static const uint64 PowersOfTen[] = { 68 UINT64CONST(1), UINT64CONST(10), 69 UINT64CONST(100), UINT64CONST(1000), 70 UINT64CONST(10000), UINT64CONST(100000), 71 UINT64CONST(1000000), UINT64CONST(10000000), 72 UINT64CONST(100000000), UINT64CONST(1000000000), 73 UINT64CONST(10000000000), UINT64CONST(100000000000), 74 UINT64CONST(1000000000000), UINT64CONST(10000000000000), 75 UINT64CONST(100000000000000), UINT64CONST(1000000000000000), 76 UINT64CONST(10000000000000000), UINT64CONST(100000000000000000), 77 UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000) 78 }; 79 80 /* 81 * Compute base-10 logarithm by dividing the base-2 logarithm by a 82 * good-enough approximation of the base-2 logarithm of 10 83 */ 84 t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096; 85 return t + (v >= PowersOfTen[t]); 86 } 87 88 /* 89 * pg_atoi: convert string to integer 90 * 91 * allows any number of leading or trailing whitespace characters. 92 * 93 * 'size' is the sizeof() the desired integral result (1, 2, or 4 bytes). 94 * 95 * c, if not 0, is a terminator character that may appear after the 96 * integer (plus whitespace). If 0, the string must end after the integer. 97 * 98 * Unlike plain atoi(), this will throw ereport() upon bad input format or 99 * overflow. 100 */ 101 int32 102 pg_atoi(const char *s, int size, int c) 103 { 104 long l; 105 char *badp; 106 107 /* 108 * Some versions of strtol treat the empty string as an error, but some 109 * seem not to. Make an explicit test to be sure we catch it. 110 */ 111 if (s == NULL) 112 elog(ERROR, "NULL pointer"); 113 if (*s == 0) 114 ereport(ERROR, 115 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 116 errmsg("invalid input syntax for type %s: \"%s\"", 117 "integer", s))); 118 119 errno = 0; 120 l = strtol(s, &badp, 10); 121 122 /* We made no progress parsing the string, so bail out */ 123 if (s == badp) 124 ereport(ERROR, 125 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 126 errmsg("invalid input syntax for type %s: \"%s\"", 127 "integer", s))); 128 129 switch (size) 130 { 131 case sizeof(int32): 132 if (errno == ERANGE 133 #if defined(HAVE_LONG_INT_64) 134 /* won't get ERANGE on these with 64-bit longs... */ 135 || l < INT_MIN || l > INT_MAX 136 #endif 137 ) 138 ereport(ERROR, 139 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), 140 errmsg("value \"%s\" is out of range for type %s", s, 141 "integer"))); 142 break; 143 case sizeof(int16): 144 if (errno == ERANGE || l < SHRT_MIN || l > SHRT_MAX) 145 ereport(ERROR, 146 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), 147 errmsg("value \"%s\" is out of range for type %s", s, 148 "smallint"))); 149 break; 150 case sizeof(int8): 151 if (errno == ERANGE || l < SCHAR_MIN || l > SCHAR_MAX) 152 ereport(ERROR, 153 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), 154 errmsg("value \"%s\" is out of range for 8-bit integer", s))); 155 break; 156 default: 157 elog(ERROR, "unsupported result size: %d", size); 158 } 159 160 /* 161 * Skip any trailing whitespace; if anything but whitespace remains before 162 * the terminating character, bail out 163 */ 164 while (*badp && *badp != c && isspace((unsigned char) *badp)) 165 badp++; 166 167 if (*badp && *badp != c) 168 ereport(ERROR, 169 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 170 errmsg("invalid input syntax for type %s: \"%s\"", 171 "integer", s))); 172 173 return (int32) l; 174 } 175 176 /* 177 * Convert input string to a signed 16 bit integer. 178 * 179 * Allows any number of leading or trailing whitespace characters. Will throw 180 * ereport() upon bad input format or overflow. 181 * 182 * NB: Accumulate input as a negative number, to deal with two's complement 183 * representation of the most negative number, which can't be represented as a 184 * positive number. 185 */ 186 int16 187 pg_strtoint16(const char *s) 188 { 189 const char *ptr = s; 190 int16 tmp = 0; 191 bool neg = false; 192 193 /* skip leading spaces */ 194 while (likely(*ptr) && isspace((unsigned char) *ptr)) 195 ptr++; 196 197 /* handle sign */ 198 if (*ptr == '-') 199 { 200 ptr++; 201 neg = true; 202 } 203 else if (*ptr == '+') 204 ptr++; 205 206 /* require at least one digit */ 207 if (unlikely(!isdigit((unsigned char) *ptr))) 208 goto invalid_syntax; 209 210 /* process digits */ 211 while (*ptr && isdigit((unsigned char) *ptr)) 212 { 213 int8 digit = (*ptr++ - '0'); 214 215 if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) || 216 unlikely(pg_sub_s16_overflow(tmp, digit, &tmp))) 217 goto out_of_range; 218 } 219 220 /* allow trailing whitespace, but not other trailing chars */ 221 while (*ptr != '\0' && isspace((unsigned char) *ptr)) 222 ptr++; 223 224 if (unlikely(*ptr != '\0')) 225 goto invalid_syntax; 226 227 if (!neg) 228 { 229 /* could fail if input is most negative number */ 230 if (unlikely(tmp == PG_INT16_MIN)) 231 goto out_of_range; 232 tmp = -tmp; 233 } 234 235 return tmp; 236 237 out_of_range: 238 ereport(ERROR, 239 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), 240 errmsg("value \"%s\" is out of range for type %s", 241 s, "smallint"))); 242 243 invalid_syntax: 244 ereport(ERROR, 245 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 246 errmsg("invalid input syntax for type %s: \"%s\"", 247 "smallint", s))); 248 249 return 0; /* keep compiler quiet */ 250 } 251 252 /* 253 * Convert input string to a signed 32 bit integer. 254 * 255 * Allows any number of leading or trailing whitespace characters. Will throw 256 * ereport() upon bad input format or overflow. 257 * 258 * NB: Accumulate input as a negative number, to deal with two's complement 259 * representation of the most negative number, which can't be represented as a 260 * positive number. 261 */ 262 int32 263 pg_strtoint32(const char *s) 264 { 265 const char *ptr = s; 266 int32 tmp = 0; 267 bool neg = false; 268 269 /* skip leading spaces */ 270 while (likely(*ptr) && isspace((unsigned char) *ptr)) 271 ptr++; 272 273 /* handle sign */ 274 if (*ptr == '-') 275 { 276 ptr++; 277 neg = true; 278 } 279 else if (*ptr == '+') 280 ptr++; 281 282 /* require at least one digit */ 283 if (unlikely(!isdigit((unsigned char) *ptr))) 284 goto invalid_syntax; 285 286 /* process digits */ 287 while (*ptr && isdigit((unsigned char) *ptr)) 288 { 289 int8 digit = (*ptr++ - '0'); 290 291 if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) || 292 unlikely(pg_sub_s32_overflow(tmp, digit, &tmp))) 293 goto out_of_range; 294 } 295 296 /* allow trailing whitespace, but not other trailing chars */ 297 while (*ptr != '\0' && isspace((unsigned char) *ptr)) 298 ptr++; 299 300 if (unlikely(*ptr != '\0')) 301 goto invalid_syntax; 302 303 if (!neg) 304 { 305 /* could fail if input is most negative number */ 306 if (unlikely(tmp == PG_INT32_MIN)) 307 goto out_of_range; 308 tmp = -tmp; 309 } 310 311 return tmp; 312 313 out_of_range: 314 ereport(ERROR, 315 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), 316 errmsg("value \"%s\" is out of range for type %s", 317 s, "integer"))); 318 319 invalid_syntax: 320 ereport(ERROR, 321 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), 322 errmsg("invalid input syntax for type %s: \"%s\"", 323 "integer", s))); 324 325 return 0; /* keep compiler quiet */ 326 } 327 328 /* 329 * pg_itoa: converts a signed 16-bit integer to its string representation 330 * 331 * Caller must ensure that 'a' points to enough memory to hold the result 332 * (at least 7 bytes, counting a leading sign and trailing NUL). 333 * 334 * It doesn't seem worth implementing this separately. 335 */ 336 void 337 pg_itoa(int16 i, char *a) 338 { 339 pg_ltoa((int32) i, a); 340 } 341 342 /* 343 * pg_ultoa_n: converts an unsigned 32-bit integer to its string representation, 344 * not NUL-terminated, and returns the length of that string representation 345 * 346 * Caller must ensure that 'a' points to enough memory to hold the result (at 347 * least 10 bytes) 348 */ 349 int 350 pg_ultoa_n(uint32 value, char *a) 351 { 352 int olength, 353 i = 0; 354 355 /* Degenerate case */ 356 if (value == 0) 357 { 358 *a = '0'; 359 return 1; 360 } 361 362 olength = decimalLength32(value); 363 364 /* Compute the result string. */ 365 while (value >= 10000) 366 { 367 const uint32 c = value - 10000 * (value / 10000); 368 const uint32 c0 = (c % 100) << 1; 369 const uint32 c1 = (c / 100) << 1; 370 371 char *pos = a + olength - i; 372 373 value /= 10000; 374 375 memcpy(pos - 2, DIGIT_TABLE + c0, 2); 376 memcpy(pos - 4, DIGIT_TABLE + c1, 2); 377 i += 4; 378 } 379 if (value >= 100) 380 { 381 const uint32 c = (value % 100) << 1; 382 383 char *pos = a + olength - i; 384 385 value /= 100; 386 387 memcpy(pos - 2, DIGIT_TABLE + c, 2); 388 i += 2; 389 } 390 if (value >= 10) 391 { 392 const uint32 c = value << 1; 393 394 char *pos = a + olength - i; 395 396 memcpy(pos - 2, DIGIT_TABLE + c, 2); 397 } 398 else 399 { 400 *a = (char) ('0' + value); 401 } 402 403 return olength; 404 } 405 406 /* 407 * NUL-terminate the output of pg_ultoa_n. 408 * 409 * It is the caller's responsibility to ensure that a is at least 12 bytes long, 410 * which is enough room to hold a minus sign, a maximally long int32, and the 411 * above terminating NUL. 412 */ 413 void 414 pg_ltoa(int32 value, char *a) 415 { 416 uint32 uvalue = (uint32) value; 417 int len; 418 419 if (value < 0) 420 { 421 uvalue = (uint32) 0 - uvalue; 422 *a++ = '-'; 423 } 424 len = pg_ultoa_n(uvalue, a); 425 a[len] = '\0'; 426 } 427 428 /* 429 * Get the decimal representation, not NUL-terminated, and return the length of 430 * same. Caller must ensure that a points to at least MAXINT8LEN bytes. 431 */ 432 int 433 pg_ulltoa_n(uint64 value, char *a) 434 { 435 int olength, 436 i = 0; 437 uint32 value2; 438 439 /* Degenerate case */ 440 if (value == 0) 441 { 442 *a = '0'; 443 return 1; 444 } 445 446 olength = decimalLength64(value); 447 448 /* Compute the result string. */ 449 while (value >= 100000000) 450 { 451 const uint64 q = value / 100000000; 452 uint32 value2 = (uint32) (value - 100000000 * q); 453 454 const uint32 c = value2 % 10000; 455 const uint32 d = value2 / 10000; 456 const uint32 c0 = (c % 100) << 1; 457 const uint32 c1 = (c / 100) << 1; 458 const uint32 d0 = (d % 100) << 1; 459 const uint32 d1 = (d / 100) << 1; 460 461 char *pos = a + olength - i; 462 463 value = q; 464 465 memcpy(pos - 2, DIGIT_TABLE + c0, 2); 466 memcpy(pos - 4, DIGIT_TABLE + c1, 2); 467 memcpy(pos - 6, DIGIT_TABLE + d0, 2); 468 memcpy(pos - 8, DIGIT_TABLE + d1, 2); 469 i += 8; 470 } 471 472 /* Switch to 32-bit for speed */ 473 value2 = (uint32) value; 474 475 if (value2 >= 10000) 476 { 477 const uint32 c = value2 - 10000 * (value2 / 10000); 478 const uint32 c0 = (c % 100) << 1; 479 const uint32 c1 = (c / 100) << 1; 480 481 char *pos = a + olength - i; 482 483 value2 /= 10000; 484 485 memcpy(pos - 2, DIGIT_TABLE + c0, 2); 486 memcpy(pos - 4, DIGIT_TABLE + c1, 2); 487 i += 4; 488 } 489 if (value2 >= 100) 490 { 491 const uint32 c = (value2 % 100) << 1; 492 char *pos = a + olength - i; 493 494 value2 /= 100; 495 496 memcpy(pos - 2, DIGIT_TABLE + c, 2); 497 i += 2; 498 } 499 if (value2 >= 10) 500 { 501 const uint32 c = value2 << 1; 502 char *pos = a + olength - i; 503 504 memcpy(pos - 2, DIGIT_TABLE + c, 2); 505 } 506 else 507 *a = (char) ('0' + value2); 508 509 return olength; 510 } 511 512 /* 513 * pg_lltoa: convert a signed 64-bit integer to its string representation 514 * 515 * Caller must ensure that 'a' points to enough memory to hold the result 516 * (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL). 517 */ 518 void 519 pg_lltoa(int64 value, char *a) 520 { 521 int len; 522 uint64 uvalue = value; 523 524 if (value < 0) 525 { 526 *a++ = '-'; 527 uvalue = (uint64) 0 - uvalue; 528 } 529 len = pg_ulltoa_n(uvalue, a); 530 a[len] = 0; 531 } 532 533 534 /* 535 * pg_ultostr_zeropad 536 * Converts 'value' into a decimal string representation stored at 'str'. 537 * 'minwidth' specifies the minimum width of the result; any extra space 538 * is filled up by prefixing the number with zeros. 539 * 540 * Returns the ending address of the string result (the last character written 541 * plus 1). Note that no NUL terminator is written. 542 * 543 * The intended use-case for this function is to build strings that contain 544 * multiple individual numbers, for example: 545 * 546 * str = pg_ultostr_zeropad(str, hours, 2); 547 * *str++ = ':'; 548 * str = pg_ultostr_zeropad(str, mins, 2); 549 * *str++ = ':'; 550 * str = pg_ultostr_zeropad(str, secs, 2); 551 * *str = '\0'; 552 * 553 * Note: Caller must ensure that 'str' points to enough memory to hold the 554 * result. 555 */ 556 char * 557 pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth) 558 { 559 int len; 560 561 Assert(minwidth > 0); 562 563 if (value < 100 && minwidth == 2) /* Short cut for common case */ 564 { 565 memcpy(str, DIGIT_TABLE + value * 2, 2); 566 return str + 2; 567 } 568 569 len = pg_ultoa_n(value, str); 570 if (len >= minwidth) 571 return str + len; 572 573 memmove(str + minwidth - len, str, len); 574 memset(str, '0', minwidth - len); 575 return str + minwidth; 576 } 577 578 /* 579 * pg_ultostr 580 * Converts 'value' into a decimal string representation stored at 'str'. 581 * 582 * Returns the ending address of the string result (the last character written 583 * plus 1). Note that no NUL terminator is written. 584 * 585 * The intended use-case for this function is to build strings that contain 586 * multiple individual numbers, for example: 587 * 588 * str = pg_ultostr(str, a); 589 * *str++ = ' '; 590 * str = pg_ultostr(str, b); 591 * *str = '\0'; 592 * 593 * Note: Caller must ensure that 'str' points to enough memory to hold the 594 * result. 595 */ 596 char * 597 pg_ultostr(char *str, uint32 value) 598 { 599 int len = pg_ultoa_n(value, str); 600 601 return str + len; 602 } 603 604 /* 605 * pg_strtouint64 606 * Converts 'str' into an unsigned 64-bit integer. 607 * 608 * This has the identical API to strtoul(3), except that it will handle 609 * 64-bit ints even where "long" is narrower than that. 610 * 611 * For the moment it seems sufficient to assume that the platform has 612 * such a function somewhere; let's not roll our own. 613 */ 614 uint64 615 pg_strtouint64(const char *str, char **endptr, int base) 616 { 617 #ifdef _MSC_VER /* MSVC only */ 618 return _strtoui64(str, endptr, base); 619 #elif defined(HAVE_STRTOULL) && SIZEOF_LONG < 8 620 return strtoull(str, endptr, base); 621 #else 622 return strtoul(str, endptr, base); 623 #endif 624 } 625