1 /*-------------------------------------------------------------------------
2 *
3 * numutils.c
4 * utility functions for I/O of built-in numeric types.
5 *
6 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/utils/adt/numutils.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include <math.h>
18 #include <limits.h>
19 #include <ctype.h>
20
21 #include "common/int.h"
22 #include "utils/builtins.h"
23 #include "port/pg_bitutils.h"
24
25 /*
26 * A table of all two-digit numbers. This is used to speed up decimal digit
27 * generation by copying pairs of digits into the final output.
28 */
29 static const char DIGIT_TABLE[200] =
30 "00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
31 "10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
32 "20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
33 "30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
34 "40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
35 "50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
36 "60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
37 "70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
38 "80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
39 "90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
40
41 /*
42 * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
43 */
44 static inline int
decimalLength32(const uint32 v)45 decimalLength32(const uint32 v)
46 {
47 int t;
48 static const uint32 PowersOfTen[] = {
49 1, 10, 100,
50 1000, 10000, 100000,
51 1000000, 10000000, 100000000,
52 1000000000
53 };
54
55 /*
56 * Compute base-10 logarithm by dividing the base-2 logarithm by a
57 * good-enough approximation of the base-2 logarithm of 10
58 */
59 t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
60 return t + (v >= PowersOfTen[t]);
61 }
62
63 static inline int
decimalLength64(const uint64 v)64 decimalLength64(const uint64 v)
65 {
66 int t;
67 static const uint64 PowersOfTen[] = {
68 UINT64CONST(1), UINT64CONST(10),
69 UINT64CONST(100), UINT64CONST(1000),
70 UINT64CONST(10000), UINT64CONST(100000),
71 UINT64CONST(1000000), UINT64CONST(10000000),
72 UINT64CONST(100000000), UINT64CONST(1000000000),
73 UINT64CONST(10000000000), UINT64CONST(100000000000),
74 UINT64CONST(1000000000000), UINT64CONST(10000000000000),
75 UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
76 UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
77 UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
78 };
79
80 /*
81 * Compute base-10 logarithm by dividing the base-2 logarithm by a
82 * good-enough approximation of the base-2 logarithm of 10
83 */
84 t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
85 return t + (v >= PowersOfTen[t]);
86 }
87
88 /*
89 * pg_atoi: convert string to integer
90 *
91 * allows any number of leading or trailing whitespace characters.
92 *
93 * 'size' is the sizeof() the desired integral result (1, 2, or 4 bytes).
94 *
95 * c, if not 0, is a terminator character that may appear after the
96 * integer (plus whitespace). If 0, the string must end after the integer.
97 *
98 * Unlike plain atoi(), this will throw ereport() upon bad input format or
99 * overflow.
100 */
101 int32
pg_atoi(const char * s,int size,int c)102 pg_atoi(const char *s, int size, int c)
103 {
104 long l;
105 char *badp;
106
107 /*
108 * Some versions of strtol treat the empty string as an error, but some
109 * seem not to. Make an explicit test to be sure we catch it.
110 */
111 if (s == NULL)
112 elog(ERROR, "NULL pointer");
113 if (*s == 0)
114 ereport(ERROR,
115 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
116 errmsg("invalid input syntax for type %s: \"%s\"",
117 "integer", s)));
118
119 errno = 0;
120 l = strtol(s, &badp, 10);
121
122 /* We made no progress parsing the string, so bail out */
123 if (s == badp)
124 ereport(ERROR,
125 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
126 errmsg("invalid input syntax for type %s: \"%s\"",
127 "integer", s)));
128
129 switch (size)
130 {
131 case sizeof(int32):
132 if (errno == ERANGE
133 #if defined(HAVE_LONG_INT_64)
134 /* won't get ERANGE on these with 64-bit longs... */
135 || l < INT_MIN || l > INT_MAX
136 #endif
137 )
138 ereport(ERROR,
139 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
140 errmsg("value \"%s\" is out of range for type %s", s,
141 "integer")));
142 break;
143 case sizeof(int16):
144 if (errno == ERANGE || l < SHRT_MIN || l > SHRT_MAX)
145 ereport(ERROR,
146 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
147 errmsg("value \"%s\" is out of range for type %s", s,
148 "smallint")));
149 break;
150 case sizeof(int8):
151 if (errno == ERANGE || l < SCHAR_MIN || l > SCHAR_MAX)
152 ereport(ERROR,
153 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
154 errmsg("value \"%s\" is out of range for 8-bit integer", s)));
155 break;
156 default:
157 elog(ERROR, "unsupported result size: %d", size);
158 }
159
160 /*
161 * Skip any trailing whitespace; if anything but whitespace remains before
162 * the terminating character, bail out
163 */
164 while (*badp && *badp != c && isspace((unsigned char) *badp))
165 badp++;
166
167 if (*badp && *badp != c)
168 ereport(ERROR,
169 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
170 errmsg("invalid input syntax for type %s: \"%s\"",
171 "integer", s)));
172
173 return (int32) l;
174 }
175
176 /*
177 * Convert input string to a signed 16 bit integer.
178 *
179 * Allows any number of leading or trailing whitespace characters. Will throw
180 * ereport() upon bad input format or overflow.
181 *
182 * NB: Accumulate input as a negative number, to deal with two's complement
183 * representation of the most negative number, which can't be represented as a
184 * positive number.
185 */
186 int16
pg_strtoint16(const char * s)187 pg_strtoint16(const char *s)
188 {
189 const char *ptr = s;
190 int16 tmp = 0;
191 bool neg = false;
192
193 /* skip leading spaces */
194 while (likely(*ptr) && isspace((unsigned char) *ptr))
195 ptr++;
196
197 /* handle sign */
198 if (*ptr == '-')
199 {
200 ptr++;
201 neg = true;
202 }
203 else if (*ptr == '+')
204 ptr++;
205
206 /* require at least one digit */
207 if (unlikely(!isdigit((unsigned char) *ptr)))
208 goto invalid_syntax;
209
210 /* process digits */
211 while (*ptr && isdigit((unsigned char) *ptr))
212 {
213 int8 digit = (*ptr++ - '0');
214
215 if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) ||
216 unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
217 goto out_of_range;
218 }
219
220 /* allow trailing whitespace, but not other trailing chars */
221 while (*ptr != '\0' && isspace((unsigned char) *ptr))
222 ptr++;
223
224 if (unlikely(*ptr != '\0'))
225 goto invalid_syntax;
226
227 if (!neg)
228 {
229 /* could fail if input is most negative number */
230 if (unlikely(tmp == PG_INT16_MIN))
231 goto out_of_range;
232 tmp = -tmp;
233 }
234
235 return tmp;
236
237 out_of_range:
238 ereport(ERROR,
239 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
240 errmsg("value \"%s\" is out of range for type %s",
241 s, "smallint")));
242
243 invalid_syntax:
244 ereport(ERROR,
245 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
246 errmsg("invalid input syntax for type %s: \"%s\"",
247 "smallint", s)));
248
249 return 0; /* keep compiler quiet */
250 }
251
252 /*
253 * Convert input string to a signed 32 bit integer.
254 *
255 * Allows any number of leading or trailing whitespace characters. Will throw
256 * ereport() upon bad input format or overflow.
257 *
258 * NB: Accumulate input as a negative number, to deal with two's complement
259 * representation of the most negative number, which can't be represented as a
260 * positive number.
261 */
262 int32
pg_strtoint32(const char * s)263 pg_strtoint32(const char *s)
264 {
265 const char *ptr = s;
266 int32 tmp = 0;
267 bool neg = false;
268
269 /* skip leading spaces */
270 while (likely(*ptr) && isspace((unsigned char) *ptr))
271 ptr++;
272
273 /* handle sign */
274 if (*ptr == '-')
275 {
276 ptr++;
277 neg = true;
278 }
279 else if (*ptr == '+')
280 ptr++;
281
282 /* require at least one digit */
283 if (unlikely(!isdigit((unsigned char) *ptr)))
284 goto invalid_syntax;
285
286 /* process digits */
287 while (*ptr && isdigit((unsigned char) *ptr))
288 {
289 int8 digit = (*ptr++ - '0');
290
291 if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) ||
292 unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
293 goto out_of_range;
294 }
295
296 /* allow trailing whitespace, but not other trailing chars */
297 while (*ptr != '\0' && isspace((unsigned char) *ptr))
298 ptr++;
299
300 if (unlikely(*ptr != '\0'))
301 goto invalid_syntax;
302
303 if (!neg)
304 {
305 /* could fail if input is most negative number */
306 if (unlikely(tmp == PG_INT32_MIN))
307 goto out_of_range;
308 tmp = -tmp;
309 }
310
311 return tmp;
312
313 out_of_range:
314 ereport(ERROR,
315 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
316 errmsg("value \"%s\" is out of range for type %s",
317 s, "integer")));
318
319 invalid_syntax:
320 ereport(ERROR,
321 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
322 errmsg("invalid input syntax for type %s: \"%s\"",
323 "integer", s)));
324
325 return 0; /* keep compiler quiet */
326 }
327
328 /*
329 * pg_itoa: converts a signed 16-bit integer to its string representation
330 *
331 * Caller must ensure that 'a' points to enough memory to hold the result
332 * (at least 7 bytes, counting a leading sign and trailing NUL).
333 *
334 * It doesn't seem worth implementing this separately.
335 */
336 void
pg_itoa(int16 i,char * a)337 pg_itoa(int16 i, char *a)
338 {
339 pg_ltoa((int32) i, a);
340 }
341
342 /*
343 * pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
344 * not NUL-terminated, and returns the length of that string representation
345 *
346 * Caller must ensure that 'a' points to enough memory to hold the result (at
347 * least 10 bytes)
348 */
349 int
pg_ultoa_n(uint32 value,char * a)350 pg_ultoa_n(uint32 value, char *a)
351 {
352 int olength,
353 i = 0;
354
355 /* Degenerate case */
356 if (value == 0)
357 {
358 *a = '0';
359 return 1;
360 }
361
362 olength = decimalLength32(value);
363
364 /* Compute the result string. */
365 while (value >= 10000)
366 {
367 const uint32 c = value - 10000 * (value / 10000);
368 const uint32 c0 = (c % 100) << 1;
369 const uint32 c1 = (c / 100) << 1;
370
371 char *pos = a + olength - i;
372
373 value /= 10000;
374
375 memcpy(pos - 2, DIGIT_TABLE + c0, 2);
376 memcpy(pos - 4, DIGIT_TABLE + c1, 2);
377 i += 4;
378 }
379 if (value >= 100)
380 {
381 const uint32 c = (value % 100) << 1;
382
383 char *pos = a + olength - i;
384
385 value /= 100;
386
387 memcpy(pos - 2, DIGIT_TABLE + c, 2);
388 i += 2;
389 }
390 if (value >= 10)
391 {
392 const uint32 c = value << 1;
393
394 char *pos = a + olength - i;
395
396 memcpy(pos - 2, DIGIT_TABLE + c, 2);
397 }
398 else
399 {
400 *a = (char) ('0' + value);
401 }
402
403 return olength;
404 }
405
406 /*
407 * NUL-terminate the output of pg_ultoa_n.
408 *
409 * It is the caller's responsibility to ensure that a is at least 12 bytes long,
410 * which is enough room to hold a minus sign, a maximally long int32, and the
411 * above terminating NUL.
412 */
413 void
pg_ltoa(int32 value,char * a)414 pg_ltoa(int32 value, char *a)
415 {
416 uint32 uvalue = (uint32) value;
417 int len;
418
419 if (value < 0)
420 {
421 uvalue = (uint32) 0 - uvalue;
422 *a++ = '-';
423 }
424 len = pg_ultoa_n(uvalue, a);
425 a[len] = '\0';
426 }
427
428 /*
429 * Get the decimal representation, not NUL-terminated, and return the length of
430 * same. Caller must ensure that a points to at least MAXINT8LEN bytes.
431 */
432 int
pg_ulltoa_n(uint64 value,char * a)433 pg_ulltoa_n(uint64 value, char *a)
434 {
435 int olength,
436 i = 0;
437 uint32 value2;
438
439 /* Degenerate case */
440 if (value == 0)
441 {
442 *a = '0';
443 return 1;
444 }
445
446 olength = decimalLength64(value);
447
448 /* Compute the result string. */
449 while (value >= 100000000)
450 {
451 const uint64 q = value / 100000000;
452 uint32 value2 = (uint32) (value - 100000000 * q);
453
454 const uint32 c = value2 % 10000;
455 const uint32 d = value2 / 10000;
456 const uint32 c0 = (c % 100) << 1;
457 const uint32 c1 = (c / 100) << 1;
458 const uint32 d0 = (d % 100) << 1;
459 const uint32 d1 = (d / 100) << 1;
460
461 char *pos = a + olength - i;
462
463 value = q;
464
465 memcpy(pos - 2, DIGIT_TABLE + c0, 2);
466 memcpy(pos - 4, DIGIT_TABLE + c1, 2);
467 memcpy(pos - 6, DIGIT_TABLE + d0, 2);
468 memcpy(pos - 8, DIGIT_TABLE + d1, 2);
469 i += 8;
470 }
471
472 /* Switch to 32-bit for speed */
473 value2 = (uint32) value;
474
475 if (value2 >= 10000)
476 {
477 const uint32 c = value2 - 10000 * (value2 / 10000);
478 const uint32 c0 = (c % 100) << 1;
479 const uint32 c1 = (c / 100) << 1;
480
481 char *pos = a + olength - i;
482
483 value2 /= 10000;
484
485 memcpy(pos - 2, DIGIT_TABLE + c0, 2);
486 memcpy(pos - 4, DIGIT_TABLE + c1, 2);
487 i += 4;
488 }
489 if (value2 >= 100)
490 {
491 const uint32 c = (value2 % 100) << 1;
492 char *pos = a + olength - i;
493
494 value2 /= 100;
495
496 memcpy(pos - 2, DIGIT_TABLE + c, 2);
497 i += 2;
498 }
499 if (value2 >= 10)
500 {
501 const uint32 c = value2 << 1;
502 char *pos = a + olength - i;
503
504 memcpy(pos - 2, DIGIT_TABLE + c, 2);
505 }
506 else
507 *a = (char) ('0' + value2);
508
509 return olength;
510 }
511
512 /*
513 * pg_lltoa: convert a signed 64-bit integer to its string representation
514 *
515 * Caller must ensure that 'a' points to enough memory to hold the result
516 * (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL).
517 */
518 void
pg_lltoa(int64 value,char * a)519 pg_lltoa(int64 value, char *a)
520 {
521 int len;
522 uint64 uvalue = value;
523
524 if (value < 0)
525 {
526 *a++ = '-';
527 uvalue = (uint64) 0 - uvalue;
528 }
529 len = pg_ulltoa_n(uvalue, a);
530 a[len] = 0;
531 }
532
533
534 /*
535 * pg_ultostr_zeropad
536 * Converts 'value' into a decimal string representation stored at 'str'.
537 * 'minwidth' specifies the minimum width of the result; any extra space
538 * is filled up by prefixing the number with zeros.
539 *
540 * Returns the ending address of the string result (the last character written
541 * plus 1). Note that no NUL terminator is written.
542 *
543 * The intended use-case for this function is to build strings that contain
544 * multiple individual numbers, for example:
545 *
546 * str = pg_ultostr_zeropad(str, hours, 2);
547 * *str++ = ':';
548 * str = pg_ultostr_zeropad(str, mins, 2);
549 * *str++ = ':';
550 * str = pg_ultostr_zeropad(str, secs, 2);
551 * *str = '\0';
552 *
553 * Note: Caller must ensure that 'str' points to enough memory to hold the
554 * result.
555 */
556 char *
pg_ultostr_zeropad(char * str,uint32 value,int32 minwidth)557 pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth)
558 {
559 int len;
560
561 Assert(minwidth > 0);
562
563 if (value < 100 && minwidth == 2) /* Short cut for common case */
564 {
565 memcpy(str, DIGIT_TABLE + value * 2, 2);
566 return str + 2;
567 }
568
569 len = pg_ultoa_n(value, str);
570 if (len >= minwidth)
571 return str + len;
572
573 memmove(str + minwidth - len, str, len);
574 memset(str, '0', minwidth - len);
575 return str + minwidth;
576 }
577
578 /*
579 * pg_ultostr
580 * Converts 'value' into a decimal string representation stored at 'str'.
581 *
582 * Returns the ending address of the string result (the last character written
583 * plus 1). Note that no NUL terminator is written.
584 *
585 * The intended use-case for this function is to build strings that contain
586 * multiple individual numbers, for example:
587 *
588 * str = pg_ultostr(str, a);
589 * *str++ = ' ';
590 * str = pg_ultostr(str, b);
591 * *str = '\0';
592 *
593 * Note: Caller must ensure that 'str' points to enough memory to hold the
594 * result.
595 */
596 char *
pg_ultostr(char * str,uint32 value)597 pg_ultostr(char *str, uint32 value)
598 {
599 int len = pg_ultoa_n(value, str);
600
601 return str + len;
602 }
603
604 /*
605 * pg_strtouint64
606 * Converts 'str' into an unsigned 64-bit integer.
607 *
608 * This has the identical API to strtoul(3), except that it will handle
609 * 64-bit ints even where "long" is narrower than that.
610 *
611 * For the moment it seems sufficient to assume that the platform has
612 * such a function somewhere; let's not roll our own.
613 */
614 uint64
pg_strtouint64(const char * str,char ** endptr,int base)615 pg_strtouint64(const char *str, char **endptr, int base)
616 {
617 #ifdef _MSC_VER /* MSVC only */
618 return _strtoui64(str, endptr, base);
619 #elif defined(HAVE_STRTOULL) && SIZEOF_LONG < 8
620 return strtoull(str, endptr, base);
621 #else
622 return strtoul(str, endptr, base);
623 #endif
624 }
625