1 /*-------------------------------------------------------------------------
2  *
3  * numutils.c
4  *	  utility functions for I/O of built-in numeric types.
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/utils/adt/numutils.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <math.h>
18 #include <limits.h>
19 #include <ctype.h>
20 
21 #include "common/int.h"
22 #include "utils/builtins.h"
23 #include "port/pg_bitutils.h"
24 
25 /*
26  * A table of all two-digit numbers. This is used to speed up decimal digit
27  * generation by copying pairs of digits into the final output.
28  */
29 static const char DIGIT_TABLE[200] =
30 "00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
31 "10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
32 "20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
33 "30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
34 "40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
35 "50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
36 "60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
37 "70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
38 "80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
39 "90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
40 
41 /*
42  * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
43  */
44 static inline int
decimalLength32(const uint32 v)45 decimalLength32(const uint32 v)
46 {
47 	int			t;
48 	static const uint32 PowersOfTen[] = {
49 		1, 10, 100,
50 		1000, 10000, 100000,
51 		1000000, 10000000, 100000000,
52 		1000000000
53 	};
54 
55 	/*
56 	 * Compute base-10 logarithm by dividing the base-2 logarithm by a
57 	 * good-enough approximation of the base-2 logarithm of 10
58 	 */
59 	t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
60 	return t + (v >= PowersOfTen[t]);
61 }
62 
63 static inline int
decimalLength64(const uint64 v)64 decimalLength64(const uint64 v)
65 {
66 	int			t;
67 	static const uint64 PowersOfTen[] = {
68 		UINT64CONST(1), UINT64CONST(10),
69 		UINT64CONST(100), UINT64CONST(1000),
70 		UINT64CONST(10000), UINT64CONST(100000),
71 		UINT64CONST(1000000), UINT64CONST(10000000),
72 		UINT64CONST(100000000), UINT64CONST(1000000000),
73 		UINT64CONST(10000000000), UINT64CONST(100000000000),
74 		UINT64CONST(1000000000000), UINT64CONST(10000000000000),
75 		UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
76 		UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
77 		UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
78 	};
79 
80 	/*
81 	 * Compute base-10 logarithm by dividing the base-2 logarithm by a
82 	 * good-enough approximation of the base-2 logarithm of 10
83 	 */
84 	t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
85 	return t + (v >= PowersOfTen[t]);
86 }
87 
88 /*
89  * pg_atoi: convert string to integer
90  *
91  * allows any number of leading or trailing whitespace characters.
92  *
93  * 'size' is the sizeof() the desired integral result (1, 2, or 4 bytes).
94  *
95  * c, if not 0, is a terminator character that may appear after the
96  * integer (plus whitespace).  If 0, the string must end after the integer.
97  *
98  * Unlike plain atoi(), this will throw ereport() upon bad input format or
99  * overflow.
100  */
101 int32
pg_atoi(const char * s,int size,int c)102 pg_atoi(const char *s, int size, int c)
103 {
104 	long		l;
105 	char	   *badp;
106 
107 	/*
108 	 * Some versions of strtol treat the empty string as an error, but some
109 	 * seem not to.  Make an explicit test to be sure we catch it.
110 	 */
111 	if (s == NULL)
112 		elog(ERROR, "NULL pointer");
113 	if (*s == 0)
114 		ereport(ERROR,
115 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
116 				 errmsg("invalid input syntax for type %s: \"%s\"",
117 						"integer", s)));
118 
119 	errno = 0;
120 	l = strtol(s, &badp, 10);
121 
122 	/* We made no progress parsing the string, so bail out */
123 	if (s == badp)
124 		ereport(ERROR,
125 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
126 				 errmsg("invalid input syntax for type %s: \"%s\"",
127 						"integer", s)));
128 
129 	switch (size)
130 	{
131 		case sizeof(int32):
132 			if (errno == ERANGE
133 #if defined(HAVE_LONG_INT_64)
134 			/* won't get ERANGE on these with 64-bit longs... */
135 				|| l < INT_MIN || l > INT_MAX
136 #endif
137 				)
138 				ereport(ERROR,
139 						(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
140 						 errmsg("value \"%s\" is out of range for type %s", s,
141 								"integer")));
142 			break;
143 		case sizeof(int16):
144 			if (errno == ERANGE || l < SHRT_MIN || l > SHRT_MAX)
145 				ereport(ERROR,
146 						(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
147 						 errmsg("value \"%s\" is out of range for type %s", s,
148 								"smallint")));
149 			break;
150 		case sizeof(int8):
151 			if (errno == ERANGE || l < SCHAR_MIN || l > SCHAR_MAX)
152 				ereport(ERROR,
153 						(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
154 						 errmsg("value \"%s\" is out of range for 8-bit integer", s)));
155 			break;
156 		default:
157 			elog(ERROR, "unsupported result size: %d", size);
158 	}
159 
160 	/*
161 	 * Skip any trailing whitespace; if anything but whitespace remains before
162 	 * the terminating character, bail out
163 	 */
164 	while (*badp && *badp != c && isspace((unsigned char) *badp))
165 		badp++;
166 
167 	if (*badp && *badp != c)
168 		ereport(ERROR,
169 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
170 				 errmsg("invalid input syntax for type %s: \"%s\"",
171 						"integer", s)));
172 
173 	return (int32) l;
174 }
175 
176 /*
177  * Convert input string to a signed 16 bit integer.
178  *
179  * Allows any number of leading or trailing whitespace characters. Will throw
180  * ereport() upon bad input format or overflow.
181  *
182  * NB: Accumulate input as a negative number, to deal with two's complement
183  * representation of the most negative number, which can't be represented as a
184  * positive number.
185  */
186 int16
pg_strtoint16(const char * s)187 pg_strtoint16(const char *s)
188 {
189 	const char *ptr = s;
190 	int16		tmp = 0;
191 	bool		neg = false;
192 
193 	/* skip leading spaces */
194 	while (likely(*ptr) && isspace((unsigned char) *ptr))
195 		ptr++;
196 
197 	/* handle sign */
198 	if (*ptr == '-')
199 	{
200 		ptr++;
201 		neg = true;
202 	}
203 	else if (*ptr == '+')
204 		ptr++;
205 
206 	/* require at least one digit */
207 	if (unlikely(!isdigit((unsigned char) *ptr)))
208 		goto invalid_syntax;
209 
210 	/* process digits */
211 	while (*ptr && isdigit((unsigned char) *ptr))
212 	{
213 		int8		digit = (*ptr++ - '0');
214 
215 		if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) ||
216 			unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
217 			goto out_of_range;
218 	}
219 
220 	/* allow trailing whitespace, but not other trailing chars */
221 	while (*ptr != '\0' && isspace((unsigned char) *ptr))
222 		ptr++;
223 
224 	if (unlikely(*ptr != '\0'))
225 		goto invalid_syntax;
226 
227 	if (!neg)
228 	{
229 		/* could fail if input is most negative number */
230 		if (unlikely(tmp == PG_INT16_MIN))
231 			goto out_of_range;
232 		tmp = -tmp;
233 	}
234 
235 	return tmp;
236 
237 out_of_range:
238 	ereport(ERROR,
239 			(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
240 			 errmsg("value \"%s\" is out of range for type %s",
241 					s, "smallint")));
242 
243 invalid_syntax:
244 	ereport(ERROR,
245 			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
246 			 errmsg("invalid input syntax for type %s: \"%s\"",
247 					"smallint", s)));
248 
249 	return 0;					/* keep compiler quiet */
250 }
251 
252 /*
253  * Convert input string to a signed 32 bit integer.
254  *
255  * Allows any number of leading or trailing whitespace characters. Will throw
256  * ereport() upon bad input format or overflow.
257  *
258  * NB: Accumulate input as a negative number, to deal with two's complement
259  * representation of the most negative number, which can't be represented as a
260  * positive number.
261  */
262 int32
pg_strtoint32(const char * s)263 pg_strtoint32(const char *s)
264 {
265 	const char *ptr = s;
266 	int32		tmp = 0;
267 	bool		neg = false;
268 
269 	/* skip leading spaces */
270 	while (likely(*ptr) && isspace((unsigned char) *ptr))
271 		ptr++;
272 
273 	/* handle sign */
274 	if (*ptr == '-')
275 	{
276 		ptr++;
277 		neg = true;
278 	}
279 	else if (*ptr == '+')
280 		ptr++;
281 
282 	/* require at least one digit */
283 	if (unlikely(!isdigit((unsigned char) *ptr)))
284 		goto invalid_syntax;
285 
286 	/* process digits */
287 	while (*ptr && isdigit((unsigned char) *ptr))
288 	{
289 		int8		digit = (*ptr++ - '0');
290 
291 		if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) ||
292 			unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
293 			goto out_of_range;
294 	}
295 
296 	/* allow trailing whitespace, but not other trailing chars */
297 	while (*ptr != '\0' && isspace((unsigned char) *ptr))
298 		ptr++;
299 
300 	if (unlikely(*ptr != '\0'))
301 		goto invalid_syntax;
302 
303 	if (!neg)
304 	{
305 		/* could fail if input is most negative number */
306 		if (unlikely(tmp == PG_INT32_MIN))
307 			goto out_of_range;
308 		tmp = -tmp;
309 	}
310 
311 	return tmp;
312 
313 out_of_range:
314 	ereport(ERROR,
315 			(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
316 			 errmsg("value \"%s\" is out of range for type %s",
317 					s, "integer")));
318 
319 invalid_syntax:
320 	ereport(ERROR,
321 			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
322 			 errmsg("invalid input syntax for type %s: \"%s\"",
323 					"integer", s)));
324 
325 	return 0;					/* keep compiler quiet */
326 }
327 
328 /*
329  * pg_itoa: converts a signed 16-bit integer to its string representation
330  *
331  * Caller must ensure that 'a' points to enough memory to hold the result
332  * (at least 7 bytes, counting a leading sign and trailing NUL).
333  *
334  * It doesn't seem worth implementing this separately.
335  */
336 void
pg_itoa(int16 i,char * a)337 pg_itoa(int16 i, char *a)
338 {
339 	pg_ltoa((int32) i, a);
340 }
341 
342 /*
343  * pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
344  * not NUL-terminated, and returns the length of that string representation
345  *
346  * Caller must ensure that 'a' points to enough memory to hold the result (at
347  * least 10 bytes)
348  */
349 int
pg_ultoa_n(uint32 value,char * a)350 pg_ultoa_n(uint32 value, char *a)
351 {
352 	int			olength,
353 				i = 0;
354 
355 	/* Degenerate case */
356 	if (value == 0)
357 	{
358 		*a = '0';
359 		return 1;
360 	}
361 
362 	olength = decimalLength32(value);
363 
364 	/* Compute the result string. */
365 	while (value >= 10000)
366 	{
367 		const uint32 c = value - 10000 * (value / 10000);
368 		const uint32 c0 = (c % 100) << 1;
369 		const uint32 c1 = (c / 100) << 1;
370 
371 		char	   *pos = a + olength - i;
372 
373 		value /= 10000;
374 
375 		memcpy(pos - 2, DIGIT_TABLE + c0, 2);
376 		memcpy(pos - 4, DIGIT_TABLE + c1, 2);
377 		i += 4;
378 	}
379 	if (value >= 100)
380 	{
381 		const uint32 c = (value % 100) << 1;
382 
383 		char	   *pos = a + olength - i;
384 
385 		value /= 100;
386 
387 		memcpy(pos - 2, DIGIT_TABLE + c, 2);
388 		i += 2;
389 	}
390 	if (value >= 10)
391 	{
392 		const uint32 c = value << 1;
393 
394 		char	   *pos = a + olength - i;
395 
396 		memcpy(pos - 2, DIGIT_TABLE + c, 2);
397 	}
398 	else
399 	{
400 		*a = (char) ('0' + value);
401 	}
402 
403 	return olength;
404 }
405 
406 /*
407  * NUL-terminate the output of pg_ultoa_n.
408  *
409  * It is the caller's responsibility to ensure that a is at least 12 bytes long,
410  * which is enough room to hold a minus sign, a maximally long int32, and the
411  * above terminating NUL.
412  */
413 void
pg_ltoa(int32 value,char * a)414 pg_ltoa(int32 value, char *a)
415 {
416 	uint32		uvalue = (uint32) value;
417 	int			len;
418 
419 	if (value < 0)
420 	{
421 		uvalue = (uint32) 0 - uvalue;
422 		*a++ = '-';
423 	}
424 	len = pg_ultoa_n(uvalue, a);
425 	a[len] = '\0';
426 }
427 
428 /*
429  * Get the decimal representation, not NUL-terminated, and return the length of
430  * same.  Caller must ensure that a points to at least MAXINT8LEN bytes.
431  */
432 int
pg_ulltoa_n(uint64 value,char * a)433 pg_ulltoa_n(uint64 value, char *a)
434 {
435 	int			olength,
436 				i = 0;
437 	uint32		value2;
438 
439 	/* Degenerate case */
440 	if (value == 0)
441 	{
442 		*a = '0';
443 		return 1;
444 	}
445 
446 	olength = decimalLength64(value);
447 
448 	/* Compute the result string. */
449 	while (value >= 100000000)
450 	{
451 		const uint64 q = value / 100000000;
452 		uint32		value2 = (uint32) (value - 100000000 * q);
453 
454 		const uint32 c = value2 % 10000;
455 		const uint32 d = value2 / 10000;
456 		const uint32 c0 = (c % 100) << 1;
457 		const uint32 c1 = (c / 100) << 1;
458 		const uint32 d0 = (d % 100) << 1;
459 		const uint32 d1 = (d / 100) << 1;
460 
461 		char	   *pos = a + olength - i;
462 
463 		value = q;
464 
465 		memcpy(pos - 2, DIGIT_TABLE + c0, 2);
466 		memcpy(pos - 4, DIGIT_TABLE + c1, 2);
467 		memcpy(pos - 6, DIGIT_TABLE + d0, 2);
468 		memcpy(pos - 8, DIGIT_TABLE + d1, 2);
469 		i += 8;
470 	}
471 
472 	/* Switch to 32-bit for speed */
473 	value2 = (uint32) value;
474 
475 	if (value2 >= 10000)
476 	{
477 		const uint32 c = value2 - 10000 * (value2 / 10000);
478 		const uint32 c0 = (c % 100) << 1;
479 		const uint32 c1 = (c / 100) << 1;
480 
481 		char	   *pos = a + olength - i;
482 
483 		value2 /= 10000;
484 
485 		memcpy(pos - 2, DIGIT_TABLE + c0, 2);
486 		memcpy(pos - 4, DIGIT_TABLE + c1, 2);
487 		i += 4;
488 	}
489 	if (value2 >= 100)
490 	{
491 		const uint32 c = (value2 % 100) << 1;
492 		char	   *pos = a + olength - i;
493 
494 		value2 /= 100;
495 
496 		memcpy(pos - 2, DIGIT_TABLE + c, 2);
497 		i += 2;
498 	}
499 	if (value2 >= 10)
500 	{
501 		const uint32 c = value2 << 1;
502 		char	   *pos = a + olength - i;
503 
504 		memcpy(pos - 2, DIGIT_TABLE + c, 2);
505 	}
506 	else
507 		*a = (char) ('0' + value2);
508 
509 	return olength;
510 }
511 
512 /*
513  * pg_lltoa: convert a signed 64-bit integer to its string representation
514  *
515  * Caller must ensure that 'a' points to enough memory to hold the result
516  * (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL).
517  */
518 void
pg_lltoa(int64 value,char * a)519 pg_lltoa(int64 value, char *a)
520 {
521 	int			len;
522 	uint64		uvalue = value;
523 
524 	if (value < 0)
525 	{
526 		*a++ = '-';
527 		uvalue = (uint64) 0 - uvalue;
528 	}
529 	len = pg_ulltoa_n(uvalue, a);
530 	a[len] = 0;
531 }
532 
533 
534 /*
535  * pg_ultostr_zeropad
536  *		Converts 'value' into a decimal string representation stored at 'str'.
537  *		'minwidth' specifies the minimum width of the result; any extra space
538  *		is filled up by prefixing the number with zeros.
539  *
540  * Returns the ending address of the string result (the last character written
541  * plus 1).  Note that no NUL terminator is written.
542  *
543  * The intended use-case for this function is to build strings that contain
544  * multiple individual numbers, for example:
545  *
546  *	str = pg_ultostr_zeropad(str, hours, 2);
547  *	*str++ = ':';
548  *	str = pg_ultostr_zeropad(str, mins, 2);
549  *	*str++ = ':';
550  *	str = pg_ultostr_zeropad(str, secs, 2);
551  *	*str = '\0';
552  *
553  * Note: Caller must ensure that 'str' points to enough memory to hold the
554  * result.
555  */
556 char *
pg_ultostr_zeropad(char * str,uint32 value,int32 minwidth)557 pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth)
558 {
559 	int			len;
560 
561 	Assert(minwidth > 0);
562 
563 	if (value < 100 && minwidth == 2)	/* Short cut for common case */
564 	{
565 		memcpy(str, DIGIT_TABLE + value * 2, 2);
566 		return str + 2;
567 	}
568 
569 	len = pg_ultoa_n(value, str);
570 	if (len >= minwidth)
571 		return str + len;
572 
573 	memmove(str + minwidth - len, str, len);
574 	memset(str, '0', minwidth - len);
575 	return str + minwidth;
576 }
577 
578 /*
579  * pg_ultostr
580  *		Converts 'value' into a decimal string representation stored at 'str'.
581  *
582  * Returns the ending address of the string result (the last character written
583  * plus 1).  Note that no NUL terminator is written.
584  *
585  * The intended use-case for this function is to build strings that contain
586  * multiple individual numbers, for example:
587  *
588  *	str = pg_ultostr(str, a);
589  *	*str++ = ' ';
590  *	str = pg_ultostr(str, b);
591  *	*str = '\0';
592  *
593  * Note: Caller must ensure that 'str' points to enough memory to hold the
594  * result.
595  */
596 char *
pg_ultostr(char * str,uint32 value)597 pg_ultostr(char *str, uint32 value)
598 {
599 	int			len = pg_ultoa_n(value, str);
600 
601 	return str + len;
602 }
603 
604 /*
605  * pg_strtouint64
606  *		Converts 'str' into an unsigned 64-bit integer.
607  *
608  * This has the identical API to strtoul(3), except that it will handle
609  * 64-bit ints even where "long" is narrower than that.
610  *
611  * For the moment it seems sufficient to assume that the platform has
612  * such a function somewhere; let's not roll our own.
613  */
614 uint64
pg_strtouint64(const char * str,char ** endptr,int base)615 pg_strtouint64(const char *str, char **endptr, int base)
616 {
617 #ifdef _MSC_VER					/* MSVC only */
618 	return _strtoui64(str, endptr, base);
619 #elif defined(HAVE_STRTOULL) && SIZEOF_LONG < 8
620 	return strtoull(str, endptr, base);
621 #else
622 	return strtoul(str, endptr, base);
623 #endif
624 }
625