1
2 /*
3 * Copyright 2015 MongoDB, Inc.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 #include <stdlib.h>
19 #include <stdint.h>
20 #include <string.h>
21 #include <ctype.h>
22
23 #include "bson-decimal128.h"
24 #include "bson-types.h"
25 #include "bson-macros.h"
26 #include "bson-string.h"
27
28
29 #define BSON_DECIMAL128_EXPONENT_MAX 6111
30 #define BSON_DECIMAL128_EXPONENT_MIN -6176
31 #define BSON_DECIMAL128_EXPONENT_BIAS 6176
32 #define BSON_DECIMAL128_MAX_DIGITS 34
33
34 #define BSON_DECIMAL128_SET_NAN(dec) \
35 do { (dec).high = 0x7c00000000000000ull; (dec).low = 0; } while (0);
36 #define BSON_DECIMAL128_SET_INF(dec, isneg) \
37 do { \
38 (dec).high = 0x7800000000000000ull + 0x8000000000000000ull * (isneg); \
39 (dec).low = 0; \
40 } while (0);
41
42 /**
43 * _bson_uint128_t:
44 *
45 * This struct represents a 128 bit integer.
46 */
47 typedef struct
48 {
49 uint32_t parts[4]; /* 32-bit words stored high to low. */
50 } _bson_uint128_t;
51
52
53 /**
54 *------------------------------------------------------------------------------
55 *
56 * _bson_uint128_divide1B --
57 *
58 * This function divides a #_bson_uint128_t by 1000000000 (1 billion) and
59 * computes the quotient and remainder.
60 *
61 * The remainder will contain 9 decimal digits for conversion to string.
62 *
63 * @value The #_bson_uint128_t operand.
64 * @quotient A pointer to store the #_bson_uint128_t quotient.
65 * @rem A pointer to store the #uint64_t remainder.
66 *
67 * Returns:
68 * The quotient at @quotient and the remainder at @rem.
69 *
70 * Side effects:
71 * None.
72 *
73 *------------------------------------------------------------------------------
74 */
75 static void
_bson_uint128_divide1B(_bson_uint128_t value,_bson_uint128_t * quotient,uint32_t * rem)76 _bson_uint128_divide1B (_bson_uint128_t value, /* IN */
77 _bson_uint128_t *quotient, /* OUT */
78 uint32_t *rem) /* OUT */
79 {
80 const uint32_t DIVISOR = 1000 * 1000 * 1000;
81 uint64_t _rem = 0;
82 int i = 0;
83
84 if (!value.parts[0] && !value.parts[1] &&
85 !value.parts[2] && !value.parts[3]) {
86 *quotient = value;
87 *rem = 0;
88 return;
89 }
90
91
92 for (i = 0; i <= 3; i++) {
93 _rem <<= 32; /* Adjust remainder to match value of next dividend */
94 _rem += value.parts[i]; /* Add the divided to _rem */
95 value.parts[i] = (uint32_t)(_rem / DIVISOR);
96 _rem %= DIVISOR; /* Store the remainder */
97 }
98
99 *quotient = value;
100 *rem = _rem;
101 }
102
103
104 /**
105 *------------------------------------------------------------------------------
106 *
107 * bson_decimal128_to_string --
108 *
109 * This function converts a BID formatted decimal128 value to string,
110 * accepting a &bson_decimal128_t as @dec. The string is stored at @str.
111 *
112 * @dec : The BID formatted decimal to convert.
113 * @str : The output decimal128 string. At least %BSON_DECIMAL128_STRING characters.
114 *
115 * Returns:
116 * None.
117 *
118 * Side effects:
119 * None.
120 *
121 *------------------------------------------------------------------------------
122 */
123 void
bson_decimal128_to_string(const bson_decimal128_t * dec,char * str)124 bson_decimal128_to_string (const bson_decimal128_t *dec, /* IN */
125 char *str) /* OUT */
126 {
127 uint32_t COMBINATION_MASK = 0x1f; /* Extract least significant 5 bits */
128 uint32_t EXPONENT_MASK = 0x3fff; /* Extract least significant 14 bits */
129 uint32_t COMBINATION_INFINITY = 30; /* Value of combination field for Inf */
130 uint32_t COMBINATION_NAN = 31; /* Value of combination field for NaN */
131 uint32_t EXPONENT_BIAS = 6176; /* decimal128 exponent bias */
132
133 char *str_out = str; /* output pointer in string */
134 char significand_str[35]; /* decoded significand digits */
135
136
137 /* Note: bits in this routine are referred to starting at 0, */
138 /* from the sign bit, towards the coefficient. */
139 uint32_t high; /* bits 0 - 31 */
140 uint32_t midh; /* bits 32 - 63 */
141 uint32_t midl; /* bits 64 - 95 */
142 uint32_t low; /* bits 96 - 127 */
143 uint32_t combination; /* bits 1 - 5 */
144 uint32_t biased_exponent; /* decoded biased exponent (14 bits) */
145 uint32_t significand_digits = 0; /* the number of significand digits */
146 uint32_t significand[36] = { 0 }; /* the base-10 digits in the significand */
147 uint32_t *significand_read = significand; /* read pointer into significand */
148 int32_t exponent; /* unbiased exponent */
149 int32_t scientific_exponent; /* the exponent if scientific notation is
150 * used */
151 bool is_zero = false; /* true if the number is zero */
152
153 uint8_t significand_msb; /* the most signifcant significand bits (50-46) */
154 _bson_uint128_t significand128; /* temporary storage for significand decoding */
155 size_t i; /* indexing variables */
156 int j, k;
157
158 memset (significand_str, 0, sizeof (significand_str));
159
160 if ((int64_t)dec->high < 0) { /* negative */
161 *(str_out++) = '-';
162 }
163
164 low = (uint32_t)dec->low,
165 midl = (uint32_t)(dec->low >> 32),
166 midh = (uint32_t)dec->high,
167 high = (uint32_t)(dec->high >> 32);
168
169 /* Decode combination field and exponent */
170 combination = (high >> 26) & COMBINATION_MASK;
171
172 if (BSON_UNLIKELY ((combination >> 3) == 3)) {
173 /* Check for 'special' values */
174 if (combination == COMBINATION_INFINITY) { /* Infinity */
175 strcpy (str_out, "Inf");
176 return;
177 } else if (combination == COMBINATION_NAN) { /* NaN */
178 /* str, not str_out, to erase the sign */
179 strcpy (str, "NaN");
180 /* we don't care about the NaN payload. */
181 return;
182 } else {
183 biased_exponent = (high >> 15) & EXPONENT_MASK;
184 significand_msb = 0x8 + ((high >> 14) & 0x1);
185 }
186 } else {
187 significand_msb = (high >> 14) & 0x7;
188 biased_exponent = (high >> 17) & EXPONENT_MASK;
189 }
190
191 exponent = biased_exponent - EXPONENT_BIAS;
192 /* Create string of significand digits */
193
194 /* Convert the 114-bit binary number represented by */
195 /* (high, midh, midl, low) to at most 34 decimal */
196 /* digits through modulo and division. */
197 significand128.parts[0] = (high & 0x3fff) + ((significand_msb & 0xf) << 14);
198 significand128.parts[1] = midh;
199 significand128.parts[2] = midl;
200 significand128.parts[3] = low;
201
202 if (significand128.parts[0] == 0 && significand128.parts[1] == 0 &&
203 significand128.parts[2] == 0 && significand128.parts[3] == 0) {
204 is_zero = true;
205 } else if (significand128.parts[0] >= (1 << 17)) {
206 /* The significand is non-canonical or zero.
207 * In order to preserve compatability with the densely packed decimal
208 * format, the maximum value for the significand of decimal128 is
209 * 1e34 - 1. If the value is greater than 1e34 - 1, the IEEE 754
210 * standard dictates that the significand is interpreted as zero.
211 */
212 is_zero = true;
213 } else {
214 for (k = 3; k >= 0; k--) {
215 uint32_t least_digits = 0;
216 _bson_uint128_divide1B (significand128, &significand128,
217 &least_digits);
218
219 /* We now have the 9 least significant digits (in base 2). */
220 /* Convert and output to string. */
221 if (!least_digits) { continue; }
222
223 for (j = 8; j >= 0; j--) {
224 significand[k * 9 + j] = least_digits % 10;
225 least_digits /= 10;
226 }
227 }
228 }
229
230 /* Output format options: */
231 /* Scientific - [-]d.dddE(+/-)dd or [-]dE(+/-)dd */
232 /* Regular - ddd.ddd */
233
234 if (is_zero) {
235 significand_digits = 1;
236 *significand_read = 0;
237 } else {
238 significand_digits = 36;
239 while (!(*significand_read)) {
240 significand_digits--;
241 significand_read++;
242 }
243 }
244
245 scientific_exponent = significand_digits - 1 + exponent;
246
247 /* The scientific exponent checks are dictated by the string conversion
248 * specification and are somewhat arbitrary cutoffs.
249 *
250 * We must check exponent > 0, because if this is the case, the number
251 * has trailing zeros. However, we *cannot* output these trailing zeros,
252 * because doing so would change the precision of the value, and would
253 * change stored data if the string converted number is round tripped.
254 */
255 if (scientific_exponent < -6 || exponent > 0) {
256 /* Scientific format */
257 *(str_out++) = *(significand_read++) + '0';
258 significand_digits--;
259
260 if (significand_digits) {
261 *(str_out++) = '.';
262 }
263
264 for (i = 0; i < significand_digits; i++) {
265 *(str_out++) = *(significand_read++) + '0';
266 }
267 /* Exponent */
268 *(str_out++) = 'E';
269 bson_snprintf (str_out, 6, "%+d", scientific_exponent);
270 } else {
271 /* Regular format with no decimal place */
272 if (exponent >= 0) {
273 for (i = 0; i < significand_digits; i++) {
274 *(str_out++) = *(significand_read++) + '0';
275 }
276 *str_out = '\0';
277 } else {
278 int32_t radix_position = significand_digits + exponent;
279
280 if (radix_position > 0) { /* non-zero digits before radix */
281 for (i = 0; i < radix_position; i++) {
282 *(str_out++) = *(significand_read++) + '0';
283 }
284 } else { /* leading zero before radix point */
285 *(str_out++) = '0';
286 }
287
288 *(str_out++) = '.';
289 while (radix_position++ < 0) { /* add leading zeros after radix */
290 *(str_out++) = '0';
291 }
292
293 for (i = 0; i < significand_digits - BSON_MAX (radix_position - 1, 0);
294 i++) {
295 *(str_out++) = *(significand_read++) + '0';
296 }
297 *str_out = '\0';
298 }
299 }
300 }
301
302
303 typedef struct
304 {
305 uint64_t high,
306 low;
307 } _bson_uint128_6464_t;
308
309
310 /**
311 *-------------------------------------------------------------------------
312 *
313 * mul64x64 --
314 *
315 * This function multiplies two &uint64_t into a &_bson_uint128_6464_t.
316 *
317 * Returns:
318 * The product of @left and @right.
319 *
320 * Side Effects:
321 * None.
322 *
323 *-------------------------------------------------------------------------
324 */
325 static void
_mul_64x64(uint64_t left,uint64_t right,_bson_uint128_6464_t * product)326 _mul_64x64 (uint64_t left, /* IN */
327 uint64_t right, /* IN */
328 _bson_uint128_6464_t *product) /* OUT */
329 {
330 uint64_t left_high, left_low,
331 right_high, right_low,
332 product_high, product_mid, product_mid2, product_low;
333 _bson_uint128_6464_t rt = { 0 };
334
335 if (!left && !right) {
336 *product = rt;
337 return;
338 }
339
340 left_high = left >> 32;
341 left_low = (uint32_t)left;
342 right_high = right >> 32;
343 right_low = (uint32_t)right;
344
345 product_high = left_high * right_high;
346 product_mid = left_high * right_low;
347 product_mid2 = left_low * right_high;
348 product_low = left_low * right_low;
349
350 product_high += product_mid >> 32;
351 product_mid = (uint32_t)product_mid + product_mid2 + (product_low >> 32);
352
353 product_high = product_high + (product_mid >> 32);
354 product_low = (product_mid << 32) + (uint32_t)product_low;
355
356 rt.high = product_high;
357 rt.low = product_low;
358 *product = rt;
359 }
360
361 /**
362 *------------------------------------------------------------------------------
363 *
364 * _dec128_tolower --
365 *
366 * This function converts the ASCII character @c to lowercase. It is locale
367 * insensitive (unlike the stdlib tolower).
368 *
369 * Returns:
370 * The lowercased character.
371 */
372 char
_dec128_tolower(char c)373 _dec128_tolower (char c)
374 {
375 if (isupper (c)) {
376 c += 32;
377 }
378
379 return c;
380 }
381
382 /**
383 *------------------------------------------------------------------------------
384 *
385 * _dec128_istreq --
386 *
387 * This function compares the null-terminated *ASCII* strings @a and @b
388 * for case-insensitive equality.
389 *
390 * Returns:
391 * true if the strings are equal, false otherwise.
392 */
393 bool
_dec128_istreq(const char * a,const char * b)394 _dec128_istreq (const char *a, /* IN */
395 const char *b /* IN */)
396 {
397 while (*a != '\0' || *b != '\0') {
398 /* strings are different lengths. */
399 if (*a == '\0' || *b == '\0') {
400 return false;
401 }
402
403 if (_dec128_tolower (*a) != _dec128_tolower (*b)) {
404 return false;
405 }
406
407 a++;
408 b++;
409 }
410
411 return true;
412 }
413
414 /**
415 *------------------------------------------------------------------------------
416 *
417 * bson_decimal128_from_string --
418 *
419 * This function converts @string in the format [+-]ddd[.]ddd[E][+-]dddd to
420 * decimal128. Out of range values are converted to +/-Infinity. Invalid
421 * strings are converted to NaN.
422 *
423 * If more digits are provided than the available precision allows,
424 * round to the nearest expressable decimal128 with ties going to even will
425 * occur.
426 *
427 * Note: @string must be ASCII only!
428 *
429 * Returns:
430 * true on success, or false on failure. @dec will be NaN if @str was invalid
431 * The &bson_decimal128_t converted from @string at @dec.
432 *
433 * Side effects:
434 * None.
435 *
436 *------------------------------------------------------------------------------
437 */
438 bool
bson_decimal128_from_string(const char * string,bson_decimal128_t * dec)439 bson_decimal128_from_string (const char *string, /* IN */
440 bson_decimal128_t *dec) /* OUT */
441 {
442 _bson_uint128_6464_t significand = { 0 };
443
444 const char *str_read = string; /* Read pointer for consuming str. */
445
446 /* Parsing state tracking */
447 bool is_negative = false;
448 bool saw_radix = false;
449 bool includes_sign = false; /* True if the input string contains a sign. */
450 bool found_nonzero = false;
451
452 size_t significant_digits = 0; /* Total number of significant digits
453 * (no leading or trailing zero) */
454 size_t ndigits_read = 0; /* Total number of significand digits read */
455 size_t ndigits = 0; /* Total number of digits (no leading zeros) */
456 size_t radix_position = 0; /* The number of the digits after radix */
457 size_t first_nonzero = 0; /* The index of the first non-zero in *str* */
458
459 uint16_t digits[BSON_DECIMAL128_MAX_DIGITS] = { 0 };
460 uint16_t ndigits_stored = 0; /* The number of digits in digits */
461 uint16_t *digits_insert = digits; /* Insertion pointer for digits */
462 size_t first_digit = 0; /* The index of the first non-zero digit */
463 size_t last_digit = 0; /* The index of the last digit */
464
465 int32_t exponent = 0;
466 size_t i = 0; /* loop index over array */
467 uint64_t significand_high = 0; /* The high 17 digits of the significand */
468 uint64_t significand_low = 0; /* The low 17 digits of the significand */
469 uint16_t biased_exponent = 0; /* The biased exponent */
470
471 BSON_ASSERT (dec);
472 dec->high = 0;
473 dec->low = 0;
474
475 if (*str_read == '+' || *str_read == '-') {
476 is_negative = *(str_read++) == '-';
477 includes_sign = true;
478 }
479
480 /* Check for Infinity or NaN */
481 if (!isdigit (*str_read) && *str_read != '.') {
482 if (_dec128_istreq (str_read, "inf") ||
483 _dec128_istreq (str_read, "infinity")) {
484 BSON_DECIMAL128_SET_INF (*dec, is_negative);
485 return true;
486 } else if (_dec128_istreq (str_read, "nan")) {
487 BSON_DECIMAL128_SET_NAN (*dec);
488 return true;
489 }
490
491 BSON_DECIMAL128_SET_NAN (*dec);
492 return false;
493 }
494
495 /* Read digits */
496 while (isdigit (*str_read) || *str_read == '.') {
497 if (*str_read == '.') {
498 if (saw_radix) {
499 BSON_DECIMAL128_SET_NAN (*dec);
500 return false;
501 }
502
503 saw_radix = true;
504 str_read++;
505 continue;
506 }
507
508 if (ndigits_stored < 34) {
509 if (*str_read != '0' || found_nonzero) {
510 if (!found_nonzero) {
511 first_nonzero = ndigits_read;
512 }
513
514 found_nonzero = true;
515 *(digits_insert++) = *(str_read) - '0'; /* Only store 34 digits */
516 ndigits_stored++;
517 }
518 }
519
520 if (found_nonzero) {
521 ndigits++;
522 }
523
524 if (saw_radix) {
525 radix_position++;
526 }
527
528 ndigits_read++;
529 str_read++;
530 }
531
532 if (saw_radix && !ndigits_read) {
533 BSON_DECIMAL128_SET_NAN (*dec);
534 return false;
535 }
536
537 /* Read exponent if exists */
538 if (*str_read == 'e' || *str_read == 'E') {
539 int nread = 0;
540 #ifdef _MSC_VER
541 # define SSCANF sscanf_s
542 #else
543 # define SSCANF sscanf
544 #endif
545 int read_exponent = SSCANF (++str_read, "%d%n", &exponent, &nread);
546 str_read += nread;
547
548 if (!read_exponent || nread == 0) {
549 BSON_DECIMAL128_SET_NAN (*dec);
550 return false;
551 }
552
553 #undef SSCANF
554 }
555
556 if (*str_read) {
557 BSON_DECIMAL128_SET_NAN (*dec);
558 return false;
559 }
560
561 /* Done reading input. */
562 /* Find first non-zero digit in digits */
563 first_digit = 0;
564
565 if (!ndigits_stored) { /* value is zero */
566 first_digit = 0;
567 last_digit = 0;
568 digits[0] = 0;
569 ndigits = 1;
570 ndigits_stored = 1;
571 significant_digits = 0;
572 } else {
573 last_digit = ndigits_stored - 1;
574 significant_digits = ndigits;
575 /* Mark trailing zeros as non-significant */
576 while (string[first_nonzero + significant_digits - 1 +
577 includes_sign + saw_radix] == '0') {
578 significant_digits--;
579 }
580 }
581
582
583 /* Normalization of exponent */
584 /* Correct exponent based on radix position, and shift significand as needed */
585 /* to represent user input */
586
587 /* Overflow prevention */
588 if (exponent <= radix_position && radix_position - exponent > (1 << 14)) {
589 exponent = BSON_DECIMAL128_EXPONENT_MIN;
590 } else {
591 exponent -= radix_position;
592 }
593
594 /* Attempt to normalize the exponent */
595 while (exponent > BSON_DECIMAL128_EXPONENT_MAX) {
596 /* Shift exponent to significand and decrease */
597 last_digit++;
598
599 if (last_digit - first_digit > BSON_DECIMAL128_MAX_DIGITS) {
600 /* The exponent is too great to shift into the significand. */
601 if (significant_digits == 0) {
602 /* Value is zero, we are allowed to clamp the exponent. */
603 exponent = BSON_DECIMAL128_EXPONENT_MAX;
604 break;
605 }
606
607 /* Overflow is not permitted, error. */
608 BSON_DECIMAL128_SET_NAN (*dec);
609 return false;
610 }
611
612 exponent--;
613 }
614
615 while (exponent < BSON_DECIMAL128_EXPONENT_MIN ||
616 ndigits_stored < ndigits) {
617 /* Shift last digit */
618 if (last_digit == 0) {
619 /* underflow is not allowed, but zero clamping is */
620 if (significant_digits == 0) {
621 exponent = BSON_DECIMAL128_EXPONENT_MIN;
622 break;
623 }
624
625 BSON_DECIMAL128_SET_NAN (*dec);
626 return false;
627 }
628
629 if (ndigits_stored < ndigits) {
630 if (string[ndigits - 1 + includes_sign + saw_radix] - '0' != 0 &&
631 significant_digits != 0) {
632 BSON_DECIMAL128_SET_NAN (*dec);
633 return false;
634 }
635
636 ndigits--; /* adjust to match digits not stored */
637 } else {
638 if (digits[last_digit] != 0) {
639 /* Inexact rounding is not allowed. */
640 BSON_DECIMAL128_SET_NAN (*dec);
641 return false;
642 }
643
644
645 last_digit--; /* adjust to round */
646 }
647
648 if (exponent < BSON_DECIMAL128_EXPONENT_MAX) {
649 exponent++;
650 } else {
651 BSON_DECIMAL128_SET_NAN (*dec);
652 return false;
653 }
654 }
655
656 /* Round */
657 /* We've normalized the exponent, but might still need to round. */
658 if (last_digit - first_digit + 1 < significant_digits) {
659 size_t end_of_string = ndigits_read + includes_sign + saw_radix;
660 uint8_t round_digit;
661 uint8_t round_bit = 0;
662
663 /* There are non-zero digits after last_digit that need rounding. */
664 /* We round to nearest, ties to even */
665 round_digit =
666 string[first_nonzero + last_digit + includes_sign + saw_radix + 1] -
667 '0';
668
669 if (round_digit != 0) {
670 /* Inexact (non-zero) rounding is not allowed */
671 BSON_DECIMAL128_SET_NAN (*dec);
672 return false;
673 }
674 }
675
676 /* Encode significand */
677 significand_high = 0, /* The high 17 digits of the significand */
678 significand_low = 0; /* The low 17 digits of the significand */
679
680 if (significant_digits == 0) { /* read a zero */
681 significand_high = 0;
682 significand_low = 0;
683 } else if (last_digit - first_digit < 17) {
684 int d_idx = first_digit;
685 significand_low = digits[d_idx++];
686
687 for (; d_idx <= last_digit; d_idx++) {
688 significand_low *= 10;
689 significand_low += digits[d_idx];
690 significand_high = 0;
691 }
692 } else {
693 int d_idx = first_digit;
694 significand_high = digits[d_idx++];
695
696 for (; d_idx <= last_digit - 17; d_idx++) {
697 significand_high *= 10;
698 significand_high += digits[d_idx];
699 }
700
701 significand_low = digits[d_idx++];
702
703 for (; d_idx <= last_digit; d_idx++) {
704 significand_low *= 10;
705 significand_low += digits[d_idx];
706 }
707 }
708
709 _mul_64x64 (significand_high,
710 100000000000000000ull,
711 &significand);
712 significand.low += significand_low;
713
714 if (significand.low < significand_low) {
715 significand.high += 1;
716 }
717
718
719 biased_exponent = (exponent + (int16_t)BSON_DECIMAL128_EXPONENT_BIAS);
720
721 /* Encode combination, exponent, and significand. */
722 if ((significand.high >> 49) & 1) {
723 /* Encode '11' into bits 1 to 3 */
724 dec->high |= (0x3ull << 61);
725 dec->high |= (biased_exponent & 0x3fffull) << 47;
726 dec->high |= significand.high & 0x7fffffffffffull;
727 } else {
728 dec->high |= (biased_exponent & 0x3fffull) << 49;
729 dec->high |= significand.high & 0x1ffffffffffffull;
730 }
731
732 dec->low = significand.low;
733
734 /* Encode sign */
735 if (is_negative) {
736 dec->high |= 0x8000000000000000ull;
737 }
738
739 return true;
740 }
741