1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 // This is a C++ header-only implementation of Grisu2 algorithm from the publication:
16 // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
17 // integers." ACM Sigplan Notices 45.6 (2010): 233-243.
18 
19 #ifndef RAPIDJSON_DIYFP_H_
20 #define RAPIDJSON_DIYFP_H_
21 
22 #include "../rapidjson.h"
23 
24 #if defined(_MSC_VER) && defined(_M_AMD64)
25 #include <intrin.h>
26 #pragma intrinsic(_BitScanReverse64)
27 #pragma intrinsic(_umul128)
28 #endif
29 
30 RAPIDJSON_NAMESPACE_BEGIN
31 namespace internal {
32 
33 #ifdef __GNUC__
34 RAPIDJSON_DIAG_PUSH
35 RAPIDJSON_DIAG_OFF(effc++)
36 #endif
37 
38 #ifdef __clang__
39 RAPIDJSON_DIAG_PUSH
40 RAPIDJSON_DIAG_OFF(padded)
41 #endif
42 
43 struct DiyFp {
DiyFpDiyFp44     DiyFp() : f(), e() {}
45 
DiyFpDiyFp46     DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
47 
DiyFpDiyFp48     explicit DiyFp(double d) {
49         union {
50             double d;
51             uint64_t u64;
52         } u = { d };
53 
54         int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
55         uint64_t significand = (u.u64 & kDpSignificandMask);
56         if (biased_e != 0) {
57             f = significand + kDpHiddenBit;
58             e = biased_e - kDpExponentBias;
59         }
60         else {
61             f = significand;
62             e = kDpMinExponent + 1;
63         }
64     }
65 
66     DiyFp operator-(const DiyFp& rhs) const {
67         return DiyFp(f - rhs.f, e);
68     }
69 
70     DiyFp operator*(const DiyFp& rhs) const {
71 #if defined(_MSC_VER) && defined(_M_AMD64)
72         uint64_t h;
73         uint64_t l = _umul128(f, rhs.f, &h);
74         if (l & (uint64_t(1) << 63)) // rounding
75             h++;
76         return DiyFp(h, e + rhs.e + 64);
77 #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
78         __extension__ typedef unsigned __int128 uint128;
79         uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
80         uint64_t h = static_cast<uint64_t>(p >> 64);
81         uint64_t l = static_cast<uint64_t>(p);
82         if (l & (uint64_t(1) << 63)) // rounding
83             h++;
84         return DiyFp(h, e + rhs.e + 64);
85 #else
86         const uint64_t M32 = 0xFFFFFFFF;
87         const uint64_t a = f >> 32;
88         const uint64_t b = f & M32;
89         const uint64_t c = rhs.f >> 32;
90         const uint64_t d = rhs.f & M32;
91         const uint64_t ac = a * c;
92         const uint64_t bc = b * c;
93         const uint64_t ad = a * d;
94         const uint64_t bd = b * d;
95         uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
96         tmp += 1U << 31;  /// mult_round
97         return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
98 #endif
99     }
100 
NormalizeDiyFp101     DiyFp Normalize() const {
102 #if defined(_MSC_VER) && defined(_M_AMD64)
103         unsigned long index;
104         _BitScanReverse64(&index, f);
105         return DiyFp(f << (63 - index), e - (63 - index));
106 #elif defined(__GNUC__) && __GNUC__ >= 4
107         int s = __builtin_clzll(f);
108         return DiyFp(f << s, e - s);
109 #else
110         DiyFp res = *this;
111         while (!(res.f & (static_cast<uint64_t>(1) << 63))) {
112             res.f <<= 1;
113             res.e--;
114         }
115         return res;
116 #endif
117     }
118 
NormalizeBoundaryDiyFp119     DiyFp NormalizeBoundary() const {
120         DiyFp res = *this;
121         while (!(res.f & (kDpHiddenBit << 1))) {
122             res.f <<= 1;
123             res.e--;
124         }
125         res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
126         res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
127         return res;
128     }
129 
NormalizedBoundariesDiyFp130     void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
131         DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
132         DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
133         mi.f <<= mi.e - pl.e;
134         mi.e = pl.e;
135         *plus = pl;
136         *minus = mi;
137     }
138 
ToDoubleDiyFp139     double ToDouble() const {
140         union {
141             double d;
142             uint64_t u64;
143         }u;
144         const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
145             static_cast<uint64_t>(e + kDpExponentBias);
146         u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
147         return u.d;
148     }
149 
150     static const int kDiySignificandSize = 64;
151     static const int kDpSignificandSize = 52;
152     static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
153     static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
154     static const int kDpMinExponent = -kDpExponentBias;
155     static const int kDpDenormalExponent = -kDpExponentBias + 1;
156     static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
157     static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
158     static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
159 
160     uint64_t f;
161     int e;
162 };
163 
GetCachedPowerByIndex(size_t index)164 inline DiyFp GetCachedPowerByIndex(size_t index) {
165     // 10^-348, 10^-340, ..., 10^340
166     static const uint64_t kCachedPowers_F[] = {
167         RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
168         RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
169         RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
170         RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
171         RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
172         RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
173         RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
174         RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
175         RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
176         RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
177         RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
178         RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
179         RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
180         RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
181         RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
182         RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
183         RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
184         RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
185         RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
186         RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
187         RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
188         RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
189         RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
190         RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
191         RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
192         RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
193         RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
194         RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
195         RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
196         RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
197         RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
198         RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
199         RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
200         RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
201         RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
202         RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
203         RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
204         RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
205         RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
206         RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
207         RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
208         RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
209         RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
210         RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
211     };
212     static const int16_t kCachedPowers_E[] = {
213         -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007,  -980,
214         -954,  -927,  -901,  -874,  -847,  -821,  -794,  -768,  -741,  -715,
215         -688,  -661,  -635,  -608,  -582,  -555,  -529,  -502,  -475,  -449,
216         -422,  -396,  -369,  -343,  -316,  -289,  -263,  -236,  -210,  -183,
217         -157,  -130,  -103,   -77,   -50,   -24,     3,    30,    56,    83,
218         109,   136,   162,   189,   216,   242,   269,   295,   322,   348,
219         375,   402,   428,   455,   481,   508,   534,   561,   588,   614,
220         641,   667,   694,   720,   747,   774,   800,   827,   853,   880,
221         907,   933,   960,   986,  1013,  1039,  1066
222     };
223     return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
224 }
225 
GetCachedPower(int e,int * K)226 inline DiyFp GetCachedPower(int e, int* K) {
227 
228     //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
229     double dk = (-61 - e) * 0.30102999566398114 + 347;  // dk must be positive, so can do ceiling in positive
230     int k = static_cast<int>(dk);
231     if (dk - k > 0.0)
232         k++;
233 
234     unsigned index = static_cast<unsigned>((k >> 3) + 1);
235     *K = -(-348 + static_cast<int>(index << 3));    // decimal exponent no need lookup table
236 
237     return GetCachedPowerByIndex(index);
238 }
239 
GetCachedPower10(int exp,int * outExp)240 inline DiyFp GetCachedPower10(int exp, int *outExp) {
241      unsigned index = (static_cast<unsigned>(exp) + 348u) / 8u;
242      *outExp = -348 + static_cast<int>(index) * 8;
243      return GetCachedPowerByIndex(index);
244  }
245 
246 #ifdef __GNUC__
247 RAPIDJSON_DIAG_POP
248 #endif
249 
250 #ifdef __clang__
251 RAPIDJSON_DIAG_POP
252 RAPIDJSON_DIAG_OFF(padded)
253 #endif
254 
255 } // namespace internal
256 RAPIDJSON_NAMESPACE_END
257 
258 #endif // RAPIDJSON_DIYFP_H_
259