1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 // This is a C++ header-only implementation of Grisu2 algorithm from the publication:
16 // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
17 // integers." ACM Sigplan Notices 45.6 (2010): 233-243.
18
19 #ifndef RAPIDJSON_DIYFP_H_
20 #define RAPIDJSON_DIYFP_H_
21
22 #include "../rapidjson.h"
23
24 #if defined(_MSC_VER) && defined(_M_AMD64)
25 #include <intrin.h>
26 #pragma intrinsic(_BitScanReverse64)
27 #pragma intrinsic(_umul128)
28 #endif
29
30 RAPIDJSON_NAMESPACE_BEGIN
31 namespace internal {
32
33 #ifdef __GNUC__
34 RAPIDJSON_DIAG_PUSH
35 RAPIDJSON_DIAG_OFF(effc++)
36 #endif
37
38 #ifdef __clang__
39 RAPIDJSON_DIAG_PUSH
40 RAPIDJSON_DIAG_OFF(padded)
41 #endif
42
43 struct DiyFp {
DiyFpDiyFp44 DiyFp() : f(), e() {}
45
DiyFpDiyFp46 DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
47
DiyFpDiyFp48 explicit DiyFp(double d) {
49 union {
50 double d;
51 uint64_t u64;
52 } u = { d };
53
54 int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
55 uint64_t significand = (u.u64 & kDpSignificandMask);
56 if (biased_e != 0) {
57 f = significand + kDpHiddenBit;
58 e = biased_e - kDpExponentBias;
59 }
60 else {
61 f = significand;
62 e = kDpMinExponent + 1;
63 }
64 }
65
66 DiyFp operator-(const DiyFp& rhs) const {
67 return DiyFp(f - rhs.f, e);
68 }
69
70 DiyFp operator*(const DiyFp& rhs) const {
71 #if defined(_MSC_VER) && defined(_M_AMD64)
72 uint64_t h;
73 uint64_t l = _umul128(f, rhs.f, &h);
74 if (l & (uint64_t(1) << 63)) // rounding
75 h++;
76 return DiyFp(h, e + rhs.e + 64);
77 #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
78 __extension__ typedef unsigned __int128 uint128;
79 uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
80 uint64_t h = static_cast<uint64_t>(p >> 64);
81 uint64_t l = static_cast<uint64_t>(p);
82 if (l & (uint64_t(1) << 63)) // rounding
83 h++;
84 return DiyFp(h, e + rhs.e + 64);
85 #else
86 const uint64_t M32 = 0xFFFFFFFF;
87 const uint64_t a = f >> 32;
88 const uint64_t b = f & M32;
89 const uint64_t c = rhs.f >> 32;
90 const uint64_t d = rhs.f & M32;
91 const uint64_t ac = a * c;
92 const uint64_t bc = b * c;
93 const uint64_t ad = a * d;
94 const uint64_t bd = b * d;
95 uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
96 tmp += 1U << 31; /// mult_round
97 return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
98 #endif
99 }
100
NormalizeDiyFp101 DiyFp Normalize() const {
102 #if defined(_MSC_VER) && defined(_M_AMD64)
103 unsigned long index;
104 _BitScanReverse64(&index, f);
105 return DiyFp(f << (63 - index), e - (63 - index));
106 #elif defined(__GNUC__) && __GNUC__ >= 4
107 int s = __builtin_clzll(f);
108 return DiyFp(f << s, e - s);
109 #else
110 DiyFp res = *this;
111 while (!(res.f & (static_cast<uint64_t>(1) << 63))) {
112 res.f <<= 1;
113 res.e--;
114 }
115 return res;
116 #endif
117 }
118
NormalizeBoundaryDiyFp119 DiyFp NormalizeBoundary() const {
120 DiyFp res = *this;
121 while (!(res.f & (kDpHiddenBit << 1))) {
122 res.f <<= 1;
123 res.e--;
124 }
125 res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
126 res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
127 return res;
128 }
129
NormalizedBoundariesDiyFp130 void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
131 DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
132 DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
133 mi.f <<= mi.e - pl.e;
134 mi.e = pl.e;
135 *plus = pl;
136 *minus = mi;
137 }
138
ToDoubleDiyFp139 double ToDouble() const {
140 union {
141 double d;
142 uint64_t u64;
143 }u;
144 const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
145 static_cast<uint64_t>(e + kDpExponentBias);
146 u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
147 return u.d;
148 }
149
150 static const int kDiySignificandSize = 64;
151 static const int kDpSignificandSize = 52;
152 static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
153 static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
154 static const int kDpMinExponent = -kDpExponentBias;
155 static const int kDpDenormalExponent = -kDpExponentBias + 1;
156 static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
157 static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
158 static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
159
160 uint64_t f;
161 int e;
162 };
163
GetCachedPowerByIndex(size_t index)164 inline DiyFp GetCachedPowerByIndex(size_t index) {
165 // 10^-348, 10^-340, ..., 10^340
166 static const uint64_t kCachedPowers_F[] = {
167 RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
168 RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
169 RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
170 RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
171 RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
172 RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
173 RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
174 RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
175 RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
176 RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
177 RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
178 RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
179 RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
180 RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
181 RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
182 RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
183 RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
184 RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
185 RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
186 RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
187 RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
188 RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
189 RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
190 RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
191 RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
192 RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
193 RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
194 RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
195 RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
196 RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
197 RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
198 RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
199 RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
200 RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
201 RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
202 RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
203 RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
204 RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
205 RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
206 RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
207 RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
208 RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
209 RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
210 RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
211 };
212 static const int16_t kCachedPowers_E[] = {
213 -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
214 -954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
215 -688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
216 -422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
217 -157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
218 109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
219 375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
220 641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
221 907, 933, 960, 986, 1013, 1039, 1066
222 };
223 return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
224 }
225
GetCachedPower(int e,int * K)226 inline DiyFp GetCachedPower(int e, int* K) {
227
228 //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
229 double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
230 int k = static_cast<int>(dk);
231 if (dk - k > 0.0)
232 k++;
233
234 unsigned index = static_cast<unsigned>((k >> 3) + 1);
235 *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
236
237 return GetCachedPowerByIndex(index);
238 }
239
GetCachedPower10(int exp,int * outExp)240 inline DiyFp GetCachedPower10(int exp, int *outExp) {
241 unsigned index = (static_cast<unsigned>(exp) + 348u) / 8u;
242 *outExp = -348 + static_cast<int>(index) * 8;
243 return GetCachedPowerByIndex(index);
244 }
245
246 #ifdef __GNUC__
247 RAPIDJSON_DIAG_POP
248 #endif
249
250 #ifdef __clang__
251 RAPIDJSON_DIAG_POP
252 RAPIDJSON_DIAG_OFF(padded)
253 #endif
254
255 } // namespace internal
256 RAPIDJSON_NAMESPACE_END
257
258 #endif // RAPIDJSON_DIYFP_H_
259