xref: /reactos/sdk/lib/ucrt/convert/atoldbl.cpp (revision 04e0dc4a)
1 //
2 // atoldbl.cpp
3 //
4 //      Copyright (c) Microsoft Corporation. All rights reserved.
5 //
6 // The _atoldbl and _atoldbl_l functions, which convert a string representation
7 // of a floating point number into a 10-byte _LDOUBLE object.
8 //
9 #define _ALLOW_OLD_VALIDATE_MACROS
10 #include <corecrt_internal.h>
11 #include <corecrt_internal_fltintrn.h>
12 #include <corecrt_internal_strtox.h>
13 #include <float.h>
14 #include <locale.h>
15 #include <math.h>
16 #include <stdlib.h>
17 #include <string.h>
18 
19 
20 
21 #define PTR_12(x) ((uint8_t*)(&(x)->ld12))
22 
23 #define MSB_USHORT  ((uint16_t)     0x8000)
24 #define MSB_ULONG   ((uint32_t) 0x80000000)
25 
26 #define TMAX10  5200       /* maximum temporary decimal exponent */
27 #define TMIN10 -5200       /* minimum temporary decimal exponent */
28 #define LD_MAX_EXP_LEN   4 /* maximum number of decimal exponent digits */
29 #define LD_MAX_MAN_LEN  24 /* maximum length of mantissa (decimal)*/
30 #define LD_MAX_MAN_LEN1 25 /* MAX_MAN_LEN+1 */
31 
32 #define LD_BIAS   0x3fff  /* exponent bias for long double */
33 #define LD_BIASM1 0x3ffe  /* LD_BIAS - 1 */
34 #define LD_MAXEXP 0x7fff  /* maximum biased exponent */
35 
36 #define D_BIAS   0x3ff  /* exponent bias for double */
37 #define D_BIASM1 0x3fe  /* D_BIAS - 1 */
38 #define D_MAXEXP 0x7ff  /* maximum biased exponent */
39 
40 // Macros for manipulation of a 12-byte long double number (an ordinary 10-byte
41 // long double plus two extra bytes of mantissa).
42 // byte layout:
43 //
44 //              +-----+--------+--------+-------+
45 //              |XT(2)|MANLO(4)|MANHI(4)|EXP(2) |
46 //              +-----+--------+--------+-------+
47 //              |<-UL_LO->|<-UL_MED->|<-UL_HI ->|
48 //                  (4)       (4)        (4)
49 #define ALIGN(x) ((unsigned long _UNALIGNED*)(x))
50 
51 #define U_EXP_12(p)    ((uint16_t           *)(PTR_12(p) + 10))
52 #define UL_MANHI_12(p) ((uint32_t _UNALIGNED*)(PTR_12(p) +  6))
53 #define UL_MANLO_12(p) ((uint32_t _UNALIGNED*)(PTR_12(p) +  2))
54 #define U_XT_12(p)     ((uint16_t           *)(PTR_12(p)     ))
55 
56 // Pointers to the four low, mid, and high order bytes of the extended mantissa
57 #define UL_LO_12(p)  ((uint32_t*)(PTR_12(p)    ))
58 #define UL_MED_12(p) ((uint32_t*)(PTR_12(p) + 4))
59 #define UL_HI_12(p)  ((uint32_t*)(PTR_12(p) + 8))
60 
61 // Pointers to the uint8_t, uint16_t, and uint32_t of order i (LSB = 0; MSB = 9)
62 #define UCHAR_12(p, i)   ((uint8_t *)(          PTR_12(p) + (i)))
63 #define USHORT_12(p, i)  ((uint16_t*)((uint8_t*)PTR_12(p) + (i)))
64 #define ULONG_12(p, i)   ((uint32_t*)((uint8_t*)PTR_12(p) + (i)))
65 #define TEN_BYTE_PART(p) ((uint8_t *)(          PTR_12(p) +  2 ))
66 
67 // Manipulation of a 10-byte long double number
68 #define U_EXP_LD(p)    ((uint16_t*)(_PTR_LD(p) + 8))
69 #define UL_MANHI_LD(p) ((uint32_t*)(_PTR_LD(p) + 4))
70 #define UL_MANLO_LD(p) ((uint32_t*)(_PTR_LD(p)    ))
71 
72 // Manipulation of a 64bit IEEE double
73 #define U_SHORT4_D(p) ((uint16_t*)(p) + 3)
74 #define UL_HI_D(p)    ((uint32_t*)(p) + 1)
75 #define UL_LO_D(p)    ((uint32_t*)(p)    )
76 
77 #define PUT_INF_12(p, sign)                           \
78     *UL_HI_12 (p) = (sign) ? 0xffff8000 : 0x7fff8000; \
79     *UL_MED_12(p) = 0;                                \
80     *UL_LO_12 (p) = 0;
81 
82 #define PUT_ZERO_12(p) \
83     *UL_HI_12 (p) = 0; \
84     *UL_MED_12(p) = 0; \
85     *UL_LO_12 (p) = 0;
86 
87 #define ISZERO_12(p) \
88     ((*UL_HI_12 (p) & 0x7fffffff) == 0 && \
89       *UL_MED_12(p)               == 0 && \
90       *UL_LO_12 (p)               == 0)
91 
92 #define PUT_INF_LD(p, sign)                     \
93     *U_EXP_LD   (p) = (sign) ? 0xffff : 0x7fff; \
94     *UL_MANHI_LD(p) = 0x8000;                   \
95     *UL_MANLO_LD(p) = 0;
96 
97 #define PUT_ZERO_LD(p)   \
98     *U_EXP_LD   (p) = 0; \
99     *UL_MANHI_LD(p) = 0; \
100     *UL_MANLO_LD(p) = 0;
101 
102 #define ISZERO_LD(p)                    \
103     ((*U_EXP_LD   (p) & 0x7fff) == 0 && \
104       *UL_MANHI_LD(p)           == 0 && \
105       *UL_MANLO_LD(p)           == 0)
106 
107 
108 
109 static _LDBL12 const ld12_pow10_positive[] =
110 {
111     /*P0001*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xA0,0x02,0x40}},
112     /*P0002*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xC8,0x05,0x40}},
113     /*P0003*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFA,0x08,0x40}},
114     /*P0004*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x9C,0x0C,0x40}},
115     /*P0005*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x50,0xC3,0x0F,0x40}},
116     /*P0006*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x24,0xF4,0x12,0x40}},
117     /*P0007*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x80,0x96,0x98,0x16,0x40}},
118     /*P0008*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x20,0xBC,0xBE,0x19,0x40}},
119     /*P0016*/ {{0x00,0x00, 0x00,0x00,0x00,0x04,0xBF,0xC9,0x1B,0x8E,0x34,0x40}},
120     /*P0024*/ {{0x00,0x00, 0x00,0xA1,0xED,0xCC,0xCE,0x1B,0xC2,0xD3,0x4E,0x40}},
121     /*P0032*/ {{0x20,0xF0, 0x9E,0xB5,0x70,0x2B,0xA8,0xAD,0xC5,0x9D,0x69,0x40}},
122     /*P0040*/ {{0xD0,0x5D, 0xFD,0x25,0xE5,0x1A,0x8E,0x4F,0x19,0xEB,0x83,0x40}},
123     /*P0048*/ {{0x71,0x96, 0xD7,0x95,0x43,0x0E,0x05,0x8D,0x29,0xAF,0x9E,0x40}},
124     /*P0056*/ {{0xF9,0xBF, 0xA0,0x44,0xED,0x81,0x12,0x8F,0x81,0x82,0xB9,0x40}},
125     /*P0064*/ {{0xBF,0x3C, 0xD5,0xA6,0xCF,0xFF,0x49,0x1F,0x78,0xC2,0xD3,0x40}},
126     /*P0128*/ {{0x6F,0xC6, 0xE0,0x8C,0xE9,0x80,0xC9,0x47,0xBA,0x93,0xA8,0x41}},
127     /*P0192*/ {{0xBC,0x85, 0x6B,0x55,0x27,0x39,0x8D,0xF7,0x70,0xE0,0x7C,0x42}},
128     /*P0256*/ {{0xBC,0xDD, 0x8E,0xDE,0xF9,0x9D,0xFB,0xEB,0x7E,0xAA,0x51,0x43}},
129     /*P0320*/ {{0xA1,0xE6, 0x76,0xE3,0xCC,0xF2,0x29,0x2F,0x84,0x81,0x26,0x44}},
130     /*P0384*/ {{0x28,0x10, 0x17,0xAA,0xF8,0xAE,0x10,0xE3,0xC5,0xC4,0xFA,0x44}},
131     /*P0448*/ {{0xEB,0xA7, 0xD4,0xF3,0xF7,0xEB,0xE1,0x4A,0x7A,0x95,0xCF,0x45}},
132     /*P0512*/ {{0x65,0xCC, 0xC7,0x91,0x0E,0xA6,0xAE,0xA0,0x19,0xE3,0xA3,0x46}},
133     /*P1024*/ {{0x0D,0x65, 0x17,0x0C,0x75,0x81,0x86,0x75,0x76,0xC9,0x48,0x4D}},
134     /*P1536*/ {{0x58,0x42, 0xE4,0xA7,0x93,0x39,0x3B,0x35,0xB8,0xB2,0xED,0x53}},
135     /*P2048*/ {{0x4D,0xA7, 0xE5,0x5D,0x3D,0xC5,0x5D,0x3B,0x8B,0x9E,0x92,0x5A}},
136     /*P2560*/ {{0xFF,0x5D, 0xA6,0xF0,0xA1,0x20,0xC0,0x54,0xA5,0x8C,0x37,0x61}},
137     /*P3072*/ {{0xD1,0xFD, 0x8B,0x5A,0x8B,0xD8,0x25,0x5D,0x89,0xF9,0xDB,0x67}},
138     /*P3584*/ {{0xAA,0x95, 0xF8,0xF3,0x27,0xBF,0xA2,0xC8,0x5D,0xDD,0x80,0x6E}},
139     /*P4096*/ {{0x4C,0xC9, 0x9B,0x97,0x20,0x8A,0x02,0x52,0x60,0xC4,0x25,0x75}}
140 };
141 
142 static _LDBL12 const ld12_pow10_negative[] =
143 {
144     /*N0001*/ {{0xCD,0xCC, 0xCD,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xFB,0x3F}},
145     /*N0002*/ {{0x71,0x3D, 0x0A,0xD7,0xA3,0x70,0x3D,0x0A,0xD7,0xA3,0xF8,0x3F}},
146     /*N0003*/ {{0x5A,0x64, 0x3B,0xDF,0x4F,0x8D,0x97,0x6E,0x12,0x83,0xF5,0x3F}},
147     /*N0004*/ {{0xC3,0xD3, 0x2C,0x65,0x19,0xE2,0x58,0x17,0xB7,0xD1,0xF1,0x3F}},
148     /*N0005*/ {{0xD0,0x0F, 0x23,0x84,0x47,0x1B,0x47,0xAC,0xC5,0xA7,0xEE,0x3F}},
149     /*N0006*/ {{0x40,0xA6, 0xB6,0x69,0x6C,0xAF,0x05,0xBD,0x37,0x86,0xEB,0x3F}},
150     /*N0007*/ {{0x33,0x3D, 0xBC,0x42,0x7A,0xE5,0xD5,0x94,0xBF,0xD6,0xE7,0x3F}},
151     /*N0008*/ {{0xC2,0xFD, 0xFD,0xCE,0x61,0x84,0x11,0x77,0xCC,0xAB,0xE4,0x3F}},
152     /*N0016*/ {{0x2F,0x4C, 0x5B,0xE1,0x4D,0xC4,0xBE,0x94,0x95,0xE6,0xC9,0x3F}},
153     /*N0024*/ {{0x92,0xC4, 0x53,0x3B,0x75,0x44,0xCD,0x14,0xBE,0x9A,0xAF,0x3F}},
154     /*N0032*/ {{0xDE,0x67, 0xBA,0x94,0x39,0x45,0xAD,0x1E,0xB1,0xCF,0x94,0x3F}},
155     /*N0040*/ {{0x24,0x23, 0xC6,0xE2,0xBC,0xBA,0x3B,0x31,0x61,0x8B,0x7A,0x3F}},
156     /*N0048*/ {{0x61,0x55, 0x59,0xC1,0x7E,0xB1,0x53,0x7C,0x12,0xBB,0x5F,0x3F}},
157     /*N0056*/ {{0xD7,0xEE, 0x2F,0x8D,0x06,0xBE,0x92,0x85,0x15,0xFB,0x44,0x3F}},
158     /*N0064*/ {{0x24,0x3F, 0xA5,0xE9,0x39,0xA5,0x27,0xEA,0x7F,0xA8,0x2A,0x3F}},
159     /*N0128*/ {{0x7D,0xAC, 0xA1,0xE4,0xBC,0x64,0x7C,0x46,0xD0,0xDD,0x55,0x3E}},
160     /*N0192*/ {{0x63,0x7B, 0x06,0xCC,0x23,0x54,0x77,0x83,0xFF,0x91,0x81,0x3D}},
161     /*N0256*/ {{0x91,0xFA, 0x3A,0x19,0x7A,0x63,0x25,0x43,0x31,0xC0,0xAC,0x3C}},
162     /*N0320*/ {{0x21,0x89, 0xD1,0x38,0x82,0x47,0x97,0xB8,0x00,0xFD,0xD7,0x3B}},
163     /*N0384*/ {{0xDC,0x88, 0x58,0x08,0x1B,0xB1,0xE8,0xE3,0x86,0xA6,0x03,0x3B}},
164     /*N0448*/ {{0xC6,0x84, 0x45,0x42,0x07,0xB6,0x99,0x75,0x37,0xDB,0x2E,0x3A}},
165     /*N0512*/ {{0x33,0x71, 0x1C,0xD2,0x23,0xDB,0x32,0xEE,0x49,0x90,0x5A,0x39}},
166     /*N1024*/ {{0xA6,0x87, 0xBE,0xC0,0x57,0xDA,0xA5,0x82,0xA6,0xA2,0xB5,0x32}},
167     /*N1536*/ {{0xE2,0x68, 0xB2,0x11,0xA7,0x52,0x9F,0x44,0x59,0xB7,0x10,0x2C}},
168     /*N2048*/ {{0x25,0x49, 0xE4,0x2D,0x36,0x34,0x4F,0x53,0xAE,0xCE,0x6B,0x25}},
169     /*N2560*/ {{0x8F,0x59, 0x04,0xA4,0xC0,0xDE,0xC2,0x7D,0xFB,0xE8,0xC6,0x1E}},
170     /*N3072*/ {{0x9E,0xE7, 0x88,0x5A,0x57,0x91,0x3C,0xBF,0x50,0x83,0x22,0x18}},
171     /*N3584*/ {{0x4E,0x4B, 0x65,0x62,0xFD,0x83,0x8F,0xAF,0x06,0x94,0x7D,0x11}},
172     /*N4096*/ {{0xE4,0x2D, 0xDE,0x9F,0xCE,0xD2,0xC8,0x04,0xDD,0xA6,0xD8,0x0A}}
173 };
174 
175 
176 
177 // Adds x and y, storing the result in *sum.  Returns true if overflow occurred;
178 // false otherwise.
add_uint32_carry(uint32_t const x,uint32_t const y,uint32_t * const sum)179 static __forceinline bool __cdecl add_uint32_carry(uint32_t const x, uint32_t const y, uint32_t* const sum) throw()
180 {
181     uint32_t const r = x + y;
182 
183     *sum = r;
184 
185     return r < x || r < y; // carry
186 }
187 
188 // Adds *x and *y as 12-byte integers, storing the result in *x.  Overflow is ignored.
add_ld12(_LDBL12 * const x,_LDBL12 const * const y)189 static __forceinline void __cdecl add_ld12(_LDBL12* const x, _LDBL12 const* const y) throw()
190 {
191     if (add_uint32_carry(*UL_LO_12(x), *UL_LO_12(y), UL_LO_12(x)))
192     {
193         if (add_uint32_carry(*UL_MED_12(x), 1, UL_MED_12(x)))
194         {
195             ++*UL_HI_12(x);
196         }
197     }
198 
199     if (add_uint32_carry(*UL_MED_12(x), *UL_MED_12(y), UL_MED_12(x)))
200     {
201         ++*UL_HI_12(x);
202     }
203 
204     // Ignore next carry -- assume no overflow will occur
205     add_uint32_carry(*UL_HI_12(x), *UL_HI_12(y), UL_HI_12(x));
206 }
207 
208 // Shifts *p N bits to the left.  The number is shifted as a 12-byte integer.
209 template <uint32_t N>
shl_ld12(_LDBL12 * const p)210 static __forceinline void __cdecl shl_ld12(_LDBL12* const p) throw()
211 {
212     uint32_t const total_bits{sizeof(uint32_t) * CHAR_BIT};
213     uint32_t const msb_bits{N};
214     uint32_t const lsb_bits{total_bits - N};
215 
216     static_assert(msb_bits <= total_bits, "shift too large");
217 
218     uint32_t const lsb_mask{(1 << (lsb_bits - 1)) - 1};
219     uint32_t const msb_mask{static_cast<uint32_t>(-1) ^ lsb_mask};
220 
221     uint32_t const lo_carry {(*UL_LO_12 (p) & msb_mask) >> lsb_bits};
222     uint32_t const med_carry{(*UL_MED_12(p) & msb_mask) >> lsb_bits};
223 
224     *UL_LO_12 (p) = (*UL_LO_12 (p) << msb_bits);
225     *UL_MED_12(p) = (*UL_MED_12(p) << msb_bits) | lo_carry;
226     *UL_HI_12 (p) = (*UL_HI_12 (p) << msb_bits) | med_carry;
227 }
228 
229 // Shifts *p one bit to the right.  The number is shifted as a 12-byte integer.
shr_ld12(_LDBL12 * const p)230 static __forceinline void __cdecl shr_ld12(_LDBL12* const p) throw()
231 {
232     uint32_t const c2 = *UL_HI_12 (p) & 0x1 ? MSB_ULONG : 0;
233     uint32_t const c1 = *UL_MED_12(p) & 0x1 ? MSB_ULONG : 0;
234 
235     *UL_HI_12 (p) >>= 1;
236     *UL_MED_12(p) = *UL_MED_12(p) >> 1 | c2;
237     *UL_LO_12 (p) = *UL_LO_12 (p) >> 1 | c1;
238 }
239 
240 // Multiplies *px and *py, storing the result in *px.
multiply_ld12(_LDBL12 * const px,_LDBL12 const * const py)241 static __forceinline void __cdecl multiply_ld12(_LDBL12* const px, _LDBL12 const* const py) throw()
242 {
243     _LDBL12 tempman; // This is actually a 12-byte mantissa, not a 12-byte long double
244     *UL_LO_12 (&tempman) = 0;
245     *UL_MED_12(&tempman) = 0;
246     *UL_HI_12 (&tempman) = 0;
247 
248     uint16_t expx = *U_EXP_12(px);
249     uint16_t expy = *U_EXP_12(py);
250 
251     uint16_t const sign = (expx ^ expy) & static_cast<uint16_t>(0x8000);
252     expx &= 0x7fff;
253     expy &= 0x7fff;
254     uint16_t expsum = expx + expy;
255 
256     if (expx   >= LD_MAXEXP ||
257         expy   >= LD_MAXEXP ||
258         expsum >  LD_MAXEXP + LD_BIASM1)
259     {
260         // Overflow to infinity
261         PUT_INF_12(px, sign);
262         return;
263     }
264 
265     if (expsum <= LD_BIASM1 - 63)
266     {
267         // Underflow to zero
268         PUT_ZERO_12(px);
269         return;
270     }
271 
272     if (expx == 0)
273     {
274         // If this is a denormal temp real then the mantissa was shifted right
275         // once to set bit 63 to zero.
276         ++expsum; // Correct for this
277 
278         if (ISZERO_12(px))
279         {
280             // Put positive sign:
281             *U_EXP_12(px) = 0;
282             return;
283         }
284     }
285 
286     if (expy == 0)
287     {
288         ++expsum; // Because arg2 is denormal
289         if (ISZERO_12(py))
290         {
291             PUT_ZERO_12(px);
292             return;
293         }
294     }
295 
296     int roffs = 0;
297     for (int i = 0; i < 5; ++i)
298     {
299         int poffs = i << 1;
300         int qoffs = 8;
301         for (int j = 5 - i; j > 0; --j)
302         {
303             bool carry = false;
304 
305             uint16_t* const p = USHORT_12(px, poffs);
306             uint16_t* const q = USHORT_12(py, qoffs);
307             uint32_t* const r = ULONG_12(&tempman, roffs);
308             uint32_t  const prod = static_cast<uint32_t>(*p) * static_cast<uint32_t>(*q);
309 
310             #if defined _M_X64 || defined _M_ARM
311             // handle misalignment problems
312             if (i & 0x1) // i is odd
313             {
314                 uint32_t sum = 0;
315                 carry = add_uint32_carry(*ALIGN(r), prod, &sum);
316                 *ALIGN(r) = sum;
317             }
318             else // i is even
319             {
320                 carry = add_uint32_carry(*r, prod, r);
321             }
322             #else
323             carry = add_uint32_carry(*r, prod, r);
324             #endif
325 
326             if (carry)
327             {
328                 // roffs should be less than 8 in this case
329                 ++*USHORT_12(&tempman, roffs + 4);
330             }
331 
332             poffs += 2;
333             qoffs -= 2;
334         }
335 
336         roffs += 2;
337     }
338 
339     expsum -= LD_BIASM1;
340 
341     // Normalize
342     while (static_cast<int16_t>(expsum) > 0 && (*UL_HI_12(&tempman) & MSB_ULONG) == 0)
343     {
344          shl_ld12<1>(&tempman);
345          expsum--;
346     }
347 
348     if (static_cast<int16_t>(expsum) <= 0)
349     {
350         bool sticky = false;
351 
352         expsum--;
353         while (static_cast<int16_t>(expsum) < 0)
354         {
355             if (*U_XT_12(&tempman) & 0x1)
356                 sticky = true;
357 
358             shr_ld12(&tempman);
359             expsum++;
360         }
361 
362         if (sticky)
363         {
364             *U_XT_12(&tempman) |= 0x1;
365         }
366     }
367 
368     if (*U_XT_12(&tempman) > 0x8000 || (*UL_LO_12(&tempman) & 0x1ffff) == 0x18000)
369     {
370         // Round up:
371         if (*UL_MANLO_12(&tempman) == UINT32_MAX)
372         {
373             *UL_MANLO_12(&tempman) = 0;
374 
375             if (*UL_MANHI_12(&tempman) == UINT32_MAX)
376             {
377                 *UL_MANHI_12(&tempman) = 0;
378 
379                 if (*U_EXP_12(&tempman) == UINT16_MAX)
380                 {
381                     // 12-byte mantissa overflow:
382                     *U_EXP_12(&tempman) = MSB_USHORT;
383                     ++expsum;
384                 }
385                 else
386                 {
387                     ++*U_EXP_12(&tempman);
388                 }
389             }
390             else
391             {
392                 ++*UL_MANHI_12(&tempman);
393             }
394         }
395         else
396         {
397             ++*UL_MANLO_12(&tempman);
398         }
399     }
400 
401 
402     // Check for exponent overflow:
403     if (expsum >= 0x7fff)
404     {
405         PUT_INF_12(px, sign);
406         return;
407     }
408 
409     // Put result in px:
410     *U_XT_12    (px) = *USHORT_12(&tempman, 2);
411     *UL_MANLO_12(px) = *UL_MED_12(&tempman);
412     *UL_MANHI_12(px) = *UL_HI_12 (&tempman);
413     *U_EXP_12   (px) = expsum | sign;
414 }
415 
416 // Multiplies *pld12 by 10^pow.
multiply_ten_pow_ld12(_LDBL12 * const pld12,int pow)417 static __forceinline void __cdecl multiply_ten_pow_ld12(_LDBL12* const pld12, int pow) throw()
418 {
419     if (pow == 0)
420         return;
421 
422     _LDBL12 const* pow_10p = ld12_pow10_positive - 8;
423     if (pow < 0)
424     {
425         pow     = -pow;
426         pow_10p = ld12_pow10_negative-8;
427     }
428 
429     while (pow != 0)
430     {
431         pow_10p += 7;
432         int const last3 = pow & 0x7; // The three least significant bits of pow
433         pow >>= 3;
434 
435         if (last3 == 0)
436             continue;
437 
438         _LDBL12 const* py = pow_10p + last3;
439 
440         _LDBL12 unround;
441 
442         // Do an exact 12byte multiplication:
443         if (*U_XT_12(py) >= 0x8000)
444         {
445             // Copy number:
446             unround = *py;
447 
448             // Unround adjacent byte:
449             --*UL_MANLO_12(&unround);
450 
451             // Point to new operand:
452             py = &unround;
453         }
454 
455         multiply_ld12(pld12, py);
456     }
457 }
458 
459 // Multiplies *ld12 by 2^power.
multiply_two_pow_ld12(_LDBL12 * const ld12,int const power)460 static __forceinline void __cdecl multiply_two_pow_ld12(_LDBL12* const ld12, int const power) throw()
461 {
462     _LDBL12 multiplicand{};
463     *U_XT_12    (&multiplicand) = 0;
464     *UL_MANLO_12(&multiplicand) = 0;
465     *UL_MANHI_12(&multiplicand) = (1u << (sizeof(uint32_t) * CHAR_BIT - 1));
466     *U_EXP_12   (&multiplicand) = static_cast<uint16_t>(power + LD_BIAS);
467 
468     multiply_ld12(ld12, &multiplicand);
469 }
470 
471 
472 // These multiply a 12-byte integer stored in an _LDBL12 by N.  N must be 10 or 16.
473 template <uint32_t N>
474 static __forceinline void __cdecl multiply_ld12_by(_LDBL12*) throw();
475 
476 template <>
multiply_ld12_by(_LDBL12 * const ld12)477 __forceinline void __cdecl multiply_ld12_by<10>(_LDBL12* const ld12) throw()
478 {
479     _LDBL12 const original_ld12 = *ld12;
480     shl_ld12<2>(ld12);
481     add_ld12(ld12, &original_ld12);
482     shl_ld12<1>(ld12);
483 }
484 
485 template <>
multiply_ld12_by(_LDBL12 * const ld12)486 __forceinline void __cdecl multiply_ld12_by<16>(_LDBL12* const ld12) throw()
487 {
488     shl_ld12<4>(ld12);
489 }
490 
491 // Converts a mantissa into an _LDBL12.  The mantissa to be converted must be
492 // represented as an array of BCD digits, one per byte, read from the byte range
493 // [mantissa, mantissa + mantissa_count).
494 template <uint32_t Base>
convert_mantissa_to_ld12(uint8_t const * const mantissa,size_t const mantissa_count,_LDBL12 * const ld12)495 static __forceinline void __cdecl convert_mantissa_to_ld12(
496     uint8_t const* const mantissa,
497     size_t         const mantissa_count,
498     _LDBL12*       const ld12
499     ) throw()
500 {
501     *UL_LO_12 (ld12) = 0;
502     *UL_MED_12(ld12) = 0;
503     *UL_HI_12 (ld12) = 0;
504 
505     uint8_t const* const mantissa_last = mantissa + mantissa_count;
506     for (uint8_t const* it = mantissa; it != mantissa_last; ++it)
507     {
508         multiply_ld12_by<Base>(ld12);
509 
510         // Add the new digit into the mantissa:
511         _LDBL12 digit_ld12{};
512         *UL_LO_12 (&digit_ld12) = *it;
513         *UL_MED_12(&digit_ld12) = 0;
514         *UL_HI_12 (&digit_ld12) = 0;
515         add_ld12(ld12, &digit_ld12);
516     }
517 
518     uint16_t expn = LD_BIASM1 + 80;
519 
520     // Normalize mantissa.  First shift word-by-word:
521     while (*UL_HI_12(ld12) == 0)
522     {
523         *UL_HI_12 (ld12) = *UL_MED_12(ld12) >> 16;
524         *UL_MED_12(ld12) = *UL_MED_12(ld12) << 16 | *UL_LO_12(ld12) >> 16;
525         *UL_LO_12 (ld12) <<= 16;
526         expn -= 16;
527     }
528 
529     while ((*UL_HI_12(ld12) & MSB_USHORT) == 0)
530     {
531         shl_ld12<1>(ld12);
532         --expn;
533     }
534 
535     *U_EXP_12(ld12) = expn;
536 }
537 
538 namespace __crt_strtox {
539 
assemble_floating_point_zero(bool const is_negative,_LDBL12 & result)540 void __cdecl assemble_floating_point_zero(bool const is_negative, _LDBL12& result) throw()
541 {
542     uint16_t const sign_bit{static_cast<uint16_t>(is_negative ? MSB_USHORT : 0x0000)};
543 
544     // Zero is all zero bits with an optional sign bit:
545     *U_XT_12    (&result) = 0;
546     *UL_MANLO_12(&result) = 0;
547     *UL_MANHI_12(&result) = 0;
548     *U_EXP_12   (&result) = sign_bit;
549 }
550 
assemble_floating_point_infinity(bool const is_negative,_LDBL12 & result)551 void __cdecl assemble_floating_point_infinity(bool const is_negative, _LDBL12& result) throw()
552 {
553     uint16_t const sign_bit{static_cast<uint16_t>(is_negative ? MSB_USHORT : 0x0000)};
554 
555     // Infinity has an all-zero mantissa and an all-one exponent
556     *U_XT_12    (&result) = 0;
557     *UL_MANLO_12(&result) = 0;
558     *UL_MANHI_12(&result) = 0;
559     *U_EXP_12   (&result) = static_cast<uint16_t>(LD_MAXEXP) | sign_bit;
560 }
561 
assemble_floating_point_qnan(bool const is_negative,_LDBL12 & result)562 void __cdecl assemble_floating_point_qnan(bool const is_negative, _LDBL12& result) throw()
563 {
564     uint16_t const sign_bit{static_cast<uint16_t>(is_negative ? MSB_USHORT : 0x0000)};
565 
566     *U_XT_12    (&result) = 0xffff;
567     *UL_MANLO_12(&result) = 0xffffffff;
568     *UL_MANHI_12(&result) = 0xffffffff;
569     *U_EXP_12   (&result) = static_cast<uint16_t>(LD_MAXEXP) | sign_bit;
570 }
571 
assemble_floating_point_snan(bool const is_negative,_LDBL12 & result)572 void __cdecl assemble_floating_point_snan(bool const is_negative, _LDBL12& result) throw()
573 {
574     uint16_t const sign_bit{static_cast<uint16_t>(is_negative ? MSB_USHORT : 0x0000)};
575 
576     *U_XT_12    (&result) = 0xffff;
577     *UL_MANLO_12(&result) = 0xffffffff;
578     *UL_MANHI_12(&result) = 0xbfffffff;
579     *U_EXP_12   (&result) = static_cast<uint16_t>(LD_MAXEXP) | sign_bit;
580 }
581 
assemble_floating_point_ind(_LDBL12 & result)582 void __cdecl assemble_floating_point_ind(_LDBL12& result) throw()
583 {
584     uint16_t const sign_bit{static_cast<uint16_t>(MSB_USHORT)};
585 
586     *U_XT_12    (&result) = 0x0000;
587     *UL_MANLO_12(&result) = 0x00000000;
588     *UL_MANHI_12(&result) = 0xc0000000;
589     *U_EXP_12   (&result) = static_cast<uint16_t>(LD_MAXEXP) | sign_bit;
590 }
591 
common_convert_to_ldbl12(floating_point_string const & immutable_data,bool const is_hexadecimal,_LDBL12 & result)592 static SLD_STATUS __cdecl common_convert_to_ldbl12(
593     floating_point_string const& immutable_data,
594     bool                  const  is_hexadecimal,
595     _LDBL12                    & result
596     ) throw()
597 {
598     floating_point_string data = immutable_data;
599 
600     // Cap the number of digits to LD_MAX_MAN_LEN, and round the last digit:
601     if (data._mantissa_count > LD_MAX_MAN_LEN)
602     {
603         if (data._mantissa[LD_MAX_MAN_LEN] >= (is_hexadecimal ? 8 : 5))
604         {
605             ++data._mantissa[LD_MAX_MAN_LEN - 1];
606         }
607 
608         data._mantissa_count = LD_MAX_MAN_LEN;
609     }
610 
611     // The input exponent is an adjustment from the left (so 12.3456 is represented
612     // as a mantiss a of 123456 with an exponent of 2), but the legacy functions
613     // used here expect an adjustment from the right (so 12.3456 is represented
614     // with an exponent of -4).
615     int const exponent_adjustment_multiplier = is_hexadecimal ? 4 : 1;
616     data._exponent -= data._mantissa_count * exponent_adjustment_multiplier;
617 
618     if (is_hexadecimal)
619     {
620         convert_mantissa_to_ld12<16>(data._mantissa, data._mantissa_count, &result);
621         multiply_two_pow_ld12(&result, data._exponent);
622     }
623     else
624     {
625         convert_mantissa_to_ld12<10>(data._mantissa, data._mantissa_count, &result);
626         multiply_ten_pow_ld12(&result, data._exponent);
627     }
628 
629     if (data._is_negative)
630     {
631         *U_EXP_12(&result) |= 0x8000;
632     }
633 
634     // If the combination of the mantissa and the exponent produced an infinity,
635     // we've overflowed the range of the _LDBL12.
636     if ((*U_EXP_12(&result) & LD_MAXEXP) == LD_MAXEXP)
637     {
638         return SLD_OVERFLOW;
639     }
640 
641     return SLD_OK;
642 }
643 
convert_decimal_string_to_floating_type(floating_point_string const & data,_LDBL12 & result)644 SLD_STATUS __cdecl convert_decimal_string_to_floating_type(
645     floating_point_string const& data,
646     _LDBL12                    & result
647     ) throw()
648 {
649     return common_convert_to_ldbl12(data, false, result);
650 }
651 
convert_hexadecimal_string_to_floating_type(floating_point_string const & data,_LDBL12 & result)652 SLD_STATUS __cdecl convert_hexadecimal_string_to_floating_type(
653     floating_point_string const& data,
654     _LDBL12                    & result
655     ) throw()
656 {
657     return common_convert_to_ldbl12(data, true, result);
658 }
659 
660 } // namespace __crt_strtox
661 
662 using namespace __crt_strtox;
663 
664 
665 
transform_into_return_value(SLD_STATUS const status)666 static int __cdecl transform_into_return_value(SLD_STATUS const status) throw()
667 {
668     switch (status)
669     {
670     case SLD_OVERFLOW:  return _OVERFLOW;
671     case SLD_UNDERFLOW: return _UNDERFLOW;
672     default:            return 0;
673     }
674 }
675 
676 // The internal mantissa length in ints
677 #define INTRNMAN_LEN 3
678 
679 // Internal mantissaa representation for string conversion routines
680 typedef uint32_t* mantissa_t;
681 
682 
683 // Tests whether a mantissa ends in nbit zeroes.  Returns true if all mantissa
684 // bits after (and including) nbit are zero; returns false otherwise.
mantissa_has_zero_tail(mantissa_t const mantissa,int const nbit)685 static __forceinline bool __cdecl mantissa_has_zero_tail(mantissa_t const mantissa, int const nbit) throw()
686 {
687     int nl = nbit / 32;
688     int const nb = 31 - nbit % 32;
689 
690     //
691     //             |<---- tail to be checked --->
692     //
693     //    --  ------------------------           ----
694     //    |...    |          |  ...      |
695     //    --  ------------------------           ----
696     //    ^    ^    ^
697     //    |    |    |<----nb----->
698     //    man    nl   nbit
699     //
700 
701     uint32_t const bitmask = ~(UINT32_MAX << nb);
702 
703     if (mantissa[nl] & bitmask)
704         return false;
705 
706     ++nl;
707 
708     for (; nl < INTRNMAN_LEN; ++nl)
709     {
710         if (mantissa[nl])
711             return false;
712     }
713 
714     return true;
715 }
716 
717 
718 
719 // Increments a mantissa.  The nbit argument specifies the end of the part to
720 // be incremented.  Returns true if overflow occurs; false otherwise.
increment_mantissa(mantissa_t const mantissa,int const nbit)721 static __forceinline bool __cdecl increment_mantissa(mantissa_t const mantissa, int const nbit) throw()
722 {
723     int nl = nbit / 32;
724     int const nb = 31 - nbit % 32;
725 
726     //
727     //    |<--- part to be incremented -->|
728     //
729     //    ---------------------------------
730     //    |...          |            |   ...      |
731     //    ---------------------------------
732     //    ^          ^        ^
733     //    |          |        |<--nb-->
734     //    man          nl        nbit
735     //
736 
737     uint32_t const one = static_cast<uint32_t>(1) << nb;
738 
739     bool carry = add_uint32_carry(mantissa[nl], one, &mantissa[nl]);
740 
741     --nl;
742 
743     for (; nl >= 0 && carry; --nl)
744     {
745         carry = add_uint32_carry(mantissa[nl], 1, &mantissa[nl]);
746     }
747 
748     return carry;
749 }
750 
751 // Rounds a mantissa to the given precision.  Returns true if overflow occurs;
752 // returns false otherwise.
round_mantissa(mantissa_t const mantissa,int const precision)753 static __forceinline bool __cdecl round_mantissa(mantissa_t const mantissa, int const precision) throw()
754 {
755     // The order of the n'th bit is n-1, since the first bit is bit 0
756     // therefore decrement precision to get the order of the last bit
757     // to be kept
758 
759     int const nbit = precision - 1;
760 
761     int const rndbit = nbit + 1;
762 
763     int const nl = rndbit / 32;
764     int const nb = 31 - rndbit % 32;
765 
766     // Get value of round bit
767     uint32_t const rndmask = static_cast<uint32_t>(1) << nb;
768 
769     bool retval = false;
770     if ((mantissa[nl] & rndmask) && !mantissa_has_zero_tail(mantissa, rndbit))
771     {
772         // round up
773         retval = increment_mantissa(mantissa, nbit);
774     }
775 
776     // Fill rest of mantissa with zeroes
777     mantissa[nl] &= UINT32_MAX << nb;
778     for (int i = nl + 1; i < INTRNMAN_LEN; ++i)
779     {
780         mantissa[i] = 0;
781     }
782 
783     return retval;
784 }
785 
convert_ld12_to_ldouble(_LDBL12 const * const pld12,_LDOUBLE * const result)786 static void __cdecl convert_ld12_to_ldouble(
787     _LDBL12 const* const pld12,
788     _LDOUBLE*      const result
789     ) throw()
790 {
791     // This implementation is based on the fact that the _LDBL12 format is
792     // identical to the long double and has 2 extra bytes of mantissa
793     uint16_t       exponent = *U_EXP_12(pld12) & static_cast<uint16_t>(0x7fff);
794     uint16_t const sign     = *U_EXP_12(pld12) & static_cast<uint16_t>(0x8000);
795 
796     uint32_t mantissa[] =
797     {
798         *UL_MANHI_12(pld12),
799         *UL_MANLO_12(pld12),
800         uint32_t ((*U_XT_12(pld12)) << 16)
801     };
802 
803     if (round_mantissa(mantissa, 64))
804     {
805         // The MSB of the mantissa is explicit and should be 1
806         // since we had a carry, the mantissa is now 0.
807         mantissa[0] = MSB_ULONG;
808         ++exponent;
809     }
810 
811     *UL_MANHI_LD(result) = mantissa[0];
812     *UL_MANLO_LD(result) = mantissa[1];
813     *U_EXP_LD   (result) = sign | exponent;
814 }
815 
_atoldbl_l(_LDOUBLE * const result,char * const string,_locale_t const locale)816 extern "C" int __cdecl _atoldbl_l(_LDOUBLE* const result, char* const string, _locale_t const locale)
817 {
818     _LocaleUpdate locale_update(locale);
819 
820     _LDBL12 intermediate_result{};
821     SLD_STATUS const conversion_status = parse_floating_point(
822         locale_update.GetLocaleT(),
823         make_c_string_character_source(string, nullptr),
824         &intermediate_result);
825 
826     convert_ld12_to_ldouble(&intermediate_result, result);
827     return transform_into_return_value(conversion_status);
828 }
829 
_atoldbl(_LDOUBLE * const result,char * const string)830 extern "C" int __cdecl _atoldbl(_LDOUBLE* const result, char* const string)
831 {
832     return _atoldbl_l(result, string, nullptr);
833 }
834