1 /* Copyright (C) 2007-2020 Free Software Foundation, Inc.
2 
3 This file is part of GCC.
4 
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9 
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13 for more details.
14 
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18 
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22 <http://www.gnu.org/licenses/>.  */
23 
24 #define BID_128RES
25 #include "bid_internal.h"
26 #include "bid_sqrt_macros.h"
27 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
28 #include <fenv.h>
29 
30 #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
31 #endif
32 
33 BID128_FUNCTION_ARG1 (bid128_sqrt, x)
34 
35      UINT256 M256, C256, C4, C8;
36      UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
37      UINT64 sign_x, Carry;
38      SINT64 D;
39      int_float fx, f64;
40      int exponent_x, bin_expon_cx;
41      int digits, scale, exponent_q;
42 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
43      fexcept_t binaryflags = 0;
44 #endif
45 
46   // unpack arguments, check for NaN or Infinity
47 if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
48 res.w[1] = CX.w[1];
49 res.w[0] = CX.w[0];
50     // NaN ?
51 if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
52 #ifdef SET_STATUS_FLAGS
53   if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull)	// sNaN
54     __set_status_flags (pfpsf, INVALID_EXCEPTION);
55 #endif
56   res.w[1] = CX.w[1] & QUIET_MASK64;
57   BID_RETURN (res);
58 }
59     // x is Infinity?
60 if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
61   res.w[1] = CX.w[1];
62   if (sign_x) {
63     // -Inf, return NaN
64     res.w[1] = 0x7c00000000000000ull;
65 #ifdef SET_STATUS_FLAGS
66     __set_status_flags (pfpsf, INVALID_EXCEPTION);
67 #endif
68   }
69   BID_RETURN (res);
70 }
71     // x is 0 otherwise
72 
73 res.w[1] =
74   sign_x |
75   ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) << 49);
76 res.w[0] = 0;
77 BID_RETURN (res);
78 }
79 if (sign_x) {
80   res.w[1] = 0x7c00000000000000ull;
81   res.w[0] = 0;
82 #ifdef SET_STATUS_FLAGS
83   __set_status_flags (pfpsf, INVALID_EXCEPTION);
84 #endif
85   BID_RETURN (res);
86 }
87 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
88 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
89 #endif
90   // 2^64
91 f64.i = 0x5f800000;
92 
93   // fx ~ CX
94 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
95 bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
96 digits = estimate_decimal_digits[bin_expon_cx];
97 
98 A10 = CX;
99 if (exponent_x & 1) {
100   A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
101   A10.w[0] = CX.w[0] << 3;
102   CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
103   CX2.w[0] = CX.w[0] << 1;
104   __add_128_128 (A10, A10, CX2);
105 }
106 
107 CS.w[0] = short_sqrt128 (A10);
108 CS.w[1] = 0;
109   // check for exact result
110 if (CS.w[0] * CS.w[0] == A10.w[0]) {
111   __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
112   if (S2.w[1] == A10.w[1])	// && S2.w[0]==A10.w[0])
113   {
114     get_BID128_very_fast (&res, 0,
115 			  (exponent_x +
116 			   DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
117 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
118     (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
119 #endif
120     BID_RETURN (res);
121   }
122 }
123   // get number of digits in CX
124 D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1];
125 if (D > 0
126     || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0]))
127   digits++;
128 
129   // if exponent is odd, scale coefficient by 10
130 scale = 67 - digits;
131 exponent_q = exponent_x - scale;
132 scale += (exponent_q & 1);	// exp. bias is even
133 
134 if (scale > 38) {
135   T128 = power10_table_128[scale - 37];
136   __mul_128x128_low (CX1, CX, T128);
137 
138   TP128 = power10_table_128[37];
139   __mul_128x128_to_256 (C256, CX1, TP128);
140 } else {
141   T128 = power10_table_128[scale];
142   __mul_128x128_to_256 (C256, CX, T128);
143 }
144 
145 
146   // 4*C256
147 C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
148 C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
149 C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
150 C4.w[0] = C256.w[0] << 2;
151 
152 long_sqrt128 (&CS, C256);
153 
154 #ifndef IEEE_ROUND_NEAREST
155 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
156 if (!((rnd_mode) & 3)) {
157 #endif
158 #endif
159   // compare to midpoints
160   CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
161   CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
162   // CSM^2
163   //__mul_128x128_to_256(M256, CSM, CSM);
164   __sqr128_to_256 (M256, CSM);
165 
166   if (C4.w[3] > M256.w[3]
167       || (C4.w[3] == M256.w[3]
168 	  && (C4.w[2] > M256.w[2]
169 	      || (C4.w[2] == M256.w[2]
170 		  && (C4.w[1] > M256.w[1]
171 		      || (C4.w[1] == M256.w[1]
172 			  && C4.w[0] > M256.w[0])))))) {
173     // round up
174     CS.w[0]++;
175     if (!CS.w[0])
176       CS.w[1]++;
177   } else {
178     C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
179     C8.w[0] = CS.w[0] << 3;
180     // M256 - 8*CSM
181     __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
182     __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
183     __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
184     M256.w[3] = M256.w[3] - Carry;
185 
186     // if CSM' > C256, round up
187     if (M256.w[3] > C4.w[3]
188 	|| (M256.w[3] == C4.w[3]
189 	    && (M256.w[2] > C4.w[2]
190 		|| (M256.w[2] == C4.w[2]
191 		    && (M256.w[1] > C4.w[1]
192 			|| (M256.w[1] == C4.w[1]
193 			    && M256.w[0] > C4.w[0])))))) {
194       // round down
195       if (!CS.w[0])
196 	CS.w[1]--;
197       CS.w[0]--;
198     }
199   }
200 #ifndef IEEE_ROUND_NEAREST
201 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
202 } else {
203   __sqr128_to_256 (M256, CS);
204   C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
205   C8.w[0] = CS.w[0] << 1;
206   if (M256.w[3] > C256.w[3]
207       || (M256.w[3] == C256.w[3]
208 	  && (M256.w[2] > C256.w[2]
209 	      || (M256.w[2] == C256.w[2]
210 		  && (M256.w[1] > C256.w[1]
211 		      || (M256.w[1] == C256.w[1]
212 			  && M256.w[0] > C256.w[0])))))) {
213     __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
214     __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
215     __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
216     M256.w[3] = M256.w[3] - Carry;
217     M256.w[0]++;
218     if (!M256.w[0]) {
219       M256.w[1]++;
220       if (!M256.w[1]) {
221 	M256.w[2]++;
222 	if (!M256.w[2])
223 	  M256.w[3]++;
224       }
225     }
226 
227     if (!CS.w[0])
228       CS.w[1]--;
229     CS.w[0]--;
230 
231     if (M256.w[3] > C256.w[3]
232 	|| (M256.w[3] == C256.w[3]
233 	    && (M256.w[2] > C256.w[2]
234 		|| (M256.w[2] == C256.w[2]
235 		    && (M256.w[1] > C256.w[1]
236 			|| (M256.w[1] == C256.w[1]
237 			    && M256.w[0] > C256.w[0])))))) {
238 
239       if (!CS.w[0])
240 	CS.w[1]--;
241       CS.w[0]--;
242     }
243   }
244 
245   else {
246     __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
247     __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
248     __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
249     M256.w[3] = M256.w[3] + Carry;
250     M256.w[0]++;
251     if (!M256.w[0]) {
252       M256.w[1]++;
253       if (!M256.w[1]) {
254 	M256.w[2]++;
255 	if (!M256.w[2])
256 	  M256.w[3]++;
257       }
258     }
259     if (M256.w[3] < C256.w[3]
260 	|| (M256.w[3] == C256.w[3]
261 	    && (M256.w[2] < C256.w[2]
262 		|| (M256.w[2] == C256.w[2]
263 		    && (M256.w[1] < C256.w[1]
264 			|| (M256.w[1] == C256.w[1]
265 			    && M256.w[0] <= C256.w[0])))))) {
266 
267       CS.w[0]++;
268       if (!CS.w[0])
269 	CS.w[1]++;
270     }
271   }
272   // RU?
273   if ((rnd_mode) == ROUNDING_UP) {
274     CS.w[0]++;
275     if (!CS.w[0])
276       CS.w[1]++;
277   }
278 
279 }
280 #endif
281 #endif
282 
283 #ifdef SET_STATUS_FLAGS
284 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
285 #endif
286 get_BID128_fast (&res, 0,
287 		 (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
288 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
289 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
290 #endif
291 BID_RETURN (res);
292 }
293 
294 
295 
296 BID128_FUNCTION_ARGTYPE1 (bid128d_sqrt, UINT64, x)
297 
298      UINT256 M256, C256, C4, C8;
299      UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
300      UINT64 sign_x, Carry;
301      SINT64 D;
302      int_float fx, f64;
303      int exponent_x, bin_expon_cx;
304      int digits, scale, exponent_q;
305 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
306      fexcept_t binaryflags = 0;
307 #endif
308 
309 	// unpack arguments, check for NaN or Infinity
310    // unpack arguments, check for NaN or Infinity
311 CX.w[1] = 0;
312 if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], x)) {
313 res.w[1] = CX.w[0];
314 res.w[0] = 0;
315 	   // NaN ?
316 if ((x & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
317 #ifdef SET_STATUS_FLAGS
318   if ((x & SNAN_MASK64) == SNAN_MASK64)	// sNaN
319     __set_status_flags (pfpsf, INVALID_EXCEPTION);
320 #endif
321   res.w[0] = (CX.w[0] & 0x0003ffffffffffffull);
322   __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]);
323   res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull);
324   BID_RETURN (res);
325 }
326 	   // x is Infinity?
327 if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) {
328   if (sign_x) {
329     // -Inf, return NaN
330     res.w[1] = 0x7c00000000000000ull;
331 #ifdef SET_STATUS_FLAGS
332     __set_status_flags (pfpsf, INVALID_EXCEPTION);
333 #endif
334   }
335   BID_RETURN (res);
336 }
337 	   // x is 0 otherwise
338 
339 exponent_x =
340   exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128;
341 res.w[1] =
342   sign_x | ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1)
343 	    << 49);
344 res.w[0] = 0;
345 BID_RETURN (res);
346 }
347 if (sign_x) {
348   res.w[1] = 0x7c00000000000000ull;
349   res.w[0] = 0;
350 #ifdef SET_STATUS_FLAGS
351   __set_status_flags (pfpsf, INVALID_EXCEPTION);
352 #endif
353   BID_RETURN (res);
354 }
355 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
356 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
357 #endif
358 exponent_x =
359   exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128;
360 
361 	   // 2^64
362 f64.i = 0x5f800000;
363 
364 	   // fx ~ CX
365 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
366 bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
367 digits = estimate_decimal_digits[bin_expon_cx];
368 
369 A10 = CX;
370 if (exponent_x & 1) {
371   A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
372   A10.w[0] = CX.w[0] << 3;
373   CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
374   CX2.w[0] = CX.w[0] << 1;
375   __add_128_128 (A10, A10, CX2);
376 }
377 
378 CS.w[0] = short_sqrt128 (A10);
379 CS.w[1] = 0;
380 	   // check for exact result
381 if (CS.w[0] * CS.w[0] == A10.w[0]) {
382   __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
383   if (S2.w[1] == A10.w[1]) {
384     get_BID128_very_fast (&res, 0,
385 			  (exponent_x + DECIMAL_EXPONENT_BIAS_128) >> 1,
386 			  CS);
387 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
388     (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
389 #endif
390     BID_RETURN (res);
391   }
392 }
393 	   // get number of digits in CX
394 D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1];
395 if (D > 0
396     || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0]))
397   digits++;
398 
399 		// if exponent is odd, scale coefficient by 10
400 scale = 67 - digits;
401 exponent_q = exponent_x - scale;
402 scale += (exponent_q & 1);	// exp. bias is even
403 
404 if (scale > 38) {
405   T128 = power10_table_128[scale - 37];
406   __mul_128x128_low (CX1, CX, T128);
407 
408   TP128 = power10_table_128[37];
409   __mul_128x128_to_256 (C256, CX1, TP128);
410 } else {
411   T128 = power10_table_128[scale];
412   __mul_128x128_to_256 (C256, CX, T128);
413 }
414 
415 
416 	   // 4*C256
417 C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
418 C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
419 C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
420 C4.w[0] = C256.w[0] << 2;
421 
422 long_sqrt128 (&CS, C256);
423 
424 #ifndef IEEE_ROUND_NEAREST
425 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
426 if (!((rnd_mode) & 3)) {
427 #endif
428 #endif
429   // compare to midpoints
430   CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
431   CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
432   // CSM^2
433   //__mul_128x128_to_256(M256, CSM, CSM);
434   __sqr128_to_256 (M256, CSM);
435 
436   if (C4.w[3] > M256.w[3]
437       || (C4.w[3] == M256.w[3]
438 	  && (C4.w[2] > M256.w[2]
439 	      || (C4.w[2] == M256.w[2]
440 		  && (C4.w[1] > M256.w[1]
441 		      || (C4.w[1] == M256.w[1]
442 			  && C4.w[0] > M256.w[0])))))) {
443     // round up
444     CS.w[0]++;
445     if (!CS.w[0])
446       CS.w[1]++;
447   } else {
448     C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
449     C8.w[0] = CS.w[0] << 3;
450     // M256 - 8*CSM
451     __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
452     __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
453     __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
454     M256.w[3] = M256.w[3] - Carry;
455 
456     // if CSM' > C256, round up
457     if (M256.w[3] > C4.w[3]
458 	|| (M256.w[3] == C4.w[3]
459 	    && (M256.w[2] > C4.w[2]
460 		|| (M256.w[2] == C4.w[2]
461 		    && (M256.w[1] > C4.w[1]
462 			|| (M256.w[1] == C4.w[1]
463 			    && M256.w[0] > C4.w[0])))))) {
464       // round down
465       if (!CS.w[0])
466 	CS.w[1]--;
467       CS.w[0]--;
468     }
469   }
470 #ifndef IEEE_ROUND_NEAREST
471 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
472 } else {
473   __sqr128_to_256 (M256, CS);
474   C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
475   C8.w[0] = CS.w[0] << 1;
476   if (M256.w[3] > C256.w[3]
477       || (M256.w[3] == C256.w[3]
478 	  && (M256.w[2] > C256.w[2]
479 	      || (M256.w[2] == C256.w[2]
480 		  && (M256.w[1] > C256.w[1]
481 		      || (M256.w[1] == C256.w[1]
482 			  && M256.w[0] > C256.w[0])))))) {
483     __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
484     __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
485     __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
486     M256.w[3] = M256.w[3] - Carry;
487     M256.w[0]++;
488     if (!M256.w[0]) {
489       M256.w[1]++;
490       if (!M256.w[1]) {
491 	M256.w[2]++;
492 	if (!M256.w[2])
493 	  M256.w[3]++;
494       }
495     }
496 
497     if (!CS.w[0])
498       CS.w[1]--;
499     CS.w[0]--;
500 
501     if (M256.w[3] > C256.w[3]
502 	|| (M256.w[3] == C256.w[3]
503 	    && (M256.w[2] > C256.w[2]
504 		|| (M256.w[2] == C256.w[2]
505 		    && (M256.w[1] > C256.w[1]
506 			|| (M256.w[1] == C256.w[1]
507 			    && M256.w[0] > C256.w[0])))))) {
508 
509       if (!CS.w[0])
510 	CS.w[1]--;
511       CS.w[0]--;
512     }
513   }
514 
515   else {
516     __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
517     __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
518     __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
519     M256.w[3] = M256.w[3] + Carry;
520     M256.w[0]++;
521     if (!M256.w[0]) {
522       M256.w[1]++;
523       if (!M256.w[1]) {
524 	M256.w[2]++;
525 	if (!M256.w[2])
526 	  M256.w[3]++;
527       }
528     }
529     if (M256.w[3] < C256.w[3]
530 	|| (M256.w[3] == C256.w[3]
531 	    && (M256.w[2] < C256.w[2]
532 		|| (M256.w[2] == C256.w[2]
533 		    && (M256.w[1] < C256.w[1]
534 			|| (M256.w[1] == C256.w[1]
535 			    && M256.w[0] <= C256.w[0])))))) {
536 
537       CS.w[0]++;
538       if (!CS.w[0])
539 	CS.w[1]++;
540     }
541   }
542   // RU?
543   if ((rnd_mode) == ROUNDING_UP) {
544     CS.w[0]++;
545     if (!CS.w[0])
546       CS.w[1]++;
547   }
548 
549 }
550 #endif
551 #endif
552 
553 #ifdef SET_STATUS_FLAGS
554 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
555 #endif
556 get_BID128_fast (&res, 0, (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1,
557 		 CS);
558 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
559 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
560 #endif
561 BID_RETURN (res);
562 
563 
564 }
565