1 
2 /*============================================================================
3 
4 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
5 Package, Release 3e, by John R. Hauser.
6 
7 Copyright 2011, 2012, 2013, 2014, 2017 The Regents of the University of
8 California.  All rights reserved.
9 
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12 
13  1. Redistributions of source code must retain the above copyright notice,
14     this list of conditions, and the following disclaimer.
15 
16  2. Redistributions in binary form must reproduce the above copyright notice,
17     this list of conditions, and the following disclaimer in the documentation
18     and/or other materials provided with the distribution.
19 
20  3. Neither the name of the University nor the names of its contributors may
21     be used to endorse or promote products derived from this software without
22     specific prior written permission.
23 
24 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
25 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
27 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
28 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
31 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 =============================================================================*/
36 
37 #include <stdbool.h>
38 #include <stdint.h>
39 #include "platform.h"
40 #include "internals.h"
41 #include "specialize.h"
42 #include "softfloat.h"
43 
44 #ifdef SOFTFLOAT_FAST_INT64
45 
f128M_sqrt(const float128_t * aPtr,float128_t * zPtr)46 void f128M_sqrt( const float128_t *aPtr, float128_t *zPtr )
47 {
48 
49     *zPtr = f128_sqrt( *aPtr );
50 
51 }
52 
53 #else
54 
f128M_sqrt(const float128_t * aPtr,float128_t * zPtr)55 void f128M_sqrt( const float128_t *aPtr, float128_t *zPtr )
56 {
57     const uint32_t *aWPtr;
58     uint32_t *zWPtr;
59     uint32_t uiA96;
60     bool signA;
61     int32_t rawExpA;
62     uint32_t rem[6];
63     int32_t expA, expZ;
64     uint64_t rem64;
65     uint32_t sig32A, recipSqrt32, sig32Z, qs[3], q;
66     uint64_t sig64Z;
67     uint32_t term[5];
68     uint64_t x64;
69     uint32_t y[5], rem32;
70 
71     /*------------------------------------------------------------------------
72     *------------------------------------------------------------------------*/
73     aWPtr = (const uint32_t *) aPtr;
74     zWPtr = (uint32_t *) zPtr;
75     /*------------------------------------------------------------------------
76     *------------------------------------------------------------------------*/
77     uiA96 = aWPtr[indexWordHi( 4 )];
78     signA = signF128UI96( uiA96 );
79     rawExpA  = expF128UI96( uiA96 );
80     /*------------------------------------------------------------------------
81     *------------------------------------------------------------------------*/
82     if ( rawExpA == 0x7FFF ) {
83         if (
84             fracF128UI96( uiA96 )
85                 || (aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
86                         | aWPtr[indexWord( 4, 0 )])
87         ) {
88             softfloat_propagateNaNF128M( aWPtr, 0, zWPtr );
89             return;
90         }
91         if ( ! signA ) goto copyA;
92         goto invalid;
93     }
94     /*------------------------------------------------------------------------
95     *------------------------------------------------------------------------*/
96     expA = softfloat_shiftNormSigF128M( aWPtr, 13 - (rawExpA & 1), rem );
97     if ( expA == -128 ) goto copyA;
98     if ( signA ) goto invalid;
99     /*------------------------------------------------------------------------
100     | (`sig32Z' is guaranteed to be a lower bound on the square root of
101     | `sig32A', which makes `sig32Z' also a lower bound on the square root of
102     | `sigA'.)
103     *------------------------------------------------------------------------*/
104     expZ = ((expA - 0x3FFF)>>1) + 0x3FFE;
105     expA &= 1;
106     rem64 = (uint64_t) rem[indexWord( 4, 3 )]<<32 | rem[indexWord( 4, 2 )];
107     if ( expA ) {
108         if ( ! rawExpA ) {
109             softfloat_shortShiftRight128M( rem, 1, rem );
110             rem64 >>= 1;
111         }
112         sig32A = rem64>>29;
113     } else {
114         sig32A = rem64>>30;
115     }
116     recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
117     sig32Z = ((uint64_t) sig32A * recipSqrt32)>>32;
118     if ( expA ) sig32Z >>= 1;
119     qs[2] = sig32Z;
120     rem64 -= (uint64_t) sig32Z * sig32Z;
121     rem[indexWord( 4, 3 )] = rem64>>32;
122     rem[indexWord( 4, 2 )] = rem64;
123     /*------------------------------------------------------------------------
124     *------------------------------------------------------------------------*/
125     q = ((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32;
126     sig64Z = ((uint64_t) sig32Z<<32) + ((uint64_t) q<<3);
127     term[indexWord( 4, 3 )] = 0;
128     term[indexWord( 4, 0 )] = 0;
129     /*------------------------------------------------------------------------
130     | (Repeating this loop is a rare occurrence.)
131     *------------------------------------------------------------------------*/
132     for (;;) {
133         x64 = ((uint64_t) sig32Z<<32) + sig64Z;
134         term[indexWord( 4, 2 )] = x64>>32;
135         term[indexWord( 4, 1 )] = x64;
136         softfloat_remStep128MBy32( rem, 29, term, q, y );
137         rem32 = y[indexWord( 4, 3 )];
138         if ( ! (rem32 & 0x80000000) ) break;
139         --q;
140         sig64Z -= 1<<3;
141     }
142     qs[1] = q;
143     rem64 = (uint64_t) rem32<<32 | y[indexWord( 4, 2 )];
144     /*------------------------------------------------------------------------
145     *------------------------------------------------------------------------*/
146     q = ((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32;
147     if ( rem64>>34 ) q += recipSqrt32;
148     sig64Z <<= 1;
149     /*------------------------------------------------------------------------
150     | (Repeating this loop is a rare occurrence.)
151     *------------------------------------------------------------------------*/
152     for (;;) {
153         x64 = sig64Z + (q>>26);
154         term[indexWord( 4, 2 )] = x64>>32;
155         term[indexWord( 4, 1 )] = x64;
156         term[indexWord( 4, 0 )] = q<<6;
157         softfloat_remStep128MBy32(
158             y, 29, term, q, &rem[indexMultiwordHi( 6, 4 )] );
159         rem32 = rem[indexWordHi( 6 )];
160         if ( ! (rem32 & 0x80000000) ) break;
161         --q;
162     }
163     qs[0] = q;
164     rem64 = (uint64_t) rem32<<32 | rem[indexWord( 6, 4 )];
165     /*------------------------------------------------------------------------
166     *------------------------------------------------------------------------*/
167     q = (((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32) + 2;
168     if ( rem64>>34 ) q += recipSqrt32;
169     x64 = (uint64_t) q<<27;
170     y[indexWord( 5, 0 )] = x64;
171     x64 = ((uint64_t) qs[0]<<24) + (x64>>32);
172     y[indexWord( 5, 1 )] = x64;
173     x64 = ((uint64_t) qs[1]<<21) + (x64>>32);
174     y[indexWord( 5, 2 )] = x64;
175     x64 = ((uint64_t) qs[2]<<18) + (x64>>32);
176     y[indexWord( 5, 3 )] = x64;
177     y[indexWord( 5, 4 )] = x64>>32;
178     /*------------------------------------------------------------------------
179     *------------------------------------------------------------------------*/
180     if ( (q & 0xF) <= 2 ) {
181         q &= ~3;
182         y[indexWordLo( 5 )] = q<<27;
183         term[indexWord( 5, 4 )] = 0;
184         term[indexWord( 5, 3 )] = 0;
185         term[indexWord( 5, 2 )] = 0;
186         term[indexWord( 5, 1 )] = q>>6;
187         term[indexWord( 5, 0 )] = q<<26;
188         softfloat_sub160M( y, term, term );
189         rem[indexWord( 6, 1 )] = 0;
190         rem[indexWord( 6, 0 )] = 0;
191         softfloat_remStep160MBy32(
192             &rem[indexMultiwordLo( 6, 5 )],
193             14,
194             term,
195             q,
196             &rem[indexMultiwordLo( 6, 5 )]
197         );
198         rem32 = rem[indexWord( 6, 4 )];
199         if ( rem32 & 0x80000000 ) {
200             softfloat_sub1X160M( y );
201         } else {
202             if (
203                 rem32 || rem[indexWord( 6, 0 )] || rem[indexWord( 6, 1 )]
204                     || (rem[indexWord( 6, 3 )] | rem[indexWord( 6, 2 )])
205             ) {
206                 y[indexWordLo( 5 )] |= 1;
207             }
208         }
209     }
210     softfloat_roundPackMToF128M( 0, expZ, y, zWPtr );
211     return;
212     /*------------------------------------------------------------------------
213     *------------------------------------------------------------------------*/
214  invalid:
215     softfloat_invalidF128M( zWPtr );
216     return;
217     /*------------------------------------------------------------------------
218     *------------------------------------------------------------------------*/
219  copyA:
220     zWPtr[indexWordHi( 4 )] = uiA96;
221     zWPtr[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
222     zWPtr[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
223     zWPtr[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
224 
225 }
226 
227 #endif
228 
229