1 /*
2 * Copyright (c) 2016, Intel Corporation.
3 * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
4 * Intel Math Library (LIBM) Source Code
5 *
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7 *
8 * This code is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License version 2 only, as
10 * published by the Free Software Foundation.
11 *
12 * This code is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 * version 2 for more details (a copy is included in the LICENSE file that
16 * accompanied this code).
17 *
18 * You should have received a copy of the GNU General Public License version
19 * 2 along with this work; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23 * or visit www.oracle.com if you need additional information or have any
24 * questions.
25 *
26 */
27
28 #include "precompiled.hpp"
29 #include "asm/assembler.hpp"
30 #include "asm/assembler.inline.hpp"
31 #include "macroAssembler_x86.hpp"
32 #include "runtime/stubRoutines.hpp"
33 #include "utilities/globalDefinitions.hpp"
34
35 /******************************************************************************/
36 // ALGORITHM DESCRIPTION - EXP()
37 // ---------------------
38 //
39 // Description:
40 // Let K = 64 (table size).
41 // x x/log(2) n
42 // e = 2 = 2 * T[j] * (1 + P(y))
43 // where
44 // x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
45 // m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
46 // j/K
47 // values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
48 //
49 // P(y) is a minimax polynomial approximation of exp(x)-1
50 // on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
51 //
52 // To avoid problems with arithmetic overflow and underflow,
53 // n n1 n2
54 // value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
55 // where BIAS is a value of exponent bias.
56 //
57 // Special cases:
58 // exp(NaN) = NaN
59 // exp(+INF) = +INF
60 // exp(-INF) = 0
61 // exp(x) = 1 for subnormals
62 // for finite argument, only exp(0)=1 is exact
63 // For IEEE double
64 // if x > 709.782712893383973096 then exp(x) overflow
65 // if x < -745.133219101941108420 then exp(x) underflow
66 //
67 /******************************************************************************/
68
69 #ifdef _LP64
70 // The 64 bit code is at most SSE2 compliant
71 ATTRIBUTE_ALIGNED(16) juint _cv[] =
72 {
73 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
74 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
75 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
76 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
77 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
78 };
79
80 ATTRIBUTE_ALIGNED(16) juint _shifter[] =
81 {
82 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
83 };
84
85 ATTRIBUTE_ALIGNED(16) juint _mmask[] =
86 {
87 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
88 };
89
90 ATTRIBUTE_ALIGNED(16) juint _bias[] =
91 {
92 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
93 };
94
95 ATTRIBUTE_ALIGNED(16) juint _Tbl_addr[] =
96 {
97 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
98 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
99 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
100 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
101 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
102 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
103 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
104 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
105 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
106 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
107 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
108 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
109 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
110 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
111 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
112 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
113 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
114 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
115 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
116 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
117 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
118 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
119 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
120 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
121 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
122 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
123 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
124 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
125 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
126 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
127 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
128 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
129 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
130 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
131 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
132 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
133 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
134 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
135 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
136 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
137 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
138 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
139 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
140 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
141 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
142 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
143 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
144 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
145 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
146 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
147 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
148 0x000fa7c1UL
149 };
150
151 ATTRIBUTE_ALIGNED(16) juint _ALLONES[] =
152 {
153 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
154 };
155
156 ATTRIBUTE_ALIGNED(16) juint _ebias[] =
157 {
158 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
159 };
160
161 ATTRIBUTE_ALIGNED(4) juint _XMAX[] =
162 {
163 0xffffffffUL, 0x7fefffffUL
164 };
165
166 ATTRIBUTE_ALIGNED(4) juint _XMIN[] =
167 {
168 0x00000000UL, 0x00100000UL
169 };
170
171 ATTRIBUTE_ALIGNED(4) juint _INF[] =
172 {
173 0x00000000UL, 0x7ff00000UL
174 };
175
176 ATTRIBUTE_ALIGNED(4) juint _ZERO[] =
177 {
178 0x00000000UL, 0x00000000UL
179 };
180
181 ATTRIBUTE_ALIGNED(4) juint _ONE_val[] =
182 {
183 0x00000000UL, 0x3ff00000UL
184 };
185
186
187 // Registers:
188 // input: xmm0
189 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
190 // rax, rdx, rcx, tmp - r11
191
192 // Code generated by Intel C compiler for LIBM library
193
fast_exp(XMMRegister xmm0,XMMRegister xmm1,XMMRegister xmm2,XMMRegister xmm3,XMMRegister xmm4,XMMRegister xmm5,XMMRegister xmm6,XMMRegister xmm7,Register eax,Register ecx,Register edx,Register tmp)194 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
195 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
196 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
197 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
198 Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
199
200 assert_different_registers(tmp, eax, ecx, edx);
201 address cv = (address)_cv;
202 address Shifter = (address)_shifter;
203 address mmask = (address)_mmask;
204 address bias = (address)_bias;
205 address Tbl_addr = (address)_Tbl_addr;
206 address ALLONES = (address)_ALLONES;
207 address ebias = (address)_ebias;
208 address XMAX = (address)_XMAX;
209 address XMIN = (address)_XMIN;
210 address INF = (address)_INF;
211 address ZERO = (address)_ZERO;
212 address ONE_val = (address)_ONE_val;
213
214 bind(start);
215 subq(rsp, 24);
216 movsd(Address(rsp, 8), xmm0);
217 unpcklpd(xmm0, xmm0);
218 movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
219 movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
220 movdqu(xmm2, ExternalAddress(16 + cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
221 movdqu(xmm3, ExternalAddress(32 + cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
222 pextrw(eax, xmm0, 3);
223 andl(eax, 32767);
224 movl(edx, 16527);
225 subl(edx, eax);
226 subl(eax, 15504);
227 orl(edx, eax);
228 cmpl(edx, INT_MIN);
229 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
230 mulpd(xmm1, xmm0);
231 addpd(xmm1, xmm6);
232 movapd(xmm7, xmm1);
233 subpd(xmm1, xmm6);
234 mulpd(xmm2, xmm1);
235 movdqu(xmm4, ExternalAddress(64 + cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
236 mulpd(xmm3, xmm1);
237 movdqu(xmm5, ExternalAddress(80 + cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
238 subpd(xmm0, xmm2);
239 movdl(eax, xmm7);
240 movl(ecx, eax);
241 andl(ecx, 63);
242 shll(ecx, 4);
243 sarl(eax, 6);
244 movl(edx, eax);
245 movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
246 pand(xmm7, xmm6);
247 movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
248 paddq(xmm7, xmm6);
249 psllq(xmm7, 46);
250 subpd(xmm0, xmm3);
251 lea(tmp, ExternalAddress(Tbl_addr));
252 movdqu(xmm2, Address(ecx, tmp));
253 mulpd(xmm4, xmm0);
254 movapd(xmm6, xmm0);
255 movapd(xmm1, xmm0);
256 mulpd(xmm6, xmm6);
257 mulpd(xmm0, xmm6);
258 addpd(xmm5, xmm4);
259 mulsd(xmm0, xmm6);
260 mulpd(xmm6, ExternalAddress(48 + cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
261 addsd(xmm1, xmm2);
262 unpckhpd(xmm2, xmm2);
263 mulpd(xmm0, xmm5);
264 addsd(xmm1, xmm0);
265 por(xmm2, xmm7);
266 unpckhpd(xmm0, xmm0);
267 addsd(xmm0, xmm1);
268 addsd(xmm0, xmm6);
269 addl(edx, 894);
270 cmpl(edx, 1916);
271 jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
272 mulsd(xmm0, xmm2);
273 addsd(xmm0, xmm2);
274 jmp(B1_5);
275
276 bind(L_2TAG_PACKET_1_0_2);
277 xorpd(xmm3, xmm3);
278 movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
279 movl(edx, -1022);
280 subl(edx, eax);
281 movdl(xmm5, edx);
282 psllq(xmm4, xmm5);
283 movl(ecx, eax);
284 sarl(eax, 1);
285 pinsrw(xmm3, eax, 3);
286 movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
287 psllq(xmm3, 4);
288 psubd(xmm2, xmm3);
289 mulsd(xmm0, xmm2);
290 cmpl(edx, 52);
291 jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
292 pand(xmm4, xmm2);
293 paddd(xmm3, xmm6);
294 subsd(xmm2, xmm4);
295 addsd(xmm0, xmm2);
296 cmpl(ecx, 1023);
297 jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
298 pextrw(ecx, xmm0, 3);
299 andl(ecx, 32768);
300 orl(edx, ecx);
301 cmpl(edx, 0);
302 jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
303 movapd(xmm6, xmm0);
304 addsd(xmm0, xmm4);
305 mulsd(xmm0, xmm3);
306 pextrw(ecx, xmm0, 3);
307 andl(ecx, 32752);
308 cmpl(ecx, 0);
309 jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
310 jmp(B1_5);
311
312 bind(L_2TAG_PACKET_5_0_2);
313 mulsd(xmm6, xmm3);
314 mulsd(xmm4, xmm3);
315 movdqu(xmm0, xmm6);
316 pxor(xmm6, xmm4);
317 psrad(xmm6, 31);
318 pshufd(xmm6, xmm6, 85);
319 psllq(xmm0, 1);
320 psrlq(xmm0, 1);
321 pxor(xmm0, xmm6);
322 psrlq(xmm6, 63);
323 paddq(xmm0, xmm6);
324 paddq(xmm0, xmm4);
325 movl(Address(rsp, 0), 15);
326 jmp(L_2TAG_PACKET_6_0_2);
327
328 bind(L_2TAG_PACKET_4_0_2);
329 addsd(xmm0, xmm4);
330 mulsd(xmm0, xmm3);
331 jmp(B1_5);
332
333 bind(L_2TAG_PACKET_3_0_2);
334 addsd(xmm0, xmm4);
335 mulsd(xmm0, xmm3);
336 pextrw(ecx, xmm0, 3);
337 andl(ecx, 32752);
338 cmpl(ecx, 32752);
339 jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
340 jmp(B1_5);
341
342 bind(L_2TAG_PACKET_2_0_2);
343 paddd(xmm3, xmm6);
344 addpd(xmm0, xmm2);
345 mulsd(xmm0, xmm3);
346 movl(Address(rsp, 0), 15);
347 jmp(L_2TAG_PACKET_6_0_2);
348
349 bind(L_2TAG_PACKET_8_0_2);
350 cmpl(eax, 2146435072);
351 jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
352 movl(eax, Address(rsp, 12));
353 cmpl(eax, INT_MIN);
354 jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
355 movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL
356 mulsd(xmm0, xmm0);
357
358 bind(L_2TAG_PACKET_7_0_2);
359 movl(Address(rsp, 0), 14);
360 jmp(L_2TAG_PACKET_6_0_2);
361
362 bind(L_2TAG_PACKET_10_0_2);
363 movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL
364 mulsd(xmm0, xmm0);
365 movl(Address(rsp, 0), 15);
366 jmp(L_2TAG_PACKET_6_0_2);
367
368 bind(L_2TAG_PACKET_9_0_2);
369 movl(edx, Address(rsp, 8));
370 cmpl(eax, 2146435072);
371 jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
372 cmpl(edx, 0);
373 jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
374 movl(eax, Address(rsp, 12));
375 cmpl(eax, 2146435072);
376 jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
377 movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL
378 jmp(B1_5);
379
380 bind(L_2TAG_PACKET_12_0_2);
381 movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL
382 jmp(B1_5);
383
384 bind(L_2TAG_PACKET_11_0_2);
385 movsd(xmm0, Address(rsp, 8));
386 addsd(xmm0, xmm0);
387 jmp(B1_5);
388
389 bind(L_2TAG_PACKET_0_0_2);
390 movl(eax, Address(rsp, 12));
391 andl(eax, 2147483647);
392 cmpl(eax, 1083179008);
393 jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
394 movsd(Address(rsp, 8), xmm0);
395 addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL
396 jmp(B1_5);
397
398 bind(L_2TAG_PACKET_6_0_2);
399 movq(Address(rsp, 16), xmm0);
400
401 bind(B1_3);
402 movq(xmm0, Address(rsp, 16));
403
404 bind(B1_5);
405 addq(rsp, 24);
406 }
407 #else
408 // The 32 bit code is at most SSE2 compliant
409 ATTRIBUTE_ALIGNED(16) juint _static_const_table[] =
410 {
411 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
412 0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
413 0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
414 0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
415 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
416 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
417 0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
418 0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
419 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
420 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
421 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
422 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
423 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
424 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
425 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
426 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
427 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
428 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
429 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
430 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
431 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
432 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
433 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
434 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
435 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
436 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
437 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
438 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
439 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
440 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
441 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
442 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
443 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
444 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
445 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
446 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
447 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
448 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
449 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
450 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
451 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
452 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
453 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
454 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
455 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
456 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
457 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
458 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
459 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
460 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
461 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
462 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
463 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
464 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
465 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
466 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
467 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
468 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
469 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
470 0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
471 0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
472 0x00100000UL
473 };
474
475 //registers,
476 // input: (rbp + 8)
477 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
478 // rax, rdx, rcx, rbx (tmp)
479
480 // Code generated by Intel C compiler for LIBM library
481
fast_exp(XMMRegister xmm0,XMMRegister xmm1,XMMRegister xmm2,XMMRegister xmm3,XMMRegister xmm4,XMMRegister xmm5,XMMRegister xmm6,XMMRegister xmm7,Register eax,Register ecx,Register edx,Register tmp)482 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
483 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
484 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
485 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
486 Label L_2TAG_PACKET_12_0_2, start;
487
488 assert_different_registers(tmp, eax, ecx, edx);
489 address static_const_table = (address)_static_const_table;
490
491 bind(start);
492 subl(rsp, 120);
493 movl(Address(rsp, 64), tmp);
494 lea(tmp, ExternalAddress(static_const_table));
495 movsd(xmm0, Address(rsp, 128));
496 unpcklpd(xmm0, xmm0);
497 movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
498 movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
499 movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
500 movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
501 pextrw(eax, xmm0, 3);
502 andl(eax, 32767);
503 movl(edx, 16527);
504 subl(edx, eax);
505 subl(eax, 15504);
506 orl(edx, eax);
507 cmpl(edx, INT_MIN);
508 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
509 mulpd(xmm1, xmm0);
510 addpd(xmm1, xmm6);
511 movapd(xmm7, xmm1);
512 subpd(xmm1, xmm6);
513 mulpd(xmm2, xmm1);
514 movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
515 mulpd(xmm3, xmm1);
516 movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
517 subpd(xmm0, xmm2);
518 movdl(eax, xmm7);
519 movl(ecx, eax);
520 andl(ecx, 63);
521 shll(ecx, 4);
522 sarl(eax, 6);
523 movl(edx, eax);
524 movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
525 pand(xmm7, xmm6);
526 movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
527 paddq(xmm7, xmm6);
528 psllq(xmm7, 46);
529 subpd(xmm0, xmm3);
530 movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
531 mulpd(xmm4, xmm0);
532 movapd(xmm6, xmm0);
533 movapd(xmm1, xmm0);
534 mulpd(xmm6, xmm6);
535 mulpd(xmm0, xmm6);
536 addpd(xmm5, xmm4);
537 mulsd(xmm0, xmm6);
538 mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
539 addsd(xmm1, xmm2);
540 unpckhpd(xmm2, xmm2);
541 mulpd(xmm0, xmm5);
542 addsd(xmm1, xmm0);
543 por(xmm2, xmm7);
544 unpckhpd(xmm0, xmm0);
545 addsd(xmm0, xmm1);
546 addsd(xmm0, xmm6);
547 addl(edx, 894);
548 cmpl(edx, 1916);
549 jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
550 mulsd(xmm0, xmm2);
551 addsd(xmm0, xmm2);
552 jmp(L_2TAG_PACKET_2_0_2);
553
554 bind(L_2TAG_PACKET_1_0_2);
555 fnstcw(Address(rsp, 24));
556 movzwl(edx, Address(rsp, 24));
557 orl(edx, 768);
558 movw(Address(rsp, 28), edx);
559 fldcw(Address(rsp, 28));
560 movl(edx, eax);
561 sarl(eax, 1);
562 subl(edx, eax);
563 movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
564 pandn(xmm6, xmm2);
565 addl(eax, 1023);
566 movdl(xmm3, eax);
567 psllq(xmm3, 52);
568 por(xmm6, xmm3);
569 addl(edx, 1023);
570 movdl(xmm4, edx);
571 psllq(xmm4, 52);
572 movsd(Address(rsp, 8), xmm0);
573 fld_d(Address(rsp, 8));
574 movsd(Address(rsp, 16), xmm6);
575 fld_d(Address(rsp, 16));
576 fmula(1);
577 faddp(1);
578 movsd(Address(rsp, 8), xmm4);
579 fld_d(Address(rsp, 8));
580 fmulp(1);
581 fstp_d(Address(rsp, 8));
582 movsd(xmm0, Address(rsp, 8));
583 fldcw(Address(rsp, 24));
584 pextrw(ecx, xmm0, 3);
585 andl(ecx, 32752);
586 cmpl(ecx, 32752);
587 jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
588 cmpl(ecx, 0);
589 jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
590 jmp(L_2TAG_PACKET_2_0_2);
591 cmpl(ecx, INT_MIN);
592 jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
593 cmpl(ecx, -1064950997);
594 jcc(Assembler::below, L_2TAG_PACKET_2_0_2);
595 jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
596 movl(edx, Address(rsp, 128));
597 cmpl(edx, -17155601);
598 jcc(Assembler::below, L_2TAG_PACKET_2_0_2);
599 jmp(L_2TAG_PACKET_4_0_2);
600
601 bind(L_2TAG_PACKET_3_0_2);
602 movl(edx, 14);
603 jmp(L_2TAG_PACKET_5_0_2);
604
605 bind(L_2TAG_PACKET_4_0_2);
606 movl(edx, 15);
607
608 bind(L_2TAG_PACKET_5_0_2);
609 movsd(Address(rsp, 0), xmm0);
610 movsd(xmm0, Address(rsp, 128));
611 fld_d(Address(rsp, 0));
612 jmp(L_2TAG_PACKET_6_0_2);
613
614 bind(L_2TAG_PACKET_7_0_2);
615 cmpl(eax, 2146435072);
616 jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
617 movl(eax, Address(rsp, 132));
618 cmpl(eax, INT_MIN);
619 jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
620 movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL
621 mulsd(xmm0, xmm0);
622 movl(edx, 14);
623 jmp(L_2TAG_PACKET_5_0_2);
624
625 bind(L_2TAG_PACKET_9_0_2);
626 movsd(xmm0, Address(tmp, 1216));
627 mulsd(xmm0, xmm0);
628 movl(edx, 15);
629 jmp(L_2TAG_PACKET_5_0_2);
630
631 bind(L_2TAG_PACKET_8_0_2);
632 movl(edx, Address(rsp, 128));
633 cmpl(eax, 2146435072);
634 jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
635 cmpl(edx, 0);
636 jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
637 movl(eax, Address(rsp, 132));
638 cmpl(eax, 2146435072);
639 jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
640 movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL
641 jmp(L_2TAG_PACKET_2_0_2);
642
643 bind(L_2TAG_PACKET_11_0_2);
644 movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL
645 jmp(L_2TAG_PACKET_2_0_2);
646
647 bind(L_2TAG_PACKET_10_0_2);
648 movsd(xmm0, Address(rsp, 128));
649 addsd(xmm0, xmm0);
650 jmp(L_2TAG_PACKET_2_0_2);
651
652 bind(L_2TAG_PACKET_0_0_2);
653 movl(eax, Address(rsp, 132));
654 andl(eax, 2147483647);
655 cmpl(eax, 1083179008);
656 jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
657 movsd(xmm0, Address(rsp, 128));
658 addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL
659 jmp(L_2TAG_PACKET_2_0_2);
660
661 bind(L_2TAG_PACKET_2_0_2);
662 movsd(Address(rsp, 48), xmm0);
663 fld_d(Address(rsp, 48));
664
665 bind(L_2TAG_PACKET_6_0_2);
666 movl(tmp, Address(rsp, 64));
667 }
668 #endif
669