1 /*
2  * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
3  * Copyright (c) 2016, Intel Corporation. All rights reserved.
4  * Intel Math Library (LIBM) Source Code
5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6  *
7  * This code is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License version 2 only, as
9  * published by the Free Software Foundation.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 
27 package org.graalvm.compiler.lir.amd64;
28 
29 import static jdk.vm.ci.amd64.AMD64.r11;
30 import static jdk.vm.ci.amd64.AMD64.r8;
31 import static jdk.vm.ci.amd64.AMD64.rax;
32 import static jdk.vm.ci.amd64.AMD64.rcx;
33 import static jdk.vm.ci.amd64.AMD64.rdx;
34 import static jdk.vm.ci.amd64.AMD64.rsp;
35 import static jdk.vm.ci.amd64.AMD64.xmm0;
36 import static jdk.vm.ci.amd64.AMD64.xmm1;
37 import static jdk.vm.ci.amd64.AMD64.xmm2;
38 import static jdk.vm.ci.amd64.AMD64.xmm3;
39 import static jdk.vm.ci.amd64.AMD64.xmm4;
40 import static jdk.vm.ci.amd64.AMD64.xmm5;
41 import static jdk.vm.ci.amd64.AMD64.xmm6;
42 import static jdk.vm.ci.amd64.AMD64.xmm7;
43 import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.pointerConstant;
44 import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.recordExternalAddress;
45 
46 import org.graalvm.compiler.asm.Label;
47 import org.graalvm.compiler.asm.amd64.AMD64Address;
48 import org.graalvm.compiler.asm.amd64.AMD64Assembler;
49 import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
50 import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
51 import org.graalvm.compiler.lir.LIRInstructionClass;
52 import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant;
53 import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
54 
55 import jdk.vm.ci.amd64.AMD64;
56 
57 /**
58  * <pre>
59  *                     ALGORITHM DESCRIPTION - LOG()
60  *                     ---------------------
61  *
62  *    x=2^k * mx, mx in [1,2)
63  *
64  *    Get B~1/mx based on the output of rcpss instruction (B0)
65  *    B = int((B0*2^7+0.5))/2^7
66  *
67  *    Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
68  *
69  *    Result:  k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6)  and
70  *             p(r) is a degree 7 polynomial
71  *             -log(B) read from data table (high, low parts)
72  *             Result is formed from high and low parts.
73  *
74  * Special cases:
75  *  log(NaN) = quiet NaN, and raise invalid exception
76  *  log(+INF) = that INF
77  *  log(0) = -INF with divide-by-zero exception raised
78  *  log(1) = +0
79  *  log(x) = NaN with invalid exception raised if x < -0, including -INF
80  * </pre>
81  */
82 public final class AMD64MathLogOp extends AMD64MathIntrinsicUnaryOp {
83 
84     public static final LIRInstructionClass<AMD64MathLogOp> TYPE = LIRInstructionClass.create(AMD64MathLogOp.class);
85 
AMD64MathLogOp()86     public AMD64MathLogOp() {
87         super(TYPE, /* GPR */ rax, rcx, rdx, r8, r11,
88                         /* XMM */ xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
89     }
90 
91     private ArrayDataPointerConstant lTbl = pointerConstant(16, new int[]{
92             // @formatter:off
93             0xfefa3800, 0x3fe62e42, 0x93c76730, 0x3d2ef357, 0xaa241800,
94             0x3fe5ee82, 0x0cda46be, 0x3d220238, 0x5c364800, 0x3fe5af40,
95             0xac10c9fb, 0x3d2dfa63, 0x26bb8c00, 0x3fe5707a, 0xff3303dd,
96             0x3d09980b, 0x26867800, 0x3fe5322e, 0x5d257531, 0x3d05ccc4,
97             0x835a5000, 0x3fe4f45a, 0x6d93b8fb, 0xbd2e6c51, 0x6f970c00,
98             0x3fe4b6fd, 0xed4c541c, 0x3cef7115, 0x27e8a400, 0x3fe47a15,
99             0xf94d60aa, 0xbd22cb6a, 0xf2f92400, 0x3fe43d9f, 0x481051f7,
100             0xbcfd984f, 0x2125cc00, 0x3fe4019c, 0x30f0c74c, 0xbd26ce79,
101             0x0c36c000, 0x3fe3c608, 0x7cfe13c2, 0xbd02b736, 0x17197800,
102             0x3fe38ae2, 0xbb5569a4, 0xbd218b7a, 0xad9d8c00, 0x3fe35028,
103             0x9527e6ac, 0x3d10b83f, 0x44340800, 0x3fe315da, 0xc5a0ed9c,
104             0xbd274e93, 0x57b0e000, 0x3fe2dbf5, 0x07b9dc11, 0xbd17a6e5,
105             0x6d0ec000, 0x3fe2a278, 0xe797882d, 0x3d206d2b, 0x1134dc00,
106             0x3fe26962, 0x05226250, 0xbd0b61f1, 0xd8bebc00, 0x3fe230b0,
107             0x6e48667b, 0x3d12fc06, 0x5fc61800, 0x3fe1f863, 0xc9fe81d3,
108             0xbd2a7242, 0x49ae6000, 0x3fe1c078, 0xed70e667, 0x3cccacde,
109             0x40f23c00, 0x3fe188ee, 0xf8ab4650, 0x3d14cc4e, 0xf6f29800,
110             0x3fe151c3, 0xa293ae49, 0xbd2edd97, 0x23c75c00, 0x3fe11af8,
111             0xbb9ddcb2, 0xbd258647, 0x8611cc00, 0x3fe0e489, 0x07801742,
112             0x3d1c2998, 0xe2d05400, 0x3fe0ae76, 0x887e7e27, 0x3d1f486b,
113             0x0533c400, 0x3fe078bf, 0x41edf5fd, 0x3d268122, 0xbe760400,
114             0x3fe04360, 0xe79539e0, 0xbd04c45f, 0xe5b20800, 0x3fe00e5a,
115             0xb1727b1c, 0xbd053ba3, 0xaf7a4800, 0x3fdfb358, 0x3c164935,
116             0x3d0085fa, 0xee031800, 0x3fdf4aa7, 0x6f014a8b, 0x3d12cde5,
117             0x56b41000, 0x3fdee2a1, 0x5a470251, 0x3d2f27f4, 0xc3ddb000,
118             0x3fde7b42, 0x5372bd08, 0xbd246550, 0x1a272800, 0x3fde148a,
119             0x07322938, 0xbd1326b2, 0x484c9800, 0x3fddae75, 0x60dc616a,
120             0xbd1ea42d, 0x46def800, 0x3fdd4902, 0xe9a767a8, 0x3d235baf,
121             0x18064800, 0x3fdce42f, 0x3ec7a6b0, 0xbd0797c3, 0xc7455800,
122             0x3fdc7ff9, 0xc15249ae, 0xbd29b6dd, 0x693fa000, 0x3fdc1c60,
123             0x7fe8e180, 0x3d2cec80, 0x1b80e000, 0x3fdbb961, 0xf40a666d,
124             0x3d27d85b, 0x04462800, 0x3fdb56fa, 0x2d841995, 0x3d109525,
125             0x5248d000, 0x3fdaf529, 0x52774458, 0xbd217cc5, 0x3c8ad800,
126             0x3fda93ed, 0xbea77a5d, 0x3d1e36f2, 0x0224f800, 0x3fda3344,
127             0x7f9d79f5, 0x3d23c645, 0xea15f000, 0x3fd9d32b, 0x10d0c0b0,
128             0xbd26279e, 0x43135800, 0x3fd973a3, 0xa502d9f0, 0xbd152313,
129             0x635bf800, 0x3fd914a8, 0x2ee6307d, 0xbd1766b5, 0xa88b3000,
130             0x3fd8b639, 0xe5e70470, 0xbd205ae1, 0x776dc800, 0x3fd85855,
131             0x3333778a, 0x3d2fd56f, 0x3bd81800, 0x3fd7fafa, 0xc812566a,
132             0xbd272090, 0x687cf800, 0x3fd79e26, 0x2efd1778, 0x3d29ec7d,
133             0x76c67800, 0x3fd741d8, 0x49dc60b3, 0x3d2d8b09, 0xe6af1800,
134             0x3fd6e60e, 0x7c222d87, 0x3d172165, 0x3e9c6800, 0x3fd68ac8,
135             0x2756eba0, 0x3d20a0d3, 0x0b3ab000, 0x3fd63003, 0xe731ae00,
136             0xbd2db623, 0xdf596000, 0x3fd5d5bd, 0x08a465dc, 0xbd0a0b2a,
137             0x53c8d000, 0x3fd57bf7, 0xee5d40ef, 0x3d1faded, 0x0738a000,
138             0x3fd522ae, 0x8164c759, 0x3d2ebe70, 0x9e173000, 0x3fd4c9e0,
139             0x1b0ad8a4, 0xbd2e2089, 0xc271c800, 0x3fd4718d, 0x0967d675,
140             0xbd2f27ce, 0x23d5e800, 0x3fd419b4, 0xec90e09d, 0x3d08e436,
141             0x77333000, 0x3fd3c252, 0xb606bd5c, 0x3d183b54, 0x76be1000,
142             0x3fd36b67, 0xb0f177c8, 0x3d116ecd, 0xe1d36000, 0x3fd314f1,
143             0xd3213cb8, 0xbd28e27a, 0x7cdc9000, 0x3fd2bef0, 0x4a5004f4,
144             0x3d2a9cfa, 0x1134d800, 0x3fd26962, 0xdf5bb3b6, 0x3d2c93c1,
145             0x6d0eb800, 0x3fd21445, 0xba46baea, 0x3d0a87de, 0x635a6800,
146             0x3fd1bf99, 0x5147bdb7, 0x3d2ca6ed, 0xcbacf800, 0x3fd16b5c,
147             0xf7a51681, 0x3d2b9acd, 0x8227e800, 0x3fd1178e, 0x63a5f01c,
148             0xbd2c210e, 0x67616000, 0x3fd0c42d, 0x163ceae9, 0x3d27188b,
149             0x604d5800, 0x3fd07138, 0x16ed4e91, 0x3cf89cdb, 0x5626c800,
150             0x3fd01eae, 0x1485e94a, 0xbd16f08c, 0x6cb3b000, 0x3fcf991c,
151             0xca0cdf30, 0x3d1bcbec, 0xe4dd0000, 0x3fcef5ad, 0x65bb8e11,
152             0xbcca2115, 0xffe71000, 0x3fce530e, 0x6041f430, 0x3cc21227,
153             0xb0d49000, 0x3fcdb13d, 0xf715b035, 0xbd2aff2a, 0xf2656000,
154             0x3fcd1037, 0x75b6f6e4, 0xbd084a7e, 0xc6f01000, 0x3fcc6ffb,
155             0xc5962bd2, 0xbcf1ec72, 0x383be000, 0x3fcbd087, 0x595412b6,
156             0xbd2d4bc4, 0x575bd000, 0x3fcb31d8, 0x4eace1aa, 0xbd0c358d,
157             0x3c8ae000, 0x3fca93ed, 0x50562169, 0xbd287243, 0x07089000,
158             0x3fc9f6c4, 0x6865817a, 0x3d29904d, 0xdcf70000, 0x3fc95a5a,
159             0x58a0ff6f, 0x3d07f228, 0xeb390000, 0x3fc8beaf, 0xaae92cd1,
160             0xbd073d54, 0x6551a000, 0x3fc823c1, 0x9a631e83, 0x3d1e0ddb,
161             0x85445000, 0x3fc7898d, 0x70914305, 0xbd1c6610, 0x8b757000,
162             0x3fc6f012, 0xe59c21e1, 0xbd25118d, 0xbe8c1000, 0x3fc6574e,
163             0x2c3c2e78, 0x3d19cf8b, 0x6b544000, 0x3fc5bf40, 0xeb68981c,
164             0xbd127023, 0xe4a1b000, 0x3fc527e5, 0xe5697dc7, 0x3d2633e8,
165             0x8333b000, 0x3fc4913d, 0x54fdb678, 0x3d258379, 0xa5993000,
166             0x3fc3fb45, 0x7e6a354d, 0xbd2cd1d8, 0xb0159000, 0x3fc365fc,
167             0x234b7289, 0x3cc62fa8, 0x0c868000, 0x3fc2d161, 0xcb81b4a1,
168             0x3d039d6c, 0x2a49c000, 0x3fc23d71, 0x8fd3df5c, 0x3d100d23,
169             0x7e23f000, 0x3fc1aa2b, 0x44389934, 0x3d2ca78e, 0x8227e000,
170             0x3fc1178e, 0xce2d07f2, 0x3d21ef78, 0xb59e4000, 0x3fc08598,
171             0x7009902c, 0xbd27e5dd, 0x39dbe000, 0x3fbfe891, 0x4fa10afd,
172             0xbd2534d6, 0x830a2000, 0x3fbec739, 0xafe645e0, 0xbd2dc068,
173             0x63844000, 0x3fbda727, 0x1fa71733, 0x3d1a8940, 0x01bc4000,
174             0x3fbc8858, 0xc65aacd3, 0x3d2646d1, 0x8dad6000, 0x3fbb6ac8,
175             0x2bf768e5, 0xbd139080, 0x40b1c000, 0x3fba4e76, 0xb94407c8,
176             0xbd0e42b6, 0x5d594000, 0x3fb9335e, 0x3abd47da, 0x3d23115c,
177             0x2f40e000, 0x3fb8197e, 0xf96ffdf7, 0x3d0f80dc, 0x0aeac000,
178             0x3fb700d3, 0xa99ded32, 0x3cec1e8d, 0x4d97a000, 0x3fb5e95a,
179             0x3c5d1d1e, 0xbd2c6906, 0x5d208000, 0x3fb4d311, 0x82f4e1ef,
180             0xbcf53a25, 0xa7d1e000, 0x3fb3bdf5, 0xa5db4ed7, 0x3d2cc85e,
181             0xa4472000, 0x3fb2aa04, 0xae9c697d, 0xbd20b6e8, 0xd1466000,
182             0x3fb1973b, 0x560d9e9b, 0xbd25325d, 0xb59e4000, 0x3fb08598,
183             0x7009902c, 0xbd17e5dd, 0xc006c000, 0x3faeea31, 0x4fc93b7b,
184             0xbd0e113e, 0xcdddc000, 0x3faccb73, 0x47d82807, 0xbd1a68f2,
185             0xd0fb0000, 0x3faaaef2, 0x353bb42e, 0x3d20fc1a, 0x149fc000,
186             0x3fa894aa, 0xd05a267d, 0xbd197995, 0xf2d4c000, 0x3fa67c94,
187             0xec19afa2, 0xbd029efb, 0xd42e0000, 0x3fa466ae, 0x75bdfd28,
188             0xbd2c1673, 0x2f8d0000, 0x3fa252f3, 0xe021b67b, 0x3d283e9a,
189             0x89e74000, 0x3fa0415d, 0x5cf1d753, 0x3d0111c0, 0xec148000,
190             0x3f9c63d2, 0x3f9eb2f3, 0x3d2578c6, 0x28c90000, 0x3f984925,
191             0x325a0c34, 0xbd2aa0ba, 0x25980000, 0x3f9432a9, 0x928637fe,
192             0x3d098139, 0x58938000, 0x3f902056, 0x06e2f7d2, 0xbd23dc5b,
193             0xa3890000, 0x3f882448, 0xda74f640, 0xbd275577, 0x75890000,
194             0x3f801015, 0x999d2be8, 0xbd10c76b, 0x59580000, 0x3f700805,
195             0xcb31c67b, 0x3d2166af, 0x00000000, 0x00000000, 0x00000000,
196             0x80000000
197             // @formatter:on
198     });
199 
200     private ArrayDataPointerConstant log2 = pointerConstant(8, new int[]{
201             // @formatter:off
202             0xfefa3800, 0x3fa62e42,
203     });
204     private ArrayDataPointerConstant log28 = pointerConstant(8, new int[]{
205             0x93c76730, 0x3ceef357
206             // @formatter:on
207     });
208 
209     private ArrayDataPointerConstant coeff = pointerConstant(16, new int[]{
210             // @formatter:off
211             0x92492492, 0x3fc24924, 0x00000000, 0xbfd00000,
212     });
213     private ArrayDataPointerConstant coeff16 = pointerConstant(16, new int[]{
214             0x3d6fb175, 0xbfc5555e, 0x55555555, 0x3fd55555,
215     });
216     private ArrayDataPointerConstant coeff32 = pointerConstant(16, new int[]{
217             0x9999999a, 0x3fc99999, 0x00000000, 0xbfe00000
218             // @formatter:on
219     });
220 
221     @Override
emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm)222     public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
223         // registers,
224         // input: xmm0
225         // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
226         // rax, rdx, rcx, r8, r11
227         Label block0 = new Label();
228         Label block1 = new Label();
229         Label block2 = new Label();
230         Label block3 = new Label();
231         Label block4 = new Label();
232         Label block5 = new Label();
233         Label block6 = new Label();
234         Label block7 = new Label();
235         Label block8 = new Label();
236         Label block9 = new Label();
237 
238         masm.subq(rsp, 24);
239         masm.movsd(new AMD64Address(rsp, 0), xmm0);
240         masm.movq(rax, 0x3ff0000000000000L);
241         masm.movdq(xmm2, rax);
242         masm.movq(rdx, 0x77f0000000000000L);
243         masm.movdq(xmm3, rdx);
244         masm.movl(rcx, 32768);
245         masm.movdl(xmm4, rcx);
246         masm.movq(r8, 0xffffe00000000000L);
247         masm.movdq(xmm5, r8);
248         masm.movdqu(xmm1, xmm0);
249         masm.pextrw(rax, xmm0, 3);
250         masm.por(xmm0, xmm2);
251         masm.movl(rcx, 16352);
252         masm.psrlq(xmm0, 27);
253         masm.leaq(r11, recordExternalAddress(crb, lTbl));
254         masm.psrld(xmm0, 2);
255         masm.rcpps(xmm0, xmm0);
256         masm.psllq(xmm1, 12);
257         masm.pshufd(xmm6, xmm5, 228);
258         masm.psrlq(xmm1, 12);
259         masm.subl(rax, 16);
260         masm.cmplAndJcc(rax, 32736, ConditionFlag.AboveEqual, block0, false);
261 
262         masm.bind(block1);
263         masm.paddd(xmm0, xmm4);
264         masm.por(xmm1, xmm3);
265         masm.movdl(rdx, xmm0);
266         masm.psllq(xmm0, 29);
267         masm.pand(xmm5, xmm1);
268         masm.pand(xmm0, xmm6);
269         masm.subsd(xmm1, xmm5);
270         masm.mulpd(xmm5, xmm0);
271         masm.andl(rax, 32752);
272         masm.subl(rax, rcx);
273         masm.cvtsi2sdl(xmm7, rax);
274         masm.mulsd(xmm1, xmm0);
275         masm.movq(xmm6, recordExternalAddress(crb, log2));             // 0xfefa3800, 0x3fa62e42
276         masm.movdqu(xmm3, recordExternalAddress(crb, coeff));          // 0x92492492, 0x3fc24924,
277                                                                        // 0x00000000, 0xbfd00000
278         masm.subsd(xmm5, xmm2);
279         masm.andl(rdx, 16711680);
280         masm.shrl(rdx, 12);
281         masm.movdqu(xmm0, new AMD64Address(r11, rdx, AMD64Address.Scale.Times1));
282         masm.movdqu(xmm4, recordExternalAddress(crb, coeff16));        // 0x3d6fb175, 0xbfc5555e,
283                                                                        // 0x55555555, 0x3fd55555
284         masm.addsd(xmm1, xmm5);
285         masm.movdqu(xmm2, recordExternalAddress(crb, coeff32));        // 0x9999999a, 0x3fc99999,
286                                                                        // 0x00000000, 0xbfe00000
287         masm.mulsd(xmm6, xmm7);
288         if (masm.supports(AMD64.CPUFeature.SSE3)) {
289             masm.movddup(xmm5, xmm1);
290         } else {
291             masm.movdqu(xmm5, xmm1);
292             masm.movlhps(xmm5, xmm5);
293         }
294         masm.mulsd(xmm7, recordExternalAddress(crb, log28));           // 0x93c76730, 0x3ceef357
295         masm.mulsd(xmm3, xmm1);
296         masm.addsd(xmm0, xmm6);
297         masm.mulpd(xmm4, xmm5);
298         masm.mulpd(xmm5, xmm5);
299         if (masm.supports(AMD64.CPUFeature.SSE3)) {
300             masm.movddup(xmm6, xmm0);
301         } else {
302             masm.movdqu(xmm6, xmm0);
303             masm.movlhps(xmm6, xmm6);
304         }
305         masm.addsd(xmm0, xmm1);
306         masm.addpd(xmm4, xmm2);
307         masm.mulpd(xmm3, xmm5);
308         masm.subsd(xmm6, xmm0);
309         masm.mulsd(xmm4, xmm1);
310         masm.pshufd(xmm2, xmm0, 238);
311         masm.addsd(xmm1, xmm6);
312         masm.mulsd(xmm5, xmm5);
313         masm.addsd(xmm7, xmm2);
314         masm.addpd(xmm4, xmm3);
315         masm.addsd(xmm1, xmm7);
316         masm.mulpd(xmm4, xmm5);
317         masm.addsd(xmm1, xmm4);
318         masm.pshufd(xmm5, xmm4, 238);
319         masm.addsd(xmm1, xmm5);
320         masm.addsd(xmm0, xmm1);
321         masm.jmp(block9);
322 
323         masm.bind(block0);
324         masm.movq(xmm0, new AMD64Address(rsp, 0));
325         masm.movq(xmm1, new AMD64Address(rsp, 0));
326         masm.addl(rax, 16);
327         masm.cmplAndJcc(rax, 32768, ConditionFlag.AboveEqual, block2, false);
328         masm.cmplAndJcc(rax, 16, ConditionFlag.Below, block3, false);
329 
330         masm.bind(block4);
331         masm.addsd(xmm0, xmm0);
332         masm.jmp(block9);
333 
334         masm.bind(block5);
335         masm.jcc(AMD64Assembler.ConditionFlag.Above, block4);
336         masm.cmplAndJcc(rdx, 0, ConditionFlag.Above, block4, false);
337         masm.jmp(block6);
338 
339         masm.bind(block3);
340         masm.xorpd(xmm1, xmm1);
341         masm.addsd(xmm1, xmm0);
342         masm.movdl(rdx, xmm1);
343         masm.psrlq(xmm1, 32);
344         masm.movdl(rcx, xmm1);
345         masm.orl(rdx, rcx);
346         masm.cmplAndJcc(rdx, 0, ConditionFlag.Equal, block7, false);
347         masm.xorpd(xmm1, xmm1);
348         masm.movl(rax, 18416);
349         masm.pinsrw(xmm1, rax, 3);
350         masm.mulsd(xmm0, xmm1);
351         masm.movdqu(xmm1, xmm0);
352         masm.pextrw(rax, xmm0, 3);
353         masm.por(xmm0, xmm2);
354         masm.psrlq(xmm0, 27);
355         masm.movl(rcx, 18416);
356         masm.psrld(xmm0, 2);
357         masm.rcpps(xmm0, xmm0);
358         masm.psllq(xmm1, 12);
359         masm.pshufd(xmm6, xmm5, 228);
360         masm.psrlq(xmm1, 12);
361         masm.jmp(block1);
362 
363         masm.bind(block2);
364         masm.movdl(rdx, xmm1);
365         masm.psrlq(xmm1, 32);
366         masm.movdl(rcx, xmm1);
367         masm.addl(rcx, rcx);
368         masm.cmplAndJcc(rcx, -2097152, ConditionFlag.AboveEqual, block5, false);
369         masm.orl(rdx, rcx);
370         masm.cmplAndJcc(rdx, 0, ConditionFlag.Equal, block7, false);
371 
372         masm.bind(block6);
373         masm.xorpd(xmm1, xmm1);
374         masm.xorpd(xmm0, xmm0);
375         masm.movl(rax, 32752);
376         masm.pinsrw(xmm1, rax, 3);
377         masm.mulsd(xmm0, xmm1);
378         masm.movl(new AMD64Address(rsp, 16), 3);
379         masm.jmp(block8);
380         masm.bind(block7);
381         masm.xorpd(xmm1, xmm1);
382         masm.xorpd(xmm0, xmm0);
383         masm.movl(rax, 49136);
384         masm.pinsrw(xmm0, rax, 3);
385         masm.divsd(xmm0, xmm1);
386         masm.movl(new AMD64Address(rsp, 16), 2);
387 
388         masm.bind(block8);
389         masm.movq(new AMD64Address(rsp, 8), xmm0);
390 
391         masm.movq(xmm0, new AMD64Address(rsp, 8));
392 
393         masm.bind(block9);
394         masm.addq(rsp, 24);
395     }
396 }
397