1 /* 2 * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016, Intel Corporation. All rights reserved. 4 * Intel Math Library (LIBM) Source Code 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 package org.graalvm.compiler.lir.amd64; 28 29 import static jdk.vm.ci.amd64.AMD64.r11; 30 import static jdk.vm.ci.amd64.AMD64.r8; 31 import static jdk.vm.ci.amd64.AMD64.rax; 32 import static jdk.vm.ci.amd64.AMD64.rcx; 33 import static jdk.vm.ci.amd64.AMD64.rdx; 34 import static jdk.vm.ci.amd64.AMD64.rsp; 35 import static jdk.vm.ci.amd64.AMD64.xmm0; 36 import static jdk.vm.ci.amd64.AMD64.xmm1; 37 import static jdk.vm.ci.amd64.AMD64.xmm2; 38 import static jdk.vm.ci.amd64.AMD64.xmm3; 39 import static jdk.vm.ci.amd64.AMD64.xmm4; 40 import static jdk.vm.ci.amd64.AMD64.xmm5; 41 import static jdk.vm.ci.amd64.AMD64.xmm6; 42 import static jdk.vm.ci.amd64.AMD64.xmm7; 43 import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.pointerConstant; 44 import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.recordExternalAddress; 45 46 import org.graalvm.compiler.asm.Label; 47 import org.graalvm.compiler.asm.amd64.AMD64Address; 48 import org.graalvm.compiler.asm.amd64.AMD64Assembler; 49 import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag; 50 import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; 51 import org.graalvm.compiler.lir.LIRInstructionClass; 52 import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant; 53 import org.graalvm.compiler.lir.asm.CompilationResultBuilder; 54 55 import jdk.vm.ci.amd64.AMD64; 56 57 /** 58 * <pre> 59 * ALGORITHM DESCRIPTION - LOG() 60 * --------------------- 61 * 62 * x=2^k * mx, mx in [1,2) 63 * 64 * Get B~1/mx based on the output of rcpss instruction (B0) 65 * B = int((B0*2^7+0.5))/2^7 66 * 67 * Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) 68 * 69 * Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and 70 * p(r) is a degree 7 polynomial 71 * -log(B) read from data table (high, low parts) 72 * Result is formed from high and low parts. 73 * 74 * Special cases: 75 * log(NaN) = quiet NaN, and raise invalid exception 76 * log(+INF) = that INF 77 * log(0) = -INF with divide-by-zero exception raised 78 * log(1) = +0 79 * log(x) = NaN with invalid exception raised if x < -0, including -INF 80 * </pre> 81 */ 82 public final class AMD64MathLogOp extends AMD64MathIntrinsicUnaryOp { 83 84 public static final LIRInstructionClass<AMD64MathLogOp> TYPE = LIRInstructionClass.create(AMD64MathLogOp.class); 85 AMD64MathLogOp()86 public AMD64MathLogOp() { 87 super(TYPE, /* GPR */ rax, rcx, rdx, r8, r11, 88 /* XMM */ xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7); 89 } 90 91 private ArrayDataPointerConstant lTbl = pointerConstant(16, new int[]{ 92 // @formatter:off 93 0xfefa3800, 0x3fe62e42, 0x93c76730, 0x3d2ef357, 0xaa241800, 94 0x3fe5ee82, 0x0cda46be, 0x3d220238, 0x5c364800, 0x3fe5af40, 95 0xac10c9fb, 0x3d2dfa63, 0x26bb8c00, 0x3fe5707a, 0xff3303dd, 96 0x3d09980b, 0x26867800, 0x3fe5322e, 0x5d257531, 0x3d05ccc4, 97 0x835a5000, 0x3fe4f45a, 0x6d93b8fb, 0xbd2e6c51, 0x6f970c00, 98 0x3fe4b6fd, 0xed4c541c, 0x3cef7115, 0x27e8a400, 0x3fe47a15, 99 0xf94d60aa, 0xbd22cb6a, 0xf2f92400, 0x3fe43d9f, 0x481051f7, 100 0xbcfd984f, 0x2125cc00, 0x3fe4019c, 0x30f0c74c, 0xbd26ce79, 101 0x0c36c000, 0x3fe3c608, 0x7cfe13c2, 0xbd02b736, 0x17197800, 102 0x3fe38ae2, 0xbb5569a4, 0xbd218b7a, 0xad9d8c00, 0x3fe35028, 103 0x9527e6ac, 0x3d10b83f, 0x44340800, 0x3fe315da, 0xc5a0ed9c, 104 0xbd274e93, 0x57b0e000, 0x3fe2dbf5, 0x07b9dc11, 0xbd17a6e5, 105 0x6d0ec000, 0x3fe2a278, 0xe797882d, 0x3d206d2b, 0x1134dc00, 106 0x3fe26962, 0x05226250, 0xbd0b61f1, 0xd8bebc00, 0x3fe230b0, 107 0x6e48667b, 0x3d12fc06, 0x5fc61800, 0x3fe1f863, 0xc9fe81d3, 108 0xbd2a7242, 0x49ae6000, 0x3fe1c078, 0xed70e667, 0x3cccacde, 109 0x40f23c00, 0x3fe188ee, 0xf8ab4650, 0x3d14cc4e, 0xf6f29800, 110 0x3fe151c3, 0xa293ae49, 0xbd2edd97, 0x23c75c00, 0x3fe11af8, 111 0xbb9ddcb2, 0xbd258647, 0x8611cc00, 0x3fe0e489, 0x07801742, 112 0x3d1c2998, 0xe2d05400, 0x3fe0ae76, 0x887e7e27, 0x3d1f486b, 113 0x0533c400, 0x3fe078bf, 0x41edf5fd, 0x3d268122, 0xbe760400, 114 0x3fe04360, 0xe79539e0, 0xbd04c45f, 0xe5b20800, 0x3fe00e5a, 115 0xb1727b1c, 0xbd053ba3, 0xaf7a4800, 0x3fdfb358, 0x3c164935, 116 0x3d0085fa, 0xee031800, 0x3fdf4aa7, 0x6f014a8b, 0x3d12cde5, 117 0x56b41000, 0x3fdee2a1, 0x5a470251, 0x3d2f27f4, 0xc3ddb000, 118 0x3fde7b42, 0x5372bd08, 0xbd246550, 0x1a272800, 0x3fde148a, 119 0x07322938, 0xbd1326b2, 0x484c9800, 0x3fddae75, 0x60dc616a, 120 0xbd1ea42d, 0x46def800, 0x3fdd4902, 0xe9a767a8, 0x3d235baf, 121 0x18064800, 0x3fdce42f, 0x3ec7a6b0, 0xbd0797c3, 0xc7455800, 122 0x3fdc7ff9, 0xc15249ae, 0xbd29b6dd, 0x693fa000, 0x3fdc1c60, 123 0x7fe8e180, 0x3d2cec80, 0x1b80e000, 0x3fdbb961, 0xf40a666d, 124 0x3d27d85b, 0x04462800, 0x3fdb56fa, 0x2d841995, 0x3d109525, 125 0x5248d000, 0x3fdaf529, 0x52774458, 0xbd217cc5, 0x3c8ad800, 126 0x3fda93ed, 0xbea77a5d, 0x3d1e36f2, 0x0224f800, 0x3fda3344, 127 0x7f9d79f5, 0x3d23c645, 0xea15f000, 0x3fd9d32b, 0x10d0c0b0, 128 0xbd26279e, 0x43135800, 0x3fd973a3, 0xa502d9f0, 0xbd152313, 129 0x635bf800, 0x3fd914a8, 0x2ee6307d, 0xbd1766b5, 0xa88b3000, 130 0x3fd8b639, 0xe5e70470, 0xbd205ae1, 0x776dc800, 0x3fd85855, 131 0x3333778a, 0x3d2fd56f, 0x3bd81800, 0x3fd7fafa, 0xc812566a, 132 0xbd272090, 0x687cf800, 0x3fd79e26, 0x2efd1778, 0x3d29ec7d, 133 0x76c67800, 0x3fd741d8, 0x49dc60b3, 0x3d2d8b09, 0xe6af1800, 134 0x3fd6e60e, 0x7c222d87, 0x3d172165, 0x3e9c6800, 0x3fd68ac8, 135 0x2756eba0, 0x3d20a0d3, 0x0b3ab000, 0x3fd63003, 0xe731ae00, 136 0xbd2db623, 0xdf596000, 0x3fd5d5bd, 0x08a465dc, 0xbd0a0b2a, 137 0x53c8d000, 0x3fd57bf7, 0xee5d40ef, 0x3d1faded, 0x0738a000, 138 0x3fd522ae, 0x8164c759, 0x3d2ebe70, 0x9e173000, 0x3fd4c9e0, 139 0x1b0ad8a4, 0xbd2e2089, 0xc271c800, 0x3fd4718d, 0x0967d675, 140 0xbd2f27ce, 0x23d5e800, 0x3fd419b4, 0xec90e09d, 0x3d08e436, 141 0x77333000, 0x3fd3c252, 0xb606bd5c, 0x3d183b54, 0x76be1000, 142 0x3fd36b67, 0xb0f177c8, 0x3d116ecd, 0xe1d36000, 0x3fd314f1, 143 0xd3213cb8, 0xbd28e27a, 0x7cdc9000, 0x3fd2bef0, 0x4a5004f4, 144 0x3d2a9cfa, 0x1134d800, 0x3fd26962, 0xdf5bb3b6, 0x3d2c93c1, 145 0x6d0eb800, 0x3fd21445, 0xba46baea, 0x3d0a87de, 0x635a6800, 146 0x3fd1bf99, 0x5147bdb7, 0x3d2ca6ed, 0xcbacf800, 0x3fd16b5c, 147 0xf7a51681, 0x3d2b9acd, 0x8227e800, 0x3fd1178e, 0x63a5f01c, 148 0xbd2c210e, 0x67616000, 0x3fd0c42d, 0x163ceae9, 0x3d27188b, 149 0x604d5800, 0x3fd07138, 0x16ed4e91, 0x3cf89cdb, 0x5626c800, 150 0x3fd01eae, 0x1485e94a, 0xbd16f08c, 0x6cb3b000, 0x3fcf991c, 151 0xca0cdf30, 0x3d1bcbec, 0xe4dd0000, 0x3fcef5ad, 0x65bb8e11, 152 0xbcca2115, 0xffe71000, 0x3fce530e, 0x6041f430, 0x3cc21227, 153 0xb0d49000, 0x3fcdb13d, 0xf715b035, 0xbd2aff2a, 0xf2656000, 154 0x3fcd1037, 0x75b6f6e4, 0xbd084a7e, 0xc6f01000, 0x3fcc6ffb, 155 0xc5962bd2, 0xbcf1ec72, 0x383be000, 0x3fcbd087, 0x595412b6, 156 0xbd2d4bc4, 0x575bd000, 0x3fcb31d8, 0x4eace1aa, 0xbd0c358d, 157 0x3c8ae000, 0x3fca93ed, 0x50562169, 0xbd287243, 0x07089000, 158 0x3fc9f6c4, 0x6865817a, 0x3d29904d, 0xdcf70000, 0x3fc95a5a, 159 0x58a0ff6f, 0x3d07f228, 0xeb390000, 0x3fc8beaf, 0xaae92cd1, 160 0xbd073d54, 0x6551a000, 0x3fc823c1, 0x9a631e83, 0x3d1e0ddb, 161 0x85445000, 0x3fc7898d, 0x70914305, 0xbd1c6610, 0x8b757000, 162 0x3fc6f012, 0xe59c21e1, 0xbd25118d, 0xbe8c1000, 0x3fc6574e, 163 0x2c3c2e78, 0x3d19cf8b, 0x6b544000, 0x3fc5bf40, 0xeb68981c, 164 0xbd127023, 0xe4a1b000, 0x3fc527e5, 0xe5697dc7, 0x3d2633e8, 165 0x8333b000, 0x3fc4913d, 0x54fdb678, 0x3d258379, 0xa5993000, 166 0x3fc3fb45, 0x7e6a354d, 0xbd2cd1d8, 0xb0159000, 0x3fc365fc, 167 0x234b7289, 0x3cc62fa8, 0x0c868000, 0x3fc2d161, 0xcb81b4a1, 168 0x3d039d6c, 0x2a49c000, 0x3fc23d71, 0x8fd3df5c, 0x3d100d23, 169 0x7e23f000, 0x3fc1aa2b, 0x44389934, 0x3d2ca78e, 0x8227e000, 170 0x3fc1178e, 0xce2d07f2, 0x3d21ef78, 0xb59e4000, 0x3fc08598, 171 0x7009902c, 0xbd27e5dd, 0x39dbe000, 0x3fbfe891, 0x4fa10afd, 172 0xbd2534d6, 0x830a2000, 0x3fbec739, 0xafe645e0, 0xbd2dc068, 173 0x63844000, 0x3fbda727, 0x1fa71733, 0x3d1a8940, 0x01bc4000, 174 0x3fbc8858, 0xc65aacd3, 0x3d2646d1, 0x8dad6000, 0x3fbb6ac8, 175 0x2bf768e5, 0xbd139080, 0x40b1c000, 0x3fba4e76, 0xb94407c8, 176 0xbd0e42b6, 0x5d594000, 0x3fb9335e, 0x3abd47da, 0x3d23115c, 177 0x2f40e000, 0x3fb8197e, 0xf96ffdf7, 0x3d0f80dc, 0x0aeac000, 178 0x3fb700d3, 0xa99ded32, 0x3cec1e8d, 0x4d97a000, 0x3fb5e95a, 179 0x3c5d1d1e, 0xbd2c6906, 0x5d208000, 0x3fb4d311, 0x82f4e1ef, 180 0xbcf53a25, 0xa7d1e000, 0x3fb3bdf5, 0xa5db4ed7, 0x3d2cc85e, 181 0xa4472000, 0x3fb2aa04, 0xae9c697d, 0xbd20b6e8, 0xd1466000, 182 0x3fb1973b, 0x560d9e9b, 0xbd25325d, 0xb59e4000, 0x3fb08598, 183 0x7009902c, 0xbd17e5dd, 0xc006c000, 0x3faeea31, 0x4fc93b7b, 184 0xbd0e113e, 0xcdddc000, 0x3faccb73, 0x47d82807, 0xbd1a68f2, 185 0xd0fb0000, 0x3faaaef2, 0x353bb42e, 0x3d20fc1a, 0x149fc000, 186 0x3fa894aa, 0xd05a267d, 0xbd197995, 0xf2d4c000, 0x3fa67c94, 187 0xec19afa2, 0xbd029efb, 0xd42e0000, 0x3fa466ae, 0x75bdfd28, 188 0xbd2c1673, 0x2f8d0000, 0x3fa252f3, 0xe021b67b, 0x3d283e9a, 189 0x89e74000, 0x3fa0415d, 0x5cf1d753, 0x3d0111c0, 0xec148000, 190 0x3f9c63d2, 0x3f9eb2f3, 0x3d2578c6, 0x28c90000, 0x3f984925, 191 0x325a0c34, 0xbd2aa0ba, 0x25980000, 0x3f9432a9, 0x928637fe, 192 0x3d098139, 0x58938000, 0x3f902056, 0x06e2f7d2, 0xbd23dc5b, 193 0xa3890000, 0x3f882448, 0xda74f640, 0xbd275577, 0x75890000, 194 0x3f801015, 0x999d2be8, 0xbd10c76b, 0x59580000, 0x3f700805, 195 0xcb31c67b, 0x3d2166af, 0x00000000, 0x00000000, 0x00000000, 196 0x80000000 197 // @formatter:on 198 }); 199 200 private ArrayDataPointerConstant log2 = pointerConstant(8, new int[]{ 201 // @formatter:off 202 0xfefa3800, 0x3fa62e42, 203 }); 204 private ArrayDataPointerConstant log28 = pointerConstant(8, new int[]{ 205 0x93c76730, 0x3ceef357 206 // @formatter:on 207 }); 208 209 private ArrayDataPointerConstant coeff = pointerConstant(16, new int[]{ 210 // @formatter:off 211 0x92492492, 0x3fc24924, 0x00000000, 0xbfd00000, 212 }); 213 private ArrayDataPointerConstant coeff16 = pointerConstant(16, new int[]{ 214 0x3d6fb175, 0xbfc5555e, 0x55555555, 0x3fd55555, 215 }); 216 private ArrayDataPointerConstant coeff32 = pointerConstant(16, new int[]{ 217 0x9999999a, 0x3fc99999, 0x00000000, 0xbfe00000 218 // @formatter:on 219 }); 220 221 @Override emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm)222 public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { 223 // registers, 224 // input: xmm0 225 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 226 // rax, rdx, rcx, r8, r11 227 Label block0 = new Label(); 228 Label block1 = new Label(); 229 Label block2 = new Label(); 230 Label block3 = new Label(); 231 Label block4 = new Label(); 232 Label block5 = new Label(); 233 Label block6 = new Label(); 234 Label block7 = new Label(); 235 Label block8 = new Label(); 236 Label block9 = new Label(); 237 238 masm.subq(rsp, 24); 239 masm.movsd(new AMD64Address(rsp, 0), xmm0); 240 masm.movq(rax, 0x3ff0000000000000L); 241 masm.movdq(xmm2, rax); 242 masm.movq(rdx, 0x77f0000000000000L); 243 masm.movdq(xmm3, rdx); 244 masm.movl(rcx, 32768); 245 masm.movdl(xmm4, rcx); 246 masm.movq(r8, 0xffffe00000000000L); 247 masm.movdq(xmm5, r8); 248 masm.movdqu(xmm1, xmm0); 249 masm.pextrw(rax, xmm0, 3); 250 masm.por(xmm0, xmm2); 251 masm.movl(rcx, 16352); 252 masm.psrlq(xmm0, 27); 253 masm.leaq(r11, recordExternalAddress(crb, lTbl)); 254 masm.psrld(xmm0, 2); 255 masm.rcpps(xmm0, xmm0); 256 masm.psllq(xmm1, 12); 257 masm.pshufd(xmm6, xmm5, 228); 258 masm.psrlq(xmm1, 12); 259 masm.subl(rax, 16); 260 masm.cmplAndJcc(rax, 32736, ConditionFlag.AboveEqual, block0, false); 261 262 masm.bind(block1); 263 masm.paddd(xmm0, xmm4); 264 masm.por(xmm1, xmm3); 265 masm.movdl(rdx, xmm0); 266 masm.psllq(xmm0, 29); 267 masm.pand(xmm5, xmm1); 268 masm.pand(xmm0, xmm6); 269 masm.subsd(xmm1, xmm5); 270 masm.mulpd(xmm5, xmm0); 271 masm.andl(rax, 32752); 272 masm.subl(rax, rcx); 273 masm.cvtsi2sdl(xmm7, rax); 274 masm.mulsd(xmm1, xmm0); 275 masm.movq(xmm6, recordExternalAddress(crb, log2)); // 0xfefa3800, 0x3fa62e42 276 masm.movdqu(xmm3, recordExternalAddress(crb, coeff)); // 0x92492492, 0x3fc24924, 277 // 0x00000000, 0xbfd00000 278 masm.subsd(xmm5, xmm2); 279 masm.andl(rdx, 16711680); 280 masm.shrl(rdx, 12); 281 masm.movdqu(xmm0, new AMD64Address(r11, rdx, AMD64Address.Scale.Times1)); 282 masm.movdqu(xmm4, recordExternalAddress(crb, coeff16)); // 0x3d6fb175, 0xbfc5555e, 283 // 0x55555555, 0x3fd55555 284 masm.addsd(xmm1, xmm5); 285 masm.movdqu(xmm2, recordExternalAddress(crb, coeff32)); // 0x9999999a, 0x3fc99999, 286 // 0x00000000, 0xbfe00000 287 masm.mulsd(xmm6, xmm7); 288 if (masm.supports(AMD64.CPUFeature.SSE3)) { 289 masm.movddup(xmm5, xmm1); 290 } else { 291 masm.movdqu(xmm5, xmm1); 292 masm.movlhps(xmm5, xmm5); 293 } 294 masm.mulsd(xmm7, recordExternalAddress(crb, log28)); // 0x93c76730, 0x3ceef357 295 masm.mulsd(xmm3, xmm1); 296 masm.addsd(xmm0, xmm6); 297 masm.mulpd(xmm4, xmm5); 298 masm.mulpd(xmm5, xmm5); 299 if (masm.supports(AMD64.CPUFeature.SSE3)) { 300 masm.movddup(xmm6, xmm0); 301 } else { 302 masm.movdqu(xmm6, xmm0); 303 masm.movlhps(xmm6, xmm6); 304 } 305 masm.addsd(xmm0, xmm1); 306 masm.addpd(xmm4, xmm2); 307 masm.mulpd(xmm3, xmm5); 308 masm.subsd(xmm6, xmm0); 309 masm.mulsd(xmm4, xmm1); 310 masm.pshufd(xmm2, xmm0, 238); 311 masm.addsd(xmm1, xmm6); 312 masm.mulsd(xmm5, xmm5); 313 masm.addsd(xmm7, xmm2); 314 masm.addpd(xmm4, xmm3); 315 masm.addsd(xmm1, xmm7); 316 masm.mulpd(xmm4, xmm5); 317 masm.addsd(xmm1, xmm4); 318 masm.pshufd(xmm5, xmm4, 238); 319 masm.addsd(xmm1, xmm5); 320 masm.addsd(xmm0, xmm1); 321 masm.jmp(block9); 322 323 masm.bind(block0); 324 masm.movq(xmm0, new AMD64Address(rsp, 0)); 325 masm.movq(xmm1, new AMD64Address(rsp, 0)); 326 masm.addl(rax, 16); 327 masm.cmplAndJcc(rax, 32768, ConditionFlag.AboveEqual, block2, false); 328 masm.cmplAndJcc(rax, 16, ConditionFlag.Below, block3, false); 329 330 masm.bind(block4); 331 masm.addsd(xmm0, xmm0); 332 masm.jmp(block9); 333 334 masm.bind(block5); 335 masm.jcc(AMD64Assembler.ConditionFlag.Above, block4); 336 masm.cmplAndJcc(rdx, 0, ConditionFlag.Above, block4, false); 337 masm.jmp(block6); 338 339 masm.bind(block3); 340 masm.xorpd(xmm1, xmm1); 341 masm.addsd(xmm1, xmm0); 342 masm.movdl(rdx, xmm1); 343 masm.psrlq(xmm1, 32); 344 masm.movdl(rcx, xmm1); 345 masm.orl(rdx, rcx); 346 masm.cmplAndJcc(rdx, 0, ConditionFlag.Equal, block7, false); 347 masm.xorpd(xmm1, xmm1); 348 masm.movl(rax, 18416); 349 masm.pinsrw(xmm1, rax, 3); 350 masm.mulsd(xmm0, xmm1); 351 masm.movdqu(xmm1, xmm0); 352 masm.pextrw(rax, xmm0, 3); 353 masm.por(xmm0, xmm2); 354 masm.psrlq(xmm0, 27); 355 masm.movl(rcx, 18416); 356 masm.psrld(xmm0, 2); 357 masm.rcpps(xmm0, xmm0); 358 masm.psllq(xmm1, 12); 359 masm.pshufd(xmm6, xmm5, 228); 360 masm.psrlq(xmm1, 12); 361 masm.jmp(block1); 362 363 masm.bind(block2); 364 masm.movdl(rdx, xmm1); 365 masm.psrlq(xmm1, 32); 366 masm.movdl(rcx, xmm1); 367 masm.addl(rcx, rcx); 368 masm.cmplAndJcc(rcx, -2097152, ConditionFlag.AboveEqual, block5, false); 369 masm.orl(rdx, rcx); 370 masm.cmplAndJcc(rdx, 0, ConditionFlag.Equal, block7, false); 371 372 masm.bind(block6); 373 masm.xorpd(xmm1, xmm1); 374 masm.xorpd(xmm0, xmm0); 375 masm.movl(rax, 32752); 376 masm.pinsrw(xmm1, rax, 3); 377 masm.mulsd(xmm0, xmm1); 378 masm.movl(new AMD64Address(rsp, 16), 3); 379 masm.jmp(block8); 380 masm.bind(block7); 381 masm.xorpd(xmm1, xmm1); 382 masm.xorpd(xmm0, xmm0); 383 masm.movl(rax, 49136); 384 masm.pinsrw(xmm0, rax, 3); 385 masm.divsd(xmm0, xmm1); 386 masm.movl(new AMD64Address(rsp, 16), 2); 387 388 masm.bind(block8); 389 masm.movq(new AMD64Address(rsp, 8), xmm0); 390 391 masm.movq(xmm0, new AMD64Address(rsp, 8)); 392 393 masm.bind(block9); 394 masm.addq(rsp, 24); 395 } 396 } 397