14afb647cSTimo Kreuzer; 24afb647cSTimo Kreuzer; MIT License 34afb647cSTimo Kreuzer; ----------- 44afb647cSTimo Kreuzer; 54afb647cSTimo Kreuzer; Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 64afb647cSTimo Kreuzer; 74afb647cSTimo Kreuzer; Permission is hereby granted, free of charge, to any person obtaining a copy 84afb647cSTimo Kreuzer; of this Software and associated documentaon files (the "Software"), to deal 94afb647cSTimo Kreuzer; in the Software without restriction, including without limitation the rights 104afb647cSTimo Kreuzer; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 114afb647cSTimo Kreuzer; copies of the Software, and to permit persons to whom the Software is 124afb647cSTimo Kreuzer; furnished to do so, subject to the following conditions: 134afb647cSTimo Kreuzer; 144afb647cSTimo Kreuzer; The above copyright notice and this permission notice shall be included in 154afb647cSTimo Kreuzer; all copies or substantial portions of the Software. 164afb647cSTimo Kreuzer; 174afb647cSTimo Kreuzer; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 184afb647cSTimo Kreuzer; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 194afb647cSTimo Kreuzer; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 204afb647cSTimo Kreuzer; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 214afb647cSTimo Kreuzer; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 224afb647cSTimo Kreuzer; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 234afb647cSTimo Kreuzer; THE SOFTWARE. 244afb647cSTimo Kreuzer; 254afb647cSTimo Kreuzer; pow.asm 264afb647cSTimo Kreuzer; 274afb647cSTimo Kreuzer; An implementation of the pow libm function. 284afb647cSTimo Kreuzer; 294afb647cSTimo Kreuzer; Prototype: 304afb647cSTimo Kreuzer; 314afb647cSTimo Kreuzer; double pow(double x, double y); 324afb647cSTimo Kreuzer; 334afb647cSTimo Kreuzer 344afb647cSTimo Kreuzer; 354afb647cSTimo Kreuzer; Algorithm: 364afb647cSTimo Kreuzer; x^y = e^(y*ln(x)) 374afb647cSTimo Kreuzer; 384afb647cSTimo Kreuzer; Look in exp, log for the respective algorithms 394afb647cSTimo Kreuzer; 404afb647cSTimo Kreuzer 414afb647cSTimo Kreuzer.const 424afb647cSTimo Kreuzer 434afb647cSTimo KreuzerALIGN 16 444afb647cSTimo Kreuzer 454afb647cSTimo Kreuzer; these codes and the ones in the corresponding .c file have to match 464afb647cSTimo Kreuzer__flag_x_one_y_snan DD 00000001 474afb647cSTimo Kreuzer__flag_x_zero_z_inf DD 00000002 484afb647cSTimo Kreuzer__flag_x_nan DD 00000003 494afb647cSTimo Kreuzer__flag_y_nan DD 00000004 504afb647cSTimo Kreuzer__flag_x_nan_y_nan DD 00000005 514afb647cSTimo Kreuzer__flag_x_neg_y_notint DD 00000006 524afb647cSTimo Kreuzer__flag_z_zero DD 00000007 534afb647cSTimo Kreuzer__flag_z_denormal DD 00000008 544afb647cSTimo Kreuzer__flag_z_inf DD 00000009 554afb647cSTimo Kreuzer 564afb647cSTimo KreuzerALIGN 16 574afb647cSTimo Kreuzer 584afb647cSTimo Kreuzer__ay_max_bound DQ 43e0000000000000h 594afb647cSTimo Kreuzer__ay_min_bound DQ 3c00000000000000h 604afb647cSTimo Kreuzer__sign_mask DQ 8000000000000000h 614afb647cSTimo Kreuzer__sign_and_exp_mask DQ 0fff0000000000000h 624afb647cSTimo Kreuzer__exp_mask DQ 7ff0000000000000h 634afb647cSTimo Kreuzer__neg_inf DQ 0fff0000000000000h 644afb647cSTimo Kreuzer__pos_inf DQ 7ff0000000000000h 654afb647cSTimo Kreuzer__pos_one DQ 3ff0000000000000h 664afb647cSTimo Kreuzer__pos_zero DQ 0000000000000000h 674afb647cSTimo Kreuzer__exp_mant_mask DQ 7fffffffffffffffh 684afb647cSTimo Kreuzer__mant_mask DQ 000fffffffffffffh 694afb647cSTimo Kreuzer__ind_pattern DQ 0fff8000000000000h 704afb647cSTimo Kreuzer 714afb647cSTimo Kreuzer 724afb647cSTimo Kreuzer__neg_qnan DQ 0fff8000000000000h 734afb647cSTimo Kreuzer__qnan DQ 7ff8000000000000h 744afb647cSTimo Kreuzer__qnan_set DQ 0008000000000000h 754afb647cSTimo Kreuzer 764afb647cSTimo Kreuzer__neg_one DQ 0bff0000000000000h 774afb647cSTimo Kreuzer__neg_zero DQ 8000000000000000h 784afb647cSTimo Kreuzer 794afb647cSTimo Kreuzer__exp_shift DQ 0000000000000034h ; 52 804afb647cSTimo Kreuzer__exp_bias DQ 00000000000003ffh ; 1023 814afb647cSTimo Kreuzer__exp_bias_m1 DQ 00000000000003feh ; 1022 824afb647cSTimo Kreuzer 834afb647cSTimo Kreuzer__yexp_53 DQ 0000000000000035h ; 53 844afb647cSTimo Kreuzer__mant_full DQ 000fffffffffffffh 854afb647cSTimo Kreuzer__1_before_mant DQ 0010000000000000h 864afb647cSTimo Kreuzer 874afb647cSTimo Kreuzer__mask_mant_all8 DQ 000ff00000000000h 884afb647cSTimo Kreuzer__mask_mant9 DQ 0000080000000000h 894afb647cSTimo Kreuzer 904afb647cSTimo Kreuzer 914afb647cSTimo Kreuzer 924afb647cSTimo KreuzerALIGN 16 934afb647cSTimo Kreuzer__real_fffffffff8000000 DQ 0fffffffff8000000h 944afb647cSTimo Kreuzer DQ 0fffffffff8000000h 954afb647cSTimo Kreuzer 964afb647cSTimo Kreuzer__mask_8000000000000000 DQ 8000000000000000h 974afb647cSTimo Kreuzer DQ 8000000000000000h 984afb647cSTimo Kreuzer 994afb647cSTimo Kreuzer__real_4090040000000000 DQ 4090040000000000h 1004afb647cSTimo Kreuzer DQ 4090040000000000h 1014afb647cSTimo Kreuzer 1024afb647cSTimo Kreuzer__real_C090C80000000000 DQ 0C090C80000000000h 1034afb647cSTimo Kreuzer DQ 0C090C80000000000h 1044afb647cSTimo Kreuzer 1054afb647cSTimo Kreuzer;--------------------- 1064afb647cSTimo Kreuzer; log data 1074afb647cSTimo Kreuzer;--------------------- 1084afb647cSTimo Kreuzer 1094afb647cSTimo KreuzerALIGN 16 1104afb647cSTimo Kreuzer 1114afb647cSTimo Kreuzer__real_ninf DQ 0fff0000000000000h ; -inf 1124afb647cSTimo Kreuzer DQ 0000000000000000h 1134afb647cSTimo Kreuzer__real_inf DQ 7ff0000000000000h ; +inf 1144afb647cSTimo Kreuzer DQ 0000000000000000h 1154afb647cSTimo Kreuzer__real_nan DQ 7ff8000000000000h ; NaN 1164afb647cSTimo Kreuzer DQ 0000000000000000h 1174afb647cSTimo Kreuzer__real_mant DQ 000FFFFFFFFFFFFFh ; mantissa bits 1184afb647cSTimo Kreuzer DQ 0000000000000000h 1194afb647cSTimo Kreuzer__mask_1023 DQ 00000000000003ffh 1204afb647cSTimo Kreuzer DQ 0000000000000000h 1214afb647cSTimo Kreuzer__mask_001 DQ 0000000000000001h 1224afb647cSTimo Kreuzer DQ 0000000000000000h 1234afb647cSTimo Kreuzer 1244afb647cSTimo Kreuzer__real_log2_lead DQ 3fe62e42e0000000h ; log2_lead 6.93147122859954833984e-01 1254afb647cSTimo Kreuzer DQ 0000000000000000h 1264afb647cSTimo Kreuzer__real_log2_tail DQ 3e6efa39ef35793ch ; log2_tail 5.76999904754328540596e-08 1274afb647cSTimo Kreuzer DQ 0000000000000000h 1284afb647cSTimo Kreuzer 1294afb647cSTimo Kreuzer__real_two DQ 4000000000000000h ; 2 1304afb647cSTimo Kreuzer DQ 0000000000000000h 1314afb647cSTimo Kreuzer 1324afb647cSTimo Kreuzer__real_one DQ 3ff0000000000000h ; 1 1334afb647cSTimo Kreuzer DQ 0000000000000000h 1344afb647cSTimo Kreuzer 1354afb647cSTimo Kreuzer__real_half DQ 3fe0000000000000h ; 1/2 1364afb647cSTimo Kreuzer DQ 0000000000000000h 1374afb647cSTimo Kreuzer 1384afb647cSTimo Kreuzer__mask_100 DQ 0000000000000100h 1394afb647cSTimo Kreuzer DQ 0000000000000000h 1404afb647cSTimo Kreuzer 1414afb647cSTimo Kreuzer__real_1_over_2 DQ 3fe0000000000000h 1424afb647cSTimo Kreuzer DQ 0000000000000000h 1434afb647cSTimo Kreuzer__real_1_over_3 DQ 3fd5555555555555h 1444afb647cSTimo Kreuzer DQ 0000000000000000h 1454afb647cSTimo Kreuzer__real_1_over_4 DQ 3fd0000000000000h 1464afb647cSTimo Kreuzer DQ 0000000000000000h 1474afb647cSTimo Kreuzer__real_1_over_5 DQ 3fc999999999999ah 1484afb647cSTimo Kreuzer DQ 0000000000000000h 1494afb647cSTimo Kreuzer__real_1_over_6 DQ 3fc5555555555555h 1504afb647cSTimo Kreuzer DQ 0000000000000000h 1514afb647cSTimo Kreuzer__real_1_over_7 DQ 3fc2492492492494h 1524afb647cSTimo Kreuzer DQ 0000000000000000h 1534afb647cSTimo Kreuzer 1544afb647cSTimo Kreuzer__mask_1023_f DQ 0c08ff80000000000h 1554afb647cSTimo Kreuzer DQ 0000000000000000h 1564afb647cSTimo Kreuzer 1574afb647cSTimo Kreuzer__mask_2045 DQ 00000000000007fdh 1584afb647cSTimo Kreuzer DQ 0000000000000000h 1594afb647cSTimo Kreuzer 1604afb647cSTimo Kreuzer__real_threshold DQ 3fc0000000000000h ; 0.125 1614afb647cSTimo Kreuzer DQ 3fc0000000000000h 1624afb647cSTimo Kreuzer 1634afb647cSTimo Kreuzer__real_notsign DQ 7ffFFFFFFFFFFFFFh ; ^sign bit 1644afb647cSTimo Kreuzer DQ 0000000000000000h 1654afb647cSTimo Kreuzer 1664afb647cSTimo Kreuzer 1674afb647cSTimo KreuzerEXTRN __log_256_lead:QWORD 1684afb647cSTimo KreuzerEXTRN __log_256_tail:QWORD 1694afb647cSTimo KreuzerEXTRN __use_fma3_lib:DWORD 1704afb647cSTimo Kreuzer 1714afb647cSTimo Kreuzer; This table differs from the tables in log_256_lead_tail_table.asm: 1724afb647cSTimo Kreuzer; the heads have fewer significant bits (hence the tails also differ). 1734afb647cSTimo KreuzerALIGN 16 1744afb647cSTimo Kreuzer__log_F_inv_head DQ 4000000000000000h 1754afb647cSTimo Kreuzer DQ 3fffe00000000000h 1764afb647cSTimo Kreuzer DQ 3fffc00000000000h 1774afb647cSTimo Kreuzer DQ 3fffa00000000000h 1784afb647cSTimo Kreuzer DQ 3fff800000000000h 1794afb647cSTimo Kreuzer DQ 3fff600000000000h 1804afb647cSTimo Kreuzer DQ 3fff400000000000h 1814afb647cSTimo Kreuzer DQ 3fff200000000000h 1824afb647cSTimo Kreuzer DQ 3fff000000000000h 1834afb647cSTimo Kreuzer DQ 3ffee00000000000h 1844afb647cSTimo Kreuzer DQ 3ffec00000000000h 1854afb647cSTimo Kreuzer DQ 3ffea00000000000h 1864afb647cSTimo Kreuzer DQ 3ffe900000000000h 1874afb647cSTimo Kreuzer DQ 3ffe700000000000h 1884afb647cSTimo Kreuzer DQ 3ffe500000000000h 1894afb647cSTimo Kreuzer DQ 3ffe300000000000h 1904afb647cSTimo Kreuzer DQ 3ffe100000000000h 1914afb647cSTimo Kreuzer DQ 3ffe000000000000h 1924afb647cSTimo Kreuzer DQ 3ffde00000000000h 1934afb647cSTimo Kreuzer DQ 3ffdc00000000000h 1944afb647cSTimo Kreuzer DQ 3ffda00000000000h 1954afb647cSTimo Kreuzer DQ 3ffd900000000000h 1964afb647cSTimo Kreuzer DQ 3ffd700000000000h 1974afb647cSTimo Kreuzer DQ 3ffd500000000000h 1984afb647cSTimo Kreuzer DQ 3ffd400000000000h 1994afb647cSTimo Kreuzer DQ 3ffd200000000000h 2004afb647cSTimo Kreuzer DQ 3ffd000000000000h 2014afb647cSTimo Kreuzer DQ 3ffcf00000000000h 2024afb647cSTimo Kreuzer DQ 3ffcd00000000000h 2034afb647cSTimo Kreuzer DQ 3ffcb00000000000h 2044afb647cSTimo Kreuzer DQ 3ffca00000000000h 2054afb647cSTimo Kreuzer DQ 3ffc800000000000h 2064afb647cSTimo Kreuzer DQ 3ffc700000000000h 2074afb647cSTimo Kreuzer DQ 3ffc500000000000h 2084afb647cSTimo Kreuzer DQ 3ffc300000000000h 2094afb647cSTimo Kreuzer DQ 3ffc200000000000h 2104afb647cSTimo Kreuzer DQ 3ffc000000000000h 2114afb647cSTimo Kreuzer DQ 3ffbf00000000000h 2124afb647cSTimo Kreuzer DQ 3ffbd00000000000h 2134afb647cSTimo Kreuzer DQ 3ffbc00000000000h 2144afb647cSTimo Kreuzer DQ 3ffba00000000000h 2154afb647cSTimo Kreuzer DQ 3ffb900000000000h 2164afb647cSTimo Kreuzer DQ 3ffb700000000000h 2174afb647cSTimo Kreuzer DQ 3ffb600000000000h 2184afb647cSTimo Kreuzer DQ 3ffb400000000000h 2194afb647cSTimo Kreuzer DQ 3ffb300000000000h 2204afb647cSTimo Kreuzer DQ 3ffb200000000000h 2214afb647cSTimo Kreuzer DQ 3ffb000000000000h 2224afb647cSTimo Kreuzer DQ 3ffaf00000000000h 2234afb647cSTimo Kreuzer DQ 3ffad00000000000h 2244afb647cSTimo Kreuzer DQ 3ffac00000000000h 2254afb647cSTimo Kreuzer DQ 3ffaa00000000000h 2264afb647cSTimo Kreuzer DQ 3ffa900000000000h 2274afb647cSTimo Kreuzer DQ 3ffa800000000000h 2284afb647cSTimo Kreuzer DQ 3ffa600000000000h 2294afb647cSTimo Kreuzer DQ 3ffa500000000000h 2304afb647cSTimo Kreuzer DQ 3ffa400000000000h 2314afb647cSTimo Kreuzer DQ 3ffa200000000000h 2324afb647cSTimo Kreuzer DQ 3ffa100000000000h 2334afb647cSTimo Kreuzer DQ 3ffa000000000000h 2344afb647cSTimo Kreuzer DQ 3ff9e00000000000h 2354afb647cSTimo Kreuzer DQ 3ff9d00000000000h 2364afb647cSTimo Kreuzer DQ 3ff9c00000000000h 2374afb647cSTimo Kreuzer DQ 3ff9a00000000000h 2384afb647cSTimo Kreuzer DQ 3ff9900000000000h 2394afb647cSTimo Kreuzer DQ 3ff9800000000000h 2404afb647cSTimo Kreuzer DQ 3ff9700000000000h 2414afb647cSTimo Kreuzer DQ 3ff9500000000000h 2424afb647cSTimo Kreuzer DQ 3ff9400000000000h 2434afb647cSTimo Kreuzer DQ 3ff9300000000000h 2444afb647cSTimo Kreuzer DQ 3ff9200000000000h 2454afb647cSTimo Kreuzer DQ 3ff9000000000000h 2464afb647cSTimo Kreuzer DQ 3ff8f00000000000h 2474afb647cSTimo Kreuzer DQ 3ff8e00000000000h 2484afb647cSTimo Kreuzer DQ 3ff8d00000000000h 2494afb647cSTimo Kreuzer DQ 3ff8b00000000000h 2504afb647cSTimo Kreuzer DQ 3ff8a00000000000h 2514afb647cSTimo Kreuzer DQ 3ff8900000000000h 2524afb647cSTimo Kreuzer DQ 3ff8800000000000h 2534afb647cSTimo Kreuzer DQ 3ff8700000000000h 2544afb647cSTimo Kreuzer DQ 3ff8600000000000h 2554afb647cSTimo Kreuzer DQ 3ff8400000000000h 2564afb647cSTimo Kreuzer DQ 3ff8300000000000h 2574afb647cSTimo Kreuzer DQ 3ff8200000000000h 2584afb647cSTimo Kreuzer DQ 3ff8100000000000h 2594afb647cSTimo Kreuzer DQ 3ff8000000000000h 2604afb647cSTimo Kreuzer DQ 3ff7f00000000000h 2614afb647cSTimo Kreuzer DQ 3ff7e00000000000h 2624afb647cSTimo Kreuzer DQ 3ff7d00000000000h 2634afb647cSTimo Kreuzer DQ 3ff7b00000000000h 2644afb647cSTimo Kreuzer DQ 3ff7a00000000000h 2654afb647cSTimo Kreuzer DQ 3ff7900000000000h 2664afb647cSTimo Kreuzer DQ 3ff7800000000000h 2674afb647cSTimo Kreuzer DQ 3ff7700000000000h 2684afb647cSTimo Kreuzer DQ 3ff7600000000000h 2694afb647cSTimo Kreuzer DQ 3ff7500000000000h 2704afb647cSTimo Kreuzer DQ 3ff7400000000000h 2714afb647cSTimo Kreuzer DQ 3ff7300000000000h 2724afb647cSTimo Kreuzer DQ 3ff7200000000000h 2734afb647cSTimo Kreuzer DQ 3ff7100000000000h 2744afb647cSTimo Kreuzer DQ 3ff7000000000000h 2754afb647cSTimo Kreuzer DQ 3ff6f00000000000h 2764afb647cSTimo Kreuzer DQ 3ff6e00000000000h 2774afb647cSTimo Kreuzer DQ 3ff6d00000000000h 2784afb647cSTimo Kreuzer DQ 3ff6c00000000000h 2794afb647cSTimo Kreuzer DQ 3ff6b00000000000h 2804afb647cSTimo Kreuzer DQ 3ff6a00000000000h 2814afb647cSTimo Kreuzer DQ 3ff6900000000000h 2824afb647cSTimo Kreuzer DQ 3ff6800000000000h 2834afb647cSTimo Kreuzer DQ 3ff6700000000000h 2844afb647cSTimo Kreuzer DQ 3ff6600000000000h 2854afb647cSTimo Kreuzer DQ 3ff6500000000000h 2864afb647cSTimo Kreuzer DQ 3ff6400000000000h 2874afb647cSTimo Kreuzer DQ 3ff6300000000000h 2884afb647cSTimo Kreuzer DQ 3ff6200000000000h 2894afb647cSTimo Kreuzer DQ 3ff6100000000000h 2904afb647cSTimo Kreuzer DQ 3ff6000000000000h 2914afb647cSTimo Kreuzer DQ 3ff5f00000000000h 2924afb647cSTimo Kreuzer DQ 3ff5e00000000000h 2934afb647cSTimo Kreuzer DQ 3ff5d00000000000h 2944afb647cSTimo Kreuzer DQ 3ff5c00000000000h 2954afb647cSTimo Kreuzer DQ 3ff5b00000000000h 2964afb647cSTimo Kreuzer DQ 3ff5a00000000000h 2974afb647cSTimo Kreuzer DQ 3ff5900000000000h 2984afb647cSTimo Kreuzer DQ 3ff5800000000000h 2994afb647cSTimo Kreuzer DQ 3ff5800000000000h 3004afb647cSTimo Kreuzer DQ 3ff5700000000000h 3014afb647cSTimo Kreuzer DQ 3ff5600000000000h 3024afb647cSTimo Kreuzer DQ 3ff5500000000000h 3034afb647cSTimo Kreuzer DQ 3ff5400000000000h 3044afb647cSTimo Kreuzer DQ 3ff5300000000000h 3054afb647cSTimo Kreuzer DQ 3ff5200000000000h 3064afb647cSTimo Kreuzer DQ 3ff5100000000000h 3074afb647cSTimo Kreuzer DQ 3ff5000000000000h 3084afb647cSTimo Kreuzer DQ 3ff5000000000000h 3094afb647cSTimo Kreuzer DQ 3ff4f00000000000h 3104afb647cSTimo Kreuzer DQ 3ff4e00000000000h 3114afb647cSTimo Kreuzer DQ 3ff4d00000000000h 3124afb647cSTimo Kreuzer DQ 3ff4c00000000000h 3134afb647cSTimo Kreuzer DQ 3ff4b00000000000h 3144afb647cSTimo Kreuzer DQ 3ff4a00000000000h 3154afb647cSTimo Kreuzer DQ 3ff4a00000000000h 3164afb647cSTimo Kreuzer DQ 3ff4900000000000h 3174afb647cSTimo Kreuzer DQ 3ff4800000000000h 3184afb647cSTimo Kreuzer DQ 3ff4700000000000h 3194afb647cSTimo Kreuzer DQ 3ff4600000000000h 3204afb647cSTimo Kreuzer DQ 3ff4600000000000h 3214afb647cSTimo Kreuzer DQ 3ff4500000000000h 3224afb647cSTimo Kreuzer DQ 3ff4400000000000h 3234afb647cSTimo Kreuzer DQ 3ff4300000000000h 3244afb647cSTimo Kreuzer DQ 3ff4200000000000h 3254afb647cSTimo Kreuzer DQ 3ff4200000000000h 3264afb647cSTimo Kreuzer DQ 3ff4100000000000h 3274afb647cSTimo Kreuzer DQ 3ff4000000000000h 3284afb647cSTimo Kreuzer DQ 3ff3f00000000000h 3294afb647cSTimo Kreuzer DQ 3ff3e00000000000h 3304afb647cSTimo Kreuzer DQ 3ff3e00000000000h 3314afb647cSTimo Kreuzer DQ 3ff3d00000000000h 3324afb647cSTimo Kreuzer DQ 3ff3c00000000000h 3334afb647cSTimo Kreuzer DQ 3ff3b00000000000h 3344afb647cSTimo Kreuzer DQ 3ff3b00000000000h 3354afb647cSTimo Kreuzer DQ 3ff3a00000000000h 3364afb647cSTimo Kreuzer DQ 3ff3900000000000h 3374afb647cSTimo Kreuzer DQ 3ff3800000000000h 3384afb647cSTimo Kreuzer DQ 3ff3800000000000h 3394afb647cSTimo Kreuzer DQ 3ff3700000000000h 3404afb647cSTimo Kreuzer DQ 3ff3600000000000h 3414afb647cSTimo Kreuzer DQ 3ff3500000000000h 3424afb647cSTimo Kreuzer DQ 3ff3500000000000h 3434afb647cSTimo Kreuzer DQ 3ff3400000000000h 3444afb647cSTimo Kreuzer DQ 3ff3300000000000h 3454afb647cSTimo Kreuzer DQ 3ff3200000000000h 3464afb647cSTimo Kreuzer DQ 3ff3200000000000h 3474afb647cSTimo Kreuzer DQ 3ff3100000000000h 3484afb647cSTimo Kreuzer DQ 3ff3000000000000h 3494afb647cSTimo Kreuzer DQ 3ff3000000000000h 3504afb647cSTimo Kreuzer DQ 3ff2f00000000000h 3514afb647cSTimo Kreuzer DQ 3ff2e00000000000h 3524afb647cSTimo Kreuzer DQ 3ff2e00000000000h 3534afb647cSTimo Kreuzer DQ 3ff2d00000000000h 3544afb647cSTimo Kreuzer DQ 3ff2c00000000000h 3554afb647cSTimo Kreuzer DQ 3ff2b00000000000h 3564afb647cSTimo Kreuzer DQ 3ff2b00000000000h 3574afb647cSTimo Kreuzer DQ 3ff2a00000000000h 3584afb647cSTimo Kreuzer DQ 3ff2900000000000h 3594afb647cSTimo Kreuzer DQ 3ff2900000000000h 3604afb647cSTimo Kreuzer DQ 3ff2800000000000h 3614afb647cSTimo Kreuzer DQ 3ff2700000000000h 3624afb647cSTimo Kreuzer DQ 3ff2700000000000h 3634afb647cSTimo Kreuzer DQ 3ff2600000000000h 3644afb647cSTimo Kreuzer DQ 3ff2500000000000h 3654afb647cSTimo Kreuzer DQ 3ff2500000000000h 3664afb647cSTimo Kreuzer DQ 3ff2400000000000h 3674afb647cSTimo Kreuzer DQ 3ff2300000000000h 3684afb647cSTimo Kreuzer DQ 3ff2300000000000h 3694afb647cSTimo Kreuzer DQ 3ff2200000000000h 3704afb647cSTimo Kreuzer DQ 3ff2100000000000h 3714afb647cSTimo Kreuzer DQ 3ff2100000000000h 3724afb647cSTimo Kreuzer DQ 3ff2000000000000h 3734afb647cSTimo Kreuzer DQ 3ff2000000000000h 3744afb647cSTimo Kreuzer DQ 3ff1f00000000000h 3754afb647cSTimo Kreuzer DQ 3ff1e00000000000h 3764afb647cSTimo Kreuzer DQ 3ff1e00000000000h 3774afb647cSTimo Kreuzer DQ 3ff1d00000000000h 3784afb647cSTimo Kreuzer DQ 3ff1c00000000000h 3794afb647cSTimo Kreuzer DQ 3ff1c00000000000h 3804afb647cSTimo Kreuzer DQ 3ff1b00000000000h 3814afb647cSTimo Kreuzer DQ 3ff1b00000000000h 3824afb647cSTimo Kreuzer DQ 3ff1a00000000000h 3834afb647cSTimo Kreuzer DQ 3ff1900000000000h 3844afb647cSTimo Kreuzer DQ 3ff1900000000000h 3854afb647cSTimo Kreuzer DQ 3ff1800000000000h 3864afb647cSTimo Kreuzer DQ 3ff1800000000000h 3874afb647cSTimo Kreuzer DQ 3ff1700000000000h 3884afb647cSTimo Kreuzer DQ 3ff1600000000000h 3894afb647cSTimo Kreuzer DQ 3ff1600000000000h 3904afb647cSTimo Kreuzer DQ 3ff1500000000000h 3914afb647cSTimo Kreuzer DQ 3ff1500000000000h 3924afb647cSTimo Kreuzer DQ 3ff1400000000000h 3934afb647cSTimo Kreuzer DQ 3ff1300000000000h 3944afb647cSTimo Kreuzer DQ 3ff1300000000000h 3954afb647cSTimo Kreuzer DQ 3ff1200000000000h 3964afb647cSTimo Kreuzer DQ 3ff1200000000000h 3974afb647cSTimo Kreuzer DQ 3ff1100000000000h 3984afb647cSTimo Kreuzer DQ 3ff1100000000000h 3994afb647cSTimo Kreuzer DQ 3ff1000000000000h 4004afb647cSTimo Kreuzer DQ 3ff0f00000000000h 4014afb647cSTimo Kreuzer DQ 3ff0f00000000000h 4024afb647cSTimo Kreuzer DQ 3ff0e00000000000h 4034afb647cSTimo Kreuzer DQ 3ff0e00000000000h 4044afb647cSTimo Kreuzer DQ 3ff0d00000000000h 4054afb647cSTimo Kreuzer DQ 3ff0d00000000000h 4064afb647cSTimo Kreuzer DQ 3ff0c00000000000h 4074afb647cSTimo Kreuzer DQ 3ff0c00000000000h 4084afb647cSTimo Kreuzer DQ 3ff0b00000000000h 4094afb647cSTimo Kreuzer DQ 3ff0a00000000000h 4104afb647cSTimo Kreuzer DQ 3ff0a00000000000h 4114afb647cSTimo Kreuzer DQ 3ff0900000000000h 4124afb647cSTimo Kreuzer DQ 3ff0900000000000h 4134afb647cSTimo Kreuzer DQ 3ff0800000000000h 4144afb647cSTimo Kreuzer DQ 3ff0800000000000h 4154afb647cSTimo Kreuzer DQ 3ff0700000000000h 4164afb647cSTimo Kreuzer DQ 3ff0700000000000h 4174afb647cSTimo Kreuzer DQ 3ff0600000000000h 4184afb647cSTimo Kreuzer DQ 3ff0600000000000h 4194afb647cSTimo Kreuzer DQ 3ff0500000000000h 4204afb647cSTimo Kreuzer DQ 3ff0500000000000h 4214afb647cSTimo Kreuzer DQ 3ff0400000000000h 4224afb647cSTimo Kreuzer DQ 3ff0400000000000h 4234afb647cSTimo Kreuzer DQ 3ff0300000000000h 4244afb647cSTimo Kreuzer DQ 3ff0300000000000h 4254afb647cSTimo Kreuzer DQ 3ff0200000000000h 4264afb647cSTimo Kreuzer DQ 3ff0200000000000h 4274afb647cSTimo Kreuzer DQ 3ff0100000000000h 4284afb647cSTimo Kreuzer DQ 3ff0100000000000h 4294afb647cSTimo Kreuzer DQ 3ff0000000000000h 4304afb647cSTimo Kreuzer DQ 3ff0000000000000h 4314afb647cSTimo Kreuzer 4324afb647cSTimo KreuzerALIGN 16 4334afb647cSTimo Kreuzer__log_F_inv_tail DQ 0000000000000000h 4344afb647cSTimo Kreuzer DQ 3effe01fe01fe020h 4354afb647cSTimo Kreuzer DQ 3f1fc07f01fc07f0h 4364afb647cSTimo Kreuzer DQ 3f31caa01fa11caah 4374afb647cSTimo Kreuzer DQ 3f3f81f81f81f820h 4384afb647cSTimo Kreuzer DQ 3f48856506ddaba6h 4394afb647cSTimo Kreuzer DQ 3f5196792909c560h 4404afb647cSTimo Kreuzer DQ 3f57d9108c2ad433h 4414afb647cSTimo Kreuzer DQ 3f5f07c1f07c1f08h 4424afb647cSTimo Kreuzer DQ 3f638ff08b1c03ddh 4434afb647cSTimo Kreuzer DQ 3f680f6603d980f6h 4444afb647cSTimo Kreuzer DQ 3f6d00f57403d5d0h 4454afb647cSTimo Kreuzer DQ 3f331abf0b7672a0h 4464afb647cSTimo Kreuzer DQ 3f506a965d43919bh 4474afb647cSTimo Kreuzer DQ 3f5ceb240795ceb2h 4484afb647cSTimo Kreuzer DQ 3f6522f3b834e67fh 4494afb647cSTimo Kreuzer DQ 3f6c3c3c3c3c3c3ch 4504afb647cSTimo Kreuzer DQ 3f3e01e01e01e01eh 4514afb647cSTimo Kreuzer DQ 3f575b8fe21a291ch 4524afb647cSTimo Kreuzer DQ 3f6403b9403b9404h 4534afb647cSTimo Kreuzer DQ 3f6cc0ed7303b5cch 4544afb647cSTimo Kreuzer DQ 3f479118f3fc4da2h 4554afb647cSTimo Kreuzer DQ 3f5ed952e0b0ce46h 4564afb647cSTimo Kreuzer DQ 3f695900eae56404h 4574afb647cSTimo Kreuzer DQ 3f3d41d41d41d41dh 4584afb647cSTimo Kreuzer DQ 3f5cb28ff16c69aeh 4594afb647cSTimo Kreuzer DQ 3f696b1edd80e866h 4604afb647cSTimo Kreuzer DQ 3f4372e225fe30d9h 4614afb647cSTimo Kreuzer DQ 3f60ad12073615a2h 4624afb647cSTimo Kreuzer DQ 3f6cdb2c0397cdb3h 4634afb647cSTimo Kreuzer DQ 3f52cc157b864407h 4644afb647cSTimo Kreuzer DQ 3f664cb5f7148404h 4654afb647cSTimo Kreuzer DQ 3f3c71c71c71c71ch 4664afb647cSTimo Kreuzer DQ 3f6129a21a930b84h 4674afb647cSTimo Kreuzer DQ 3f6f1e0387f1e038h 4684afb647cSTimo Kreuzer DQ 3f5ad4e4ba80709bh 4694afb647cSTimo Kreuzer DQ 3f6c0e070381c0e0h 4704afb647cSTimo Kreuzer DQ 3f560fba1a362bb0h 4714afb647cSTimo Kreuzer DQ 3f6a5713280dee96h 4724afb647cSTimo Kreuzer DQ 3f53f59620f9ece9h 4734afb647cSTimo Kreuzer DQ 3f69f22983759f23h 4744afb647cSTimo Kreuzer DQ 3f5478ac63fc8d5ch 4754afb647cSTimo Kreuzer DQ 3f6ad87bb4671656h 4764afb647cSTimo Kreuzer DQ 3f578b8efbb8148ch 4774afb647cSTimo Kreuzer DQ 3f6d0369d0369d03h 4784afb647cSTimo Kreuzer DQ 3f5d212b601b3748h 4794afb647cSTimo Kreuzer DQ 3f0b2036406c80d9h 4804afb647cSTimo Kreuzer DQ 3f629663b24547d1h 4814afb647cSTimo Kreuzer DQ 3f4435e50d79435eh 4824afb647cSTimo Kreuzer DQ 3f67d0ff2920bc03h 4834afb647cSTimo Kreuzer DQ 3f55c06b15c06b16h 4844afb647cSTimo Kreuzer DQ 3f6e3a5f0fd7f954h 4854afb647cSTimo Kreuzer DQ 3f61dec0d4c77b03h 4864afb647cSTimo Kreuzer DQ 3f473289870ac52eh 4874afb647cSTimo Kreuzer DQ 3f6a034da034da03h 4884afb647cSTimo Kreuzer DQ 3f5d041da2292856h 4894afb647cSTimo Kreuzer DQ 3f3a41a41a41a41ah 4904afb647cSTimo Kreuzer DQ 3f68550f8a39409dh 4914afb647cSTimo Kreuzer DQ 3f5b4fe5e92c0686h 4924afb647cSTimo Kreuzer DQ 3f3a01a01a01a01ah 4934afb647cSTimo Kreuzer DQ 3f691d2a2067b23ah 4944afb647cSTimo Kreuzer DQ 3f5e7c5dada0b4e5h 4954afb647cSTimo Kreuzer DQ 3f468a7725080ce1h 4964afb647cSTimo Kreuzer DQ 3f6c49d4aa21b490h 4974afb647cSTimo Kreuzer DQ 3f63333333333333h 4984afb647cSTimo Kreuzer DQ 3f54bc363b03fccfh 4994afb647cSTimo Kreuzer DQ 3f2c9f01970e4f81h 5004afb647cSTimo Kreuzer DQ 3f697617c6ef5b25h 5014afb647cSTimo Kreuzer DQ 3f6161f9add3c0cah 5024afb647cSTimo Kreuzer DQ 3f5319fe6cb39806h 5034afb647cSTimo Kreuzer DQ 3f2f693a1c451ab3h 5044afb647cSTimo Kreuzer DQ 3f6a9e240321a9e2h 5054afb647cSTimo Kreuzer DQ 3f63831f3831f383h 5064afb647cSTimo Kreuzer DQ 3f5949ebc4dcfc1ch 5074afb647cSTimo Kreuzer DQ 3f480c6980c6980ch 5084afb647cSTimo Kreuzer DQ 3f6f9d00c5fe7403h 5094afb647cSTimo Kreuzer DQ 3f69721ed7e75347h 5104afb647cSTimo Kreuzer DQ 3f6381ec0313381fh 5114afb647cSTimo Kreuzer DQ 3f5b97c2aec12653h 5124afb647cSTimo Kreuzer DQ 3f509ef3024ae3bah 5134afb647cSTimo Kreuzer DQ 3f38618618618618h 5144afb647cSTimo Kreuzer DQ 3f6e0184f00c2780h 5154afb647cSTimo Kreuzer DQ 3f692ef5657dba52h 5164afb647cSTimo Kreuzer DQ 3f64940305494030h 5174afb647cSTimo Kreuzer DQ 3f60303030303030h 5184afb647cSTimo Kreuzer DQ 3f58060180601806h 5194afb647cSTimo Kreuzer DQ 3f5017f405fd017fh 5204afb647cSTimo Kreuzer DQ 3f412a8ad278e8ddh 5214afb647cSTimo Kreuzer DQ 3f17d05f417d05f4h 5224afb647cSTimo Kreuzer DQ 3f6d67245c02f7d6h 5234afb647cSTimo Kreuzer DQ 3f6a4411c1d986a9h 5244afb647cSTimo Kreuzer DQ 3f6754d76c7316dfh 5254afb647cSTimo Kreuzer DQ 3f649902f149902fh 5264afb647cSTimo Kreuzer DQ 3f621023358c1a68h 5274afb647cSTimo Kreuzer DQ 3f5f7390d2a6c406h 5284afb647cSTimo Kreuzer DQ 3f5b2b0805d5b2b1h 5294afb647cSTimo Kreuzer DQ 3f5745d1745d1746h 5304afb647cSTimo Kreuzer DQ 3f53c31507fa32c4h 5314afb647cSTimo Kreuzer DQ 3f50a1fd1b7af017h 5324afb647cSTimo Kreuzer DQ 3f4bc36ce3e0453ah 5334afb647cSTimo Kreuzer DQ 3f4702e05c0b8170h 5344afb647cSTimo Kreuzer DQ 3f4300b79300b793h 5354afb647cSTimo Kreuzer DQ 3f3f76b4337c6cb1h 5364afb647cSTimo Kreuzer DQ 3f3a62681c860fb0h 5374afb647cSTimo Kreuzer DQ 3f36c16c16c16c17h 5384afb647cSTimo Kreuzer DQ 3f3490aa31a3cfc7h 5394afb647cSTimo Kreuzer DQ 3f33cd153729043eh 5404afb647cSTimo Kreuzer DQ 3f3473a88d0bfd2eh 5414afb647cSTimo Kreuzer DQ 3f36816816816817h 5424afb647cSTimo Kreuzer DQ 3f39f36016719f36h 5434afb647cSTimo Kreuzer DQ 3f3ec6a5122f9016h 5444afb647cSTimo Kreuzer DQ 3f427c29da5519cfh 5454afb647cSTimo Kreuzer DQ 3f4642c8590b2164h 5464afb647cSTimo Kreuzer DQ 3f4ab5c45606f00bh 5474afb647cSTimo Kreuzer DQ 3f4fd3b80b11fd3ch 5484afb647cSTimo Kreuzer DQ 3f52cda0c6ba4eaah 5494afb647cSTimo Kreuzer DQ 3f56058160581606h 5504afb647cSTimo Kreuzer DQ 3f5990d0a4b7ef87h 5514afb647cSTimo Kreuzer DQ 3f5d6ee340579d6fh 5524afb647cSTimo Kreuzer DQ 3f60cf87d9c54a69h 5534afb647cSTimo Kreuzer DQ 3f6310572620ae4ch 5544afb647cSTimo Kreuzer DQ 3f65798c8ff522a2h 5554afb647cSTimo Kreuzer DQ 3f680ad602b580adh 5564afb647cSTimo Kreuzer DQ 3f6ac3e24799546fh 5574afb647cSTimo Kreuzer DQ 3f6da46102b1da46h 5584afb647cSTimo Kreuzer DQ 3f15805601580560h 5594afb647cSTimo Kreuzer DQ 3f3ed3c506b39a23h 5604afb647cSTimo Kreuzer DQ 3f4cbdd3e2970f60h 5614afb647cSTimo Kreuzer DQ 3f55555555555555h 5624afb647cSTimo Kreuzer DQ 3f5c979aee0bf805h 5634afb647cSTimo Kreuzer DQ 3f621291e81fd58eh 5644afb647cSTimo Kreuzer DQ 3f65fead500a9580h 5654afb647cSTimo Kreuzer DQ 3f6a0fd5c5f02a3ah 5664afb647cSTimo Kreuzer DQ 3f6e45c223898adch 5674afb647cSTimo Kreuzer DQ 3f35015015015015h 5684afb647cSTimo Kreuzer DQ 3f4c7b16ea64d422h 5694afb647cSTimo Kreuzer DQ 3f57829cbc14e5e1h 5704afb647cSTimo Kreuzer DQ 3f60877db8589720h 5714afb647cSTimo Kreuzer DQ 3f65710e4b5edceah 5724afb647cSTimo Kreuzer DQ 3f6a7dbb4d1fc1c8h 5734afb647cSTimo Kreuzer DQ 3f6fad40a57eb503h 5744afb647cSTimo Kreuzer DQ 3f43fd6bb00a5140h 5754afb647cSTimo Kreuzer DQ 3f54e78ecb419ba9h 5764afb647cSTimo Kreuzer DQ 3f600a44029100a4h 5774afb647cSTimo Kreuzer DQ 3f65c28f5c28f5c3h 5784afb647cSTimo Kreuzer DQ 3f6b9c68b2c0cc4ah 5794afb647cSTimo Kreuzer DQ 3f2978feb9f34381h 5804afb647cSTimo Kreuzer DQ 3f4ecf163bb6500ah 5814afb647cSTimo Kreuzer DQ 3f5be1958b67ebb9h 5824afb647cSTimo Kreuzer DQ 3f644e6157dc9a3bh 5834afb647cSTimo Kreuzer DQ 3f6acc4baa3f0ddfh 5844afb647cSTimo Kreuzer DQ 3f26a4cbcb2a247bh 5854afb647cSTimo Kreuzer DQ 3f50505050505050h 5864afb647cSTimo Kreuzer DQ 3f5e0b4439959819h 5874afb647cSTimo Kreuzer DQ 3f66027f6027f602h 5884afb647cSTimo Kreuzer DQ 3f6d1e854b5e0db4h 5894afb647cSTimo Kreuzer DQ 3f4165e7254813e2h 5904afb647cSTimo Kreuzer DQ 3f576646a9d716efh 5914afb647cSTimo Kreuzer DQ 3f632b48f757ce88h 5924afb647cSTimo Kreuzer DQ 3f6ac1b24652a906h 5934afb647cSTimo Kreuzer DQ 3f33b13b13b13b14h 5944afb647cSTimo Kreuzer DQ 3f5490e1eb208984h 5954afb647cSTimo Kreuzer DQ 3f62385830fec66eh 5964afb647cSTimo Kreuzer DQ 3f6a45a6cc111b7eh 5974afb647cSTimo Kreuzer DQ 3f33813813813814h 5984afb647cSTimo Kreuzer DQ 3f556f472517b708h 5994afb647cSTimo Kreuzer DQ 3f631be7bc0e8f2ah 6004afb647cSTimo Kreuzer DQ 3f6b9cbf3e55f044h 6014afb647cSTimo Kreuzer DQ 3f40e7d95bc609a9h 6024afb647cSTimo Kreuzer DQ 3f59e6b3804d19e7h 6034afb647cSTimo Kreuzer DQ 3f65c8b6af7963c2h 6044afb647cSTimo Kreuzer DQ 3f6eb9dad43bf402h 6054afb647cSTimo Kreuzer DQ 3f4f1a515885fb37h 6064afb647cSTimo Kreuzer DQ 3f60eeb1d3d76c02h 6074afb647cSTimo Kreuzer DQ 3f6a320261a32026h 6084afb647cSTimo Kreuzer DQ 3f3c82ac40260390h 6094afb647cSTimo Kreuzer DQ 3f5a12f684bda12fh 6104afb647cSTimo Kreuzer DQ 3f669d43fda2962ch 6114afb647cSTimo Kreuzer DQ 3f02e025c04b8097h 6124afb647cSTimo Kreuzer DQ 3f542804b542804bh 6134afb647cSTimo Kreuzer DQ 3f63f69b02593f6ah 6144afb647cSTimo Kreuzer DQ 3f6df31cb46e21fah 6154afb647cSTimo Kreuzer DQ 3f5012b404ad012bh 6164afb647cSTimo Kreuzer DQ 3f623925e7820a7fh 6174afb647cSTimo Kreuzer DQ 3f6c8253c8253c82h 6184afb647cSTimo Kreuzer DQ 3f4b92ddc02526e5h 6194afb647cSTimo Kreuzer DQ 3f61602511602511h 6204afb647cSTimo Kreuzer DQ 3f6bf471439c9adfh 6214afb647cSTimo Kreuzer DQ 3f4a85c40939a85ch 6224afb647cSTimo Kreuzer DQ 3f6166f9ac024d16h 6234afb647cSTimo Kreuzer DQ 3f6c44e10125e227h 6244afb647cSTimo Kreuzer DQ 3f4cebf48bbd90e5h 6254afb647cSTimo Kreuzer DQ 3f62492492492492h 6264afb647cSTimo Kreuzer DQ 3f6d6f2e2ec0b673h 6274afb647cSTimo Kreuzer DQ 3f5159e26af37c05h 6284afb647cSTimo Kreuzer DQ 3f64024540245402h 6294afb647cSTimo Kreuzer DQ 3f6f6f0243f6f024h 6304afb647cSTimo Kreuzer DQ 3f55e60121579805h 6314afb647cSTimo Kreuzer DQ 3f668e18cf81b10fh 6324afb647cSTimo Kreuzer DQ 3f32012012012012h 6334afb647cSTimo Kreuzer DQ 3f5c11f7047dc11fh 6344afb647cSTimo Kreuzer DQ 3f69e878ff70985eh 6354afb647cSTimo Kreuzer DQ 3f4779d9fdc3a219h 6364afb647cSTimo Kreuzer DQ 3f61eace5c957907h 6374afb647cSTimo Kreuzer DQ 3f6e0d5b450239e1h 6384afb647cSTimo Kreuzer DQ 3f548bf073816367h 6394afb647cSTimo Kreuzer DQ 3f6694808dda5202h 6404afb647cSTimo Kreuzer DQ 3f37c67f2bae2b21h 6414afb647cSTimo Kreuzer DQ 3f5ee58469ee5847h 6424afb647cSTimo Kreuzer DQ 3f6c0233c0233c02h 6434afb647cSTimo Kreuzer DQ 3f514e02328a7012h 6444afb647cSTimo Kreuzer DQ 3f6561072057b573h 6454afb647cSTimo Kreuzer DQ 3f31811811811812h 6464afb647cSTimo Kreuzer DQ 3f5e28646f5a1060h 6474afb647cSTimo Kreuzer DQ 3f6c0d1284e6f1d7h 6484afb647cSTimo Kreuzer DQ 3f523543f0c80459h 6494afb647cSTimo Kreuzer DQ 3f663cbeea4e1a09h 6504afb647cSTimo Kreuzer DQ 3f3b9a3fdd5c8cb8h 6514afb647cSTimo Kreuzer DQ 3f60be1c159a76d2h 6524afb647cSTimo Kreuzer DQ 3f6e1d1a688e4838h 6534afb647cSTimo Kreuzer DQ 3f572044d72044d7h 6544afb647cSTimo Kreuzer DQ 3f691713db81577bh 6554afb647cSTimo Kreuzer DQ 3f4ac73ae9819b50h 6564afb647cSTimo Kreuzer DQ 3f6460334e904cf6h 6574afb647cSTimo Kreuzer DQ 3f31111111111111h 6584afb647cSTimo Kreuzer DQ 3f5feef80441fef0h 6594afb647cSTimo Kreuzer DQ 3f6de021fde021feh 6604afb647cSTimo Kreuzer DQ 3f57b7eacc9686a0h 6614afb647cSTimo Kreuzer DQ 3f69ead7cd391fbch 6624afb647cSTimo Kreuzer DQ 3f50195609804390h 6634afb647cSTimo Kreuzer DQ 3f6641511e8d2b32h 6644afb647cSTimo Kreuzer DQ 3f4222b1acf1ce96h 6654afb647cSTimo Kreuzer DQ 3f62e29f79b47582h 6664afb647cSTimo Kreuzer DQ 3f24f0d1682e11cdh 6674afb647cSTimo Kreuzer DQ 3f5f9bb096771e4dh 6684afb647cSTimo Kreuzer DQ 3f6e5ee45dd96ae2h 6694afb647cSTimo Kreuzer DQ 3f5a0429a0429a04h 6704afb647cSTimo Kreuzer DQ 3f6bb74d5f06c021h 6714afb647cSTimo Kreuzer DQ 3f54fce404254fceh 6724afb647cSTimo Kreuzer DQ 3f695766eacbc402h 6734afb647cSTimo Kreuzer DQ 3f50842108421084h 6744afb647cSTimo Kreuzer DQ 3f673e5371d5c338h 6754afb647cSTimo Kreuzer DQ 3f4930523fbe3368h 6764afb647cSTimo Kreuzer DQ 3f656b38f225f6c4h 6774afb647cSTimo Kreuzer DQ 3f426e978d4fdf3bh 6784afb647cSTimo Kreuzer DQ 3f63dd40e4eb0cc6h 6794afb647cSTimo Kreuzer DQ 3f397f7d73404146h 6804afb647cSTimo Kreuzer DQ 3f6293982cc98af1h 6814afb647cSTimo Kreuzer DQ 3f30410410410410h 6824afb647cSTimo Kreuzer DQ 3f618d6f048ff7e4h 6834afb647cSTimo Kreuzer DQ 3f2236a3ebc349deh 6844afb647cSTimo Kreuzer DQ 3f60c9f8ee53d18ch 6854afb647cSTimo Kreuzer DQ 3f10204081020408h 6864afb647cSTimo Kreuzer DQ 3f60486ca2f46ea6h 6874afb647cSTimo Kreuzer DQ 3ef0101010101010h 6884afb647cSTimo Kreuzer DQ 3f60080402010080h 6894afb647cSTimo Kreuzer DQ 0000000000000000h 6904afb647cSTimo Kreuzer 6914afb647cSTimo Kreuzer;--------------------- 6924afb647cSTimo Kreuzer; exp data 6934afb647cSTimo Kreuzer;--------------------- 6944afb647cSTimo Kreuzer 6954afb647cSTimo KreuzerALIGN 16 6964afb647cSTimo Kreuzer 6974afb647cSTimo Kreuzer__denormal_threshold DD 0fffffc02h ; -1022 6984afb647cSTimo Kreuzer DD 0 6994afb647cSTimo Kreuzer DQ 0 7004afb647cSTimo Kreuzer 7014afb647cSTimo Kreuzer__enable_almost_inf DQ 7fe0000000000000h 7024afb647cSTimo Kreuzer DQ 0 7034afb647cSTimo Kreuzer 7044afb647cSTimo Kreuzer__real_zero DQ 0000000000000000h 7054afb647cSTimo Kreuzer DQ 0 7064afb647cSTimo Kreuzer 7074afb647cSTimo Kreuzer__real_smallest_denormal DQ 0000000000000001h 7084afb647cSTimo Kreuzer DQ 0 7094afb647cSTimo Kreuzer__denormal_tiny_threshold DQ 0c0874046dfefd9d0h 7104afb647cSTimo Kreuzer DQ 0 7114afb647cSTimo Kreuzer 7124afb647cSTimo Kreuzer__real_p65536 DQ 40f0000000000000h ; 65536 7134afb647cSTimo Kreuzer DQ 0 7144afb647cSTimo Kreuzer__real_m68800 DQ 0c0f0cc0000000000h ; -68800 7154afb647cSTimo Kreuzer DQ 0 7164afb647cSTimo Kreuzer__real_64_by_log2 DQ 40571547652b82feh ; 64/ln(2) 7174afb647cSTimo Kreuzer DQ 0 7184afb647cSTimo Kreuzer__real_log2_by_64_head DQ 3f862e42f0000000h ; log2_by_64_head 7194afb647cSTimo Kreuzer DQ 0 7204afb647cSTimo Kreuzer__real_log2_by_64_tail DQ 0bdfdf473de6af278h ; -log2_by_64_tail 7214afb647cSTimo Kreuzer DQ 0 7224afb647cSTimo Kreuzer__real_1_by_720 DQ 3f56c16c16c16c17h ; 1/720 7234afb647cSTimo Kreuzer DQ 0 7244afb647cSTimo Kreuzer__real_1_by_120 DQ 3f81111111111111h ; 1/120 7254afb647cSTimo Kreuzer DQ 0 7264afb647cSTimo Kreuzer__real_1_by_24 DQ 3fa5555555555555h ; 1/24 7274afb647cSTimo Kreuzer DQ 0 7284afb647cSTimo Kreuzer__real_1_by_6 DQ 3fc5555555555555h ; 1/6 7294afb647cSTimo Kreuzer DQ 0 7304afb647cSTimo Kreuzer__real_1_by_2 DQ 3fe0000000000000h ; 1/2 7314afb647cSTimo Kreuzer DQ 0 7324afb647cSTimo Kreuzer 7334afb647cSTimo Kreuzer 7344afb647cSTimo KreuzerEXTRN __two_to_jby64_head_table:QWORD 7354afb647cSTimo KreuzerEXTRN __two_to_jby64_tail_table:QWORD 7364afb647cSTimo KreuzerEXTRN __use_fma3_lib:DWORD 7374afb647cSTimo Kreuzer 7384afb647cSTimo Kreuzerfname TEXTEQU <pow> 7394afb647cSTimo Kreuzerfname_special TEXTEQU <_pow_special> 7404afb647cSTimo Kreuzer 7414afb647cSTimo Kreuzer; define local variable storage offsets 7424afb647cSTimo Kreuzer 7434afb647cSTimo Kreuzersave_x EQU 10h 7444afb647cSTimo Kreuzersave_y EQU 20h 7454afb647cSTimo Kreuzerp_temp_exp EQU 30h 7464afb647cSTimo Kreuzernegate_result EQU 40h 7474afb647cSTimo Kreuzersave_ax EQU 50h 7484afb647cSTimo Kreuzery_head EQU 60h 7494afb647cSTimo Kreuzerp_temp_log EQU 70h 7504afb647cSTimo Kreuzersave_xmm6 EQU 080h 7514afb647cSTimo Kreuzersave_xmm7 EQU 090h 7524afb647cSTimo Kreuzerdummy_space EQU 0a0h 7534afb647cSTimo Kreuzer 7544afb647cSTimo Kreuzerstack_size EQU 0c8h 7554afb647cSTimo Kreuzer 7564afb647cSTimo Kreuzerinclude fm.inc 7574afb647cSTimo Kreuzer 7584afb647cSTimo Kreuzer; external function 7594afb647cSTimo KreuzerEXTERN fname_special:PROC 7604afb647cSTimo Kreuzer 7614afb647cSTimo Kreuzer.code 7624afb647cSTimo KreuzerALIGN 16 7634afb647cSTimo KreuzerPUBLIC fname 7644afb647cSTimo Kreuzerfname PROC FRAME 7654afb647cSTimo Kreuzer StackAllocate stack_size 7664afb647cSTimo Kreuzer SaveXmm xmm6, save_xmm6 7674afb647cSTimo Kreuzer SaveXmm xmm7, save_xmm7 7684afb647cSTimo Kreuzer .ENDPROLOG 7694afb647cSTimo Kreuzer cmp DWORD PTR __use_fma3_lib, 0 7704afb647cSTimo Kreuzer jne Lpow_fma3 7714afb647cSTimo Kreuzer 7724afb647cSTimo KreuzerALIGN 16 7734afb647cSTimo KreuzerLpow_sse2: 7744afb647cSTimo Kreuzer movsd QWORD PTR [save_x+rsp], xmm0 7754afb647cSTimo Kreuzer movsd QWORD PTR [save_y+rsp], xmm1 7764afb647cSTimo Kreuzer 7774afb647cSTimo Kreuzer mov rdx, QWORD PTR [save_x+rsp] 7784afb647cSTimo Kreuzer mov r8, QWORD PTR [save_y+rsp] 7794afb647cSTimo Kreuzer 7804afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mant_mask 7814afb647cSTimo Kreuzer and r10, r8 7824afb647cSTimo Kreuzer jz Lpow_sse2_y_is_zero 7834afb647cSTimo Kreuzer 7844afb647cSTimo Kreuzer cmp r8, QWORD PTR __pos_one 7854afb647cSTimo Kreuzer je Lpow_sse2_y_is_one 7864afb647cSTimo Kreuzer 7874afb647cSTimo Kreuzer mov r9, QWORD PTR __sign_mask 7884afb647cSTimo Kreuzer and r9, rdx 7894afb647cSTimo Kreuzer mov rax, QWORD PTR __pos_zero 7904afb647cSTimo Kreuzer mov QWORD PTR [negate_result+rsp], rax 7914afb647cSTimo Kreuzer cmp r9, QWORD PTR __sign_mask 7924afb647cSTimo Kreuzer je Lpow_sse2_x_is_neg 7934afb647cSTimo Kreuzer 7944afb647cSTimo Kreuzer cmp rdx, QWORD PTR __pos_one 7954afb647cSTimo Kreuzer je Lpow_sse2_x_is_pos_one 7964afb647cSTimo Kreuzer 7974afb647cSTimo Kreuzer cmp rdx, QWORD PTR __pos_zero 7984afb647cSTimo Kreuzer je Lpow_sse2_x_is_zero 7994afb647cSTimo Kreuzer 8004afb647cSTimo Kreuzer mov r9, QWORD PTR __exp_mask 8014afb647cSTimo Kreuzer and r9, rdx 8024afb647cSTimo Kreuzer cmp r9, QWORD PTR __exp_mask 8034afb647cSTimo Kreuzer je Lpow_sse2_x_is_inf_or_nan 8044afb647cSTimo Kreuzer 8054afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 8064afb647cSTimo Kreuzer and r10, r8 8074afb647cSTimo Kreuzer cmp r10, QWORD PTR __ay_max_bound 8084afb647cSTimo Kreuzer jg Lpow_sse2_ay_is_very_large 8094afb647cSTimo Kreuzer 8104afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 8114afb647cSTimo Kreuzer and r10, r8 8124afb647cSTimo Kreuzer cmp r10, QWORD PTR __ay_min_bound 8134afb647cSTimo Kreuzer jl Lpow_sse2_ay_is_very_small 8144afb647cSTimo Kreuzer 8154afb647cSTimo Kreuzer ; ----------------------------- 8164afb647cSTimo Kreuzer ; compute log(x) here 8174afb647cSTimo Kreuzer ; ----------------------------- 8184afb647cSTimo KreuzerLpow_sse2_log_x: 8194afb647cSTimo Kreuzer 8204afb647cSTimo Kreuzer ; compute exponent part 8214afb647cSTimo Kreuzer xor r8, r8 8224afb647cSTimo Kreuzer movdqa xmm3, xmm0 8234afb647cSTimo Kreuzer psrlq xmm3, 52 8244afb647cSTimo Kreuzer movd r8, xmm0 8254afb647cSTimo Kreuzer psubq xmm3, XMMWORD PTR __mask_1023 8264afb647cSTimo Kreuzer movdqa xmm2, xmm0 8274afb647cSTimo Kreuzer cvtdq2pd xmm6, xmm3 ; xexp 8284afb647cSTimo Kreuzer pand xmm2, XMMWORD PTR __real_mant 8294afb647cSTimo Kreuzer 8304afb647cSTimo Kreuzer comisd xmm6, QWORD PTR __mask_1023_f 8314afb647cSTimo Kreuzer je Lpow_sse2_denormal_adjust 8324afb647cSTimo Kreuzer 8334afb647cSTimo KreuzerLpow_sse2_continue_common: 8344afb647cSTimo Kreuzer 8354afb647cSTimo Kreuzer ; compute index into the log tables 8364afb647cSTimo Kreuzer movsd xmm7, xmm0 8374afb647cSTimo Kreuzer mov r9, r8 8384afb647cSTimo Kreuzer and r8, QWORD PTR __mask_mant_all8 8394afb647cSTimo Kreuzer and r9, QWORD PTR __mask_mant9 8404afb647cSTimo Kreuzer subsd xmm7, __real_one 8414afb647cSTimo Kreuzer shl r9, 1 8424afb647cSTimo Kreuzer add r8, r9 8434afb647cSTimo Kreuzer mov QWORD PTR [p_temp_log+rsp], r8 8444afb647cSTimo Kreuzer andpd xmm7, __real_notsign 8454afb647cSTimo Kreuzer 8464afb647cSTimo Kreuzer ; F, Y, switch to near-one codepath 8474afb647cSTimo Kreuzer movsd xmm1, QWORD PTR [p_temp_log+rsp] 8484afb647cSTimo Kreuzer shr r8, 44 8494afb647cSTimo Kreuzer por xmm2, XMMWORD PTR __real_half 8504afb647cSTimo Kreuzer por xmm1, XMMWORD PTR __real_half 8514afb647cSTimo Kreuzer lea r9, QWORD PTR __log_F_inv_head 8524afb647cSTimo Kreuzer lea rdx, QWORD PTR __log_F_inv_tail 8534afb647cSTimo Kreuzer comisd xmm7, __real_threshold 8544afb647cSTimo Kreuzer jb Lpow_sse2_near_one 8554afb647cSTimo Kreuzer 8564afb647cSTimo Kreuzer ; f = F - Y, r = f * inv 8574afb647cSTimo Kreuzer subsd xmm1, xmm2 8584afb647cSTimo Kreuzer movsd xmm4, xmm1 8594afb647cSTimo Kreuzer mulsd xmm1, QWORD PTR [r9+r8*8] 8604afb647cSTimo Kreuzer movsd xmm5, xmm1 8614afb647cSTimo Kreuzer mulsd xmm4, QWORD PTR [rdx+r8*8] 8624afb647cSTimo Kreuzer movsd xmm7, xmm4 8634afb647cSTimo Kreuzer addsd xmm1, xmm4 8644afb647cSTimo Kreuzer 8654afb647cSTimo Kreuzer movsd xmm2, xmm1 8664afb647cSTimo Kreuzer movsd xmm0, xmm1 8674afb647cSTimo Kreuzer lea r9, __log_256_lead 8684afb647cSTimo Kreuzer 8694afb647cSTimo Kreuzer ; poly 8704afb647cSTimo Kreuzer movsd xmm3, QWORD PTR __real_1_over_6 8714afb647cSTimo Kreuzer movsd xmm1, QWORD PTR __real_1_over_3 8724afb647cSTimo Kreuzer mulsd xmm3, xmm2 8734afb647cSTimo Kreuzer mulsd xmm1, xmm2 8744afb647cSTimo Kreuzer mulsd xmm0, xmm2 8754afb647cSTimo Kreuzer subsd xmm5, xmm2 8764afb647cSTimo Kreuzer movsd xmm4, xmm0 8774afb647cSTimo Kreuzer addsd xmm3, QWORD PTR __real_1_over_5 8784afb647cSTimo Kreuzer addsd xmm1, QWORD PTR __real_1_over_2 8794afb647cSTimo Kreuzer mulsd xmm4, xmm0 8804afb647cSTimo Kreuzer mulsd xmm3, xmm2 8814afb647cSTimo Kreuzer mulsd xmm1, xmm0 8824afb647cSTimo Kreuzer addsd xmm3, QWORD PTR __real_1_over_4 8834afb647cSTimo Kreuzer addsd xmm7, xmm5 8844afb647cSTimo Kreuzer mulsd xmm3, xmm4 8854afb647cSTimo Kreuzer addsd xmm1, xmm3 8864afb647cSTimo Kreuzer addsd xmm1, xmm7 8874afb647cSTimo Kreuzer 8884afb647cSTimo Kreuzer movsd xmm5, QWORD PTR __real_log2_tail 8894afb647cSTimo Kreuzer lea rdx, __log_256_tail 8904afb647cSTimo Kreuzer mulsd xmm5, xmm6 8914afb647cSTimo Kreuzer movsd xmm0, QWORD PTR [r9+r8*8] 8924afb647cSTimo Kreuzer subsd xmm5, xmm1 8934afb647cSTimo Kreuzer 8944afb647cSTimo Kreuzer movsd xmm3, QWORD PTR [rdx+r8*8] 8954afb647cSTimo Kreuzer addsd xmm3, xmm5 8964afb647cSTimo Kreuzer movsd xmm1, xmm3 8974afb647cSTimo Kreuzer subsd xmm3, xmm2 8984afb647cSTimo Kreuzer 8994afb647cSTimo Kreuzer movsd xmm7, QWORD PTR __real_log2_lead 9004afb647cSTimo Kreuzer mulsd xmm7, xmm6 9014afb647cSTimo Kreuzer addsd xmm0, xmm7 9024afb647cSTimo Kreuzer 9034afb647cSTimo Kreuzer ; result of ln(x) is computed from head and tail parts, resH and resT 9044afb647cSTimo Kreuzer ; res = ln(x) = resH + resT 9054afb647cSTimo Kreuzer ; resH and resT are in full precision 9064afb647cSTimo Kreuzer 9074afb647cSTimo Kreuzer ; resT is computed from head and tail parts, resT_h and resT_t 9084afb647cSTimo Kreuzer ; resT = resT_h + resT_t 9094afb647cSTimo Kreuzer 9104afb647cSTimo Kreuzer ; now 9114afb647cSTimo Kreuzer ; xmm3 - resT 9124afb647cSTimo Kreuzer ; xmm0 - resH 9134afb647cSTimo Kreuzer ; xmm1 - (resT_t) 9144afb647cSTimo Kreuzer ; xmm2 - (-resT_h) 9154afb647cSTimo Kreuzer 9164afb647cSTimo KreuzerLpow_sse2_log_x_continue: 9174afb647cSTimo Kreuzer 9184afb647cSTimo Kreuzer movsd xmm7, xmm0 9194afb647cSTimo Kreuzer addsd xmm0, xmm3 9204afb647cSTimo Kreuzer movsd xmm5, xmm0 9214afb647cSTimo Kreuzer andpd xmm0, XMMWORD PTR __real_fffffffff8000000 9224afb647cSTimo Kreuzer 9234afb647cSTimo Kreuzer ; xmm0 - H 9244afb647cSTimo Kreuzer ; xmm7 - resH 9254afb647cSTimo Kreuzer ; xmm5 - res 9264afb647cSTimo Kreuzer 9274afb647cSTimo Kreuzer mov rax, QWORD PTR [save_y+rsp] 9284afb647cSTimo Kreuzer and rax, QWORD PTR __real_fffffffff8000000 9294afb647cSTimo Kreuzer 9304afb647cSTimo Kreuzer addsd xmm2, xmm3 9314afb647cSTimo Kreuzer subsd xmm7, xmm5 9324afb647cSTimo Kreuzer subsd xmm1, xmm2 9334afb647cSTimo Kreuzer addsd xmm7, xmm3 9344afb647cSTimo Kreuzer subsd xmm5, xmm0 9354afb647cSTimo Kreuzer 9364afb647cSTimo Kreuzer mov QWORD PTR [y_head+rsp], rax 9374afb647cSTimo Kreuzer movsd xmm4, QWORD PTR [save_y+rsp] 9384afb647cSTimo Kreuzer 9394afb647cSTimo Kreuzer addsd xmm7, xmm1 9404afb647cSTimo Kreuzer addsd xmm7, xmm5 9414afb647cSTimo Kreuzer 9424afb647cSTimo Kreuzer ; res = H + T 9434afb647cSTimo Kreuzer ; H has leading 26 bits of precision 9444afb647cSTimo Kreuzer ; T has full precision 9454afb647cSTimo Kreuzer 9464afb647cSTimo Kreuzer ; xmm0 - H 9474afb647cSTimo Kreuzer ; xmm7 - T 9484afb647cSTimo Kreuzer 9494afb647cSTimo Kreuzer movsd xmm2, QWORD PTR [y_head+rsp] 9504afb647cSTimo Kreuzer subsd xmm4, xmm2 9514afb647cSTimo Kreuzer 9524afb647cSTimo Kreuzer ; y is split into head and tail 9534afb647cSTimo Kreuzer ; for y * ln(x) computation 9544afb647cSTimo Kreuzer 9554afb647cSTimo Kreuzer ; xmm4 - Yt 9564afb647cSTimo Kreuzer ; xmm2 - Yh 9574afb647cSTimo Kreuzer ; xmm0 - H 9584afb647cSTimo Kreuzer ; xmm7 - T 9594afb647cSTimo Kreuzer 9604afb647cSTimo Kreuzer movsd xmm3, xmm4 9614afb647cSTimo Kreuzer movsd xmm5, xmm7 9624afb647cSTimo Kreuzer movsd xmm6, xmm0 9634afb647cSTimo Kreuzer mulsd xmm3, xmm7 ; YtRt 9644afb647cSTimo Kreuzer mulsd xmm4, xmm0 ; YtRh 9654afb647cSTimo Kreuzer mulsd xmm5, xmm2 ; YhRt 9664afb647cSTimo Kreuzer mulsd xmm6, xmm2 ; YhRh 9674afb647cSTimo Kreuzer 9684afb647cSTimo Kreuzer movsd xmm1, xmm6 9694afb647cSTimo Kreuzer addsd xmm3, xmm4 9704afb647cSTimo Kreuzer addsd xmm3, xmm5 9714afb647cSTimo Kreuzer 9724afb647cSTimo Kreuzer addsd xmm1, xmm3 9734afb647cSTimo Kreuzer movsd xmm0, xmm1 9744afb647cSTimo Kreuzer 9754afb647cSTimo Kreuzer subsd xmm6, xmm1 9764afb647cSTimo Kreuzer addsd xmm6, xmm3 9774afb647cSTimo Kreuzer 9784afb647cSTimo Kreuzer ; y * ln(x) = v + vt 9794afb647cSTimo Kreuzer ; v and vt are in full precision 9804afb647cSTimo Kreuzer 9814afb647cSTimo Kreuzer ; xmm0 - v 9824afb647cSTimo Kreuzer ; xmm6 - vt 9834afb647cSTimo Kreuzer 9844afb647cSTimo Kreuzer ; ----------------------------- 9854afb647cSTimo Kreuzer ; compute exp( y * ln(x) ) here 9864afb647cSTimo Kreuzer ; ----------------------------- 9874afb647cSTimo Kreuzer 9884afb647cSTimo Kreuzer ; v * (64/ln(2)) 9894afb647cSTimo Kreuzer movsd xmm7, QWORD PTR __real_64_by_log2 9904afb647cSTimo Kreuzer movsd QWORD PTR [p_temp_exp+rsp], xmm0 9914afb647cSTimo Kreuzer mulsd xmm7, xmm0 9924afb647cSTimo Kreuzer mov rdx, QWORD PTR [p_temp_exp+rsp] 9934afb647cSTimo Kreuzer 9944afb647cSTimo Kreuzer ; v < 1024*ln(2), ( v * (64/ln(2)) ) < 64*1024 9954afb647cSTimo Kreuzer ; v >= -1075*ln(2), ( v * (64/ln(2)) ) >= 64*(-1075) 9964afb647cSTimo Kreuzer comisd xmm7, QWORD PTR __real_p65536 9974afb647cSTimo Kreuzer ja Lpow_sse2_process_result_inf 9984afb647cSTimo Kreuzer 9994afb647cSTimo Kreuzer comisd xmm7, QWORD PTR __real_m68800 10004afb647cSTimo Kreuzer jb Lpow_sse2_process_result_zero 10014afb647cSTimo Kreuzer 10024afb647cSTimo Kreuzer ; n = int( v * (64/ln(2)) ) 10034afb647cSTimo Kreuzer cvtpd2dq xmm4, xmm7 10044afb647cSTimo Kreuzer lea r10, __two_to_jby64_head_table 10054afb647cSTimo Kreuzer lea r11, __two_to_jby64_tail_table 10064afb647cSTimo Kreuzer cvtdq2pd xmm1, xmm4 10074afb647cSTimo Kreuzer 10084afb647cSTimo Kreuzer ; r1 = x - n * ln(2)/64 head 10094afb647cSTimo Kreuzer movsd xmm2, QWORD PTR __real_log2_by_64_head 10104afb647cSTimo Kreuzer mulsd xmm2, xmm1 10114afb647cSTimo Kreuzer movd ecx, xmm4 10124afb647cSTimo Kreuzer mov rax, 3fh 10134afb647cSTimo Kreuzer and eax, ecx 10144afb647cSTimo Kreuzer subsd xmm0, xmm2 10154afb647cSTimo Kreuzer 10164afb647cSTimo Kreuzer ; r2 = - n * ln(2)/64 tail 10174afb647cSTimo Kreuzer mulsd xmm1, QWORD PTR __real_log2_by_64_tail 10184afb647cSTimo Kreuzer movsd xmm2, xmm0 10194afb647cSTimo Kreuzer 10204afb647cSTimo Kreuzer ; m = (n - j) / 64 10214afb647cSTimo Kreuzer sub ecx, eax 10224afb647cSTimo Kreuzer sar ecx, 6 10234afb647cSTimo Kreuzer 10244afb647cSTimo Kreuzer ; r1+r2 10254afb647cSTimo Kreuzer addsd xmm2, xmm1 10264afb647cSTimo Kreuzer addsd xmm2, xmm6 ; add vt here 10274afb647cSTimo Kreuzer movsd xmm1, xmm2 10284afb647cSTimo Kreuzer 10294afb647cSTimo Kreuzer ; q 10304afb647cSTimo Kreuzer movsd xmm0, QWORD PTR __real_1_by_2 10314afb647cSTimo Kreuzer movsd xmm3, QWORD PTR __real_1_by_24 10324afb647cSTimo Kreuzer movsd xmm4, QWORD PTR __real_1_by_720 10334afb647cSTimo Kreuzer mulsd xmm1, xmm2 10344afb647cSTimo Kreuzer mulsd xmm0, xmm2 10354afb647cSTimo Kreuzer mulsd xmm3, xmm2 10364afb647cSTimo Kreuzer mulsd xmm4, xmm2 10374afb647cSTimo Kreuzer 10384afb647cSTimo Kreuzer movsd xmm5, xmm1 10394afb647cSTimo Kreuzer mulsd xmm1, xmm2 10404afb647cSTimo Kreuzer addsd xmm0, QWORD PTR __real_one 10414afb647cSTimo Kreuzer addsd xmm3, QWORD PTR __real_1_by_6 10424afb647cSTimo Kreuzer mulsd xmm5, xmm1 10434afb647cSTimo Kreuzer addsd xmm4, QWORD PTR __real_1_by_120 10444afb647cSTimo Kreuzer mulsd xmm0, xmm2 10454afb647cSTimo Kreuzer mulsd xmm3, xmm1 10464afb647cSTimo Kreuzer 10474afb647cSTimo Kreuzer mulsd xmm4, xmm5 10484afb647cSTimo Kreuzer 10494afb647cSTimo Kreuzer ; deal with denormal results 10504afb647cSTimo Kreuzer xor r9d, r9d 10514afb647cSTimo Kreuzer 10524afb647cSTimo Kreuzer addsd xmm3, xmm4 10534afb647cSTimo Kreuzer addsd xmm0, xmm3 10544afb647cSTimo Kreuzer 10554afb647cSTimo Kreuzer cmp ecx, DWORD PTR __denormal_threshold 10564afb647cSTimo Kreuzer cmovle r9d, ecx 10574afb647cSTimo Kreuzer add rcx, 1023 10584afb647cSTimo Kreuzer shl rcx, 52 10594afb647cSTimo Kreuzer 10604afb647cSTimo Kreuzer ; f1, f2 10614afb647cSTimo Kreuzer movsd xmm5, QWORD PTR [r11+rax*8] 10624afb647cSTimo Kreuzer movsd xmm1, QWORD PTR [r10+rax*8] 10634afb647cSTimo Kreuzer mulsd xmm5, xmm0 10644afb647cSTimo Kreuzer mulsd xmm1, xmm0 10654afb647cSTimo Kreuzer 10664afb647cSTimo Kreuzer 10674afb647cSTimo Kreuzer ; (f1+f2)*(1+q) 10684afb647cSTimo Kreuzer addsd xmm5, QWORD PTR [r11+rax*8] 10694afb647cSTimo Kreuzer addsd xmm1, xmm5 10704afb647cSTimo Kreuzer addsd xmm1, QWORD PTR [r10+rax*8] 10714afb647cSTimo Kreuzer movsd xmm0, xmm1 10724afb647cSTimo Kreuzer 10734afb647cSTimo Kreuzer cmp rcx, QWORD PTR __real_inf 10744afb647cSTimo Kreuzer je Lpow_sse2_process_almost_inf 10754afb647cSTimo Kreuzer 10764afb647cSTimo Kreuzer mov QWORD PTR [p_temp_exp+rsp], rcx 10774afb647cSTimo Kreuzer test r9d, r9d 10784afb647cSTimo Kreuzer jnz Lpow_sse2_process_denormal 10794afb647cSTimo Kreuzer mulsd xmm0, QWORD PTR [p_temp_exp+rsp] 10804afb647cSTimo Kreuzer orpd xmm0, XMMWORD PTR [negate_result+rsp] 10814afb647cSTimo Kreuzer 10824afb647cSTimo KreuzerLpow_sse2_final_check: 10834afb647cSTimo Kreuzer RestoreXmm xmm7, save_xmm7 10844afb647cSTimo Kreuzer RestoreXmm xmm6, save_xmm6 10854afb647cSTimo Kreuzer StackDeallocate stack_size 10864afb647cSTimo Kreuzer ret 10874afb647cSTimo Kreuzer 10884afb647cSTimo KreuzerALIGN 16 10894afb647cSTimo KreuzerLpow_sse2_process_almost_inf: 10904afb647cSTimo Kreuzer comisd xmm0, QWORD PTR __real_one 10914afb647cSTimo Kreuzer jae Lpow_sse2_process_result_inf 10924afb647cSTimo Kreuzer 10934afb647cSTimo Kreuzer orpd xmm0, XMMWORD PTR __enable_almost_inf 10944afb647cSTimo Kreuzer orpd xmm0, XMMWORD PTR [negate_result+rsp] 10954afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 10964afb647cSTimo Kreuzer 10974afb647cSTimo KreuzerALIGN 16 10984afb647cSTimo KreuzerLpow_sse2_process_denormal: 10994afb647cSTimo Kreuzer mov ecx, r9d 11004afb647cSTimo Kreuzer xor r11d, r11d 11014afb647cSTimo Kreuzer comisd xmm0, QWORD PTR __real_one 11024afb647cSTimo Kreuzer cmovae r11d, ecx 11034afb647cSTimo Kreuzer cmp r11d, DWORD PTR __denormal_threshold 11044afb647cSTimo Kreuzer jne Lpow_sse2_process_true_denormal 11054afb647cSTimo Kreuzer 11064afb647cSTimo Kreuzer mulsd xmm0, QWORD PTR [p_temp_exp+rsp] 11074afb647cSTimo Kreuzer orpd xmm0, XMMWORD PTR [negate_result+rsp] 11084afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 11094afb647cSTimo Kreuzer 11104afb647cSTimo KreuzerALIGN 16 11114afb647cSTimo KreuzerLpow_sse2_process_true_denormal: 11124afb647cSTimo Kreuzer xor r8, r8 11134afb647cSTimo Kreuzer mov r9, 1 11144afb647cSTimo Kreuzer cmp rdx, QWORD PTR __denormal_tiny_threshold 11154afb647cSTimo Kreuzer jg Lpow_sse2_process_denormal_tiny 11164afb647cSTimo Kreuzer add ecx, 1074 11174afb647cSTimo Kreuzer cmovs rcx, r8 11184afb647cSTimo Kreuzer shl r9, cl 11194afb647cSTimo Kreuzer mov rcx, r9 11204afb647cSTimo Kreuzer 11214afb647cSTimo Kreuzer mov QWORD PTR [p_temp_exp+rsp], rcx 11224afb647cSTimo Kreuzer mulsd xmm0, QWORD PTR [p_temp_exp+rsp] 11234afb647cSTimo Kreuzer orpd xmm0, XMMWORD PTR [negate_result+rsp] 11244afb647cSTimo Kreuzer jmp Lpow_sse2_z_denormal 11254afb647cSTimo Kreuzer 11264afb647cSTimo KreuzerALIGN 16 11274afb647cSTimo KreuzerLpow_sse2_process_denormal_tiny: 11284afb647cSTimo Kreuzer movsd xmm0, QWORD PTR __real_smallest_denormal 11294afb647cSTimo Kreuzer orpd xmm0, XMMWORD PTR [negate_result+rsp] 11304afb647cSTimo Kreuzer jmp Lpow_sse2_z_denormal 11314afb647cSTimo Kreuzer 11324afb647cSTimo KreuzerALIGN 16 11334afb647cSTimo KreuzerLpow_sse2_process_result_zero: 11344afb647cSTimo Kreuzer mov r11, QWORD PTR __real_zero 11354afb647cSTimo Kreuzer or r11, QWORD PTR [negate_result+rsp] 11364afb647cSTimo Kreuzer jmp Lpow_sse2_z_is_zero_or_inf 11374afb647cSTimo Kreuzer 11384afb647cSTimo KreuzerALIGN 16 11394afb647cSTimo KreuzerLpow_sse2_process_result_inf: 11404afb647cSTimo Kreuzer mov r11, QWORD PTR __real_inf 11414afb647cSTimo Kreuzer or r11, QWORD PTR [negate_result+rsp] 11424afb647cSTimo Kreuzer jmp Lpow_sse2_z_is_zero_or_inf 11434afb647cSTimo Kreuzer 11444afb647cSTimo KreuzerALIGN 16 11454afb647cSTimo KreuzerLpow_sse2_denormal_adjust: 11464afb647cSTimo Kreuzer por xmm2, XMMWORD PTR __real_one 11474afb647cSTimo Kreuzer subsd xmm2, QWORD PTR __real_one 11484afb647cSTimo Kreuzer movsd xmm5, xmm2 11494afb647cSTimo Kreuzer pand xmm2, XMMWORD PTR __real_mant 11504afb647cSTimo Kreuzer movd r8, xmm2 11514afb647cSTimo Kreuzer psrlq xmm5, 52 11524afb647cSTimo Kreuzer psubd xmm5, XMMWORD PTR __mask_2045 11534afb647cSTimo Kreuzer cvtdq2pd xmm6, xmm5 11544afb647cSTimo Kreuzer jmp Lpow_sse2_continue_common 11554afb647cSTimo Kreuzer 11564afb647cSTimo KreuzerALIGN 16 11574afb647cSTimo KreuzerLpow_sse2_x_is_neg: 11584afb647cSTimo Kreuzer 11594afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 11604afb647cSTimo Kreuzer and r10, r8 11614afb647cSTimo Kreuzer cmp r10, QWORD PTR __ay_max_bound 11624afb647cSTimo Kreuzer jg Lpow_sse2_ay_is_very_large 11634afb647cSTimo Kreuzer 11644afb647cSTimo Kreuzer ; determine if y is an integer 11654afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mant_mask 11664afb647cSTimo Kreuzer and r10, r8 11674afb647cSTimo Kreuzer mov r11, r10 11684afb647cSTimo Kreuzer mov rcx, QWORD PTR __exp_shift 11694afb647cSTimo Kreuzer shr r10, cl 11704afb647cSTimo Kreuzer sub r10, QWORD PTR __exp_bias 11714afb647cSTimo Kreuzer js Lpow_sse2_x_is_neg_y_is_not_int 11724afb647cSTimo Kreuzer 11734afb647cSTimo Kreuzer mov rax, QWORD PTR __exp_mant_mask 11744afb647cSTimo Kreuzer and rax, rdx 11754afb647cSTimo Kreuzer mov QWORD PTR [save_ax+rsp], rax 11764afb647cSTimo Kreuzer 11774afb647cSTimo Kreuzer mov rcx, r10 11784afb647cSTimo Kreuzer cmp r10, QWORD PTR __yexp_53 11794afb647cSTimo Kreuzer jg Lpow_sse2_continue_after_y_int_check 11804afb647cSTimo Kreuzer 11814afb647cSTimo Kreuzer mov r9, QWORD PTR __mant_full 11824afb647cSTimo Kreuzer shr r9, cl 11834afb647cSTimo Kreuzer and r9, r11 11844afb647cSTimo Kreuzer jnz Lpow_sse2_x_is_neg_y_is_not_int 11854afb647cSTimo Kreuzer 11864afb647cSTimo Kreuzer mov r9, QWORD PTR __1_before_mant 11874afb647cSTimo Kreuzer shr r9, cl 11884afb647cSTimo Kreuzer and r9, r11 11894afb647cSTimo Kreuzer jz Lpow_sse2_continue_after_y_int_check 11904afb647cSTimo Kreuzer 11914afb647cSTimo Kreuzer mov rax, QWORD PTR __sign_mask 11924afb647cSTimo Kreuzer mov QWORD PTR [negate_result+rsp], rax 11934afb647cSTimo Kreuzer 11944afb647cSTimo KreuzerLpow_sse2_continue_after_y_int_check: 11954afb647cSTimo Kreuzer 11964afb647cSTimo Kreuzer cmp rdx, QWORD PTR __neg_zero 11974afb647cSTimo Kreuzer je Lpow_sse2_x_is_zero 11984afb647cSTimo Kreuzer 11994afb647cSTimo Kreuzer cmp rdx, QWORD PTR __neg_one 12004afb647cSTimo Kreuzer je Lpow_sse2_x_is_neg_one 12014afb647cSTimo Kreuzer 12024afb647cSTimo Kreuzer mov r9, QWORD PTR __exp_mask 12034afb647cSTimo Kreuzer and r9, rdx 12044afb647cSTimo Kreuzer cmp r9, QWORD PTR __exp_mask 12054afb647cSTimo Kreuzer je Lpow_sse2_x_is_inf_or_nan 12064afb647cSTimo Kreuzer 12074afb647cSTimo Kreuzer movsd xmm0, QWORD PTR [save_ax+rsp] 12084afb647cSTimo Kreuzer jmp Lpow_sse2_log_x 12094afb647cSTimo Kreuzer 12104afb647cSTimo Kreuzer 12114afb647cSTimo KreuzerALIGN 16 12124afb647cSTimo KreuzerLpow_sse2_near_one: 12134afb647cSTimo Kreuzer 12144afb647cSTimo Kreuzer ; f = F - Y, r = f * inv 12154afb647cSTimo Kreuzer movsd xmm0, xmm1 12164afb647cSTimo Kreuzer subsd xmm1, xmm2 12174afb647cSTimo Kreuzer movsd xmm4, xmm1 12184afb647cSTimo Kreuzer 12194afb647cSTimo Kreuzer movsd xmm3, QWORD PTR [r9+r8*8] 12204afb647cSTimo Kreuzer addsd xmm3, QWORD PTR [rdx+r8*8] 12214afb647cSTimo Kreuzer mulsd xmm4, xmm3 1222*105426b8STimo Kreuzer andpd xmm4, XMMWORD PTR __real_fffffffff8000000 12234afb647cSTimo Kreuzer movsd xmm5, xmm4 ; r1 12244afb647cSTimo Kreuzer mulsd xmm4, xmm0 12254afb647cSTimo Kreuzer subsd xmm1, xmm4 12264afb647cSTimo Kreuzer mulsd xmm1, xmm3 12274afb647cSTimo Kreuzer movsd xmm7, xmm1 ; r2 12284afb647cSTimo Kreuzer addsd xmm1, xmm5 12294afb647cSTimo Kreuzer 12304afb647cSTimo Kreuzer movsd xmm2, xmm1 12314afb647cSTimo Kreuzer movsd xmm0, xmm1 12324afb647cSTimo Kreuzer 12334afb647cSTimo Kreuzer lea r9, __log_256_lead 12344afb647cSTimo Kreuzer 12354afb647cSTimo Kreuzer ; poly 12364afb647cSTimo Kreuzer movsd xmm3, QWORD PTR __real_1_over_7 12374afb647cSTimo Kreuzer movsd xmm1, QWORD PTR __real_1_over_4 12384afb647cSTimo Kreuzer mulsd xmm3, xmm2 12394afb647cSTimo Kreuzer mulsd xmm1, xmm2 12404afb647cSTimo Kreuzer mulsd xmm0, xmm2 12414afb647cSTimo Kreuzer movsd xmm4, xmm0 12424afb647cSTimo Kreuzer addsd xmm3, QWORD PTR __real_1_over_6 12434afb647cSTimo Kreuzer addsd xmm1, QWORD PTR __real_1_over_3 12444afb647cSTimo Kreuzer mulsd xmm4, xmm0 12454afb647cSTimo Kreuzer mulsd xmm3, xmm2 12464afb647cSTimo Kreuzer mulsd xmm1, xmm2 12474afb647cSTimo Kreuzer addsd xmm3, QWORD PTR __real_1_over_5 12484afb647cSTimo Kreuzer mulsd xmm3, xmm2 12494afb647cSTimo Kreuzer mulsd xmm1, xmm0 12504afb647cSTimo Kreuzer mulsd xmm3, xmm4 12514afb647cSTimo Kreuzer 12524afb647cSTimo Kreuzer movsd xmm2, xmm5 12534afb647cSTimo Kreuzer movsd xmm0, xmm7 12544afb647cSTimo Kreuzer mulsd xmm0, xmm0 12554afb647cSTimo Kreuzer mulsd xmm0, QWORD PTR __real_1_over_2 12564afb647cSTimo Kreuzer mulsd xmm5, xmm7 12574afb647cSTimo Kreuzer addsd xmm5, xmm0 12584afb647cSTimo Kreuzer addsd xmm5, xmm7 12594afb647cSTimo Kreuzer 12604afb647cSTimo Kreuzer movsd xmm0, xmm2 12614afb647cSTimo Kreuzer movsd xmm7, xmm2 12624afb647cSTimo Kreuzer mulsd xmm0, xmm0 12634afb647cSTimo Kreuzer mulsd xmm0, QWORD PTR __real_1_over_2 12644afb647cSTimo Kreuzer movsd xmm4, xmm0 12654afb647cSTimo Kreuzer addsd xmm2, xmm0 ; r1 + r1^2/2 12664afb647cSTimo Kreuzer subsd xmm7, xmm2 12674afb647cSTimo Kreuzer addsd xmm7, xmm4 12684afb647cSTimo Kreuzer 12694afb647cSTimo Kreuzer addsd xmm3, xmm7 12704afb647cSTimo Kreuzer movsd xmm4, QWORD PTR __real_log2_tail 12714afb647cSTimo Kreuzer addsd xmm1, xmm3 12724afb647cSTimo Kreuzer mulsd xmm4, xmm6 12734afb647cSTimo Kreuzer lea rdx, __log_256_tail 12744afb647cSTimo Kreuzer addsd xmm1, xmm5 12754afb647cSTimo Kreuzer addsd xmm4, QWORD PTR [rdx+r8*8] 12764afb647cSTimo Kreuzer subsd xmm4, xmm1 12774afb647cSTimo Kreuzer 12784afb647cSTimo Kreuzer movsd xmm3, xmm4 12794afb647cSTimo Kreuzer movsd xmm1, xmm4 12804afb647cSTimo Kreuzer subsd xmm3, xmm2 12814afb647cSTimo Kreuzer 12824afb647cSTimo Kreuzer movsd xmm0, QWORD PTR [r9+r8*8] 12834afb647cSTimo Kreuzer movsd xmm7, QWORD PTR __real_log2_lead 12844afb647cSTimo Kreuzer mulsd xmm7, xmm6 12854afb647cSTimo Kreuzer addsd xmm0, xmm7 12864afb647cSTimo Kreuzer 12874afb647cSTimo Kreuzer jmp Lpow_sse2_log_x_continue 12884afb647cSTimo Kreuzer 12894afb647cSTimo Kreuzer 12904afb647cSTimo KreuzerALIGN 16 12914afb647cSTimo KreuzerLpow_sse2_x_is_pos_one: 12924afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 12934afb647cSTimo Kreuzer 12944afb647cSTimo KreuzerALIGN 16 12954afb647cSTimo KreuzerLpow_sse2_y_is_zero: 12964afb647cSTimo Kreuzer movsd xmm0, QWORD PTR __real_one 12974afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 12984afb647cSTimo Kreuzer 12994afb647cSTimo KreuzerALIGN 16 13004afb647cSTimo KreuzerLpow_sse2_y_is_one: 13014afb647cSTimo Kreuzer xor rax, rax 13024afb647cSTimo Kreuzer mov r11, rdx 13034afb647cSTimo Kreuzer mov r9, QWORD PTR __exp_mask 13044afb647cSTimo Kreuzer ;or r11, QWORD PTR __qnan_set 13054afb647cSTimo Kreuzer and r9, rdx 13064afb647cSTimo Kreuzer cmp r9, QWORD PTR __exp_mask 13074afb647cSTimo Kreuzer cmove rax, rdx 13084afb647cSTimo Kreuzer mov r9, QWORD PTR __mant_mask 13094afb647cSTimo Kreuzer and r9, rax 13104afb647cSTimo Kreuzer jnz Lpow_sse2_x_is_nan 13114afb647cSTimo Kreuzer 13124afb647cSTimo Kreuzer movd xmm0, rdx 13134afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 13144afb647cSTimo Kreuzer 13154afb647cSTimo KreuzerALIGN 16 13164afb647cSTimo KreuzerLpow_sse2_x_is_neg_one: 13174afb647cSTimo Kreuzer mov rdx, QWORD PTR __pos_one 13184afb647cSTimo Kreuzer or rdx, QWORD PTR [negate_result+rsp] 13194afb647cSTimo Kreuzer xor rax, rax 13204afb647cSTimo Kreuzer mov r11, r8 13214afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 13224afb647cSTimo Kreuzer ;or r11, QWORD PTR __qnan_set 13234afb647cSTimo Kreuzer and r10, r8 13244afb647cSTimo Kreuzer cmp r10, QWORD PTR __exp_mask 13254afb647cSTimo Kreuzer cmove rax, r8 13264afb647cSTimo Kreuzer mov r10, QWORD PTR __mant_mask 13274afb647cSTimo Kreuzer and r10, rax 13284afb647cSTimo Kreuzer jnz Lpow_sse2_y_is_nan 13294afb647cSTimo Kreuzer 13304afb647cSTimo Kreuzer movd xmm0, rdx 13314afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 13324afb647cSTimo Kreuzer 13334afb647cSTimo KreuzerALIGN 16 13344afb647cSTimo KreuzerLpow_sse2_x_is_neg_y_is_not_int: 13354afb647cSTimo Kreuzer mov r9, QWORD PTR __exp_mask 13364afb647cSTimo Kreuzer and r9, rdx 13374afb647cSTimo Kreuzer cmp r9, QWORD PTR __exp_mask 13384afb647cSTimo Kreuzer je Lpow_sse2_x_is_inf_or_nan 13394afb647cSTimo Kreuzer 13404afb647cSTimo Kreuzer cmp rdx, QWORD PTR __neg_zero 13414afb647cSTimo Kreuzer je Lpow_sse2_x_is_zero 13424afb647cSTimo Kreuzer 13434afb647cSTimo Kreuzer movsd xmm0, QWORD PTR [save_x+rsp] 13444afb647cSTimo Kreuzer movsd xmm1, QWORD PTR [save_y+rsp] 13454afb647cSTimo Kreuzer movsd xmm2, QWORD PTR __neg_qnan 13464afb647cSTimo Kreuzer mov r9d, DWORD PTR __flag_x_neg_y_notint 13474afb647cSTimo Kreuzer 13484afb647cSTimo Kreuzer call fname_special 13494afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 13504afb647cSTimo Kreuzer 13514afb647cSTimo KreuzerALIGN 16 13524afb647cSTimo KreuzerLpow_sse2_ay_is_very_large: 13534afb647cSTimo Kreuzer mov r9, QWORD PTR __exp_mask 13544afb647cSTimo Kreuzer and r9, rdx 13554afb647cSTimo Kreuzer cmp r9, QWORD PTR __exp_mask 13564afb647cSTimo Kreuzer je Lpow_sse2_x_is_inf_or_nan 13574afb647cSTimo Kreuzer 13584afb647cSTimo Kreuzer mov r9, QWORD PTR __exp_mant_mask 13594afb647cSTimo Kreuzer and r9, rdx 13604afb647cSTimo Kreuzer jz Lpow_sse2_x_is_zero 13614afb647cSTimo Kreuzer 13624afb647cSTimo Kreuzer cmp rdx, QWORD PTR __neg_one 13634afb647cSTimo Kreuzer je Lpow_sse2_x_is_neg_one 13644afb647cSTimo Kreuzer 13654afb647cSTimo Kreuzer mov r9, rdx 13664afb647cSTimo Kreuzer and r9, QWORD PTR __exp_mant_mask 13674afb647cSTimo Kreuzer cmp r9, QWORD PTR __pos_one 13684afb647cSTimo Kreuzer jl Lpow_sse2_ax_lt1_y_is_large_or_inf_or_nan 13694afb647cSTimo Kreuzer 13704afb647cSTimo Kreuzer jmp Lpow_sse2_ax_gt1_y_is_large_or_inf_or_nan 13714afb647cSTimo Kreuzer 13724afb647cSTimo KreuzerALIGN 16 13734afb647cSTimo KreuzerLpow_sse2_x_is_zero: 13744afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 13754afb647cSTimo Kreuzer xor rax, rax 13764afb647cSTimo Kreuzer and r10, r8 13774afb647cSTimo Kreuzer cmp r10, QWORD PTR __exp_mask 13784afb647cSTimo Kreuzer je Lpow_sse2_x_is_zero_y_is_inf_or_nan 13794afb647cSTimo Kreuzer 13804afb647cSTimo Kreuzer mov r10, QWORD PTR __sign_mask 13814afb647cSTimo Kreuzer and r10, r8 13824afb647cSTimo Kreuzer cmovnz rax, QWORD PTR __pos_inf 13834afb647cSTimo Kreuzer jnz Lpow_sse2_x_is_zero_z_is_inf 13844afb647cSTimo Kreuzer 13854afb647cSTimo Kreuzer movd xmm0, rax 13864afb647cSTimo Kreuzer orpd xmm0, XMMWORD PTR [negate_result+rsp] 13874afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 13884afb647cSTimo Kreuzer 13894afb647cSTimo KreuzerALIGN 16 13904afb647cSTimo KreuzerLpow_sse2_x_is_zero_z_is_inf: 13914afb647cSTimo Kreuzer 13924afb647cSTimo Kreuzer movsd xmm0, QWORD PTR [save_x+rsp] 13934afb647cSTimo Kreuzer movsd xmm1, QWORD PTR [save_y+rsp] 13944afb647cSTimo Kreuzer movd xmm2, rax 13954afb647cSTimo Kreuzer orpd xmm2, XMMWORD PTR [negate_result+rsp] 13964afb647cSTimo Kreuzer mov r9d, DWORD PTR __flag_x_zero_z_inf 13974afb647cSTimo Kreuzer 13984afb647cSTimo Kreuzer call fname_special 13994afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 14004afb647cSTimo Kreuzer 14014afb647cSTimo KreuzerALIGN 16 14024afb647cSTimo KreuzerLpow_sse2_x_is_zero_y_is_inf_or_nan: 14034afb647cSTimo Kreuzer mov r11, r8 14044afb647cSTimo Kreuzer cmp r8, QWORD PTR __neg_inf 14054afb647cSTimo Kreuzer cmove rax, QWORD PTR __pos_inf 14064afb647cSTimo Kreuzer je Lpow_sse2_x_is_zero_z_is_inf 14074afb647cSTimo Kreuzer 14084afb647cSTimo Kreuzer ;or r11, QWORD PTR __qnan_set 14094afb647cSTimo Kreuzer mov r10, QWORD PTR __mant_mask 14104afb647cSTimo Kreuzer and r10, r8 14114afb647cSTimo Kreuzer jnz Lpow_sse2_y_is_nan 14124afb647cSTimo Kreuzer 14134afb647cSTimo Kreuzer movd xmm0, rax 14144afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 14154afb647cSTimo Kreuzer 14164afb647cSTimo KreuzerALIGN 16 14174afb647cSTimo KreuzerLpow_sse2_x_is_inf_or_nan: 14184afb647cSTimo Kreuzer xor r11, r11 14194afb647cSTimo Kreuzer mov r10, QWORD PTR __sign_mask 14204afb647cSTimo Kreuzer and r10, r8 14214afb647cSTimo Kreuzer cmovz r11, QWORD PTR __pos_inf 14224afb647cSTimo Kreuzer mov rax, rdx 14234afb647cSTimo Kreuzer mov r9, QWORD PTR __mant_mask 14244afb647cSTimo Kreuzer ;or rax, QWORD PTR __qnan_set 14254afb647cSTimo Kreuzer and r9, rdx 14264afb647cSTimo Kreuzer cmovnz r11, rax 14274afb647cSTimo Kreuzer jnz Lpow_sse2_x_is_nan 14284afb647cSTimo Kreuzer 14294afb647cSTimo Kreuzer xor rax, rax 14304afb647cSTimo Kreuzer mov r9, r8 14314afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 14324afb647cSTimo Kreuzer ;or r9, QWORD PTR __qnan_set 14334afb647cSTimo Kreuzer and r10, r8 14344afb647cSTimo Kreuzer cmp r10, QWORD PTR __exp_mask 14354afb647cSTimo Kreuzer cmove rax, r8 14364afb647cSTimo Kreuzer mov r10, QWORD PTR __mant_mask 14374afb647cSTimo Kreuzer and r10, rax 14384afb647cSTimo Kreuzer cmovnz r11, r9 14394afb647cSTimo Kreuzer jnz Lpow_sse2_y_is_nan 14404afb647cSTimo Kreuzer 14414afb647cSTimo Kreuzer movd xmm0, r11 14424afb647cSTimo Kreuzer orpd xmm0, XMMWORD PTR [negate_result+rsp] 14434afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 14444afb647cSTimo Kreuzer 14454afb647cSTimo KreuzerALIGN 16 14464afb647cSTimo KreuzerLpow_sse2_ay_is_very_small: 14474afb647cSTimo Kreuzer movsd xmm0, QWORD PTR __pos_one 14484afb647cSTimo Kreuzer addsd xmm0, xmm1 14494afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 14504afb647cSTimo Kreuzer 14514afb647cSTimo Kreuzer 14524afb647cSTimo KreuzerALIGN 16 14534afb647cSTimo KreuzerLpow_sse2_ax_lt1_y_is_large_or_inf_or_nan: 14544afb647cSTimo Kreuzer xor r11, r11 14554afb647cSTimo Kreuzer mov r10, QWORD PTR __sign_mask 14564afb647cSTimo Kreuzer and r10, r8 14574afb647cSTimo Kreuzer cmovnz r11, QWORD PTR __pos_inf 14584afb647cSTimo Kreuzer jmp Lpow_sse2_adjust_for_nan 14594afb647cSTimo Kreuzer 14604afb647cSTimo KreuzerALIGN 16 14614afb647cSTimo KreuzerLpow_sse2_ax_gt1_y_is_large_or_inf_or_nan: 14624afb647cSTimo Kreuzer xor r11, r11 14634afb647cSTimo Kreuzer mov r10, QWORD PTR __sign_mask 14644afb647cSTimo Kreuzer and r10, r8 14654afb647cSTimo Kreuzer cmovz r11, QWORD PTR __pos_inf 14664afb647cSTimo Kreuzer 14674afb647cSTimo KreuzerALIGN 16 14684afb647cSTimo KreuzerLpow_sse2_adjust_for_nan: 14694afb647cSTimo Kreuzer 14704afb647cSTimo Kreuzer xor rax, rax 14714afb647cSTimo Kreuzer mov r9, r8 14724afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 14734afb647cSTimo Kreuzer ;or r9, QWORD PTR __qnan_set 14744afb647cSTimo Kreuzer and r10, r8 14754afb647cSTimo Kreuzer cmp r10, QWORD PTR __exp_mask 14764afb647cSTimo Kreuzer cmove rax, r8 14774afb647cSTimo Kreuzer mov r10, QWORD PTR __mant_mask 14784afb647cSTimo Kreuzer and r10, rax 14794afb647cSTimo Kreuzer cmovnz r11, r9 14804afb647cSTimo Kreuzer jnz Lpow_sse2_y_is_nan 14814afb647cSTimo Kreuzer 14824afb647cSTimo Kreuzer test rax, rax 14834afb647cSTimo Kreuzer jnz Lpow_sse2_y_is_inf 14844afb647cSTimo Kreuzer 14854afb647cSTimo KreuzerALIGN 16 14864afb647cSTimo KreuzerLpow_sse2_z_is_zero_or_inf: 14874afb647cSTimo Kreuzer 14884afb647cSTimo Kreuzer mov r9d, DWORD PTR __flag_z_zero 14894afb647cSTimo Kreuzer test r11, QWORD PTR __exp_mant_mask 14904afb647cSTimo Kreuzer cmovnz r9d, DWORD PTR __flag_z_inf 14914afb647cSTimo Kreuzer 14924afb647cSTimo Kreuzer movsd xmm0, QWORD PTR [save_x+rsp] 14934afb647cSTimo Kreuzer movsd xmm1, QWORD PTR [save_y+rsp] 14944afb647cSTimo Kreuzer movd xmm2, r11 14954afb647cSTimo Kreuzer 14964afb647cSTimo Kreuzer call fname_special 14974afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 14984afb647cSTimo Kreuzer 14994afb647cSTimo KreuzerALIGN 16 15004afb647cSTimo KreuzerLpow_sse2_y_is_inf: 15014afb647cSTimo Kreuzer 15024afb647cSTimo Kreuzer movd xmm0, r11 15034afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 15044afb647cSTimo Kreuzer 15054afb647cSTimo KreuzerALIGN 16 15064afb647cSTimo KreuzerLpow_sse2_x_is_nan: 15074afb647cSTimo Kreuzer 15084afb647cSTimo Kreuzer xor rax, rax 15094afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 15104afb647cSTimo Kreuzer and r10, r8 15114afb647cSTimo Kreuzer cmp r10, QWORD PTR __exp_mask 15124afb647cSTimo Kreuzer cmove rax, r8 15134afb647cSTimo Kreuzer mov r10, QWORD PTR __mant_mask 15144afb647cSTimo Kreuzer and r10, rax 15154afb647cSTimo Kreuzer jnz Lpow_sse2_x_is_nan_y_is_nan 15164afb647cSTimo Kreuzer 15174afb647cSTimo Kreuzer movsd xmm0, QWORD PTR [save_x+rsp] 15184afb647cSTimo Kreuzer movsd xmm1, QWORD PTR [save_y+rsp] 15194afb647cSTimo Kreuzer movd xmm2, r11 15204afb647cSTimo Kreuzer mov r9d, DWORD PTR __flag_x_nan 15214afb647cSTimo Kreuzer 15224afb647cSTimo Kreuzer call fname_special 15234afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 15244afb647cSTimo Kreuzer 15254afb647cSTimo KreuzerALIGN 16 15264afb647cSTimo KreuzerLpow_sse2_y_is_nan: 15274afb647cSTimo Kreuzer 15284afb647cSTimo Kreuzer movsd xmm0, QWORD PTR [save_x+rsp] 15294afb647cSTimo Kreuzer movsd xmm1, QWORD PTR [save_y+rsp] 15304afb647cSTimo Kreuzer movd xmm2, r11 15314afb647cSTimo Kreuzer mov r9d, DWORD PTR __flag_y_nan 15324afb647cSTimo Kreuzer 15334afb647cSTimo Kreuzer call fname_special 15344afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 15354afb647cSTimo Kreuzer 15364afb647cSTimo KreuzerALIGN 16 15374afb647cSTimo KreuzerLpow_sse2_x_is_nan_y_is_nan: 15384afb647cSTimo Kreuzer 15394afb647cSTimo Kreuzer mov r9, r8 15404afb647cSTimo Kreuzer 15414afb647cSTimo Kreuzer cmp r11, QWORD PTR __ind_pattern 15424afb647cSTimo Kreuzer cmove r11, r9 15434afb647cSTimo Kreuzer je Lpow_sse2_continue_xy_nan 15444afb647cSTimo Kreuzer 15454afb647cSTimo Kreuzer cmp r9, QWORD PTR __ind_pattern 15464afb647cSTimo Kreuzer cmove r9, r11 15474afb647cSTimo Kreuzer 15484afb647cSTimo Kreuzer mov r10, r9 15494afb647cSTimo Kreuzer and r10, QWORD PTR __sign_mask 15504afb647cSTimo Kreuzer cmovnz r9, r11 15514afb647cSTimo Kreuzer 15524afb647cSTimo Kreuzer mov r10, r11 15534afb647cSTimo Kreuzer and r10, QWORD PTR __sign_mask 15544afb647cSTimo Kreuzer cmovnz r11, r9 15554afb647cSTimo Kreuzer 15564afb647cSTimo KreuzerLpow_sse2_continue_xy_nan: 15574afb647cSTimo Kreuzer ;or r11, QWORD PTR __qnan_set 15584afb647cSTimo Kreuzer movsd xmm0, QWORD PTR [save_x+rsp] 15594afb647cSTimo Kreuzer movsd xmm1, QWORD PTR [save_y+rsp] 15604afb647cSTimo Kreuzer movd xmm2, r11 15614afb647cSTimo Kreuzer mov r9d, DWORD PTR __flag_x_nan_y_nan 15624afb647cSTimo Kreuzer 15634afb647cSTimo Kreuzer call fname_special 15644afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 15654afb647cSTimo Kreuzer 15664afb647cSTimo KreuzerALIGN 16 15674afb647cSTimo KreuzerLpow_sse2_z_denormal: 15684afb647cSTimo Kreuzer 15694afb647cSTimo Kreuzer movsd xmm2, xmm0 15704afb647cSTimo Kreuzer movsd xmm0, QWORD PTR [save_x+rsp] 15714afb647cSTimo Kreuzer movsd xmm1, QWORD PTR [save_y+rsp] 15724afb647cSTimo Kreuzer mov r9d, DWORD PTR __flag_z_denormal 15734afb647cSTimo Kreuzer 15744afb647cSTimo Kreuzer call fname_special 15754afb647cSTimo Kreuzer jmp Lpow_sse2_final_check 15764afb647cSTimo Kreuzer 15774afb647cSTimo KreuzerLpow_fma3: 15784afb647cSTimo Kreuzer vmovsd QWORD PTR [save_x+rsp], xmm0 15794afb647cSTimo Kreuzer vmovsd QWORD PTR [save_y+rsp], xmm1 15804afb647cSTimo Kreuzer 15814afb647cSTimo Kreuzer mov rdx, QWORD PTR [save_x+rsp] 15824afb647cSTimo Kreuzer mov r8, QWORD PTR [save_y+rsp] 15834afb647cSTimo Kreuzer 15844afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mant_mask 15854afb647cSTimo Kreuzer and r10, r8 15864afb647cSTimo Kreuzer jz Lpow_fma3_y_is_zero 15874afb647cSTimo Kreuzer 15884afb647cSTimo Kreuzer cmp r8, QWORD PTR __pos_one 15894afb647cSTimo Kreuzer je Lpow_fma3_y_is_one 15904afb647cSTimo Kreuzer 15914afb647cSTimo Kreuzer mov r9, QWORD PTR __sign_mask 15924afb647cSTimo Kreuzer and r9, rdx 15934afb647cSTimo Kreuzer cmp r9, QWORD PTR __sign_mask 15944afb647cSTimo Kreuzer mov rax, QWORD PTR __pos_zero 15954afb647cSTimo Kreuzer mov QWORD PTR [negate_result+rsp], rax 15964afb647cSTimo Kreuzer je Lpow_fma3_x_is_neg 15974afb647cSTimo Kreuzer 15984afb647cSTimo Kreuzer cmp rdx, QWORD PTR __pos_one 15994afb647cSTimo Kreuzer je Lpow_fma3_x_is_pos_one 16004afb647cSTimo Kreuzer 16014afb647cSTimo Kreuzer cmp rdx, QWORD PTR __pos_zero 16024afb647cSTimo Kreuzer je Lpow_fma3_x_is_zero 16034afb647cSTimo Kreuzer 16044afb647cSTimo Kreuzer mov r9, QWORD PTR __exp_mask 16054afb647cSTimo Kreuzer and r9, rdx 16064afb647cSTimo Kreuzer cmp r9, QWORD PTR __exp_mask 16074afb647cSTimo Kreuzer je Lpow_fma3_x_is_inf_or_nan 16084afb647cSTimo Kreuzer 16094afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 16104afb647cSTimo Kreuzer and r10, r8 16114afb647cSTimo Kreuzer cmp r10, QWORD PTR __ay_max_bound 16124afb647cSTimo Kreuzer jg Lpow_fma3_ay_is_very_large 16134afb647cSTimo Kreuzer 16144afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 16154afb647cSTimo Kreuzer and r10, r8 16164afb647cSTimo Kreuzer cmp r10, QWORD PTR __ay_min_bound 16174afb647cSTimo Kreuzer jl Lpow_fma3_ay_is_very_small 16184afb647cSTimo Kreuzer 16194afb647cSTimo Kreuzer ; ----------------------------- 16204afb647cSTimo Kreuzer ; compute log(x) here 16214afb647cSTimo Kreuzer ; ----------------------------- 16224afb647cSTimo KreuzerLpow_fma3_log_x: 16234afb647cSTimo Kreuzer 16244afb647cSTimo Kreuzer ; compute exponent part 16254afb647cSTimo Kreuzer vpsrlq xmm3, xmm0, 52 16264afb647cSTimo Kreuzer vmovq r8, xmm0 16274afb647cSTimo Kreuzer vpsubq xmm3, xmm3, XMMWORD PTR __mask_1023 16284afb647cSTimo Kreuzer vcvtdq2pd xmm6, xmm3 ; xexp 16294afb647cSTimo Kreuzer vpand xmm2, xmm0, XMMWORD PTR __real_mant 16304afb647cSTimo Kreuzer 16314afb647cSTimo Kreuzer vcomisd xmm6, QWORD PTR __mask_1023_f 16324afb647cSTimo Kreuzer je Lpow_fma3_denormal_adjust 16334afb647cSTimo Kreuzer 16344afb647cSTimo KreuzerLpow_fma3_continue_common: 16354afb647cSTimo Kreuzer 16364afb647cSTimo Kreuzer ; compute index into the log tables 16374afb647cSTimo Kreuzer mov r9, r8 16384afb647cSTimo Kreuzer and r8, QWORD PTR __mask_mant_all8 16394afb647cSTimo Kreuzer and r9, QWORD PTR __mask_mant9 16404afb647cSTimo Kreuzer vsubsd xmm7, xmm0, __real_one 16414afb647cSTimo Kreuzer shl r9, 1 16424afb647cSTimo Kreuzer add r8, r9 16434afb647cSTimo Kreuzer vmovq xmm1, r8 16444afb647cSTimo Kreuzer vandpd xmm7, xmm7, __real_notsign 16454afb647cSTimo Kreuzer 16464afb647cSTimo Kreuzer ; F, Y, switch to near-one codepath 16474afb647cSTimo Kreuzer shr r8, 44 16484afb647cSTimo Kreuzer vpor xmm2, xmm2, XMMWORD PTR __real_half 16494afb647cSTimo Kreuzer vpor xmm1, xmm1, XMMWORD PTR __real_half 16504afb647cSTimo Kreuzer vcomisd xmm7, __real_threshold 16514afb647cSTimo Kreuzer lea r9, QWORD PTR __log_F_inv_head 16524afb647cSTimo Kreuzer lea rdx, QWORD PTR __log_F_inv_tail 16534afb647cSTimo Kreuzer jb Lpow_fma3_near_one 16544afb647cSTimo Kreuzer 16554afb647cSTimo Kreuzer ; f = F - Y, r = f * inv 16564afb647cSTimo Kreuzer vsubsd xmm4, xmm1, xmm2 ; xmm4 <-- f = F - Y 16574afb647cSTimo Kreuzer vmulsd xmm1, xmm4, QWORD PTR [r9+r8*8] ; xmm1 <-- rhead = f*inv_head 16584afb647cSTimo Kreuzer vmovapd xmm5, xmm1 ; xmm5 <-- copy of rhead 16594afb647cSTimo Kreuzer vmulsd xmm4, xmm4, QWORD PTR [rdx+r8*8] ; xmm4 <-- rtail = f*inv_tail 16604afb647cSTimo Kreuzer vmovapd xmm7, xmm4 ; xmm7 <-- copy of rtail 16614afb647cSTimo Kreuzer vaddsd xmm1, xmm1, xmm4 ; xmm1 <-- r = rhead + rtail 16624afb647cSTimo Kreuzer 16634afb647cSTimo Kreuzer vmovapd xmm2, xmm1 ; xmm2 <-- copy of r 16644afb647cSTimo Kreuzer vmovapd xmm0, xmm1 ; xmm1 <-- copy of r 16654afb647cSTimo Kreuzer lea r9, __log_256_lead 16664afb647cSTimo Kreuzer 16674afb647cSTimo Kreuzer ; poly 16684afb647cSTimo Kreuzer; movsd xmm3, QWORD PTR __real_1_over_6 16694afb647cSTimo Kreuzer; movsd xmm1, QWORD PTR __real_1_over_3 16704afb647cSTimo Kreuzer; mulsd xmm3, xmm2 ; r*1/6 16714afb647cSTimo Kreuzer; mulsd xmm1, xmm2 ; r*1/3 16724afb647cSTimo Kreuzer; mulsd xmm0, xmm2 ; r^2 16734afb647cSTimo Kreuzer; subsd xmm5, xmm2 ; xmm5 <-- rhead - r 16744afb647cSTimo Kreuzer; movsd xmm4, xmm0 ; xmm4 <-- copy of r^2 16754afb647cSTimo Kreuzer; addsd xmm3, QWORD PTR __real_1_over_5 ; xmm3 <-- r*1/6 + 1/5 16764afb647cSTimo Kreuzer; addsd xmm1, QWORD PTR __real_1_over_2 ; xmm1 <-- r*1/3 + 1/2 16774afb647cSTimo Kreuzer; mulsd xmm4, xmm0 ; xmm4 <-- r^4 16784afb647cSTimo Kreuzer; mulsd xmm3, xmm2 ; xmm3 <-- (r*1/6 + 1/5)*r 16794afb647cSTimo Kreuzer; mulsd xmm1, xmm0 ; xmm1 <-- (r*1/3 + 1/2)*r^2 16804afb647cSTimo Kreuzer; addsd xmm3, QWORD PTR __real_1_over_4 ; xmm3 <-- (r*1/6+1/5)*r + 1/4 16814afb647cSTimo Kreuzer; addsd xmm7, xmm5 ; xmm7 <-- rtail + (rhead - r) 16824afb647cSTimo Kreuzer; mulsd xmm3, xmm4 ; xmm3 <-- (r*1/6 + 1/5)*r^5 + r^4*1/4 16834afb647cSTimo Kreuzer; addsd xmm1, xmm3 ; xmm1 <-- poly down to r^2 16844afb647cSTimo Kreuzer; addsd xmm1, xmm7 ; xmm1 <-- poly + correction 16854afb647cSTimo Kreuzer 16864afb647cSTimo Kreuzer 16874afb647cSTimo Kreuzer vsubsd xmm3, xmm5, xmm2 16884afb647cSTimo Kreuzer vmovsd xmm1, QWORD PTR __real_1_over_6 16894afb647cSTimo Kreuzer vmulsd xmm0,xmm0,xmm0 16904afb647cSTimo Kreuzer vaddsd xmm3, xmm3, xmm7 16914afb647cSTimo Kreuzer vfmadd213sd xmm1, xmm2, QWORD PTR __real_1_over_5 16924afb647cSTimo Kreuzer vfmadd213sd xmm1, xmm2, QWORD PTR __real_1_over_4 16934afb647cSTimo Kreuzer vfmadd213sd xmm1, xmm2, QWORD PTR __real_1_over_3 16944afb647cSTimo Kreuzer vfmadd213sd xmm1, xmm2, QWORD PTR __real_1_over_2 16954afb647cSTimo Kreuzer vfmadd213sd xmm1, xmm0, xmm3 16964afb647cSTimo Kreuzer 16974afb647cSTimo Kreuzer vmovsd xmm5, QWORD PTR __real_log2_tail 16984afb647cSTimo Kreuzer lea rdx, __log_256_tail 16994afb647cSTimo Kreuzer vfmsub213sd xmm5, xmm6, xmm1 17004afb647cSTimo Kreuzer vmovsd xmm0, QWORD PTR [r9+r8*8] 17014afb647cSTimo Kreuzer 17024afb647cSTimo Kreuzer vaddsd xmm3, xmm5, QWORD PTR [rdx+r8*8] 17034afb647cSTimo Kreuzer vmovapd xmm1, xmm3 17044afb647cSTimo Kreuzer vsubsd xmm3, xmm3, xmm2 17054afb647cSTimo Kreuzer 17064afb647cSTimo Kreuzer vfmadd231sd xmm0, xmm6, QWORD PTR __real_log2_lead 17074afb647cSTimo Kreuzer 17084afb647cSTimo Kreuzer ; result of ln(x) is computed from head and tail parts, resH and resT 17094afb647cSTimo Kreuzer ; res = ln(x) = resH + resT 17104afb647cSTimo Kreuzer ; resH and resT are in full precision 17114afb647cSTimo Kreuzer 17124afb647cSTimo Kreuzer ; resT is computed from head and tail parts, resT_h and resT_t 17134afb647cSTimo Kreuzer ; resT = resT_h + resT_t 17144afb647cSTimo Kreuzer 17154afb647cSTimo Kreuzer ; now 17164afb647cSTimo Kreuzer ; xmm3 - resT 17174afb647cSTimo Kreuzer ; xmm0 - resH 17184afb647cSTimo Kreuzer ; xmm1 - (resT_t) 17194afb647cSTimo Kreuzer ; xmm2 - (-resT_h) 17204afb647cSTimo Kreuzer 17214afb647cSTimo KreuzerLpow_fma3_log_x_continue: 17224afb647cSTimo Kreuzer 17234afb647cSTimo Kreuzer vmovapd xmm7, xmm0 17244afb647cSTimo Kreuzer vaddsd xmm0, xmm0, xmm3 17254afb647cSTimo Kreuzer vmovapd xmm5, xmm0 17264afb647cSTimo Kreuzer vandpd xmm0, xmm0, XMMWORD PTR __real_fffffffff8000000 17274afb647cSTimo Kreuzer 17284afb647cSTimo Kreuzer ; xmm0 - H 17294afb647cSTimo Kreuzer ; xmm7 - resH 17304afb647cSTimo Kreuzer ; xmm5 - res 17314afb647cSTimo Kreuzer 17324afb647cSTimo Kreuzer mov rax, QWORD PTR [save_y+rsp] 17334afb647cSTimo Kreuzer and rax, QWORD PTR __real_fffffffff8000000 17344afb647cSTimo Kreuzer 17354afb647cSTimo Kreuzer vaddsd xmm2, xmm2, xmm3 17364afb647cSTimo Kreuzer vsubsd xmm7, xmm7, xmm5 17374afb647cSTimo Kreuzer vsubsd xmm1, xmm1, xmm2 17384afb647cSTimo Kreuzer vaddsd xmm7, xmm7, xmm3 17394afb647cSTimo Kreuzer vsubsd xmm5, xmm5, xmm0 17404afb647cSTimo Kreuzer 17414afb647cSTimo Kreuzer mov QWORD PTR [y_head+rsp], rax 17424afb647cSTimo Kreuzer vmovsd xmm4, QWORD PTR [save_y+rsp] 17434afb647cSTimo Kreuzer 17444afb647cSTimo Kreuzer vaddsd xmm7, xmm7, xmm1 17454afb647cSTimo Kreuzer vaddsd xmm7, xmm7, xmm5 17464afb647cSTimo Kreuzer 17474afb647cSTimo Kreuzer ; res = H + T 17484afb647cSTimo Kreuzer ; H has leading 26 bits of precision 17494afb647cSTimo Kreuzer ; T has full precision 17504afb647cSTimo Kreuzer 17514afb647cSTimo Kreuzer ; xmm0 - H 17524afb647cSTimo Kreuzer ; xmm7 - T 17534afb647cSTimo Kreuzer 17544afb647cSTimo Kreuzer vmovsd xmm2, QWORD PTR [y_head+rsp] 17554afb647cSTimo Kreuzer vsubsd xmm4, xmm4, xmm2 17564afb647cSTimo Kreuzer 17574afb647cSTimo Kreuzer ; y is split into head and tail 17584afb647cSTimo Kreuzer ; for y * ln(x) computation 17594afb647cSTimo Kreuzer 17604afb647cSTimo Kreuzer ; xmm4 - Yt 17614afb647cSTimo Kreuzer ; xmm2 - Yh 17624afb647cSTimo Kreuzer ; xmm0 - H 17634afb647cSTimo Kreuzer ; xmm7 - T 17644afb647cSTimo Kreuzer 17654afb647cSTimo Kreuzer vmulsd xmm3, xmm4, xmm7 ; YtRt 17664afb647cSTimo Kreuzer vmulsd xmm4, xmm4, xmm0 ; YtRh 17674afb647cSTimo Kreuzer vmulsd xmm5, xmm7, xmm2 ; YhRt 17684afb647cSTimo Kreuzer vmulsd xmm6, xmm0, xmm2 ; YhRh 17694afb647cSTimo Kreuzer 17704afb647cSTimo Kreuzer vmovapd xmm1, xmm6 17714afb647cSTimo Kreuzer vaddsd xmm3, xmm3, xmm4 17724afb647cSTimo Kreuzer vaddsd xmm3, xmm3, xmm5 17734afb647cSTimo Kreuzer 17744afb647cSTimo Kreuzer vaddsd xmm1, xmm1, xmm3 17754afb647cSTimo Kreuzer vmovapd xmm0, xmm1 17764afb647cSTimo Kreuzer 17774afb647cSTimo Kreuzer vsubsd xmm6, xmm6, xmm1 17784afb647cSTimo Kreuzer vaddsd xmm6, xmm6, xmm3 17794afb647cSTimo Kreuzer 17804afb647cSTimo Kreuzer ; y * ln(x) = v + vt 17814afb647cSTimo Kreuzer ; v and vt are in full precision 17824afb647cSTimo Kreuzer 17834afb647cSTimo Kreuzer ; xmm0 - v 17844afb647cSTimo Kreuzer ; xmm6 - vt 17854afb647cSTimo Kreuzer 17864afb647cSTimo Kreuzer ; ----------------------------- 17874afb647cSTimo Kreuzer ; compute exp( y * ln(x) ) here 17884afb647cSTimo Kreuzer ; ----------------------------- 17894afb647cSTimo Kreuzer 17904afb647cSTimo Kreuzer ; v * (64/ln(2)) 17914afb647cSTimo Kreuzer vmovsd QWORD PTR [p_temp_exp+rsp], xmm0 17924afb647cSTimo Kreuzer vmulsd xmm7, xmm0, QWORD PTR __real_64_by_log2 17934afb647cSTimo Kreuzer mov rdx, QWORD PTR [p_temp_exp+rsp] 17944afb647cSTimo Kreuzer 17954afb647cSTimo Kreuzer ; v < 1024*ln(2), ( v * (64/ln(2)) ) < 64*1024 17964afb647cSTimo Kreuzer ; v >= -1075*ln(2), ( v * (64/ln(2)) ) >= 64*(-1075) 17974afb647cSTimo Kreuzer vcomisd xmm7, QWORD PTR __real_p65536 17984afb647cSTimo Kreuzer ja Lpow_fma3_process_result_inf 17994afb647cSTimo Kreuzer 18004afb647cSTimo Kreuzer vcomisd xmm7, QWORD PTR __real_m68800 18014afb647cSTimo Kreuzer jb Lpow_fma3_process_result_zero 18024afb647cSTimo Kreuzer 18034afb647cSTimo Kreuzer ; n = int( v * (64/ln(2)) ) 18044afb647cSTimo Kreuzer vcvtpd2dq xmm4, xmm7 18054afb647cSTimo Kreuzer lea r10, __two_to_jby64_head_table 18064afb647cSTimo Kreuzer lea r11, __two_to_jby64_tail_table 18074afb647cSTimo Kreuzer vcvtdq2pd xmm1, xmm4 18084afb647cSTimo Kreuzer 18094afb647cSTimo Kreuzer ; r1 = x - n * ln(2)/64 head 18104afb647cSTimo Kreuzer vfnmadd231sd xmm0, xmm1, QWORD PTR __real_log2_by_64_head 18114afb647cSTimo Kreuzer vmovd ecx, xmm4 18124afb647cSTimo Kreuzer mov rax, 3fh 18134afb647cSTimo Kreuzer and eax, ecx 18144afb647cSTimo Kreuzer 18154afb647cSTimo Kreuzer ; r2 = - n * ln(2)/64 tail 18164afb647cSTimo Kreuzer vmulsd xmm1, xmm1, QWORD PTR __real_log2_by_64_tail 18174afb647cSTimo Kreuzer vmovapd xmm2, xmm0 18184afb647cSTimo Kreuzer 18194afb647cSTimo Kreuzer ; m = (n - j) / 64 18204afb647cSTimo Kreuzer sub ecx, eax 18214afb647cSTimo Kreuzer sar ecx, 6 18224afb647cSTimo Kreuzer 18234afb647cSTimo Kreuzer ; r1+r2 18244afb647cSTimo Kreuzer vaddsd xmm2, xmm2, xmm1 18254afb647cSTimo Kreuzer vaddsd xmm2, xmm2, xmm6 ; add vt here 18264afb647cSTimo Kreuzer vmovapd xmm1, xmm2 18274afb647cSTimo Kreuzer 18284afb647cSTimo Kreuzer ; q 18294afb647cSTimo Kreuzer vmovsd xmm0, QWORD PTR __real_1_by_720 18304afb647cSTimo Kreuzer xor r9d, r9d 18314afb647cSTimo Kreuzer vfmadd213sd xmm0, xmm2, QWORD PTR __real_1_by_120 18324afb647cSTimo Kreuzer cmp ecx, DWORD PTR __denormal_threshold 18334afb647cSTimo Kreuzer vfmadd213sd xmm0, xmm2, QWORD PTR __real_1_by_24 18344afb647cSTimo Kreuzer cmovle r9d, ecx 18354afb647cSTimo Kreuzer vfmadd213sd xmm0, xmm2, QWORD PTR __real_1_by_6 18364afb647cSTimo Kreuzer add rcx, 1023 18374afb647cSTimo Kreuzer vfmadd213sd xmm0, xmm2, QWORD PTR __real_1_by_2 18384afb647cSTimo Kreuzer shl rcx, 52 18394afb647cSTimo Kreuzer vfmadd213sd xmm0, xmm2, QWORD PTR __real_one 18404afb647cSTimo Kreuzer vmulsd xmm0, xmm0, xmm2 ; xmm0 <-- q 18414afb647cSTimo Kreuzer; movsd xmm0, QWORD PTR __real_1_by_2 18424afb647cSTimo Kreuzer; movsd xmm3, QWORD PTR __real_1_by_24 18434afb647cSTimo Kreuzer; movsd xmm4, QWORD PTR __real_1_by_720 18444afb647cSTimo Kreuzer; mulsd xmm1, xmm2 ; xmm1 <-- r^2 18454afb647cSTimo Kreuzer; mulsd xmm0, xmm2 ; xmm0 <-- r/2 18464afb647cSTimo Kreuzer; mulsd xmm3, xmm2 ; xmm3 <-- r/24 18474afb647cSTimo Kreuzer; mulsd xmm4, xmm2 ; xmm4 <-- r/720 18484afb647cSTimo Kreuzer 18494afb647cSTimo Kreuzer; movsd xmm5, xmm1 ; xmm5 <-- copy of r^2 18504afb647cSTimo Kreuzer; mulsd xmm1, xmm2 ; xmm1 <-- r^3 18514afb647cSTimo Kreuzer; addsd xmm0, QWORD PTR __real_one ; xmm0 <-- r/2 + 1 18524afb647cSTimo Kreuzer; addsd xmm3, QWORD PTR __real_1_by_6 ; xmm3 <-- r/24 + 1/6 18534afb647cSTimo Kreuzer; mulsd xmm5, xmm1 ; xmm5 <-- r^5 18544afb647cSTimo Kreuzer; addsd xmm4, QWORD PTR __real_1_by_120 ; xmm4 <-- r/720 + 1/120 18554afb647cSTimo Kreuzer; mulsd xmm0, xmm2 ; xmm0 <-- (r/2 + 1)*r 18564afb647cSTimo Kreuzer; mulsd xmm3, xmm1 ; xmm3 <-- (r/24 + 1/6)*r^3 18574afb647cSTimo Kreuzer 18584afb647cSTimo Kreuzer; mulsd xmm4, xmm5 ; xmm4 <-- (r/720 + 1/120)*r^5 18594afb647cSTimo Kreuzer 18604afb647cSTimo Kreuzer; ; deal with denormal results 18614afb647cSTimo Kreuzer; xor r9d, r9d 18624afb647cSTimo Kreuzer; cmp ecx, DWORD PTR __denormal_threshold 18634afb647cSTimo Kreuzer 18644afb647cSTimo Kreuzer; addsd xmm3, xmm4 ; xmm3 <-- (r/720 + 1/120)*r^5 + (r/24 + 1/6)*r^3 18654afb647cSTimo Kreuzer; addsd xmm0, xmm3 ; xmm0 <-- poly 18664afb647cSTimo Kreuzer 18674afb647cSTimo Kreuzer; cmovle r9d, ecx 18684afb647cSTimo Kreuzer; add rcx, 1023 18694afb647cSTimo Kreuzer; shl rcx, 52 18704afb647cSTimo Kreuzer 18714afb647cSTimo Kreuzer ; f1, f2 18724afb647cSTimo Kreuzer vmulsd xmm5, xmm0, QWORD PTR [r11+rax*8] 18734afb647cSTimo Kreuzer vmulsd xmm1, xmm0, QWORD PTR [r10+rax*8] 18744afb647cSTimo Kreuzer 18754afb647cSTimo Kreuzer cmp rcx, QWORD PTR __real_inf 18764afb647cSTimo Kreuzer 18774afb647cSTimo Kreuzer ; (f1+f2)*(1+q) 18784afb647cSTimo Kreuzer vaddsd xmm5, xmm5, QWORD PTR [r11+rax*8] 18794afb647cSTimo Kreuzer vaddsd xmm1, xmm1, xmm5 18804afb647cSTimo Kreuzer vaddsd xmm1, xmm1, QWORD PTR [r10+rax*8] 18814afb647cSTimo Kreuzer vmovapd xmm0, xmm1 18824afb647cSTimo Kreuzer 18834afb647cSTimo Kreuzer je Lpow_fma3_process_almost_inf 18844afb647cSTimo Kreuzer 18854afb647cSTimo Kreuzer test r9d, r9d 18864afb647cSTimo Kreuzer mov QWORD PTR [p_temp_exp+rsp], rcx 18874afb647cSTimo Kreuzer jnz Lpow_fma3_process_denormal 18884afb647cSTimo Kreuzer vmulsd xmm0, xmm0, QWORD PTR [p_temp_exp+rsp] 18894afb647cSTimo Kreuzer vorpd xmm0, xmm0, XMMWORD PTR [negate_result+rsp] 18904afb647cSTimo Kreuzer 18914afb647cSTimo KreuzerLpow_fma3_final_check: 18924afb647cSTimo Kreuzer AVXRestoreXmm xmm7, save_xmm7 18934afb647cSTimo Kreuzer AVXRestoreXmm xmm6, save_xmm6 18944afb647cSTimo Kreuzer StackDeallocate stack_size 18954afb647cSTimo Kreuzer ret 18964afb647cSTimo Kreuzer 18974afb647cSTimo KreuzerALIGN 16 18984afb647cSTimo KreuzerLpow_fma3_process_almost_inf: 18994afb647cSTimo Kreuzer vcomisd xmm0, QWORD PTR __real_one 19004afb647cSTimo Kreuzer jae Lpow_fma3_process_result_inf 19014afb647cSTimo Kreuzer 19024afb647cSTimo Kreuzer vorpd xmm0, xmm0, XMMWORD PTR __enable_almost_inf 19034afb647cSTimo Kreuzer vorpd xmm0, xmm0, XMMWORD PTR [negate_result+rsp] 19044afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 19054afb647cSTimo Kreuzer 19064afb647cSTimo KreuzerALIGN 16 19074afb647cSTimo KreuzerLpow_fma3_process_denormal: 19084afb647cSTimo Kreuzer mov ecx, r9d 19094afb647cSTimo Kreuzer xor r11d, r11d 19104afb647cSTimo Kreuzer vcomisd xmm0, QWORD PTR __real_one 19114afb647cSTimo Kreuzer cmovae r11d, ecx 19124afb647cSTimo Kreuzer cmp r11d, DWORD PTR __denormal_threshold 19134afb647cSTimo Kreuzer jne Lpow_fma3_process_true_denormal 19144afb647cSTimo Kreuzer 19154afb647cSTimo Kreuzer vmulsd xmm0, xmm0, QWORD PTR [p_temp_exp+rsp] 19164afb647cSTimo Kreuzer vorpd xmm0, xmm0, XMMWORD PTR [negate_result+rsp] 19174afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 19184afb647cSTimo Kreuzer 19194afb647cSTimo KreuzerALIGN 16 19204afb647cSTimo KreuzerLpow_fma3_process_true_denormal: 19214afb647cSTimo Kreuzer xor r8, r8 19224afb647cSTimo Kreuzer cmp rdx, QWORD PTR __denormal_tiny_threshold 19234afb647cSTimo Kreuzer mov r9, 1 19244afb647cSTimo Kreuzer jg Lpow_fma3_process_denormal_tiny 19254afb647cSTimo Kreuzer add ecx, 1074 19264afb647cSTimo Kreuzer cmovs rcx, r8 19274afb647cSTimo Kreuzer shl r9, cl 19284afb647cSTimo Kreuzer mov rcx, r9 19294afb647cSTimo Kreuzer 19304afb647cSTimo Kreuzer mov QWORD PTR [p_temp_exp+rsp], rcx 19314afb647cSTimo Kreuzer vmulsd xmm0, xmm0, QWORD PTR [p_temp_exp+rsp] 19324afb647cSTimo Kreuzer vorpd xmm0, xmm0, XMMWORD PTR [negate_result+rsp] 19334afb647cSTimo Kreuzer jmp Lpow_fma3_z_denormal 19344afb647cSTimo Kreuzer 19354afb647cSTimo KreuzerALIGN 16 19364afb647cSTimo KreuzerLpow_fma3_process_denormal_tiny: 19374afb647cSTimo Kreuzer vmovsd xmm0, QWORD PTR __real_smallest_denormal 19384afb647cSTimo Kreuzer vorpd xmm0, xmm0, XMMWORD PTR [negate_result+rsp] 19394afb647cSTimo Kreuzer jmp Lpow_fma3_z_denormal 19404afb647cSTimo Kreuzer 19414afb647cSTimo KreuzerALIGN 16 19424afb647cSTimo KreuzerLpow_fma3_process_result_zero: 19434afb647cSTimo Kreuzer mov r11, QWORD PTR __real_zero 19444afb647cSTimo Kreuzer or r11, QWORD PTR [negate_result+rsp] 19454afb647cSTimo Kreuzer jmp Lpow_fma3_z_is_zero_or_inf 19464afb647cSTimo Kreuzer 19474afb647cSTimo KreuzerALIGN 16 19484afb647cSTimo KreuzerLpow_fma3_process_result_inf: 19494afb647cSTimo Kreuzer mov r11, QWORD PTR __real_inf 19504afb647cSTimo Kreuzer or r11, QWORD PTR [negate_result+rsp] 19514afb647cSTimo Kreuzer jmp Lpow_fma3_z_is_zero_or_inf 19524afb647cSTimo Kreuzer 19534afb647cSTimo KreuzerALIGN 16 19544afb647cSTimo KreuzerLpow_fma3_denormal_adjust: 19554afb647cSTimo Kreuzer vpor xmm2, xmm2, XMMWORD PTR __real_one 19564afb647cSTimo Kreuzer vsubsd xmm2, xmm2, QWORD PTR __real_one 19574afb647cSTimo Kreuzer vmovapd xmm5, xmm2 19584afb647cSTimo Kreuzer vpand xmm2, xmm2, XMMWORD PTR __real_mant 19594afb647cSTimo Kreuzer vmovq r8, xmm2 19604afb647cSTimo Kreuzer vpsrlq xmm5, xmm5, 52 19614afb647cSTimo Kreuzer vpsubd xmm5, xmm5, XMMWORD PTR __mask_2045 19624afb647cSTimo Kreuzer vcvtdq2pd xmm6, xmm5 19634afb647cSTimo Kreuzer jmp Lpow_fma3_continue_common 19644afb647cSTimo Kreuzer 19654afb647cSTimo KreuzerALIGN 16 19664afb647cSTimo KreuzerLpow_fma3_x_is_neg: 19674afb647cSTimo Kreuzer 19684afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 19694afb647cSTimo Kreuzer and r10, r8 19704afb647cSTimo Kreuzer cmp r10, QWORD PTR __ay_max_bound 19714afb647cSTimo Kreuzer jg Lpow_fma3_ay_is_very_large 19724afb647cSTimo Kreuzer 19734afb647cSTimo Kreuzer ; determine if y is an integer 19744afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mant_mask 19754afb647cSTimo Kreuzer and r10, r8 19764afb647cSTimo Kreuzer mov r11, r10 19774afb647cSTimo Kreuzer mov rcx, QWORD PTR __exp_shift 19784afb647cSTimo Kreuzer shr r10, cl 19794afb647cSTimo Kreuzer sub r10, QWORD PTR __exp_bias 19804afb647cSTimo Kreuzer js Lpow_fma3_x_is_neg_y_is_not_int 19814afb647cSTimo Kreuzer 19824afb647cSTimo Kreuzer mov rax, QWORD PTR __exp_mant_mask 19834afb647cSTimo Kreuzer and rax, rdx 19844afb647cSTimo Kreuzer mov QWORD PTR [save_ax+rsp], rax 19854afb647cSTimo Kreuzer 19864afb647cSTimo Kreuzer cmp r10, QWORD PTR __yexp_53 19874afb647cSTimo Kreuzer mov rcx, r10 19884afb647cSTimo Kreuzer jg Lpow_fma3_continue_after_y_int_check 19894afb647cSTimo Kreuzer 19904afb647cSTimo Kreuzer mov r9, QWORD PTR __mant_full 19914afb647cSTimo Kreuzer shr r9, cl 19924afb647cSTimo Kreuzer and r9, r11 19934afb647cSTimo Kreuzer jnz Lpow_fma3_x_is_neg_y_is_not_int 19944afb647cSTimo Kreuzer 19954afb647cSTimo Kreuzer mov r9, QWORD PTR __1_before_mant 19964afb647cSTimo Kreuzer shr r9, cl 19974afb647cSTimo Kreuzer and r9, r11 19984afb647cSTimo Kreuzer jz Lpow_fma3_continue_after_y_int_check 19994afb647cSTimo Kreuzer 20004afb647cSTimo Kreuzer mov rax, QWORD PTR __sign_mask 20014afb647cSTimo Kreuzer mov QWORD PTR [negate_result+rsp], rax 20024afb647cSTimo Kreuzer 20034afb647cSTimo KreuzerLpow_fma3_continue_after_y_int_check: 20044afb647cSTimo Kreuzer 20054afb647cSTimo Kreuzer cmp rdx, QWORD PTR __neg_zero 20064afb647cSTimo Kreuzer je Lpow_fma3_x_is_zero 20074afb647cSTimo Kreuzer 20084afb647cSTimo Kreuzer cmp rdx, QWORD PTR __neg_one 20094afb647cSTimo Kreuzer je Lpow_fma3_x_is_neg_one 20104afb647cSTimo Kreuzer 20114afb647cSTimo Kreuzer mov r9, QWORD PTR __exp_mask 20124afb647cSTimo Kreuzer and r9, rdx 20134afb647cSTimo Kreuzer cmp r9, QWORD PTR __exp_mask 20144afb647cSTimo Kreuzer je Lpow_fma3_x_is_inf_or_nan 20154afb647cSTimo Kreuzer 20164afb647cSTimo Kreuzer vmovsd xmm0, QWORD PTR [save_ax+rsp] 20174afb647cSTimo Kreuzer jmp Lpow_fma3_log_x 20184afb647cSTimo Kreuzer 20194afb647cSTimo Kreuzer 20204afb647cSTimo KreuzerALIGN 16 20214afb647cSTimo KreuzerLpow_fma3_near_one: 20224afb647cSTimo Kreuzer 20234afb647cSTimo Kreuzer ; f = F - Y, r = f * inv 20244afb647cSTimo Kreuzer vmovapd xmm0, xmm1 20254afb647cSTimo Kreuzer vsubsd xmm1, xmm1, xmm2 ; xmm1 <-- f 20264afb647cSTimo Kreuzer vmovapd xmm4, xmm1 ; xmm4 <-- copy of f 20274afb647cSTimo Kreuzer 20284afb647cSTimo Kreuzer vmovsd xmm3, QWORD PTR [r9+r8*8] 20294afb647cSTimo Kreuzer vaddsd xmm3, xmm3, QWORD PTR [rdx+r8*8] 20304afb647cSTimo Kreuzer vmulsd xmm4, xmm4, xmm3 ; xmm4 <-- r = f*inv 2031*105426b8STimo Kreuzer vandpd xmm4, xmm4, XMMWORD PTR __real_fffffffff8000000 ; r1 20324afb647cSTimo Kreuzer vmovapd xmm5, xmm4 ; xmm5 <-- copy of r1 20334afb647cSTimo Kreuzer; mulsd xmm4, xmm0 ; xmm4 <-- F*r1 20344afb647cSTimo Kreuzer; subsd xmm1, xmm4 ; xmm1 <-- f - F*r1 20354afb647cSTimo Kreuzer vfnmadd231sd xmm1, xmm4, xmm0 ; xmm1 <-- f - F*r1 20364afb647cSTimo Kreuzer vmulsd xmm1, xmm1, xmm3 ; xmm1 <-- r2 = (f - F*r1)*inv 20374afb647cSTimo Kreuzer vmovapd xmm7, xmm1 ; xmm7 <-- copy of r2 20384afb647cSTimo Kreuzer vaddsd xmm1, xmm1, xmm5 ; xmm1 <-- r = r1 + r2 20394afb647cSTimo Kreuzer 20404afb647cSTimo Kreuzer vmovapd xmm2, xmm1 ; xmm2 <-- copy of r 20414afb647cSTimo Kreuzer vmovapd xmm0, xmm1 ; xmm0 <-- copy of r 20424afb647cSTimo Kreuzer 20434afb647cSTimo Kreuzer lea r9, __log_256_lead 20444afb647cSTimo Kreuzer 20454afb647cSTimo Kreuzer ; poly 20464afb647cSTimo Kreuzer ; NOTE: Given the complicated corrections here, 20474afb647cSTimo Kreuzer ; I'm afraid to mess with it too much - WAT 20484afb647cSTimo Kreuzer vmovsd xmm3, QWORD PTR __real_1_over_7 20494afb647cSTimo Kreuzer vmovsd xmm1, QWORD PTR __real_1_over_4 20504afb647cSTimo Kreuzer vmulsd xmm0, xmm0, xmm2 ; xmm0 <-- r^2 20514afb647cSTimo Kreuzer vmovapd xmm4, xmm0 ; xmm4 <-- copy of r^2 20524afb647cSTimo Kreuzer vfmadd213sd xmm3, xmm2, QWORD PTR __real_1_over_6 ; xmm3 <-- r/7 + 1/6 20534afb647cSTimo Kreuzer vfmadd213sd xmm1, xmm2, QWORD PTR __real_1_over_3 ; xmm1 <-- r/4 + 1/3 20544afb647cSTimo Kreuzer vmulsd xmm4, xmm4, xmm0 ; xmm4 <-- r^4 20554afb647cSTimo Kreuzer vmulsd xmm1, xmm1, xmm2 ; xmm1 <-- (r/4 + 1/3)*r 20564afb647cSTimo Kreuzer vfmadd213sd xmm3, xmm2, QWORD PTR __real_1_over_5 ; xmm3 <-- ((r/7 + 1/6)*r) + 1/5 20574afb647cSTimo Kreuzer vmulsd xmm3, xmm3, xmm2 ; xmm3 <-- (((r/7 + 1/6)*r) + 1/5)*r 20584afb647cSTimo Kreuzer vmulsd xmm1, xmm1, xmm0 ; xmm1 <-- ((r/4 + 1/3)*r)*r^2 20594afb647cSTimo Kreuzer vmulsd xmm3, xmm3, xmm4 ; xmm3 <-- ((((r/7 + 1/6)*r) + 1/5)*r)*r^4 20604afb647cSTimo Kreuzer 20614afb647cSTimo Kreuzer vmovapd xmm2, xmm5 ; xmm2 <-- copy of r1 20624afb647cSTimo Kreuzer vmovapd xmm0, xmm7 ; xmm0 <-- copy of r2 20634afb647cSTimo Kreuzer vmulsd xmm0, xmm0, xmm0 ; xmm0 <-- r2^2 20644afb647cSTimo Kreuzer vmulsd xmm0, xmm0, QWORD PTR __real_1_over_2 ; xmm0 <-- r2^2/2 20654afb647cSTimo Kreuzer; mulsd xmm5, xmm7 ; xmm5 <-- r1*r2 20664afb647cSTimo Kreuzer; addsd xmm5, xmm0 ; xmm5 <-- r1*r2 + r2^2^2 20674afb647cSTimo Kreuzer vfmadd213sd xmm5, xmm7, xmm0 ; xmm5 <-- r1*r2 + r2^2^2 20684afb647cSTimo Kreuzer vaddsd xmm5, xmm5, xmm7 ; xmm5 <-- r1*r2 + r2^2/2 + r2 20694afb647cSTimo Kreuzer 20704afb647cSTimo Kreuzer vmovapd xmm0, xmm2 ; xmm0 <-- copy of r1 20714afb647cSTimo Kreuzer vmovapd xmm7, xmm2 ; xmm7 <-- copy of r1 20724afb647cSTimo Kreuzer vmulsd xmm0, xmm0, xmm0 ; xmm0 <-- r1^2 20734afb647cSTimo Kreuzer vmulsd xmm0, xmm0, QWORD PTR __real_1_over_2 ; xmm0 <-- r1^2/2 20744afb647cSTimo Kreuzer vmovapd xmm4, xmm0 ; xmm4 <-- copy of r1^2/2 20754afb647cSTimo Kreuzer vaddsd xmm2, xmm2, xmm0 ; xmm2 <-- r1 + r1^2/2 20764afb647cSTimo Kreuzer vsubsd xmm7, xmm7, xmm2 ; xmm7 <-- r1 - (r1 + r1^2/2) 20774afb647cSTimo Kreuzer vaddsd xmm7, xmm7, xmm4 ; xmm7 <-- r1 - (r1 + r1^2/2) + r1^2/2 20784afb647cSTimo Kreuzer ; xmm3 <-- ((((r/7 + 1/6)*r) + 1/5)*r)*r^4 + r1 - (r1 + r1^2/2) + r1^2/2 20794afb647cSTimo Kreuzer vaddsd xmm3, xmm3, xmm7 20804afb647cSTimo Kreuzer vmovsd xmm4, QWORD PTR __real_log2_tail 20814afb647cSTimo Kreuzer ; xmm1 <-- (((((r/7 + 1/6)*r) + 1/5)*r)*r^4) + 20824afb647cSTimo Kreuzer ; (r1 - (r1 + r1^2/2) + r1^2/2) + ((r/4 + 1/3)*r)*r^2) 20834afb647cSTimo Kreuzer vaddsd xmm1, xmm1, xmm3 20844afb647cSTimo Kreuzer lea rdx, __log_256_tail 20854afb647cSTimo Kreuzer ; xmm1 <-- ((((((r/7 + 1/6)*r) + 1/5)*r)*r^4) + 20864afb647cSTimo Kreuzer ; (r1 - (r1 + r1^2/2) + r1^2/2) + ((r/4 + 1/3)*r)*r^2)) 20874afb647cSTimo Kreuzer ; +(r1*r2 + r2^2/2 + r2) 20884afb647cSTimo Kreuzer vaddsd xmm1, xmm1, xmm5 20894afb647cSTimo Kreuzer ; xmm4 <-- vt * log2_tail + log256_tail 20904afb647cSTimo Kreuzer vfmadd213sd xmm4, xmm6, QWORD PTR [rdx+r8*8] 20914afb647cSTimo Kreuzer ; xmm4 <-- vt * log2_tail + log2_tail - corrected poly 20924afb647cSTimo Kreuzer vsubsd xmm4, xmm4, xmm1 20934afb647cSTimo Kreuzer 20944afb647cSTimo Kreuzer vmovapd xmm1, xmm4 20954afb647cSTimo Kreuzer vsubsd xmm3, xmm4, xmm2 ; xmm3 <-- xmm4 - more correction??? 20964afb647cSTimo Kreuzer 20974afb647cSTimo Kreuzer vmovsd xmm0, QWORD PTR [r9+r8*8] ; xmm0 <-- log256_lead 20984afb647cSTimo Kreuzer ; xmm0 <-- log256_lead + vt*log2_lead 20994afb647cSTimo Kreuzer vfmadd231sd xmm0, xmm6, QWORD PTR __real_log2_lead 21004afb647cSTimo Kreuzer 21014afb647cSTimo Kreuzer ; at this point, xmm0, xmm1, xmm2, and xmm3 should matter 21024afb647cSTimo Kreuzer jmp Lpow_fma3_log_x_continue 21034afb647cSTimo Kreuzer 21044afb647cSTimo Kreuzer 21054afb647cSTimo KreuzerALIGN 16 21064afb647cSTimo KreuzerLpow_fma3_x_is_pos_one: 21074afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 21084afb647cSTimo Kreuzer 21094afb647cSTimo KreuzerALIGN 16 21104afb647cSTimo KreuzerLpow_fma3_y_is_zero: 21114afb647cSTimo Kreuzer vmovsd xmm0, QWORD PTR __real_one 21124afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 21134afb647cSTimo Kreuzer 21144afb647cSTimo KreuzerALIGN 16 21154afb647cSTimo KreuzerLpow_fma3_y_is_one: 21164afb647cSTimo Kreuzer xor rax, rax 21174afb647cSTimo Kreuzer mov r11, rdx 21184afb647cSTimo Kreuzer mov r9, QWORD PTR __exp_mask 21194afb647cSTimo Kreuzer ;or r11, QWORD PTR __qnan_set 21204afb647cSTimo Kreuzer and r9, rdx 21214afb647cSTimo Kreuzer cmp r9, QWORD PTR __exp_mask 21224afb647cSTimo Kreuzer cmove rax, rdx 21234afb647cSTimo Kreuzer mov r9, QWORD PTR __mant_mask 21244afb647cSTimo Kreuzer and r9, rax 21254afb647cSTimo Kreuzer jnz Lpow_fma3_x_is_nan 21264afb647cSTimo Kreuzer 21274afb647cSTimo Kreuzer vmovq xmm0, rdx 21284afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 21294afb647cSTimo Kreuzer 21304afb647cSTimo KreuzerALIGN 16 21314afb647cSTimo KreuzerLpow_fma3_x_is_neg_one: 21324afb647cSTimo Kreuzer mov rdx, QWORD PTR __pos_one 21334afb647cSTimo Kreuzer or rdx, QWORD PTR [negate_result+rsp] 21344afb647cSTimo Kreuzer xor rax, rax 21354afb647cSTimo Kreuzer mov r11, r8 21364afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 21374afb647cSTimo Kreuzer ;or r11, QWORD PTR __qnan_set 21384afb647cSTimo Kreuzer and r10, r8 21394afb647cSTimo Kreuzer cmp r10, QWORD PTR __exp_mask 21404afb647cSTimo Kreuzer cmove rax, r8 21414afb647cSTimo Kreuzer mov r10, QWORD PTR __mant_mask 21424afb647cSTimo Kreuzer and r10, rax 21434afb647cSTimo Kreuzer jnz Lpow_fma3_y_is_nan 21444afb647cSTimo Kreuzer 21454afb647cSTimo Kreuzer vmovq xmm0, rdx 21464afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 21474afb647cSTimo Kreuzer 21484afb647cSTimo KreuzerALIGN 16 21494afb647cSTimo KreuzerLpow_fma3_x_is_neg_y_is_not_int: 21504afb647cSTimo Kreuzer mov r9, QWORD PTR __exp_mask 21514afb647cSTimo Kreuzer and r9, rdx 21524afb647cSTimo Kreuzer cmp r9, QWORD PTR __exp_mask 21534afb647cSTimo Kreuzer je Lpow_fma3_x_is_inf_or_nan 21544afb647cSTimo Kreuzer 21554afb647cSTimo Kreuzer cmp rdx, QWORD PTR __neg_zero 21564afb647cSTimo Kreuzer je Lpow_fma3_x_is_zero 21574afb647cSTimo Kreuzer 21584afb647cSTimo Kreuzer vmovsd xmm0, QWORD PTR [save_x+rsp] 21594afb647cSTimo Kreuzer vmovsd xmm1, QWORD PTR [save_y+rsp] 21604afb647cSTimo Kreuzer vmovsd xmm2, QWORD PTR __neg_qnan 21614afb647cSTimo Kreuzer mov r9d, DWORD PTR __flag_x_neg_y_notint 21624afb647cSTimo Kreuzer 21634afb647cSTimo Kreuzer call fname_special 21644afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 21654afb647cSTimo Kreuzer 21664afb647cSTimo KreuzerALIGN 16 21674afb647cSTimo KreuzerLpow_fma3_ay_is_very_large: 21684afb647cSTimo Kreuzer mov r9, QWORD PTR __exp_mask 21694afb647cSTimo Kreuzer and r9, rdx 21704afb647cSTimo Kreuzer cmp r9, QWORD PTR __exp_mask 21714afb647cSTimo Kreuzer je Lpow_fma3_x_is_inf_or_nan 21724afb647cSTimo Kreuzer 21734afb647cSTimo Kreuzer mov r9, QWORD PTR __exp_mant_mask 21744afb647cSTimo Kreuzer and r9, rdx 21754afb647cSTimo Kreuzer jz Lpow_fma3_x_is_zero 21764afb647cSTimo Kreuzer 21774afb647cSTimo Kreuzer cmp rdx, QWORD PTR __neg_one 21784afb647cSTimo Kreuzer je Lpow_fma3_x_is_neg_one 21794afb647cSTimo Kreuzer 21804afb647cSTimo Kreuzer mov r9, rdx 21814afb647cSTimo Kreuzer and r9, QWORD PTR __exp_mant_mask 21824afb647cSTimo Kreuzer cmp r9, QWORD PTR __pos_one 21834afb647cSTimo Kreuzer jl Lpow_fma3_ax_lt1_y_is_large_or_inf_or_nan 21844afb647cSTimo Kreuzer 21854afb647cSTimo Kreuzer jmp Lpow_fma3_ax_gt1_y_is_large_or_inf_or_nan 21864afb647cSTimo Kreuzer 21874afb647cSTimo KreuzerALIGN 16 21884afb647cSTimo KreuzerLpow_fma3_x_is_zero: 21894afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 21904afb647cSTimo Kreuzer xor rax, rax 21914afb647cSTimo Kreuzer and r10, r8 21924afb647cSTimo Kreuzer cmp r10, QWORD PTR __exp_mask 21934afb647cSTimo Kreuzer je Lpow_fma3_x_is_zero_y_is_inf_or_nan 21944afb647cSTimo Kreuzer 21954afb647cSTimo Kreuzer mov r10, QWORD PTR __sign_mask 21964afb647cSTimo Kreuzer and r10, r8 21974afb647cSTimo Kreuzer cmovnz rax, QWORD PTR __pos_inf 21984afb647cSTimo Kreuzer jnz Lpow_fma3_x_is_zero_z_is_inf 21994afb647cSTimo Kreuzer 22004afb647cSTimo Kreuzer vmovq xmm0, rax 22014afb647cSTimo Kreuzer vorpd xmm0, xmm0, XMMWORD PTR [negate_result+rsp] 22024afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 22034afb647cSTimo Kreuzer 22044afb647cSTimo KreuzerALIGN 16 22054afb647cSTimo KreuzerLpow_fma3_x_is_zero_z_is_inf: 22064afb647cSTimo Kreuzer 22074afb647cSTimo Kreuzer vmovsd xmm0, QWORD PTR [save_x+rsp] 22084afb647cSTimo Kreuzer vmovsd xmm1, QWORD PTR [save_y+rsp] 22094afb647cSTimo Kreuzer vmovq xmm2, rax 22104afb647cSTimo Kreuzer vorpd xmm2, xmm2, XMMWORD PTR [negate_result+rsp] 22114afb647cSTimo Kreuzer mov r9d, DWORD PTR __flag_x_zero_z_inf 22124afb647cSTimo Kreuzer 22134afb647cSTimo Kreuzer call fname_special 22144afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 22154afb647cSTimo Kreuzer 22164afb647cSTimo KreuzerALIGN 16 22174afb647cSTimo KreuzerLpow_fma3_x_is_zero_y_is_inf_or_nan: 22184afb647cSTimo Kreuzer mov r11, r8 22194afb647cSTimo Kreuzer cmp r8, QWORD PTR __neg_inf 22204afb647cSTimo Kreuzer; The next two lines do not correspond to IEEE754-2008. 22214afb647cSTimo Kreuzer; +-0 ^ -Inf should be +Inf with no exception 22224afb647cSTimo Kreuzer; +-0 ^ +Inf should be +0 with no exception 22234afb647cSTimo Kreuzer; cmove rax, QWORD PTR __pos_inf 22244afb647cSTimo Kreuzer; je Lpow_fma3_x_is_zero_z_is_inf 22254afb647cSTimo Kreuzer; begin replacement 22264afb647cSTimo Kreuzer je Lpow_fma3_x_is_zero_y_is_neg_inf 22274afb647cSTimo Kreuzer cmp r8, QWORD PTR __neg_inf 22284afb647cSTimo Kreuzer je Lpow_fma3_x_is_zero_y_is_pos_inf 22294afb647cSTimo Kreuzer; end replacement 22304afb647cSTimo Kreuzer 22314afb647cSTimo Kreuzer ;or r11, QWORD PTR __qnan_set 22324afb647cSTimo Kreuzer mov r10, QWORD PTR __mant_mask 22334afb647cSTimo Kreuzer and r10, r8 22344afb647cSTimo Kreuzer jnz Lpow_fma3_y_is_nan 22354afb647cSTimo Kreuzer 22364afb647cSTimo Kreuzer vmovq xmm0, rax 22374afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 22384afb647cSTimo Kreuzer 22394afb647cSTimo KreuzerALIGN 16 22404afb647cSTimo KreuzerLpow_fma3_x_is_zero_y_is_neg_inf: 22414afb647cSTimo Kreuzer ; quietly return +Inf 22424afb647cSTimo Kreuzer vmovsd xmm0, __pos_inf 22434afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 22444afb647cSTimo Kreuzer 22454afb647cSTimo KreuzerALIGN 16 22464afb647cSTimo KreuzerLpow_fma3_x_is_zero_y_is_pos_inf: 22474afb647cSTimo Kreuzer ; quietly return +0. 22484afb647cSTimo Kreuzer vxorpd xmm0, xmm0, xmm0 22494afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 22504afb647cSTimo Kreuzer 22514afb647cSTimo KreuzerALIGN 16 22524afb647cSTimo KreuzerLpow_fma3_x_is_inf_or_nan: 22534afb647cSTimo Kreuzer xor r11, r11 22544afb647cSTimo Kreuzer mov r10, QWORD PTR __sign_mask 22554afb647cSTimo Kreuzer and r10, r8 22564afb647cSTimo Kreuzer cmovz r11, QWORD PTR __pos_inf 22574afb647cSTimo Kreuzer mov rax, rdx 22584afb647cSTimo Kreuzer mov r9, QWORD PTR __mant_mask 22594afb647cSTimo Kreuzer ;or rax, QWORD PTR __qnan_set 22604afb647cSTimo Kreuzer and r9, rdx 22614afb647cSTimo Kreuzer cmovnz r11, rax 22624afb647cSTimo Kreuzer jnz Lpow_fma3_x_is_nan 22634afb647cSTimo Kreuzer 22644afb647cSTimo Kreuzer xor rax, rax 22654afb647cSTimo Kreuzer mov r9, r8 22664afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 22674afb647cSTimo Kreuzer ;or r9, QWORD PTR __qnan_set 22684afb647cSTimo Kreuzer and r10, r8 22694afb647cSTimo Kreuzer cmp r10, QWORD PTR __exp_mask 22704afb647cSTimo Kreuzer cmove rax, r8 22714afb647cSTimo Kreuzer mov r10, QWORD PTR __mant_mask 22724afb647cSTimo Kreuzer and r10, rax 22734afb647cSTimo Kreuzer cmovnz r11, r9 22744afb647cSTimo Kreuzer jnz Lpow_fma3_y_is_nan 22754afb647cSTimo Kreuzer 22764afb647cSTimo Kreuzer vmovq xmm0, r11 22774afb647cSTimo Kreuzer vorpd xmm0, xmm0, XMMWORD PTR [negate_result+rsp] 22784afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 22794afb647cSTimo Kreuzer 22804afb647cSTimo KreuzerALIGN 16 22814afb647cSTimo KreuzerLpow_fma3_ay_is_very_small: 22824afb647cSTimo Kreuzer vaddsd xmm0, xmm1, QWORD PTR __pos_one 22834afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 22844afb647cSTimo Kreuzer 22854afb647cSTimo Kreuzer 22864afb647cSTimo KreuzerALIGN 16 22874afb647cSTimo KreuzerLpow_fma3_ax_lt1_y_is_large_or_inf_or_nan: 22884afb647cSTimo Kreuzer xor r11, r11 22894afb647cSTimo Kreuzer mov r10, QWORD PTR __sign_mask 22904afb647cSTimo Kreuzer and r10, r8 22914afb647cSTimo Kreuzer cmovnz r11, QWORD PTR __pos_inf 22924afb647cSTimo Kreuzer jmp Lpow_fma3_adjust_for_nan 22934afb647cSTimo Kreuzer 22944afb647cSTimo KreuzerALIGN 16 22954afb647cSTimo KreuzerLpow_fma3_ax_gt1_y_is_large_or_inf_or_nan: 22964afb647cSTimo Kreuzer xor r11, r11 22974afb647cSTimo Kreuzer mov r10, QWORD PTR __sign_mask 22984afb647cSTimo Kreuzer and r10, r8 22994afb647cSTimo Kreuzer cmovz r11, QWORD PTR __pos_inf 23004afb647cSTimo Kreuzer 23014afb647cSTimo KreuzerALIGN 16 23024afb647cSTimo KreuzerLpow_fma3_adjust_for_nan: 23034afb647cSTimo Kreuzer 23044afb647cSTimo Kreuzer xor rax, rax 23054afb647cSTimo Kreuzer mov r9, r8 23064afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 23074afb647cSTimo Kreuzer ;or r9, QWORD PTR __qnan_set 23084afb647cSTimo Kreuzer and r10, r8 23094afb647cSTimo Kreuzer cmp r10, QWORD PTR __exp_mask 23104afb647cSTimo Kreuzer cmove rax, r8 23114afb647cSTimo Kreuzer mov r10, QWORD PTR __mant_mask 23124afb647cSTimo Kreuzer and r10, rax 23134afb647cSTimo Kreuzer cmovnz r11, r9 23144afb647cSTimo Kreuzer jnz Lpow_fma3_y_is_nan 23154afb647cSTimo Kreuzer 23164afb647cSTimo Kreuzer test rax, rax 23174afb647cSTimo Kreuzer jnz Lpow_fma3_y_is_inf 23184afb647cSTimo Kreuzer 23194afb647cSTimo KreuzerALIGN 16 23204afb647cSTimo KreuzerLpow_fma3_z_is_zero_or_inf: 23214afb647cSTimo Kreuzer 23224afb647cSTimo Kreuzer mov r9d, DWORD PTR __flag_z_zero 23234afb647cSTimo Kreuzer test r11, QWORD PTR __exp_mant_mask 23244afb647cSTimo Kreuzer cmovnz r9d, DWORD PTR __flag_z_inf 23254afb647cSTimo Kreuzer 23264afb647cSTimo Kreuzer vmovsd xmm0, QWORD PTR [save_x+rsp] 23274afb647cSTimo Kreuzer vmovsd xmm1, QWORD PTR [save_y+rsp] 23284afb647cSTimo Kreuzer vmovq xmm2, r11 23294afb647cSTimo Kreuzer 23304afb647cSTimo Kreuzer call fname_special 23314afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 23324afb647cSTimo Kreuzer 23334afb647cSTimo KreuzerALIGN 16 23344afb647cSTimo KreuzerLpow_fma3_y_is_inf: 23354afb647cSTimo Kreuzer 23364afb647cSTimo Kreuzer vmovq xmm0, r11 23374afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 23384afb647cSTimo Kreuzer 23394afb647cSTimo KreuzerALIGN 16 23404afb647cSTimo KreuzerLpow_fma3_x_is_nan: 23414afb647cSTimo Kreuzer 23424afb647cSTimo Kreuzer xor rax, rax 23434afb647cSTimo Kreuzer mov r10, QWORD PTR __exp_mask 23444afb647cSTimo Kreuzer and r10, r8 23454afb647cSTimo Kreuzer cmp r10, QWORD PTR __exp_mask 23464afb647cSTimo Kreuzer cmove rax, r8 23474afb647cSTimo Kreuzer mov r10, QWORD PTR __mant_mask 23484afb647cSTimo Kreuzer and r10, rax 23494afb647cSTimo Kreuzer jnz Lpow_fma3_x_is_nan_y_is_nan 23504afb647cSTimo Kreuzer 23514afb647cSTimo Kreuzer vmovsd xmm0, QWORD PTR [save_x+rsp] 23524afb647cSTimo Kreuzer vmovsd xmm1, QWORD PTR [save_y+rsp] 23534afb647cSTimo Kreuzer vmovq xmm2, r11 23544afb647cSTimo Kreuzer mov r9d, DWORD PTR __flag_x_nan 23554afb647cSTimo Kreuzer 23564afb647cSTimo Kreuzer call fname_special 23574afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 23584afb647cSTimo Kreuzer 23594afb647cSTimo KreuzerALIGN 16 23604afb647cSTimo KreuzerLpow_fma3_y_is_nan: 23614afb647cSTimo Kreuzer 23624afb647cSTimo Kreuzer vmovsd xmm0, QWORD PTR [save_x+rsp] 23634afb647cSTimo Kreuzer vmovsd xmm1, QWORD PTR [save_y+rsp] 23644afb647cSTimo Kreuzer vmovq xmm2, r11 23654afb647cSTimo Kreuzer mov r9d, DWORD PTR __flag_y_nan 23664afb647cSTimo Kreuzer 23674afb647cSTimo Kreuzer call fname_special 23684afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 23694afb647cSTimo Kreuzer 23704afb647cSTimo KreuzerALIGN 16 23714afb647cSTimo KreuzerLpow_fma3_x_is_nan_y_is_nan: 23724afb647cSTimo Kreuzer 23734afb647cSTimo Kreuzer mov r9, r8 23744afb647cSTimo Kreuzer 23754afb647cSTimo Kreuzer cmp r11, QWORD PTR __ind_pattern 23764afb647cSTimo Kreuzer cmove r11, r9 23774afb647cSTimo Kreuzer je Lpow_fma3_continue_xy_nan 23784afb647cSTimo Kreuzer 23794afb647cSTimo Kreuzer cmp r9, QWORD PTR __ind_pattern 23804afb647cSTimo Kreuzer cmove r9, r11 23814afb647cSTimo Kreuzer 23824afb647cSTimo Kreuzer mov r10, r9 23834afb647cSTimo Kreuzer and r10, QWORD PTR __sign_mask 23844afb647cSTimo Kreuzer cmovnz r9, r11 23854afb647cSTimo Kreuzer 23864afb647cSTimo Kreuzer mov r10, r11 23874afb647cSTimo Kreuzer and r10, QWORD PTR __sign_mask 23884afb647cSTimo Kreuzer cmovnz r11, r9 23894afb647cSTimo Kreuzer 23904afb647cSTimo KreuzerLpow_fma3_continue_xy_nan: 23914afb647cSTimo Kreuzer ;or r11, QWORD PTR __qnan_set 23924afb647cSTimo Kreuzer vmovsd xmm0, QWORD PTR [save_x+rsp] 23934afb647cSTimo Kreuzer vmovsd xmm1, QWORD PTR [save_y+rsp] 23944afb647cSTimo Kreuzer vmovq xmm2, r11 23954afb647cSTimo Kreuzer mov r9d, DWORD PTR __flag_x_nan_y_nan 23964afb647cSTimo Kreuzer 23974afb647cSTimo Kreuzer call fname_special 23984afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 23994afb647cSTimo Kreuzer 24004afb647cSTimo KreuzerALIGN 16 24014afb647cSTimo KreuzerLpow_fma3_z_denormal: 24024afb647cSTimo Kreuzer vmovapd xmm2, xmm0 24034afb647cSTimo Kreuzer vmovsd xmm0, QWORD PTR [save_x+rsp] 24044afb647cSTimo Kreuzer vmovsd xmm1, QWORD PTR [save_y+rsp] 24054afb647cSTimo Kreuzer mov r9d, DWORD PTR __flag_z_denormal 24064afb647cSTimo Kreuzer 24074afb647cSTimo Kreuzer call fname_special 24084afb647cSTimo Kreuzer jmp Lpow_fma3_final_check 24094afb647cSTimo Kreuzer 24104afb647cSTimo Kreuzerfname endp 24114afb647cSTimo KreuzerEND 2412