xref: /reactos/sdk/lib/crt/math/libm_sse2/pow.asm (revision 105426b8)
14afb647cSTimo Kreuzer;
24afb647cSTimo Kreuzer; MIT License
34afb647cSTimo Kreuzer; -----------
44afb647cSTimo Kreuzer;
54afb647cSTimo Kreuzer; Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
64afb647cSTimo Kreuzer;
74afb647cSTimo Kreuzer; Permission is hereby granted, free of charge, to any person obtaining a copy
84afb647cSTimo Kreuzer; of this Software and associated documentaon files (the "Software"), to deal
94afb647cSTimo Kreuzer; in the Software without restriction, including without limitation the rights
104afb647cSTimo Kreuzer; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
114afb647cSTimo Kreuzer; copies of the Software, and to permit persons to whom the Software is
124afb647cSTimo Kreuzer; furnished to do so, subject to the following conditions:
134afb647cSTimo Kreuzer;
144afb647cSTimo Kreuzer; The above copyright notice and this permission notice shall be included in
154afb647cSTimo Kreuzer; all copies or substantial portions of the Software.
164afb647cSTimo Kreuzer;
174afb647cSTimo Kreuzer; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
184afb647cSTimo Kreuzer; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
194afb647cSTimo Kreuzer; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
204afb647cSTimo Kreuzer; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
214afb647cSTimo Kreuzer; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
224afb647cSTimo Kreuzer; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
234afb647cSTimo Kreuzer; THE SOFTWARE.
244afb647cSTimo Kreuzer;
254afb647cSTimo Kreuzer; pow.asm
264afb647cSTimo Kreuzer;
274afb647cSTimo Kreuzer; An implementation of the pow libm function.
284afb647cSTimo Kreuzer;
294afb647cSTimo Kreuzer; Prototype:
304afb647cSTimo Kreuzer;
314afb647cSTimo Kreuzer;     double pow(double x, double y);
324afb647cSTimo Kreuzer;
334afb647cSTimo Kreuzer
344afb647cSTimo Kreuzer;
354afb647cSTimo Kreuzer;   Algorithm:
364afb647cSTimo Kreuzer;       x^y = e^(y*ln(x))
374afb647cSTimo Kreuzer;
384afb647cSTimo Kreuzer;       Look in exp, log for the respective algorithms
394afb647cSTimo Kreuzer;
404afb647cSTimo Kreuzer
414afb647cSTimo Kreuzer.const
424afb647cSTimo Kreuzer
434afb647cSTimo KreuzerALIGN 16
444afb647cSTimo Kreuzer
454afb647cSTimo Kreuzer; these codes and the ones in the corresponding .c file have to match
464afb647cSTimo Kreuzer__flag_x_one_y_snan             DD 00000001
474afb647cSTimo Kreuzer__flag_x_zero_z_inf             DD 00000002
484afb647cSTimo Kreuzer__flag_x_nan                    DD 00000003
494afb647cSTimo Kreuzer__flag_y_nan                    DD 00000004
504afb647cSTimo Kreuzer__flag_x_nan_y_nan              DD 00000005
514afb647cSTimo Kreuzer__flag_x_neg_y_notint           DD 00000006
524afb647cSTimo Kreuzer__flag_z_zero                   DD 00000007
534afb647cSTimo Kreuzer__flag_z_denormal               DD 00000008
544afb647cSTimo Kreuzer__flag_z_inf                    DD 00000009
554afb647cSTimo Kreuzer
564afb647cSTimo KreuzerALIGN 16
574afb647cSTimo Kreuzer
584afb647cSTimo Kreuzer__ay_max_bound              DQ 43e0000000000000h
594afb647cSTimo Kreuzer__ay_min_bound              DQ 3c00000000000000h
604afb647cSTimo Kreuzer__sign_mask                 DQ 8000000000000000h
614afb647cSTimo Kreuzer__sign_and_exp_mask         DQ 0fff0000000000000h
624afb647cSTimo Kreuzer__exp_mask                  DQ 7ff0000000000000h
634afb647cSTimo Kreuzer__neg_inf                   DQ 0fff0000000000000h
644afb647cSTimo Kreuzer__pos_inf                   DQ 7ff0000000000000h
654afb647cSTimo Kreuzer__pos_one                   DQ 3ff0000000000000h
664afb647cSTimo Kreuzer__pos_zero                  DQ 0000000000000000h
674afb647cSTimo Kreuzer__exp_mant_mask             DQ 7fffffffffffffffh
684afb647cSTimo Kreuzer__mant_mask                 DQ 000fffffffffffffh
694afb647cSTimo Kreuzer__ind_pattern               DQ 0fff8000000000000h
704afb647cSTimo Kreuzer
714afb647cSTimo Kreuzer
724afb647cSTimo Kreuzer__neg_qnan                  DQ 0fff8000000000000h
734afb647cSTimo Kreuzer__qnan                      DQ 7ff8000000000000h
744afb647cSTimo Kreuzer__qnan_set                  DQ 0008000000000000h
754afb647cSTimo Kreuzer
764afb647cSTimo Kreuzer__neg_one                   DQ 0bff0000000000000h
774afb647cSTimo Kreuzer__neg_zero                  DQ 8000000000000000h
784afb647cSTimo Kreuzer
794afb647cSTimo Kreuzer__exp_shift                 DQ 0000000000000034h ; 52
804afb647cSTimo Kreuzer__exp_bias                  DQ 00000000000003ffh ; 1023
814afb647cSTimo Kreuzer__exp_bias_m1               DQ 00000000000003feh ; 1022
824afb647cSTimo Kreuzer
834afb647cSTimo Kreuzer__yexp_53                   DQ 0000000000000035h ; 53
844afb647cSTimo Kreuzer__mant_full                 DQ 000fffffffffffffh
854afb647cSTimo Kreuzer__1_before_mant             DQ 0010000000000000h
864afb647cSTimo Kreuzer
874afb647cSTimo Kreuzer__mask_mant_all8            DQ 000ff00000000000h
884afb647cSTimo Kreuzer__mask_mant9                DQ 0000080000000000h
894afb647cSTimo Kreuzer
904afb647cSTimo Kreuzer
914afb647cSTimo Kreuzer
924afb647cSTimo KreuzerALIGN 16
934afb647cSTimo Kreuzer__real_fffffffff8000000     DQ 0fffffffff8000000h
944afb647cSTimo Kreuzer                            DQ 0fffffffff8000000h
954afb647cSTimo Kreuzer
964afb647cSTimo Kreuzer__mask_8000000000000000     DQ 8000000000000000h
974afb647cSTimo Kreuzer                            DQ 8000000000000000h
984afb647cSTimo Kreuzer
994afb647cSTimo Kreuzer__real_4090040000000000     DQ 4090040000000000h
1004afb647cSTimo Kreuzer                            DQ 4090040000000000h
1014afb647cSTimo Kreuzer
1024afb647cSTimo Kreuzer__real_C090C80000000000     DQ 0C090C80000000000h
1034afb647cSTimo Kreuzer                            DQ 0C090C80000000000h
1044afb647cSTimo Kreuzer
1054afb647cSTimo Kreuzer;---------------------
1064afb647cSTimo Kreuzer; log data
1074afb647cSTimo Kreuzer;---------------------
1084afb647cSTimo Kreuzer
1094afb647cSTimo KreuzerALIGN 16
1104afb647cSTimo Kreuzer
1114afb647cSTimo Kreuzer__real_ninf     DQ 0fff0000000000000h   ; -inf
1124afb647cSTimo Kreuzer                DQ 0000000000000000h
1134afb647cSTimo Kreuzer__real_inf      DQ 7ff0000000000000h    ; +inf
1144afb647cSTimo Kreuzer                DQ 0000000000000000h
1154afb647cSTimo Kreuzer__real_nan      DQ 7ff8000000000000h    ; NaN
1164afb647cSTimo Kreuzer                DQ 0000000000000000h
1174afb647cSTimo Kreuzer__real_mant     DQ 000FFFFFFFFFFFFFh    ; mantissa bits
1184afb647cSTimo Kreuzer                DQ 0000000000000000h
1194afb647cSTimo Kreuzer__mask_1023     DQ 00000000000003ffh
1204afb647cSTimo Kreuzer                DQ 0000000000000000h
1214afb647cSTimo Kreuzer__mask_001      DQ 0000000000000001h
1224afb647cSTimo Kreuzer                DQ 0000000000000000h
1234afb647cSTimo Kreuzer
1244afb647cSTimo Kreuzer__real_log2_lead    DQ 3fe62e42e0000000h ; log2_lead  6.93147122859954833984e-01
1254afb647cSTimo Kreuzer                    DQ 0000000000000000h
1264afb647cSTimo Kreuzer__real_log2_tail    DQ 3e6efa39ef35793ch ; log2_tail  5.76999904754328540596e-08
1274afb647cSTimo Kreuzer                    DQ 0000000000000000h
1284afb647cSTimo Kreuzer
1294afb647cSTimo Kreuzer__real_two          DQ 4000000000000000h ; 2
1304afb647cSTimo Kreuzer                    DQ 0000000000000000h
1314afb647cSTimo Kreuzer
1324afb647cSTimo Kreuzer__real_one          DQ 3ff0000000000000h ; 1
1334afb647cSTimo Kreuzer                    DQ 0000000000000000h
1344afb647cSTimo Kreuzer
1354afb647cSTimo Kreuzer__real_half         DQ 3fe0000000000000h ; 1/2
1364afb647cSTimo Kreuzer                    DQ 0000000000000000h
1374afb647cSTimo Kreuzer
1384afb647cSTimo Kreuzer__mask_100          DQ 0000000000000100h
1394afb647cSTimo Kreuzer                    DQ 0000000000000000h
1404afb647cSTimo Kreuzer
1414afb647cSTimo Kreuzer__real_1_over_2     DQ 3fe0000000000000h
1424afb647cSTimo Kreuzer                    DQ 0000000000000000h
1434afb647cSTimo Kreuzer__real_1_over_3     DQ 3fd5555555555555h
1444afb647cSTimo Kreuzer                    DQ 0000000000000000h
1454afb647cSTimo Kreuzer__real_1_over_4     DQ 3fd0000000000000h
1464afb647cSTimo Kreuzer                    DQ 0000000000000000h
1474afb647cSTimo Kreuzer__real_1_over_5     DQ 3fc999999999999ah
1484afb647cSTimo Kreuzer                    DQ 0000000000000000h
1494afb647cSTimo Kreuzer__real_1_over_6     DQ 3fc5555555555555h
1504afb647cSTimo Kreuzer                    DQ 0000000000000000h
1514afb647cSTimo Kreuzer__real_1_over_7     DQ 3fc2492492492494h
1524afb647cSTimo Kreuzer                    DQ 0000000000000000h
1534afb647cSTimo Kreuzer
1544afb647cSTimo Kreuzer__mask_1023_f       DQ 0c08ff80000000000h
1554afb647cSTimo Kreuzer                    DQ 0000000000000000h
1564afb647cSTimo Kreuzer
1574afb647cSTimo Kreuzer__mask_2045         DQ 00000000000007fdh
1584afb647cSTimo Kreuzer                    DQ 0000000000000000h
1594afb647cSTimo Kreuzer
1604afb647cSTimo Kreuzer__real_threshold    DQ 3fc0000000000000h ; 0.125
1614afb647cSTimo Kreuzer                    DQ 3fc0000000000000h
1624afb647cSTimo Kreuzer
1634afb647cSTimo Kreuzer__real_notsign      DQ 7ffFFFFFFFFFFFFFh ; ^sign bit
1644afb647cSTimo Kreuzer                    DQ 0000000000000000h
1654afb647cSTimo Kreuzer
1664afb647cSTimo Kreuzer
1674afb647cSTimo KreuzerEXTRN __log_256_lead:QWORD
1684afb647cSTimo KreuzerEXTRN __log_256_tail:QWORD
1694afb647cSTimo KreuzerEXTRN __use_fma3_lib:DWORD
1704afb647cSTimo Kreuzer
1714afb647cSTimo Kreuzer; This table differs from the tables in log_256_lead_tail_table.asm:
1724afb647cSTimo Kreuzer; the heads have fewer significant bits (hence the tails also differ).
1734afb647cSTimo KreuzerALIGN 16
1744afb647cSTimo Kreuzer__log_F_inv_head    DQ 4000000000000000h
1754afb647cSTimo Kreuzer                    DQ 3fffe00000000000h
1764afb647cSTimo Kreuzer                    DQ 3fffc00000000000h
1774afb647cSTimo Kreuzer                    DQ 3fffa00000000000h
1784afb647cSTimo Kreuzer                    DQ 3fff800000000000h
1794afb647cSTimo Kreuzer                    DQ 3fff600000000000h
1804afb647cSTimo Kreuzer                    DQ 3fff400000000000h
1814afb647cSTimo Kreuzer                    DQ 3fff200000000000h
1824afb647cSTimo Kreuzer                    DQ 3fff000000000000h
1834afb647cSTimo Kreuzer                    DQ 3ffee00000000000h
1844afb647cSTimo Kreuzer                    DQ 3ffec00000000000h
1854afb647cSTimo Kreuzer                    DQ 3ffea00000000000h
1864afb647cSTimo Kreuzer                    DQ 3ffe900000000000h
1874afb647cSTimo Kreuzer                    DQ 3ffe700000000000h
1884afb647cSTimo Kreuzer                    DQ 3ffe500000000000h
1894afb647cSTimo Kreuzer                    DQ 3ffe300000000000h
1904afb647cSTimo Kreuzer                    DQ 3ffe100000000000h
1914afb647cSTimo Kreuzer                    DQ 3ffe000000000000h
1924afb647cSTimo Kreuzer                    DQ 3ffde00000000000h
1934afb647cSTimo Kreuzer                    DQ 3ffdc00000000000h
1944afb647cSTimo Kreuzer                    DQ 3ffda00000000000h
1954afb647cSTimo Kreuzer                    DQ 3ffd900000000000h
1964afb647cSTimo Kreuzer                    DQ 3ffd700000000000h
1974afb647cSTimo Kreuzer                    DQ 3ffd500000000000h
1984afb647cSTimo Kreuzer                    DQ 3ffd400000000000h
1994afb647cSTimo Kreuzer                    DQ 3ffd200000000000h
2004afb647cSTimo Kreuzer                    DQ 3ffd000000000000h
2014afb647cSTimo Kreuzer                    DQ 3ffcf00000000000h
2024afb647cSTimo Kreuzer                    DQ 3ffcd00000000000h
2034afb647cSTimo Kreuzer                    DQ 3ffcb00000000000h
2044afb647cSTimo Kreuzer                    DQ 3ffca00000000000h
2054afb647cSTimo Kreuzer                    DQ 3ffc800000000000h
2064afb647cSTimo Kreuzer                    DQ 3ffc700000000000h
2074afb647cSTimo Kreuzer                    DQ 3ffc500000000000h
2084afb647cSTimo Kreuzer                    DQ 3ffc300000000000h
2094afb647cSTimo Kreuzer                    DQ 3ffc200000000000h
2104afb647cSTimo Kreuzer                    DQ 3ffc000000000000h
2114afb647cSTimo Kreuzer                    DQ 3ffbf00000000000h
2124afb647cSTimo Kreuzer                    DQ 3ffbd00000000000h
2134afb647cSTimo Kreuzer                    DQ 3ffbc00000000000h
2144afb647cSTimo Kreuzer                    DQ 3ffba00000000000h
2154afb647cSTimo Kreuzer                    DQ 3ffb900000000000h
2164afb647cSTimo Kreuzer                    DQ 3ffb700000000000h
2174afb647cSTimo Kreuzer                    DQ 3ffb600000000000h
2184afb647cSTimo Kreuzer                    DQ 3ffb400000000000h
2194afb647cSTimo Kreuzer                    DQ 3ffb300000000000h
2204afb647cSTimo Kreuzer                    DQ 3ffb200000000000h
2214afb647cSTimo Kreuzer                    DQ 3ffb000000000000h
2224afb647cSTimo Kreuzer                    DQ 3ffaf00000000000h
2234afb647cSTimo Kreuzer                    DQ 3ffad00000000000h
2244afb647cSTimo Kreuzer                    DQ 3ffac00000000000h
2254afb647cSTimo Kreuzer                    DQ 3ffaa00000000000h
2264afb647cSTimo Kreuzer                    DQ 3ffa900000000000h
2274afb647cSTimo Kreuzer                    DQ 3ffa800000000000h
2284afb647cSTimo Kreuzer                    DQ 3ffa600000000000h
2294afb647cSTimo Kreuzer                    DQ 3ffa500000000000h
2304afb647cSTimo Kreuzer                    DQ 3ffa400000000000h
2314afb647cSTimo Kreuzer                    DQ 3ffa200000000000h
2324afb647cSTimo Kreuzer                    DQ 3ffa100000000000h
2334afb647cSTimo Kreuzer                    DQ 3ffa000000000000h
2344afb647cSTimo Kreuzer                    DQ 3ff9e00000000000h
2354afb647cSTimo Kreuzer                    DQ 3ff9d00000000000h
2364afb647cSTimo Kreuzer                    DQ 3ff9c00000000000h
2374afb647cSTimo Kreuzer                    DQ 3ff9a00000000000h
2384afb647cSTimo Kreuzer                    DQ 3ff9900000000000h
2394afb647cSTimo Kreuzer                    DQ 3ff9800000000000h
2404afb647cSTimo Kreuzer                    DQ 3ff9700000000000h
2414afb647cSTimo Kreuzer                    DQ 3ff9500000000000h
2424afb647cSTimo Kreuzer                    DQ 3ff9400000000000h
2434afb647cSTimo Kreuzer                    DQ 3ff9300000000000h
2444afb647cSTimo Kreuzer                    DQ 3ff9200000000000h
2454afb647cSTimo Kreuzer                    DQ 3ff9000000000000h
2464afb647cSTimo Kreuzer                    DQ 3ff8f00000000000h
2474afb647cSTimo Kreuzer                    DQ 3ff8e00000000000h
2484afb647cSTimo Kreuzer                    DQ 3ff8d00000000000h
2494afb647cSTimo Kreuzer                    DQ 3ff8b00000000000h
2504afb647cSTimo Kreuzer                    DQ 3ff8a00000000000h
2514afb647cSTimo Kreuzer                    DQ 3ff8900000000000h
2524afb647cSTimo Kreuzer                    DQ 3ff8800000000000h
2534afb647cSTimo Kreuzer                    DQ 3ff8700000000000h
2544afb647cSTimo Kreuzer                    DQ 3ff8600000000000h
2554afb647cSTimo Kreuzer                    DQ 3ff8400000000000h
2564afb647cSTimo Kreuzer                    DQ 3ff8300000000000h
2574afb647cSTimo Kreuzer                    DQ 3ff8200000000000h
2584afb647cSTimo Kreuzer                    DQ 3ff8100000000000h
2594afb647cSTimo Kreuzer                    DQ 3ff8000000000000h
2604afb647cSTimo Kreuzer                    DQ 3ff7f00000000000h
2614afb647cSTimo Kreuzer                    DQ 3ff7e00000000000h
2624afb647cSTimo Kreuzer                    DQ 3ff7d00000000000h
2634afb647cSTimo Kreuzer                    DQ 3ff7b00000000000h
2644afb647cSTimo Kreuzer                    DQ 3ff7a00000000000h
2654afb647cSTimo Kreuzer                    DQ 3ff7900000000000h
2664afb647cSTimo Kreuzer                    DQ 3ff7800000000000h
2674afb647cSTimo Kreuzer                    DQ 3ff7700000000000h
2684afb647cSTimo Kreuzer                    DQ 3ff7600000000000h
2694afb647cSTimo Kreuzer                    DQ 3ff7500000000000h
2704afb647cSTimo Kreuzer                    DQ 3ff7400000000000h
2714afb647cSTimo Kreuzer                    DQ 3ff7300000000000h
2724afb647cSTimo Kreuzer                    DQ 3ff7200000000000h
2734afb647cSTimo Kreuzer                    DQ 3ff7100000000000h
2744afb647cSTimo Kreuzer                    DQ 3ff7000000000000h
2754afb647cSTimo Kreuzer                    DQ 3ff6f00000000000h
2764afb647cSTimo Kreuzer                    DQ 3ff6e00000000000h
2774afb647cSTimo Kreuzer                    DQ 3ff6d00000000000h
2784afb647cSTimo Kreuzer                    DQ 3ff6c00000000000h
2794afb647cSTimo Kreuzer                    DQ 3ff6b00000000000h
2804afb647cSTimo Kreuzer                    DQ 3ff6a00000000000h
2814afb647cSTimo Kreuzer                    DQ 3ff6900000000000h
2824afb647cSTimo Kreuzer                    DQ 3ff6800000000000h
2834afb647cSTimo Kreuzer                    DQ 3ff6700000000000h
2844afb647cSTimo Kreuzer                    DQ 3ff6600000000000h
2854afb647cSTimo Kreuzer                    DQ 3ff6500000000000h
2864afb647cSTimo Kreuzer                    DQ 3ff6400000000000h
2874afb647cSTimo Kreuzer                    DQ 3ff6300000000000h
2884afb647cSTimo Kreuzer                    DQ 3ff6200000000000h
2894afb647cSTimo Kreuzer                    DQ 3ff6100000000000h
2904afb647cSTimo Kreuzer                    DQ 3ff6000000000000h
2914afb647cSTimo Kreuzer                    DQ 3ff5f00000000000h
2924afb647cSTimo Kreuzer                    DQ 3ff5e00000000000h
2934afb647cSTimo Kreuzer                    DQ 3ff5d00000000000h
2944afb647cSTimo Kreuzer                    DQ 3ff5c00000000000h
2954afb647cSTimo Kreuzer                    DQ 3ff5b00000000000h
2964afb647cSTimo Kreuzer                    DQ 3ff5a00000000000h
2974afb647cSTimo Kreuzer                    DQ 3ff5900000000000h
2984afb647cSTimo Kreuzer                    DQ 3ff5800000000000h
2994afb647cSTimo Kreuzer                    DQ 3ff5800000000000h
3004afb647cSTimo Kreuzer                    DQ 3ff5700000000000h
3014afb647cSTimo Kreuzer                    DQ 3ff5600000000000h
3024afb647cSTimo Kreuzer                    DQ 3ff5500000000000h
3034afb647cSTimo Kreuzer                    DQ 3ff5400000000000h
3044afb647cSTimo Kreuzer                    DQ 3ff5300000000000h
3054afb647cSTimo Kreuzer                    DQ 3ff5200000000000h
3064afb647cSTimo Kreuzer                    DQ 3ff5100000000000h
3074afb647cSTimo Kreuzer                    DQ 3ff5000000000000h
3084afb647cSTimo Kreuzer                    DQ 3ff5000000000000h
3094afb647cSTimo Kreuzer                    DQ 3ff4f00000000000h
3104afb647cSTimo Kreuzer                    DQ 3ff4e00000000000h
3114afb647cSTimo Kreuzer                    DQ 3ff4d00000000000h
3124afb647cSTimo Kreuzer                    DQ 3ff4c00000000000h
3134afb647cSTimo Kreuzer                    DQ 3ff4b00000000000h
3144afb647cSTimo Kreuzer                    DQ 3ff4a00000000000h
3154afb647cSTimo Kreuzer                    DQ 3ff4a00000000000h
3164afb647cSTimo Kreuzer                    DQ 3ff4900000000000h
3174afb647cSTimo Kreuzer                    DQ 3ff4800000000000h
3184afb647cSTimo Kreuzer                    DQ 3ff4700000000000h
3194afb647cSTimo Kreuzer                    DQ 3ff4600000000000h
3204afb647cSTimo Kreuzer                    DQ 3ff4600000000000h
3214afb647cSTimo Kreuzer                    DQ 3ff4500000000000h
3224afb647cSTimo Kreuzer                    DQ 3ff4400000000000h
3234afb647cSTimo Kreuzer                    DQ 3ff4300000000000h
3244afb647cSTimo Kreuzer                    DQ 3ff4200000000000h
3254afb647cSTimo Kreuzer                    DQ 3ff4200000000000h
3264afb647cSTimo Kreuzer                    DQ 3ff4100000000000h
3274afb647cSTimo Kreuzer                    DQ 3ff4000000000000h
3284afb647cSTimo Kreuzer                    DQ 3ff3f00000000000h
3294afb647cSTimo Kreuzer                    DQ 3ff3e00000000000h
3304afb647cSTimo Kreuzer                    DQ 3ff3e00000000000h
3314afb647cSTimo Kreuzer                    DQ 3ff3d00000000000h
3324afb647cSTimo Kreuzer                    DQ 3ff3c00000000000h
3334afb647cSTimo Kreuzer                    DQ 3ff3b00000000000h
3344afb647cSTimo Kreuzer                    DQ 3ff3b00000000000h
3354afb647cSTimo Kreuzer                    DQ 3ff3a00000000000h
3364afb647cSTimo Kreuzer                    DQ 3ff3900000000000h
3374afb647cSTimo Kreuzer                    DQ 3ff3800000000000h
3384afb647cSTimo Kreuzer                    DQ 3ff3800000000000h
3394afb647cSTimo Kreuzer                    DQ 3ff3700000000000h
3404afb647cSTimo Kreuzer                    DQ 3ff3600000000000h
3414afb647cSTimo Kreuzer                    DQ 3ff3500000000000h
3424afb647cSTimo Kreuzer                    DQ 3ff3500000000000h
3434afb647cSTimo Kreuzer                    DQ 3ff3400000000000h
3444afb647cSTimo Kreuzer                    DQ 3ff3300000000000h
3454afb647cSTimo Kreuzer                    DQ 3ff3200000000000h
3464afb647cSTimo Kreuzer                    DQ 3ff3200000000000h
3474afb647cSTimo Kreuzer                    DQ 3ff3100000000000h
3484afb647cSTimo Kreuzer                    DQ 3ff3000000000000h
3494afb647cSTimo Kreuzer                    DQ 3ff3000000000000h
3504afb647cSTimo Kreuzer                    DQ 3ff2f00000000000h
3514afb647cSTimo Kreuzer                    DQ 3ff2e00000000000h
3524afb647cSTimo Kreuzer                    DQ 3ff2e00000000000h
3534afb647cSTimo Kreuzer                    DQ 3ff2d00000000000h
3544afb647cSTimo Kreuzer                    DQ 3ff2c00000000000h
3554afb647cSTimo Kreuzer                    DQ 3ff2b00000000000h
3564afb647cSTimo Kreuzer                    DQ 3ff2b00000000000h
3574afb647cSTimo Kreuzer                    DQ 3ff2a00000000000h
3584afb647cSTimo Kreuzer                    DQ 3ff2900000000000h
3594afb647cSTimo Kreuzer                    DQ 3ff2900000000000h
3604afb647cSTimo Kreuzer                    DQ 3ff2800000000000h
3614afb647cSTimo Kreuzer                    DQ 3ff2700000000000h
3624afb647cSTimo Kreuzer                    DQ 3ff2700000000000h
3634afb647cSTimo Kreuzer                    DQ 3ff2600000000000h
3644afb647cSTimo Kreuzer                    DQ 3ff2500000000000h
3654afb647cSTimo Kreuzer                    DQ 3ff2500000000000h
3664afb647cSTimo Kreuzer                    DQ 3ff2400000000000h
3674afb647cSTimo Kreuzer                    DQ 3ff2300000000000h
3684afb647cSTimo Kreuzer                    DQ 3ff2300000000000h
3694afb647cSTimo Kreuzer                    DQ 3ff2200000000000h
3704afb647cSTimo Kreuzer                    DQ 3ff2100000000000h
3714afb647cSTimo Kreuzer                    DQ 3ff2100000000000h
3724afb647cSTimo Kreuzer                    DQ 3ff2000000000000h
3734afb647cSTimo Kreuzer                    DQ 3ff2000000000000h
3744afb647cSTimo Kreuzer                    DQ 3ff1f00000000000h
3754afb647cSTimo Kreuzer                    DQ 3ff1e00000000000h
3764afb647cSTimo Kreuzer                    DQ 3ff1e00000000000h
3774afb647cSTimo Kreuzer                    DQ 3ff1d00000000000h
3784afb647cSTimo Kreuzer                    DQ 3ff1c00000000000h
3794afb647cSTimo Kreuzer                    DQ 3ff1c00000000000h
3804afb647cSTimo Kreuzer                    DQ 3ff1b00000000000h
3814afb647cSTimo Kreuzer                    DQ 3ff1b00000000000h
3824afb647cSTimo Kreuzer                    DQ 3ff1a00000000000h
3834afb647cSTimo Kreuzer                    DQ 3ff1900000000000h
3844afb647cSTimo Kreuzer                    DQ 3ff1900000000000h
3854afb647cSTimo Kreuzer                    DQ 3ff1800000000000h
3864afb647cSTimo Kreuzer                    DQ 3ff1800000000000h
3874afb647cSTimo Kreuzer                    DQ 3ff1700000000000h
3884afb647cSTimo Kreuzer                    DQ 3ff1600000000000h
3894afb647cSTimo Kreuzer                    DQ 3ff1600000000000h
3904afb647cSTimo Kreuzer                    DQ 3ff1500000000000h
3914afb647cSTimo Kreuzer                    DQ 3ff1500000000000h
3924afb647cSTimo Kreuzer                    DQ 3ff1400000000000h
3934afb647cSTimo Kreuzer                    DQ 3ff1300000000000h
3944afb647cSTimo Kreuzer                    DQ 3ff1300000000000h
3954afb647cSTimo Kreuzer                    DQ 3ff1200000000000h
3964afb647cSTimo Kreuzer                    DQ 3ff1200000000000h
3974afb647cSTimo Kreuzer                    DQ 3ff1100000000000h
3984afb647cSTimo Kreuzer                    DQ 3ff1100000000000h
3994afb647cSTimo Kreuzer                    DQ 3ff1000000000000h
4004afb647cSTimo Kreuzer                    DQ 3ff0f00000000000h
4014afb647cSTimo Kreuzer                    DQ 3ff0f00000000000h
4024afb647cSTimo Kreuzer                    DQ 3ff0e00000000000h
4034afb647cSTimo Kreuzer                    DQ 3ff0e00000000000h
4044afb647cSTimo Kreuzer                    DQ 3ff0d00000000000h
4054afb647cSTimo Kreuzer                    DQ 3ff0d00000000000h
4064afb647cSTimo Kreuzer                    DQ 3ff0c00000000000h
4074afb647cSTimo Kreuzer                    DQ 3ff0c00000000000h
4084afb647cSTimo Kreuzer                    DQ 3ff0b00000000000h
4094afb647cSTimo Kreuzer                    DQ 3ff0a00000000000h
4104afb647cSTimo Kreuzer                    DQ 3ff0a00000000000h
4114afb647cSTimo Kreuzer                    DQ 3ff0900000000000h
4124afb647cSTimo Kreuzer                    DQ 3ff0900000000000h
4134afb647cSTimo Kreuzer                    DQ 3ff0800000000000h
4144afb647cSTimo Kreuzer                    DQ 3ff0800000000000h
4154afb647cSTimo Kreuzer                    DQ 3ff0700000000000h
4164afb647cSTimo Kreuzer                    DQ 3ff0700000000000h
4174afb647cSTimo Kreuzer                    DQ 3ff0600000000000h
4184afb647cSTimo Kreuzer                    DQ 3ff0600000000000h
4194afb647cSTimo Kreuzer                    DQ 3ff0500000000000h
4204afb647cSTimo Kreuzer                    DQ 3ff0500000000000h
4214afb647cSTimo Kreuzer                    DQ 3ff0400000000000h
4224afb647cSTimo Kreuzer                    DQ 3ff0400000000000h
4234afb647cSTimo Kreuzer                    DQ 3ff0300000000000h
4244afb647cSTimo Kreuzer                    DQ 3ff0300000000000h
4254afb647cSTimo Kreuzer                    DQ 3ff0200000000000h
4264afb647cSTimo Kreuzer                    DQ 3ff0200000000000h
4274afb647cSTimo Kreuzer                    DQ 3ff0100000000000h
4284afb647cSTimo Kreuzer                    DQ 3ff0100000000000h
4294afb647cSTimo Kreuzer                    DQ 3ff0000000000000h
4304afb647cSTimo Kreuzer                    DQ 3ff0000000000000h
4314afb647cSTimo Kreuzer
4324afb647cSTimo KreuzerALIGN 16
4334afb647cSTimo Kreuzer__log_F_inv_tail    DQ 0000000000000000h
4344afb647cSTimo Kreuzer                    DQ 3effe01fe01fe020h
4354afb647cSTimo Kreuzer                    DQ 3f1fc07f01fc07f0h
4364afb647cSTimo Kreuzer                    DQ 3f31caa01fa11caah
4374afb647cSTimo Kreuzer                    DQ 3f3f81f81f81f820h
4384afb647cSTimo Kreuzer                    DQ 3f48856506ddaba6h
4394afb647cSTimo Kreuzer                    DQ 3f5196792909c560h
4404afb647cSTimo Kreuzer                    DQ 3f57d9108c2ad433h
4414afb647cSTimo Kreuzer                    DQ 3f5f07c1f07c1f08h
4424afb647cSTimo Kreuzer                    DQ 3f638ff08b1c03ddh
4434afb647cSTimo Kreuzer                    DQ 3f680f6603d980f6h
4444afb647cSTimo Kreuzer                    DQ 3f6d00f57403d5d0h
4454afb647cSTimo Kreuzer                    DQ 3f331abf0b7672a0h
4464afb647cSTimo Kreuzer                    DQ 3f506a965d43919bh
4474afb647cSTimo Kreuzer                    DQ 3f5ceb240795ceb2h
4484afb647cSTimo Kreuzer                    DQ 3f6522f3b834e67fh
4494afb647cSTimo Kreuzer                    DQ 3f6c3c3c3c3c3c3ch
4504afb647cSTimo Kreuzer                    DQ 3f3e01e01e01e01eh
4514afb647cSTimo Kreuzer                    DQ 3f575b8fe21a291ch
4524afb647cSTimo Kreuzer                    DQ 3f6403b9403b9404h
4534afb647cSTimo Kreuzer                    DQ 3f6cc0ed7303b5cch
4544afb647cSTimo Kreuzer                    DQ 3f479118f3fc4da2h
4554afb647cSTimo Kreuzer                    DQ 3f5ed952e0b0ce46h
4564afb647cSTimo Kreuzer                    DQ 3f695900eae56404h
4574afb647cSTimo Kreuzer                    DQ 3f3d41d41d41d41dh
4584afb647cSTimo Kreuzer                    DQ 3f5cb28ff16c69aeh
4594afb647cSTimo Kreuzer                    DQ 3f696b1edd80e866h
4604afb647cSTimo Kreuzer                    DQ 3f4372e225fe30d9h
4614afb647cSTimo Kreuzer                    DQ 3f60ad12073615a2h
4624afb647cSTimo Kreuzer                    DQ 3f6cdb2c0397cdb3h
4634afb647cSTimo Kreuzer                    DQ 3f52cc157b864407h
4644afb647cSTimo Kreuzer                    DQ 3f664cb5f7148404h
4654afb647cSTimo Kreuzer                    DQ 3f3c71c71c71c71ch
4664afb647cSTimo Kreuzer                    DQ 3f6129a21a930b84h
4674afb647cSTimo Kreuzer                    DQ 3f6f1e0387f1e038h
4684afb647cSTimo Kreuzer                    DQ 3f5ad4e4ba80709bh
4694afb647cSTimo Kreuzer                    DQ 3f6c0e070381c0e0h
4704afb647cSTimo Kreuzer                    DQ 3f560fba1a362bb0h
4714afb647cSTimo Kreuzer                    DQ 3f6a5713280dee96h
4724afb647cSTimo Kreuzer                    DQ 3f53f59620f9ece9h
4734afb647cSTimo Kreuzer                    DQ 3f69f22983759f23h
4744afb647cSTimo Kreuzer                    DQ 3f5478ac63fc8d5ch
4754afb647cSTimo Kreuzer                    DQ 3f6ad87bb4671656h
4764afb647cSTimo Kreuzer                    DQ 3f578b8efbb8148ch
4774afb647cSTimo Kreuzer                    DQ 3f6d0369d0369d03h
4784afb647cSTimo Kreuzer                    DQ 3f5d212b601b3748h
4794afb647cSTimo Kreuzer                    DQ 3f0b2036406c80d9h
4804afb647cSTimo Kreuzer                    DQ 3f629663b24547d1h
4814afb647cSTimo Kreuzer                    DQ 3f4435e50d79435eh
4824afb647cSTimo Kreuzer                    DQ 3f67d0ff2920bc03h
4834afb647cSTimo Kreuzer                    DQ 3f55c06b15c06b16h
4844afb647cSTimo Kreuzer                    DQ 3f6e3a5f0fd7f954h
4854afb647cSTimo Kreuzer                    DQ 3f61dec0d4c77b03h
4864afb647cSTimo Kreuzer                    DQ 3f473289870ac52eh
4874afb647cSTimo Kreuzer                    DQ 3f6a034da034da03h
4884afb647cSTimo Kreuzer                    DQ 3f5d041da2292856h
4894afb647cSTimo Kreuzer                    DQ 3f3a41a41a41a41ah
4904afb647cSTimo Kreuzer                    DQ 3f68550f8a39409dh
4914afb647cSTimo Kreuzer                    DQ 3f5b4fe5e92c0686h
4924afb647cSTimo Kreuzer                    DQ 3f3a01a01a01a01ah
4934afb647cSTimo Kreuzer                    DQ 3f691d2a2067b23ah
4944afb647cSTimo Kreuzer                    DQ 3f5e7c5dada0b4e5h
4954afb647cSTimo Kreuzer                    DQ 3f468a7725080ce1h
4964afb647cSTimo Kreuzer                    DQ 3f6c49d4aa21b490h
4974afb647cSTimo Kreuzer                    DQ 3f63333333333333h
4984afb647cSTimo Kreuzer                    DQ 3f54bc363b03fccfh
4994afb647cSTimo Kreuzer                    DQ 3f2c9f01970e4f81h
5004afb647cSTimo Kreuzer                    DQ 3f697617c6ef5b25h
5014afb647cSTimo Kreuzer                    DQ 3f6161f9add3c0cah
5024afb647cSTimo Kreuzer                    DQ 3f5319fe6cb39806h
5034afb647cSTimo Kreuzer                    DQ 3f2f693a1c451ab3h
5044afb647cSTimo Kreuzer                    DQ 3f6a9e240321a9e2h
5054afb647cSTimo Kreuzer                    DQ 3f63831f3831f383h
5064afb647cSTimo Kreuzer                    DQ 3f5949ebc4dcfc1ch
5074afb647cSTimo Kreuzer                    DQ 3f480c6980c6980ch
5084afb647cSTimo Kreuzer                    DQ 3f6f9d00c5fe7403h
5094afb647cSTimo Kreuzer                    DQ 3f69721ed7e75347h
5104afb647cSTimo Kreuzer                    DQ 3f6381ec0313381fh
5114afb647cSTimo Kreuzer                    DQ 3f5b97c2aec12653h
5124afb647cSTimo Kreuzer                    DQ 3f509ef3024ae3bah
5134afb647cSTimo Kreuzer                    DQ 3f38618618618618h
5144afb647cSTimo Kreuzer                    DQ 3f6e0184f00c2780h
5154afb647cSTimo Kreuzer                    DQ 3f692ef5657dba52h
5164afb647cSTimo Kreuzer                    DQ 3f64940305494030h
5174afb647cSTimo Kreuzer                    DQ 3f60303030303030h
5184afb647cSTimo Kreuzer                    DQ 3f58060180601806h
5194afb647cSTimo Kreuzer                    DQ 3f5017f405fd017fh
5204afb647cSTimo Kreuzer                    DQ 3f412a8ad278e8ddh
5214afb647cSTimo Kreuzer                    DQ 3f17d05f417d05f4h
5224afb647cSTimo Kreuzer                    DQ 3f6d67245c02f7d6h
5234afb647cSTimo Kreuzer                    DQ 3f6a4411c1d986a9h
5244afb647cSTimo Kreuzer                    DQ 3f6754d76c7316dfh
5254afb647cSTimo Kreuzer                    DQ 3f649902f149902fh
5264afb647cSTimo Kreuzer                    DQ 3f621023358c1a68h
5274afb647cSTimo Kreuzer                    DQ 3f5f7390d2a6c406h
5284afb647cSTimo Kreuzer                    DQ 3f5b2b0805d5b2b1h
5294afb647cSTimo Kreuzer                    DQ 3f5745d1745d1746h
5304afb647cSTimo Kreuzer                    DQ 3f53c31507fa32c4h
5314afb647cSTimo Kreuzer                    DQ 3f50a1fd1b7af017h
5324afb647cSTimo Kreuzer                    DQ 3f4bc36ce3e0453ah
5334afb647cSTimo Kreuzer                    DQ 3f4702e05c0b8170h
5344afb647cSTimo Kreuzer                    DQ 3f4300b79300b793h
5354afb647cSTimo Kreuzer                    DQ 3f3f76b4337c6cb1h
5364afb647cSTimo Kreuzer                    DQ 3f3a62681c860fb0h
5374afb647cSTimo Kreuzer                    DQ 3f36c16c16c16c17h
5384afb647cSTimo Kreuzer                    DQ 3f3490aa31a3cfc7h
5394afb647cSTimo Kreuzer                    DQ 3f33cd153729043eh
5404afb647cSTimo Kreuzer                    DQ 3f3473a88d0bfd2eh
5414afb647cSTimo Kreuzer                    DQ 3f36816816816817h
5424afb647cSTimo Kreuzer                    DQ 3f39f36016719f36h
5434afb647cSTimo Kreuzer                    DQ 3f3ec6a5122f9016h
5444afb647cSTimo Kreuzer                    DQ 3f427c29da5519cfh
5454afb647cSTimo Kreuzer                    DQ 3f4642c8590b2164h
5464afb647cSTimo Kreuzer                    DQ 3f4ab5c45606f00bh
5474afb647cSTimo Kreuzer                    DQ 3f4fd3b80b11fd3ch
5484afb647cSTimo Kreuzer                    DQ 3f52cda0c6ba4eaah
5494afb647cSTimo Kreuzer                    DQ 3f56058160581606h
5504afb647cSTimo Kreuzer                    DQ 3f5990d0a4b7ef87h
5514afb647cSTimo Kreuzer                    DQ 3f5d6ee340579d6fh
5524afb647cSTimo Kreuzer                    DQ 3f60cf87d9c54a69h
5534afb647cSTimo Kreuzer                    DQ 3f6310572620ae4ch
5544afb647cSTimo Kreuzer                    DQ 3f65798c8ff522a2h
5554afb647cSTimo Kreuzer                    DQ 3f680ad602b580adh
5564afb647cSTimo Kreuzer                    DQ 3f6ac3e24799546fh
5574afb647cSTimo Kreuzer                    DQ 3f6da46102b1da46h
5584afb647cSTimo Kreuzer                    DQ 3f15805601580560h
5594afb647cSTimo Kreuzer                    DQ 3f3ed3c506b39a23h
5604afb647cSTimo Kreuzer                    DQ 3f4cbdd3e2970f60h
5614afb647cSTimo Kreuzer                    DQ 3f55555555555555h
5624afb647cSTimo Kreuzer                    DQ 3f5c979aee0bf805h
5634afb647cSTimo Kreuzer                    DQ 3f621291e81fd58eh
5644afb647cSTimo Kreuzer                    DQ 3f65fead500a9580h
5654afb647cSTimo Kreuzer                    DQ 3f6a0fd5c5f02a3ah
5664afb647cSTimo Kreuzer                    DQ 3f6e45c223898adch
5674afb647cSTimo Kreuzer                    DQ 3f35015015015015h
5684afb647cSTimo Kreuzer                    DQ 3f4c7b16ea64d422h
5694afb647cSTimo Kreuzer                    DQ 3f57829cbc14e5e1h
5704afb647cSTimo Kreuzer                    DQ 3f60877db8589720h
5714afb647cSTimo Kreuzer                    DQ 3f65710e4b5edceah
5724afb647cSTimo Kreuzer                    DQ 3f6a7dbb4d1fc1c8h
5734afb647cSTimo Kreuzer                    DQ 3f6fad40a57eb503h
5744afb647cSTimo Kreuzer                    DQ 3f43fd6bb00a5140h
5754afb647cSTimo Kreuzer                    DQ 3f54e78ecb419ba9h
5764afb647cSTimo Kreuzer                    DQ 3f600a44029100a4h
5774afb647cSTimo Kreuzer                    DQ 3f65c28f5c28f5c3h
5784afb647cSTimo Kreuzer                    DQ 3f6b9c68b2c0cc4ah
5794afb647cSTimo Kreuzer                    DQ 3f2978feb9f34381h
5804afb647cSTimo Kreuzer                    DQ 3f4ecf163bb6500ah
5814afb647cSTimo Kreuzer                    DQ 3f5be1958b67ebb9h
5824afb647cSTimo Kreuzer                    DQ 3f644e6157dc9a3bh
5834afb647cSTimo Kreuzer                    DQ 3f6acc4baa3f0ddfh
5844afb647cSTimo Kreuzer                    DQ 3f26a4cbcb2a247bh
5854afb647cSTimo Kreuzer                    DQ 3f50505050505050h
5864afb647cSTimo Kreuzer                    DQ 3f5e0b4439959819h
5874afb647cSTimo Kreuzer                    DQ 3f66027f6027f602h
5884afb647cSTimo Kreuzer                    DQ 3f6d1e854b5e0db4h
5894afb647cSTimo Kreuzer                    DQ 3f4165e7254813e2h
5904afb647cSTimo Kreuzer                    DQ 3f576646a9d716efh
5914afb647cSTimo Kreuzer                    DQ 3f632b48f757ce88h
5924afb647cSTimo Kreuzer                    DQ 3f6ac1b24652a906h
5934afb647cSTimo Kreuzer                    DQ 3f33b13b13b13b14h
5944afb647cSTimo Kreuzer                    DQ 3f5490e1eb208984h
5954afb647cSTimo Kreuzer                    DQ 3f62385830fec66eh
5964afb647cSTimo Kreuzer                    DQ 3f6a45a6cc111b7eh
5974afb647cSTimo Kreuzer                    DQ 3f33813813813814h
5984afb647cSTimo Kreuzer                    DQ 3f556f472517b708h
5994afb647cSTimo Kreuzer                    DQ 3f631be7bc0e8f2ah
6004afb647cSTimo Kreuzer                    DQ 3f6b9cbf3e55f044h
6014afb647cSTimo Kreuzer                    DQ 3f40e7d95bc609a9h
6024afb647cSTimo Kreuzer                    DQ 3f59e6b3804d19e7h
6034afb647cSTimo Kreuzer                    DQ 3f65c8b6af7963c2h
6044afb647cSTimo Kreuzer                    DQ 3f6eb9dad43bf402h
6054afb647cSTimo Kreuzer                    DQ 3f4f1a515885fb37h
6064afb647cSTimo Kreuzer                    DQ 3f60eeb1d3d76c02h
6074afb647cSTimo Kreuzer                    DQ 3f6a320261a32026h
6084afb647cSTimo Kreuzer                    DQ 3f3c82ac40260390h
6094afb647cSTimo Kreuzer                    DQ 3f5a12f684bda12fh
6104afb647cSTimo Kreuzer                    DQ 3f669d43fda2962ch
6114afb647cSTimo Kreuzer                    DQ 3f02e025c04b8097h
6124afb647cSTimo Kreuzer                    DQ 3f542804b542804bh
6134afb647cSTimo Kreuzer                    DQ 3f63f69b02593f6ah
6144afb647cSTimo Kreuzer                    DQ 3f6df31cb46e21fah
6154afb647cSTimo Kreuzer                    DQ 3f5012b404ad012bh
6164afb647cSTimo Kreuzer                    DQ 3f623925e7820a7fh
6174afb647cSTimo Kreuzer                    DQ 3f6c8253c8253c82h
6184afb647cSTimo Kreuzer                    DQ 3f4b92ddc02526e5h
6194afb647cSTimo Kreuzer                    DQ 3f61602511602511h
6204afb647cSTimo Kreuzer                    DQ 3f6bf471439c9adfh
6214afb647cSTimo Kreuzer                    DQ 3f4a85c40939a85ch
6224afb647cSTimo Kreuzer                    DQ 3f6166f9ac024d16h
6234afb647cSTimo Kreuzer                    DQ 3f6c44e10125e227h
6244afb647cSTimo Kreuzer                    DQ 3f4cebf48bbd90e5h
6254afb647cSTimo Kreuzer                    DQ 3f62492492492492h
6264afb647cSTimo Kreuzer                    DQ 3f6d6f2e2ec0b673h
6274afb647cSTimo Kreuzer                    DQ 3f5159e26af37c05h
6284afb647cSTimo Kreuzer                    DQ 3f64024540245402h
6294afb647cSTimo Kreuzer                    DQ 3f6f6f0243f6f024h
6304afb647cSTimo Kreuzer                    DQ 3f55e60121579805h
6314afb647cSTimo Kreuzer                    DQ 3f668e18cf81b10fh
6324afb647cSTimo Kreuzer                    DQ 3f32012012012012h
6334afb647cSTimo Kreuzer                    DQ 3f5c11f7047dc11fh
6344afb647cSTimo Kreuzer                    DQ 3f69e878ff70985eh
6354afb647cSTimo Kreuzer                    DQ 3f4779d9fdc3a219h
6364afb647cSTimo Kreuzer                    DQ 3f61eace5c957907h
6374afb647cSTimo Kreuzer                    DQ 3f6e0d5b450239e1h
6384afb647cSTimo Kreuzer                    DQ 3f548bf073816367h
6394afb647cSTimo Kreuzer                    DQ 3f6694808dda5202h
6404afb647cSTimo Kreuzer                    DQ 3f37c67f2bae2b21h
6414afb647cSTimo Kreuzer                    DQ 3f5ee58469ee5847h
6424afb647cSTimo Kreuzer                    DQ 3f6c0233c0233c02h
6434afb647cSTimo Kreuzer                    DQ 3f514e02328a7012h
6444afb647cSTimo Kreuzer                    DQ 3f6561072057b573h
6454afb647cSTimo Kreuzer                    DQ 3f31811811811812h
6464afb647cSTimo Kreuzer                    DQ 3f5e28646f5a1060h
6474afb647cSTimo Kreuzer                    DQ 3f6c0d1284e6f1d7h
6484afb647cSTimo Kreuzer                    DQ 3f523543f0c80459h
6494afb647cSTimo Kreuzer                    DQ 3f663cbeea4e1a09h
6504afb647cSTimo Kreuzer                    DQ 3f3b9a3fdd5c8cb8h
6514afb647cSTimo Kreuzer                    DQ 3f60be1c159a76d2h
6524afb647cSTimo Kreuzer                    DQ 3f6e1d1a688e4838h
6534afb647cSTimo Kreuzer                    DQ 3f572044d72044d7h
6544afb647cSTimo Kreuzer                    DQ 3f691713db81577bh
6554afb647cSTimo Kreuzer                    DQ 3f4ac73ae9819b50h
6564afb647cSTimo Kreuzer                    DQ 3f6460334e904cf6h
6574afb647cSTimo Kreuzer                    DQ 3f31111111111111h
6584afb647cSTimo Kreuzer                    DQ 3f5feef80441fef0h
6594afb647cSTimo Kreuzer                    DQ 3f6de021fde021feh
6604afb647cSTimo Kreuzer                    DQ 3f57b7eacc9686a0h
6614afb647cSTimo Kreuzer                    DQ 3f69ead7cd391fbch
6624afb647cSTimo Kreuzer                    DQ 3f50195609804390h
6634afb647cSTimo Kreuzer                    DQ 3f6641511e8d2b32h
6644afb647cSTimo Kreuzer                    DQ 3f4222b1acf1ce96h
6654afb647cSTimo Kreuzer                    DQ 3f62e29f79b47582h
6664afb647cSTimo Kreuzer                    DQ 3f24f0d1682e11cdh
6674afb647cSTimo Kreuzer                    DQ 3f5f9bb096771e4dh
6684afb647cSTimo Kreuzer                    DQ 3f6e5ee45dd96ae2h
6694afb647cSTimo Kreuzer                    DQ 3f5a0429a0429a04h
6704afb647cSTimo Kreuzer                    DQ 3f6bb74d5f06c021h
6714afb647cSTimo Kreuzer                    DQ 3f54fce404254fceh
6724afb647cSTimo Kreuzer                    DQ 3f695766eacbc402h
6734afb647cSTimo Kreuzer                    DQ 3f50842108421084h
6744afb647cSTimo Kreuzer                    DQ 3f673e5371d5c338h
6754afb647cSTimo Kreuzer                    DQ 3f4930523fbe3368h
6764afb647cSTimo Kreuzer                    DQ 3f656b38f225f6c4h
6774afb647cSTimo Kreuzer                    DQ 3f426e978d4fdf3bh
6784afb647cSTimo Kreuzer                    DQ 3f63dd40e4eb0cc6h
6794afb647cSTimo Kreuzer                    DQ 3f397f7d73404146h
6804afb647cSTimo Kreuzer                    DQ 3f6293982cc98af1h
6814afb647cSTimo Kreuzer                    DQ 3f30410410410410h
6824afb647cSTimo Kreuzer                    DQ 3f618d6f048ff7e4h
6834afb647cSTimo Kreuzer                    DQ 3f2236a3ebc349deh
6844afb647cSTimo Kreuzer                    DQ 3f60c9f8ee53d18ch
6854afb647cSTimo Kreuzer                    DQ 3f10204081020408h
6864afb647cSTimo Kreuzer                    DQ 3f60486ca2f46ea6h
6874afb647cSTimo Kreuzer                    DQ 3ef0101010101010h
6884afb647cSTimo Kreuzer                    DQ 3f60080402010080h
6894afb647cSTimo Kreuzer                    DQ 0000000000000000h
6904afb647cSTimo Kreuzer
6914afb647cSTimo Kreuzer;---------------------
6924afb647cSTimo Kreuzer; exp data
6934afb647cSTimo Kreuzer;---------------------
6944afb647cSTimo Kreuzer
6954afb647cSTimo KreuzerALIGN 16
6964afb647cSTimo Kreuzer
6974afb647cSTimo Kreuzer__denormal_threshold            DD 0fffffc02h ; -1022
6984afb647cSTimo Kreuzer                                DD 0
6994afb647cSTimo Kreuzer                                DQ 0
7004afb647cSTimo Kreuzer
7014afb647cSTimo Kreuzer__enable_almost_inf             DQ 7fe0000000000000h
7024afb647cSTimo Kreuzer                                DQ 0
7034afb647cSTimo Kreuzer
7044afb647cSTimo Kreuzer__real_zero                     DQ 0000000000000000h
7054afb647cSTimo Kreuzer                                DQ 0
7064afb647cSTimo Kreuzer
7074afb647cSTimo Kreuzer__real_smallest_denormal        DQ 0000000000000001h
7084afb647cSTimo Kreuzer                                DQ 0
7094afb647cSTimo Kreuzer__denormal_tiny_threshold       DQ 0c0874046dfefd9d0h
7104afb647cSTimo Kreuzer                                DQ 0
7114afb647cSTimo Kreuzer
7124afb647cSTimo Kreuzer__real_p65536                   DQ 40f0000000000000h    ; 65536
7134afb647cSTimo Kreuzer                                DQ 0
7144afb647cSTimo Kreuzer__real_m68800                   DQ 0c0f0cc0000000000h   ; -68800
7154afb647cSTimo Kreuzer                                DQ 0
7164afb647cSTimo Kreuzer__real_64_by_log2               DQ 40571547652b82feh    ; 64/ln(2)
7174afb647cSTimo Kreuzer                                DQ 0
7184afb647cSTimo Kreuzer__real_log2_by_64_head          DQ 3f862e42f0000000h    ; log2_by_64_head
7194afb647cSTimo Kreuzer                                DQ 0
7204afb647cSTimo Kreuzer__real_log2_by_64_tail          DQ 0bdfdf473de6af278h   ; -log2_by_64_tail
7214afb647cSTimo Kreuzer                                DQ 0
7224afb647cSTimo Kreuzer__real_1_by_720                 DQ 3f56c16c16c16c17h    ; 1/720
7234afb647cSTimo Kreuzer                                DQ 0
7244afb647cSTimo Kreuzer__real_1_by_120                 DQ 3f81111111111111h    ; 1/120
7254afb647cSTimo Kreuzer                                DQ 0
7264afb647cSTimo Kreuzer__real_1_by_24                  DQ 3fa5555555555555h    ; 1/24
7274afb647cSTimo Kreuzer                                DQ 0
7284afb647cSTimo Kreuzer__real_1_by_6                   DQ 3fc5555555555555h    ; 1/6
7294afb647cSTimo Kreuzer                                DQ 0
7304afb647cSTimo Kreuzer__real_1_by_2                   DQ 3fe0000000000000h    ; 1/2
7314afb647cSTimo Kreuzer                                DQ 0
7324afb647cSTimo Kreuzer
7334afb647cSTimo Kreuzer
7344afb647cSTimo KreuzerEXTRN __two_to_jby64_head_table:QWORD
7354afb647cSTimo KreuzerEXTRN __two_to_jby64_tail_table:QWORD
7364afb647cSTimo KreuzerEXTRN __use_fma3_lib:DWORD
7374afb647cSTimo Kreuzer
7384afb647cSTimo Kreuzerfname           TEXTEQU <pow>
7394afb647cSTimo Kreuzerfname_special   TEXTEQU <_pow_special>
7404afb647cSTimo Kreuzer
7414afb647cSTimo Kreuzer; define local variable storage offsets
7424afb647cSTimo Kreuzer
7434afb647cSTimo Kreuzersave_x          EQU     10h
7444afb647cSTimo Kreuzersave_y          EQU     20h
7454afb647cSTimo Kreuzerp_temp_exp      EQU     30h
7464afb647cSTimo Kreuzernegate_result   EQU     40h
7474afb647cSTimo Kreuzersave_ax         EQU     50h
7484afb647cSTimo Kreuzery_head          EQU     60h
7494afb647cSTimo Kreuzerp_temp_log      EQU     70h
7504afb647cSTimo Kreuzersave_xmm6       EQU     080h
7514afb647cSTimo Kreuzersave_xmm7       EQU     090h
7524afb647cSTimo Kreuzerdummy_space     EQU     0a0h
7534afb647cSTimo Kreuzer
7544afb647cSTimo Kreuzerstack_size      EQU     0c8h
7554afb647cSTimo Kreuzer
7564afb647cSTimo Kreuzerinclude fm.inc
7574afb647cSTimo Kreuzer
7584afb647cSTimo Kreuzer; external function
7594afb647cSTimo KreuzerEXTERN fname_special:PROC
7604afb647cSTimo Kreuzer
7614afb647cSTimo Kreuzer.code
7624afb647cSTimo KreuzerALIGN 16
7634afb647cSTimo KreuzerPUBLIC fname
7644afb647cSTimo Kreuzerfname PROC FRAME
7654afb647cSTimo Kreuzer    StackAllocate stack_size
7664afb647cSTimo Kreuzer    SaveXmm xmm6, save_xmm6
7674afb647cSTimo Kreuzer    SaveXmm xmm7, save_xmm7
7684afb647cSTimo Kreuzer    .ENDPROLOG
7694afb647cSTimo Kreuzer    cmp          DWORD PTR __use_fma3_lib, 0
7704afb647cSTimo Kreuzer    jne          Lpow_fma3
7714afb647cSTimo Kreuzer
7724afb647cSTimo KreuzerALIGN 16
7734afb647cSTimo KreuzerLpow_sse2:
7744afb647cSTimo Kreuzer    movsd       QWORD PTR [save_x+rsp], xmm0
7754afb647cSTimo Kreuzer    movsd       QWORD PTR [save_y+rsp], xmm1
7764afb647cSTimo Kreuzer
7774afb647cSTimo Kreuzer    mov         rdx, QWORD PTR [save_x+rsp]
7784afb647cSTimo Kreuzer    mov         r8, QWORD PTR [save_y+rsp]
7794afb647cSTimo Kreuzer
7804afb647cSTimo Kreuzer    mov         r10, QWORD PTR __exp_mant_mask
7814afb647cSTimo Kreuzer    and         r10, r8
7824afb647cSTimo Kreuzer    jz          Lpow_sse2_y_is_zero
7834afb647cSTimo Kreuzer
7844afb647cSTimo Kreuzer    cmp         r8, QWORD PTR __pos_one
7854afb647cSTimo Kreuzer    je          Lpow_sse2_y_is_one
7864afb647cSTimo Kreuzer
7874afb647cSTimo Kreuzer    mov         r9, QWORD PTR __sign_mask
7884afb647cSTimo Kreuzer    and         r9, rdx
7894afb647cSTimo Kreuzer    mov         rax, QWORD PTR __pos_zero
7904afb647cSTimo Kreuzer    mov         QWORD PTR [negate_result+rsp], rax
7914afb647cSTimo Kreuzer    cmp         r9, QWORD PTR __sign_mask
7924afb647cSTimo Kreuzer    je          Lpow_sse2_x_is_neg
7934afb647cSTimo Kreuzer
7944afb647cSTimo Kreuzer    cmp         rdx, QWORD PTR __pos_one
7954afb647cSTimo Kreuzer    je          Lpow_sse2_x_is_pos_one
7964afb647cSTimo Kreuzer
7974afb647cSTimo Kreuzer    cmp         rdx, QWORD PTR __pos_zero
7984afb647cSTimo Kreuzer    je          Lpow_sse2_x_is_zero
7994afb647cSTimo Kreuzer
8004afb647cSTimo Kreuzer    mov         r9, QWORD PTR __exp_mask
8014afb647cSTimo Kreuzer    and         r9, rdx
8024afb647cSTimo Kreuzer    cmp         r9, QWORD PTR __exp_mask
8034afb647cSTimo Kreuzer    je          Lpow_sse2_x_is_inf_or_nan
8044afb647cSTimo Kreuzer
8054afb647cSTimo Kreuzer    mov         r10, QWORD PTR __exp_mask
8064afb647cSTimo Kreuzer    and         r10, r8
8074afb647cSTimo Kreuzer    cmp         r10, QWORD PTR __ay_max_bound
8084afb647cSTimo Kreuzer    jg          Lpow_sse2_ay_is_very_large
8094afb647cSTimo Kreuzer
8104afb647cSTimo Kreuzer    mov         r10, QWORD PTR __exp_mask
8114afb647cSTimo Kreuzer    and         r10, r8
8124afb647cSTimo Kreuzer    cmp         r10, QWORD PTR __ay_min_bound
8134afb647cSTimo Kreuzer    jl          Lpow_sse2_ay_is_very_small
8144afb647cSTimo Kreuzer
8154afb647cSTimo Kreuzer    ; -----------------------------
8164afb647cSTimo Kreuzer    ; compute log(x) here
8174afb647cSTimo Kreuzer    ; -----------------------------
8184afb647cSTimo KreuzerLpow_sse2_log_x:
8194afb647cSTimo Kreuzer
8204afb647cSTimo Kreuzer    ; compute exponent part
8214afb647cSTimo Kreuzer    xor         r8, r8
8224afb647cSTimo Kreuzer    movdqa      xmm3, xmm0
8234afb647cSTimo Kreuzer    psrlq       xmm3, 52
8244afb647cSTimo Kreuzer    movd        r8, xmm0
8254afb647cSTimo Kreuzer    psubq       xmm3, XMMWORD PTR __mask_1023
8264afb647cSTimo Kreuzer    movdqa      xmm2, xmm0
8274afb647cSTimo Kreuzer    cvtdq2pd    xmm6, xmm3 ; xexp
8284afb647cSTimo Kreuzer    pand        xmm2, XMMWORD PTR __real_mant
8294afb647cSTimo Kreuzer
8304afb647cSTimo Kreuzer    comisd      xmm6, QWORD PTR __mask_1023_f
8314afb647cSTimo Kreuzer    je          Lpow_sse2_denormal_adjust
8324afb647cSTimo Kreuzer
8334afb647cSTimo KreuzerLpow_sse2_continue_common:
8344afb647cSTimo Kreuzer
8354afb647cSTimo Kreuzer    ; compute index into the log tables
8364afb647cSTimo Kreuzer    movsd       xmm7, xmm0
8374afb647cSTimo Kreuzer    mov         r9, r8
8384afb647cSTimo Kreuzer    and         r8, QWORD PTR __mask_mant_all8
8394afb647cSTimo Kreuzer    and         r9, QWORD PTR __mask_mant9
8404afb647cSTimo Kreuzer    subsd       xmm7, __real_one
8414afb647cSTimo Kreuzer    shl         r9, 1
8424afb647cSTimo Kreuzer    add         r8, r9
8434afb647cSTimo Kreuzer    mov         QWORD PTR [p_temp_log+rsp], r8
8444afb647cSTimo Kreuzer    andpd       xmm7, __real_notsign
8454afb647cSTimo Kreuzer
8464afb647cSTimo Kreuzer    ; F, Y, switch to near-one codepath
8474afb647cSTimo Kreuzer    movsd       xmm1, QWORD PTR [p_temp_log+rsp]
8484afb647cSTimo Kreuzer    shr         r8, 44
8494afb647cSTimo Kreuzer    por         xmm2, XMMWORD PTR __real_half
8504afb647cSTimo Kreuzer    por         xmm1, XMMWORD PTR __real_half
8514afb647cSTimo Kreuzer    lea         r9, QWORD PTR __log_F_inv_head
8524afb647cSTimo Kreuzer    lea         rdx, QWORD PTR __log_F_inv_tail
8534afb647cSTimo Kreuzer    comisd      xmm7, __real_threshold
8544afb647cSTimo Kreuzer    jb          Lpow_sse2_near_one
8554afb647cSTimo Kreuzer
8564afb647cSTimo Kreuzer    ; f = F - Y, r = f * inv
8574afb647cSTimo Kreuzer    subsd       xmm1, xmm2
8584afb647cSTimo Kreuzer    movsd       xmm4, xmm1
8594afb647cSTimo Kreuzer    mulsd       xmm1, QWORD PTR [r9+r8*8]
8604afb647cSTimo Kreuzer    movsd       xmm5, xmm1
8614afb647cSTimo Kreuzer    mulsd       xmm4, QWORD PTR [rdx+r8*8]
8624afb647cSTimo Kreuzer    movsd       xmm7, xmm4
8634afb647cSTimo Kreuzer    addsd       xmm1, xmm4
8644afb647cSTimo Kreuzer
8654afb647cSTimo Kreuzer    movsd       xmm2, xmm1
8664afb647cSTimo Kreuzer    movsd       xmm0, xmm1
8674afb647cSTimo Kreuzer    lea         r9, __log_256_lead
8684afb647cSTimo Kreuzer
8694afb647cSTimo Kreuzer    ; poly
8704afb647cSTimo Kreuzer    movsd       xmm3, QWORD PTR __real_1_over_6
8714afb647cSTimo Kreuzer    movsd       xmm1, QWORD PTR __real_1_over_3
8724afb647cSTimo Kreuzer    mulsd       xmm3, xmm2
8734afb647cSTimo Kreuzer    mulsd       xmm1, xmm2
8744afb647cSTimo Kreuzer    mulsd       xmm0, xmm2
8754afb647cSTimo Kreuzer    subsd       xmm5, xmm2
8764afb647cSTimo Kreuzer    movsd       xmm4, xmm0
8774afb647cSTimo Kreuzer    addsd       xmm3, QWORD PTR __real_1_over_5
8784afb647cSTimo Kreuzer    addsd       xmm1, QWORD PTR __real_1_over_2
8794afb647cSTimo Kreuzer    mulsd       xmm4, xmm0
8804afb647cSTimo Kreuzer    mulsd       xmm3, xmm2
8814afb647cSTimo Kreuzer    mulsd       xmm1, xmm0
8824afb647cSTimo Kreuzer    addsd       xmm3, QWORD PTR __real_1_over_4
8834afb647cSTimo Kreuzer    addsd       xmm7, xmm5
8844afb647cSTimo Kreuzer    mulsd       xmm3, xmm4
8854afb647cSTimo Kreuzer    addsd       xmm1, xmm3
8864afb647cSTimo Kreuzer    addsd       xmm1, xmm7
8874afb647cSTimo Kreuzer
8884afb647cSTimo Kreuzer    movsd       xmm5, QWORD PTR __real_log2_tail
8894afb647cSTimo Kreuzer    lea         rdx, __log_256_tail
8904afb647cSTimo Kreuzer    mulsd       xmm5, xmm6
8914afb647cSTimo Kreuzer    movsd       xmm0, QWORD PTR [r9+r8*8]
8924afb647cSTimo Kreuzer    subsd       xmm5, xmm1
8934afb647cSTimo Kreuzer
8944afb647cSTimo Kreuzer    movsd       xmm3, QWORD PTR [rdx+r8*8]
8954afb647cSTimo Kreuzer    addsd       xmm3, xmm5
8964afb647cSTimo Kreuzer    movsd       xmm1, xmm3
8974afb647cSTimo Kreuzer    subsd       xmm3, xmm2
8984afb647cSTimo Kreuzer
8994afb647cSTimo Kreuzer    movsd       xmm7, QWORD PTR __real_log2_lead
9004afb647cSTimo Kreuzer    mulsd       xmm7, xmm6
9014afb647cSTimo Kreuzer    addsd       xmm0, xmm7
9024afb647cSTimo Kreuzer
9034afb647cSTimo Kreuzer    ; result of ln(x) is computed from head and tail parts, resH and resT
9044afb647cSTimo Kreuzer    ; res = ln(x) = resH + resT
9054afb647cSTimo Kreuzer    ; resH and resT are in full precision
9064afb647cSTimo Kreuzer
9074afb647cSTimo Kreuzer    ; resT is computed from head and tail parts, resT_h and resT_t
9084afb647cSTimo Kreuzer    ; resT = resT_h + resT_t
9094afb647cSTimo Kreuzer
9104afb647cSTimo Kreuzer    ; now
9114afb647cSTimo Kreuzer    ; xmm3 - resT
9124afb647cSTimo Kreuzer    ; xmm0 - resH
9134afb647cSTimo Kreuzer    ; xmm1 - (resT_t)
9144afb647cSTimo Kreuzer    ; xmm2 - (-resT_h)
9154afb647cSTimo Kreuzer
9164afb647cSTimo KreuzerLpow_sse2_log_x_continue:
9174afb647cSTimo Kreuzer
9184afb647cSTimo Kreuzer    movsd       xmm7, xmm0
9194afb647cSTimo Kreuzer    addsd       xmm0, xmm3
9204afb647cSTimo Kreuzer    movsd       xmm5, xmm0
9214afb647cSTimo Kreuzer    andpd       xmm0, XMMWORD PTR __real_fffffffff8000000
9224afb647cSTimo Kreuzer
9234afb647cSTimo Kreuzer    ; xmm0 - H
9244afb647cSTimo Kreuzer    ; xmm7 - resH
9254afb647cSTimo Kreuzer    ; xmm5 - res
9264afb647cSTimo Kreuzer
9274afb647cSTimo Kreuzer    mov         rax, QWORD PTR [save_y+rsp]
9284afb647cSTimo Kreuzer    and         rax, QWORD PTR __real_fffffffff8000000
9294afb647cSTimo Kreuzer
9304afb647cSTimo Kreuzer    addsd       xmm2, xmm3
9314afb647cSTimo Kreuzer    subsd       xmm7, xmm5
9324afb647cSTimo Kreuzer    subsd       xmm1, xmm2
9334afb647cSTimo Kreuzer    addsd       xmm7, xmm3
9344afb647cSTimo Kreuzer    subsd       xmm5, xmm0
9354afb647cSTimo Kreuzer
9364afb647cSTimo Kreuzer    mov         QWORD PTR [y_head+rsp], rax
9374afb647cSTimo Kreuzer    movsd       xmm4, QWORD PTR [save_y+rsp]
9384afb647cSTimo Kreuzer
9394afb647cSTimo Kreuzer    addsd       xmm7, xmm1
9404afb647cSTimo Kreuzer    addsd       xmm7, xmm5
9414afb647cSTimo Kreuzer
9424afb647cSTimo Kreuzer    ; res = H + T
9434afb647cSTimo Kreuzer    ; H has leading 26 bits of precision
9444afb647cSTimo Kreuzer    ; T has full precision
9454afb647cSTimo Kreuzer
9464afb647cSTimo Kreuzer    ; xmm0 - H
9474afb647cSTimo Kreuzer    ; xmm7 - T
9484afb647cSTimo Kreuzer
9494afb647cSTimo Kreuzer    movsd       xmm2, QWORD PTR [y_head+rsp]
9504afb647cSTimo Kreuzer    subsd       xmm4, xmm2
9514afb647cSTimo Kreuzer
9524afb647cSTimo Kreuzer    ; y is split into head and tail
9534afb647cSTimo Kreuzer    ; for y * ln(x) computation
9544afb647cSTimo Kreuzer
9554afb647cSTimo Kreuzer    ; xmm4 - Yt
9564afb647cSTimo Kreuzer    ; xmm2 - Yh
9574afb647cSTimo Kreuzer    ; xmm0 - H
9584afb647cSTimo Kreuzer    ; xmm7 - T
9594afb647cSTimo Kreuzer
9604afb647cSTimo Kreuzer    movsd   xmm3, xmm4
9614afb647cSTimo Kreuzer    movsd   xmm5, xmm7
9624afb647cSTimo Kreuzer    movsd   xmm6, xmm0
9634afb647cSTimo Kreuzer    mulsd   xmm3, xmm7 ; YtRt
9644afb647cSTimo Kreuzer    mulsd   xmm4, xmm0 ; YtRh
9654afb647cSTimo Kreuzer    mulsd   xmm5, xmm2 ; YhRt
9664afb647cSTimo Kreuzer    mulsd   xmm6, xmm2 ; YhRh
9674afb647cSTimo Kreuzer
9684afb647cSTimo Kreuzer    movsd   xmm1, xmm6
9694afb647cSTimo Kreuzer    addsd   xmm3, xmm4
9704afb647cSTimo Kreuzer    addsd   xmm3, xmm5
9714afb647cSTimo Kreuzer
9724afb647cSTimo Kreuzer    addsd   xmm1, xmm3
9734afb647cSTimo Kreuzer    movsd   xmm0, xmm1
9744afb647cSTimo Kreuzer
9754afb647cSTimo Kreuzer    subsd   xmm6, xmm1
9764afb647cSTimo Kreuzer    addsd   xmm6, xmm3
9774afb647cSTimo Kreuzer
9784afb647cSTimo Kreuzer    ; y * ln(x) = v + vt
9794afb647cSTimo Kreuzer    ; v and vt are in full precision
9804afb647cSTimo Kreuzer
9814afb647cSTimo Kreuzer    ; xmm0 - v
9824afb647cSTimo Kreuzer    ; xmm6 - vt
9834afb647cSTimo Kreuzer
9844afb647cSTimo Kreuzer    ; -----------------------------
9854afb647cSTimo Kreuzer    ; compute exp( y * ln(x) ) here
9864afb647cSTimo Kreuzer    ; -----------------------------
9874afb647cSTimo Kreuzer
9884afb647cSTimo Kreuzer    ; v * (64/ln(2))
9894afb647cSTimo Kreuzer    movsd       xmm7, QWORD PTR __real_64_by_log2
9904afb647cSTimo Kreuzer    movsd       QWORD PTR [p_temp_exp+rsp], xmm0
9914afb647cSTimo Kreuzer    mulsd       xmm7, xmm0
9924afb647cSTimo Kreuzer    mov         rdx, QWORD PTR [p_temp_exp+rsp]
9934afb647cSTimo Kreuzer
9944afb647cSTimo Kreuzer    ; v < 1024*ln(2), ( v * (64/ln(2)) ) < 64*1024
9954afb647cSTimo Kreuzer    ; v >= -1075*ln(2), ( v * (64/ln(2)) ) >= 64*(-1075)
9964afb647cSTimo Kreuzer    comisd      xmm7, QWORD PTR __real_p65536
9974afb647cSTimo Kreuzer    ja          Lpow_sse2_process_result_inf
9984afb647cSTimo Kreuzer
9994afb647cSTimo Kreuzer    comisd      xmm7, QWORD PTR __real_m68800
10004afb647cSTimo Kreuzer    jb          Lpow_sse2_process_result_zero
10014afb647cSTimo Kreuzer
10024afb647cSTimo Kreuzer    ; n = int( v * (64/ln(2)) )
10034afb647cSTimo Kreuzer    cvtpd2dq    xmm4, xmm7
10044afb647cSTimo Kreuzer    lea         r10, __two_to_jby64_head_table
10054afb647cSTimo Kreuzer    lea         r11, __two_to_jby64_tail_table
10064afb647cSTimo Kreuzer    cvtdq2pd    xmm1, xmm4
10074afb647cSTimo Kreuzer
10084afb647cSTimo Kreuzer    ; r1 = x - n * ln(2)/64 head
10094afb647cSTimo Kreuzer    movsd       xmm2, QWORD PTR __real_log2_by_64_head
10104afb647cSTimo Kreuzer    mulsd       xmm2, xmm1
10114afb647cSTimo Kreuzer    movd        ecx, xmm4
10124afb647cSTimo Kreuzer    mov         rax, 3fh
10134afb647cSTimo Kreuzer    and         eax, ecx
10144afb647cSTimo Kreuzer    subsd       xmm0, xmm2
10154afb647cSTimo Kreuzer
10164afb647cSTimo Kreuzer    ; r2 = - n * ln(2)/64 tail
10174afb647cSTimo Kreuzer    mulsd       xmm1, QWORD PTR __real_log2_by_64_tail
10184afb647cSTimo Kreuzer    movsd       xmm2, xmm0
10194afb647cSTimo Kreuzer
10204afb647cSTimo Kreuzer    ; m = (n - j) / 64
10214afb647cSTimo Kreuzer    sub         ecx, eax
10224afb647cSTimo Kreuzer    sar         ecx, 6
10234afb647cSTimo Kreuzer
10244afb647cSTimo Kreuzer    ; r1+r2
10254afb647cSTimo Kreuzer    addsd       xmm2, xmm1
10264afb647cSTimo Kreuzer    addsd       xmm2, xmm6 ; add vt here
10274afb647cSTimo Kreuzer    movsd       xmm1, xmm2
10284afb647cSTimo Kreuzer
10294afb647cSTimo Kreuzer    ; q
10304afb647cSTimo Kreuzer    movsd       xmm0, QWORD PTR __real_1_by_2
10314afb647cSTimo Kreuzer    movsd       xmm3, QWORD PTR __real_1_by_24
10324afb647cSTimo Kreuzer    movsd       xmm4, QWORD PTR __real_1_by_720
10334afb647cSTimo Kreuzer    mulsd       xmm1, xmm2
10344afb647cSTimo Kreuzer    mulsd       xmm0, xmm2
10354afb647cSTimo Kreuzer    mulsd       xmm3, xmm2
10364afb647cSTimo Kreuzer    mulsd       xmm4, xmm2
10374afb647cSTimo Kreuzer
10384afb647cSTimo Kreuzer    movsd       xmm5, xmm1
10394afb647cSTimo Kreuzer    mulsd       xmm1, xmm2
10404afb647cSTimo Kreuzer    addsd       xmm0, QWORD PTR __real_one
10414afb647cSTimo Kreuzer    addsd       xmm3, QWORD PTR __real_1_by_6
10424afb647cSTimo Kreuzer    mulsd       xmm5, xmm1
10434afb647cSTimo Kreuzer    addsd       xmm4, QWORD PTR __real_1_by_120
10444afb647cSTimo Kreuzer    mulsd       xmm0, xmm2
10454afb647cSTimo Kreuzer    mulsd       xmm3, xmm1
10464afb647cSTimo Kreuzer
10474afb647cSTimo Kreuzer    mulsd       xmm4, xmm5
10484afb647cSTimo Kreuzer
10494afb647cSTimo Kreuzer    ; deal with denormal results
10504afb647cSTimo Kreuzer    xor         r9d, r9d
10514afb647cSTimo Kreuzer
10524afb647cSTimo Kreuzer    addsd       xmm3, xmm4
10534afb647cSTimo Kreuzer    addsd       xmm0, xmm3
10544afb647cSTimo Kreuzer
10554afb647cSTimo Kreuzer    cmp         ecx, DWORD PTR __denormal_threshold
10564afb647cSTimo Kreuzer    cmovle      r9d, ecx
10574afb647cSTimo Kreuzer    add         rcx, 1023
10584afb647cSTimo Kreuzer    shl         rcx, 52
10594afb647cSTimo Kreuzer
10604afb647cSTimo Kreuzer    ; f1, f2
10614afb647cSTimo Kreuzer    movsd       xmm5, QWORD PTR [r11+rax*8]
10624afb647cSTimo Kreuzer    movsd       xmm1, QWORD PTR [r10+rax*8]
10634afb647cSTimo Kreuzer    mulsd       xmm5, xmm0
10644afb647cSTimo Kreuzer    mulsd       xmm1, xmm0
10654afb647cSTimo Kreuzer
10664afb647cSTimo Kreuzer
10674afb647cSTimo Kreuzer    ; (f1+f2)*(1+q)
10684afb647cSTimo Kreuzer    addsd       xmm5, QWORD PTR [r11+rax*8]
10694afb647cSTimo Kreuzer    addsd       xmm1, xmm5
10704afb647cSTimo Kreuzer    addsd       xmm1, QWORD PTR [r10+rax*8]
10714afb647cSTimo Kreuzer    movsd       xmm0, xmm1
10724afb647cSTimo Kreuzer
10734afb647cSTimo Kreuzer    cmp         rcx, QWORD PTR __real_inf
10744afb647cSTimo Kreuzer    je          Lpow_sse2_process_almost_inf
10754afb647cSTimo Kreuzer
10764afb647cSTimo Kreuzer    mov         QWORD PTR [p_temp_exp+rsp], rcx
10774afb647cSTimo Kreuzer    test        r9d, r9d
10784afb647cSTimo Kreuzer    jnz         Lpow_sse2_process_denormal
10794afb647cSTimo Kreuzer    mulsd       xmm0, QWORD PTR [p_temp_exp+rsp]
10804afb647cSTimo Kreuzer    orpd        xmm0, XMMWORD PTR [negate_result+rsp]
10814afb647cSTimo Kreuzer
10824afb647cSTimo KreuzerLpow_sse2_final_check:
10834afb647cSTimo Kreuzer    RestoreXmm   xmm7, save_xmm7
10844afb647cSTimo Kreuzer    RestoreXmm   xmm6, save_xmm6
10854afb647cSTimo Kreuzer    StackDeallocate stack_size
10864afb647cSTimo Kreuzer    ret
10874afb647cSTimo Kreuzer
10884afb647cSTimo KreuzerALIGN 16
10894afb647cSTimo KreuzerLpow_sse2_process_almost_inf:
10904afb647cSTimo Kreuzer    comisd      xmm0, QWORD PTR __real_one
10914afb647cSTimo Kreuzer    jae         Lpow_sse2_process_result_inf
10924afb647cSTimo Kreuzer
10934afb647cSTimo Kreuzer    orpd        xmm0, XMMWORD PTR __enable_almost_inf
10944afb647cSTimo Kreuzer    orpd        xmm0, XMMWORD PTR [negate_result+rsp]
10954afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
10964afb647cSTimo Kreuzer
10974afb647cSTimo KreuzerALIGN 16
10984afb647cSTimo KreuzerLpow_sse2_process_denormal:
10994afb647cSTimo Kreuzer    mov         ecx, r9d
11004afb647cSTimo Kreuzer    xor         r11d, r11d
11014afb647cSTimo Kreuzer    comisd      xmm0, QWORD PTR __real_one
11024afb647cSTimo Kreuzer    cmovae      r11d, ecx
11034afb647cSTimo Kreuzer    cmp         r11d, DWORD PTR __denormal_threshold
11044afb647cSTimo Kreuzer    jne         Lpow_sse2_process_true_denormal
11054afb647cSTimo Kreuzer
11064afb647cSTimo Kreuzer    mulsd       xmm0, QWORD PTR [p_temp_exp+rsp]
11074afb647cSTimo Kreuzer    orpd        xmm0, XMMWORD PTR [negate_result+rsp]
11084afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
11094afb647cSTimo Kreuzer
11104afb647cSTimo KreuzerALIGN 16
11114afb647cSTimo KreuzerLpow_sse2_process_true_denormal:
11124afb647cSTimo Kreuzer    xor         r8, r8
11134afb647cSTimo Kreuzer    mov         r9, 1
11144afb647cSTimo Kreuzer    cmp         rdx, QWORD PTR __denormal_tiny_threshold
11154afb647cSTimo Kreuzer    jg          Lpow_sse2_process_denormal_tiny
11164afb647cSTimo Kreuzer    add         ecx, 1074
11174afb647cSTimo Kreuzer    cmovs       rcx, r8
11184afb647cSTimo Kreuzer    shl         r9, cl
11194afb647cSTimo Kreuzer    mov         rcx, r9
11204afb647cSTimo Kreuzer
11214afb647cSTimo Kreuzer    mov         QWORD PTR [p_temp_exp+rsp], rcx
11224afb647cSTimo Kreuzer    mulsd       xmm0, QWORD PTR [p_temp_exp+rsp]
11234afb647cSTimo Kreuzer    orpd        xmm0, XMMWORD PTR [negate_result+rsp]
11244afb647cSTimo Kreuzer    jmp         Lpow_sse2_z_denormal
11254afb647cSTimo Kreuzer
11264afb647cSTimo KreuzerALIGN 16
11274afb647cSTimo KreuzerLpow_sse2_process_denormal_tiny:
11284afb647cSTimo Kreuzer    movsd       xmm0, QWORD PTR __real_smallest_denormal
11294afb647cSTimo Kreuzer    orpd        xmm0, XMMWORD PTR [negate_result+rsp]
11304afb647cSTimo Kreuzer    jmp         Lpow_sse2_z_denormal
11314afb647cSTimo Kreuzer
11324afb647cSTimo KreuzerALIGN 16
11334afb647cSTimo KreuzerLpow_sse2_process_result_zero:
11344afb647cSTimo Kreuzer    mov         r11, QWORD PTR __real_zero
11354afb647cSTimo Kreuzer    or          r11, QWORD PTR [negate_result+rsp]
11364afb647cSTimo Kreuzer    jmp         Lpow_sse2_z_is_zero_or_inf
11374afb647cSTimo Kreuzer
11384afb647cSTimo KreuzerALIGN 16
11394afb647cSTimo KreuzerLpow_sse2_process_result_inf:
11404afb647cSTimo Kreuzer    mov         r11, QWORD PTR __real_inf
11414afb647cSTimo Kreuzer    or          r11, QWORD PTR [negate_result+rsp]
11424afb647cSTimo Kreuzer    jmp         Lpow_sse2_z_is_zero_or_inf
11434afb647cSTimo Kreuzer
11444afb647cSTimo KreuzerALIGN 16
11454afb647cSTimo KreuzerLpow_sse2_denormal_adjust:
11464afb647cSTimo Kreuzer    por         xmm2, XMMWORD PTR __real_one
11474afb647cSTimo Kreuzer    subsd       xmm2, QWORD PTR __real_one
11484afb647cSTimo Kreuzer    movsd       xmm5, xmm2
11494afb647cSTimo Kreuzer    pand        xmm2, XMMWORD PTR __real_mant
11504afb647cSTimo Kreuzer    movd        r8, xmm2
11514afb647cSTimo Kreuzer    psrlq       xmm5, 52
11524afb647cSTimo Kreuzer    psubd       xmm5, XMMWORD PTR __mask_2045
11534afb647cSTimo Kreuzer    cvtdq2pd    xmm6, xmm5
11544afb647cSTimo Kreuzer    jmp         Lpow_sse2_continue_common
11554afb647cSTimo Kreuzer
11564afb647cSTimo KreuzerALIGN 16
11574afb647cSTimo KreuzerLpow_sse2_x_is_neg:
11584afb647cSTimo Kreuzer
11594afb647cSTimo Kreuzer    mov         r10, QWORD PTR __exp_mask
11604afb647cSTimo Kreuzer    and         r10, r8
11614afb647cSTimo Kreuzer    cmp         r10, QWORD PTR __ay_max_bound
11624afb647cSTimo Kreuzer    jg          Lpow_sse2_ay_is_very_large
11634afb647cSTimo Kreuzer
11644afb647cSTimo Kreuzer    ; determine if y is an integer
11654afb647cSTimo Kreuzer    mov         r10, QWORD PTR __exp_mant_mask
11664afb647cSTimo Kreuzer    and         r10, r8
11674afb647cSTimo Kreuzer    mov         r11, r10
11684afb647cSTimo Kreuzer    mov         rcx, QWORD PTR __exp_shift
11694afb647cSTimo Kreuzer    shr         r10, cl
11704afb647cSTimo Kreuzer    sub         r10, QWORD PTR __exp_bias
11714afb647cSTimo Kreuzer    js          Lpow_sse2_x_is_neg_y_is_not_int
11724afb647cSTimo Kreuzer
11734afb647cSTimo Kreuzer    mov         rax, QWORD PTR __exp_mant_mask
11744afb647cSTimo Kreuzer    and         rax, rdx
11754afb647cSTimo Kreuzer    mov         QWORD PTR [save_ax+rsp], rax
11764afb647cSTimo Kreuzer
11774afb647cSTimo Kreuzer    mov         rcx, r10
11784afb647cSTimo Kreuzer    cmp         r10, QWORD PTR __yexp_53
11794afb647cSTimo Kreuzer    jg          Lpow_sse2_continue_after_y_int_check
11804afb647cSTimo Kreuzer
11814afb647cSTimo Kreuzer    mov         r9, QWORD PTR __mant_full
11824afb647cSTimo Kreuzer    shr         r9, cl
11834afb647cSTimo Kreuzer    and         r9, r11
11844afb647cSTimo Kreuzer    jnz         Lpow_sse2_x_is_neg_y_is_not_int
11854afb647cSTimo Kreuzer
11864afb647cSTimo Kreuzer    mov         r9, QWORD PTR __1_before_mant
11874afb647cSTimo Kreuzer    shr         r9, cl
11884afb647cSTimo Kreuzer    and         r9, r11
11894afb647cSTimo Kreuzer    jz          Lpow_sse2_continue_after_y_int_check
11904afb647cSTimo Kreuzer
11914afb647cSTimo Kreuzer    mov         rax, QWORD PTR __sign_mask
11924afb647cSTimo Kreuzer    mov         QWORD PTR [negate_result+rsp], rax
11934afb647cSTimo Kreuzer
11944afb647cSTimo KreuzerLpow_sse2_continue_after_y_int_check:
11954afb647cSTimo Kreuzer
11964afb647cSTimo Kreuzer    cmp         rdx, QWORD PTR __neg_zero
11974afb647cSTimo Kreuzer    je          Lpow_sse2_x_is_zero
11984afb647cSTimo Kreuzer
11994afb647cSTimo Kreuzer    cmp         rdx, QWORD PTR __neg_one
12004afb647cSTimo Kreuzer    je          Lpow_sse2_x_is_neg_one
12014afb647cSTimo Kreuzer
12024afb647cSTimo Kreuzer    mov         r9, QWORD PTR __exp_mask
12034afb647cSTimo Kreuzer    and         r9, rdx
12044afb647cSTimo Kreuzer    cmp         r9, QWORD PTR __exp_mask
12054afb647cSTimo Kreuzer    je          Lpow_sse2_x_is_inf_or_nan
12064afb647cSTimo Kreuzer
12074afb647cSTimo Kreuzer    movsd       xmm0, QWORD PTR [save_ax+rsp]
12084afb647cSTimo Kreuzer    jmp         Lpow_sse2_log_x
12094afb647cSTimo Kreuzer
12104afb647cSTimo Kreuzer
12114afb647cSTimo KreuzerALIGN 16
12124afb647cSTimo KreuzerLpow_sse2_near_one:
12134afb647cSTimo Kreuzer
12144afb647cSTimo Kreuzer    ; f = F - Y, r = f * inv
12154afb647cSTimo Kreuzer    movsd       xmm0, xmm1
12164afb647cSTimo Kreuzer    subsd       xmm1, xmm2
12174afb647cSTimo Kreuzer    movsd       xmm4, xmm1
12184afb647cSTimo Kreuzer
12194afb647cSTimo Kreuzer    movsd       xmm3, QWORD PTR [r9+r8*8]
12204afb647cSTimo Kreuzer    addsd       xmm3, QWORD PTR [rdx+r8*8]
12214afb647cSTimo Kreuzer    mulsd       xmm4, xmm3
1222*105426b8STimo Kreuzer    andpd       xmm4, XMMWORD PTR __real_fffffffff8000000
12234afb647cSTimo Kreuzer    movsd       xmm5, xmm4 ; r1
12244afb647cSTimo Kreuzer    mulsd       xmm4, xmm0
12254afb647cSTimo Kreuzer    subsd       xmm1, xmm4
12264afb647cSTimo Kreuzer    mulsd       xmm1, xmm3
12274afb647cSTimo Kreuzer    movsd       xmm7, xmm1 ; r2
12284afb647cSTimo Kreuzer    addsd       xmm1, xmm5
12294afb647cSTimo Kreuzer
12304afb647cSTimo Kreuzer    movsd       xmm2, xmm1
12314afb647cSTimo Kreuzer    movsd       xmm0, xmm1
12324afb647cSTimo Kreuzer
12334afb647cSTimo Kreuzer    lea         r9, __log_256_lead
12344afb647cSTimo Kreuzer
12354afb647cSTimo Kreuzer    ; poly
12364afb647cSTimo Kreuzer    movsd       xmm3, QWORD PTR __real_1_over_7
12374afb647cSTimo Kreuzer    movsd       xmm1, QWORD PTR __real_1_over_4
12384afb647cSTimo Kreuzer    mulsd       xmm3, xmm2
12394afb647cSTimo Kreuzer    mulsd       xmm1, xmm2
12404afb647cSTimo Kreuzer    mulsd       xmm0, xmm2
12414afb647cSTimo Kreuzer    movsd       xmm4, xmm0
12424afb647cSTimo Kreuzer    addsd       xmm3, QWORD PTR __real_1_over_6
12434afb647cSTimo Kreuzer    addsd       xmm1, QWORD PTR __real_1_over_3
12444afb647cSTimo Kreuzer    mulsd       xmm4, xmm0
12454afb647cSTimo Kreuzer    mulsd       xmm3, xmm2
12464afb647cSTimo Kreuzer    mulsd       xmm1, xmm2
12474afb647cSTimo Kreuzer    addsd       xmm3, QWORD PTR __real_1_over_5
12484afb647cSTimo Kreuzer    mulsd       xmm3, xmm2
12494afb647cSTimo Kreuzer    mulsd       xmm1, xmm0
12504afb647cSTimo Kreuzer    mulsd       xmm3, xmm4
12514afb647cSTimo Kreuzer
12524afb647cSTimo Kreuzer    movsd       xmm2, xmm5
12534afb647cSTimo Kreuzer    movsd       xmm0, xmm7
12544afb647cSTimo Kreuzer    mulsd       xmm0, xmm0
12554afb647cSTimo Kreuzer    mulsd       xmm0, QWORD PTR __real_1_over_2
12564afb647cSTimo Kreuzer    mulsd       xmm5, xmm7
12574afb647cSTimo Kreuzer    addsd       xmm5, xmm0
12584afb647cSTimo Kreuzer    addsd       xmm5, xmm7
12594afb647cSTimo Kreuzer
12604afb647cSTimo Kreuzer    movsd       xmm0, xmm2
12614afb647cSTimo Kreuzer    movsd       xmm7, xmm2
12624afb647cSTimo Kreuzer    mulsd       xmm0, xmm0
12634afb647cSTimo Kreuzer    mulsd       xmm0, QWORD PTR __real_1_over_2
12644afb647cSTimo Kreuzer    movsd       xmm4, xmm0
12654afb647cSTimo Kreuzer    addsd       xmm2, xmm0 ; r1 + r1^2/2
12664afb647cSTimo Kreuzer    subsd       xmm7, xmm2
12674afb647cSTimo Kreuzer    addsd       xmm7, xmm4
12684afb647cSTimo Kreuzer
12694afb647cSTimo Kreuzer    addsd       xmm3, xmm7
12704afb647cSTimo Kreuzer    movsd       xmm4, QWORD PTR __real_log2_tail
12714afb647cSTimo Kreuzer    addsd       xmm1, xmm3
12724afb647cSTimo Kreuzer    mulsd       xmm4, xmm6
12734afb647cSTimo Kreuzer    lea         rdx, __log_256_tail
12744afb647cSTimo Kreuzer    addsd       xmm1, xmm5
12754afb647cSTimo Kreuzer    addsd       xmm4, QWORD PTR [rdx+r8*8]
12764afb647cSTimo Kreuzer    subsd       xmm4, xmm1
12774afb647cSTimo Kreuzer
12784afb647cSTimo Kreuzer    movsd       xmm3, xmm4
12794afb647cSTimo Kreuzer    movsd       xmm1, xmm4
12804afb647cSTimo Kreuzer    subsd       xmm3, xmm2
12814afb647cSTimo Kreuzer
12824afb647cSTimo Kreuzer    movsd       xmm0, QWORD PTR [r9+r8*8]
12834afb647cSTimo Kreuzer    movsd       xmm7, QWORD PTR __real_log2_lead
12844afb647cSTimo Kreuzer    mulsd       xmm7, xmm6
12854afb647cSTimo Kreuzer    addsd       xmm0, xmm7
12864afb647cSTimo Kreuzer
12874afb647cSTimo Kreuzer    jmp         Lpow_sse2_log_x_continue
12884afb647cSTimo Kreuzer
12894afb647cSTimo Kreuzer
12904afb647cSTimo KreuzerALIGN 16
12914afb647cSTimo KreuzerLpow_sse2_x_is_pos_one:
12924afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
12934afb647cSTimo Kreuzer
12944afb647cSTimo KreuzerALIGN 16
12954afb647cSTimo KreuzerLpow_sse2_y_is_zero:
12964afb647cSTimo Kreuzer    movsd       xmm0, QWORD PTR __real_one
12974afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
12984afb647cSTimo Kreuzer
12994afb647cSTimo KreuzerALIGN 16
13004afb647cSTimo KreuzerLpow_sse2_y_is_one:
13014afb647cSTimo Kreuzer    xor         rax, rax
13024afb647cSTimo Kreuzer    mov         r11, rdx
13034afb647cSTimo Kreuzer    mov         r9, QWORD PTR __exp_mask
13044afb647cSTimo Kreuzer    ;or          r11, QWORD PTR __qnan_set
13054afb647cSTimo Kreuzer    and         r9, rdx
13064afb647cSTimo Kreuzer    cmp         r9, QWORD PTR __exp_mask
13074afb647cSTimo Kreuzer    cmove       rax, rdx
13084afb647cSTimo Kreuzer    mov         r9, QWORD PTR __mant_mask
13094afb647cSTimo Kreuzer    and         r9, rax
13104afb647cSTimo Kreuzer    jnz         Lpow_sse2_x_is_nan
13114afb647cSTimo Kreuzer
13124afb647cSTimo Kreuzer    movd        xmm0, rdx
13134afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
13144afb647cSTimo Kreuzer
13154afb647cSTimo KreuzerALIGN 16
13164afb647cSTimo KreuzerLpow_sse2_x_is_neg_one:
13174afb647cSTimo Kreuzer    mov         rdx, QWORD PTR __pos_one
13184afb647cSTimo Kreuzer    or          rdx, QWORD PTR [negate_result+rsp]
13194afb647cSTimo Kreuzer    xor         rax, rax
13204afb647cSTimo Kreuzer    mov         r11, r8
13214afb647cSTimo Kreuzer    mov         r10, QWORD PTR __exp_mask
13224afb647cSTimo Kreuzer    ;or          r11, QWORD PTR __qnan_set
13234afb647cSTimo Kreuzer    and         r10, r8
13244afb647cSTimo Kreuzer    cmp         r10, QWORD PTR __exp_mask
13254afb647cSTimo Kreuzer    cmove       rax, r8
13264afb647cSTimo Kreuzer    mov         r10, QWORD PTR __mant_mask
13274afb647cSTimo Kreuzer    and         r10, rax
13284afb647cSTimo Kreuzer    jnz         Lpow_sse2_y_is_nan
13294afb647cSTimo Kreuzer
13304afb647cSTimo Kreuzer    movd        xmm0, rdx
13314afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
13324afb647cSTimo Kreuzer
13334afb647cSTimo KreuzerALIGN 16
13344afb647cSTimo KreuzerLpow_sse2_x_is_neg_y_is_not_int:
13354afb647cSTimo Kreuzer    mov         r9, QWORD PTR __exp_mask
13364afb647cSTimo Kreuzer    and         r9, rdx
13374afb647cSTimo Kreuzer    cmp         r9, QWORD PTR __exp_mask
13384afb647cSTimo Kreuzer    je          Lpow_sse2_x_is_inf_or_nan
13394afb647cSTimo Kreuzer
13404afb647cSTimo Kreuzer    cmp         rdx, QWORD PTR __neg_zero
13414afb647cSTimo Kreuzer    je          Lpow_sse2_x_is_zero
13424afb647cSTimo Kreuzer
13434afb647cSTimo Kreuzer    movsd       xmm0, QWORD PTR [save_x+rsp]
13444afb647cSTimo Kreuzer    movsd       xmm1, QWORD PTR [save_y+rsp]
13454afb647cSTimo Kreuzer    movsd       xmm2, QWORD PTR __neg_qnan
13464afb647cSTimo Kreuzer    mov         r9d, DWORD PTR __flag_x_neg_y_notint
13474afb647cSTimo Kreuzer
13484afb647cSTimo Kreuzer    call        fname_special
13494afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
13504afb647cSTimo Kreuzer
13514afb647cSTimo KreuzerALIGN 16
13524afb647cSTimo KreuzerLpow_sse2_ay_is_very_large:
13534afb647cSTimo Kreuzer    mov         r9, QWORD PTR __exp_mask
13544afb647cSTimo Kreuzer    and         r9, rdx
13554afb647cSTimo Kreuzer    cmp         r9, QWORD PTR __exp_mask
13564afb647cSTimo Kreuzer    je          Lpow_sse2_x_is_inf_or_nan
13574afb647cSTimo Kreuzer
13584afb647cSTimo Kreuzer    mov         r9, QWORD PTR __exp_mant_mask
13594afb647cSTimo Kreuzer    and         r9, rdx
13604afb647cSTimo Kreuzer    jz          Lpow_sse2_x_is_zero
13614afb647cSTimo Kreuzer
13624afb647cSTimo Kreuzer    cmp         rdx, QWORD PTR __neg_one
13634afb647cSTimo Kreuzer    je          Lpow_sse2_x_is_neg_one
13644afb647cSTimo Kreuzer
13654afb647cSTimo Kreuzer    mov         r9, rdx
13664afb647cSTimo Kreuzer    and         r9, QWORD PTR __exp_mant_mask
13674afb647cSTimo Kreuzer    cmp         r9, QWORD PTR __pos_one
13684afb647cSTimo Kreuzer    jl          Lpow_sse2_ax_lt1_y_is_large_or_inf_or_nan
13694afb647cSTimo Kreuzer
13704afb647cSTimo Kreuzer    jmp         Lpow_sse2_ax_gt1_y_is_large_or_inf_or_nan
13714afb647cSTimo Kreuzer
13724afb647cSTimo KreuzerALIGN 16
13734afb647cSTimo KreuzerLpow_sse2_x_is_zero:
13744afb647cSTimo Kreuzer    mov         r10, QWORD PTR __exp_mask
13754afb647cSTimo Kreuzer    xor         rax, rax
13764afb647cSTimo Kreuzer    and         r10, r8
13774afb647cSTimo Kreuzer    cmp         r10, QWORD PTR __exp_mask
13784afb647cSTimo Kreuzer    je          Lpow_sse2_x_is_zero_y_is_inf_or_nan
13794afb647cSTimo Kreuzer
13804afb647cSTimo Kreuzer    mov         r10, QWORD PTR __sign_mask
13814afb647cSTimo Kreuzer    and         r10, r8
13824afb647cSTimo Kreuzer    cmovnz      rax, QWORD PTR __pos_inf
13834afb647cSTimo Kreuzer    jnz         Lpow_sse2_x_is_zero_z_is_inf
13844afb647cSTimo Kreuzer
13854afb647cSTimo Kreuzer    movd        xmm0, rax
13864afb647cSTimo Kreuzer    orpd        xmm0, XMMWORD PTR [negate_result+rsp]
13874afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
13884afb647cSTimo Kreuzer
13894afb647cSTimo KreuzerALIGN 16
13904afb647cSTimo KreuzerLpow_sse2_x_is_zero_z_is_inf:
13914afb647cSTimo Kreuzer
13924afb647cSTimo Kreuzer    movsd       xmm0, QWORD PTR [save_x+rsp]
13934afb647cSTimo Kreuzer    movsd       xmm1, QWORD PTR [save_y+rsp]
13944afb647cSTimo Kreuzer    movd        xmm2, rax
13954afb647cSTimo Kreuzer    orpd        xmm2, XMMWORD PTR [negate_result+rsp]
13964afb647cSTimo Kreuzer    mov         r9d, DWORD PTR __flag_x_zero_z_inf
13974afb647cSTimo Kreuzer
13984afb647cSTimo Kreuzer    call        fname_special
13994afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
14004afb647cSTimo Kreuzer
14014afb647cSTimo KreuzerALIGN 16
14024afb647cSTimo KreuzerLpow_sse2_x_is_zero_y_is_inf_or_nan:
14034afb647cSTimo Kreuzer    mov         r11, r8
14044afb647cSTimo Kreuzer    cmp         r8, QWORD PTR __neg_inf
14054afb647cSTimo Kreuzer    cmove       rax, QWORD PTR __pos_inf
14064afb647cSTimo Kreuzer    je          Lpow_sse2_x_is_zero_z_is_inf
14074afb647cSTimo Kreuzer
14084afb647cSTimo Kreuzer    ;or          r11, QWORD PTR __qnan_set
14094afb647cSTimo Kreuzer    mov         r10, QWORD PTR __mant_mask
14104afb647cSTimo Kreuzer    and         r10, r8
14114afb647cSTimo Kreuzer    jnz         Lpow_sse2_y_is_nan
14124afb647cSTimo Kreuzer
14134afb647cSTimo Kreuzer    movd        xmm0, rax
14144afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
14154afb647cSTimo Kreuzer
14164afb647cSTimo KreuzerALIGN 16
14174afb647cSTimo KreuzerLpow_sse2_x_is_inf_or_nan:
14184afb647cSTimo Kreuzer    xor         r11, r11
14194afb647cSTimo Kreuzer    mov         r10, QWORD PTR __sign_mask
14204afb647cSTimo Kreuzer    and         r10, r8
14214afb647cSTimo Kreuzer    cmovz       r11, QWORD PTR __pos_inf
14224afb647cSTimo Kreuzer    mov         rax, rdx
14234afb647cSTimo Kreuzer    mov         r9, QWORD PTR __mant_mask
14244afb647cSTimo Kreuzer    ;or          rax, QWORD PTR __qnan_set
14254afb647cSTimo Kreuzer    and         r9, rdx
14264afb647cSTimo Kreuzer    cmovnz      r11, rax
14274afb647cSTimo Kreuzer    jnz         Lpow_sse2_x_is_nan
14284afb647cSTimo Kreuzer
14294afb647cSTimo Kreuzer    xor         rax, rax
14304afb647cSTimo Kreuzer    mov         r9, r8
14314afb647cSTimo Kreuzer    mov         r10, QWORD PTR __exp_mask
14324afb647cSTimo Kreuzer    ;or          r9, QWORD PTR __qnan_set
14334afb647cSTimo Kreuzer    and         r10, r8
14344afb647cSTimo Kreuzer    cmp         r10, QWORD PTR __exp_mask
14354afb647cSTimo Kreuzer    cmove       rax, r8
14364afb647cSTimo Kreuzer    mov         r10, QWORD PTR __mant_mask
14374afb647cSTimo Kreuzer    and         r10, rax
14384afb647cSTimo Kreuzer    cmovnz      r11, r9
14394afb647cSTimo Kreuzer    jnz         Lpow_sse2_y_is_nan
14404afb647cSTimo Kreuzer
14414afb647cSTimo Kreuzer    movd        xmm0, r11
14424afb647cSTimo Kreuzer    orpd        xmm0, XMMWORD PTR [negate_result+rsp]
14434afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
14444afb647cSTimo Kreuzer
14454afb647cSTimo KreuzerALIGN 16
14464afb647cSTimo KreuzerLpow_sse2_ay_is_very_small:
14474afb647cSTimo Kreuzer    movsd       xmm0, QWORD PTR __pos_one
14484afb647cSTimo Kreuzer    addsd       xmm0, xmm1
14494afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
14504afb647cSTimo Kreuzer
14514afb647cSTimo Kreuzer
14524afb647cSTimo KreuzerALIGN 16
14534afb647cSTimo KreuzerLpow_sse2_ax_lt1_y_is_large_or_inf_or_nan:
14544afb647cSTimo Kreuzer    xor         r11, r11
14554afb647cSTimo Kreuzer    mov         r10, QWORD PTR __sign_mask
14564afb647cSTimo Kreuzer    and         r10, r8
14574afb647cSTimo Kreuzer    cmovnz      r11, QWORD PTR __pos_inf
14584afb647cSTimo Kreuzer    jmp         Lpow_sse2_adjust_for_nan
14594afb647cSTimo Kreuzer
14604afb647cSTimo KreuzerALIGN 16
14614afb647cSTimo KreuzerLpow_sse2_ax_gt1_y_is_large_or_inf_or_nan:
14624afb647cSTimo Kreuzer    xor         r11, r11
14634afb647cSTimo Kreuzer    mov         r10, QWORD PTR __sign_mask
14644afb647cSTimo Kreuzer    and         r10, r8
14654afb647cSTimo Kreuzer    cmovz       r11, QWORD PTR __pos_inf
14664afb647cSTimo Kreuzer
14674afb647cSTimo KreuzerALIGN 16
14684afb647cSTimo KreuzerLpow_sse2_adjust_for_nan:
14694afb647cSTimo Kreuzer
14704afb647cSTimo Kreuzer    xor         rax, rax
14714afb647cSTimo Kreuzer    mov         r9, r8
14724afb647cSTimo Kreuzer    mov         r10, QWORD PTR __exp_mask
14734afb647cSTimo Kreuzer    ;or          r9, QWORD PTR __qnan_set
14744afb647cSTimo Kreuzer    and         r10, r8
14754afb647cSTimo Kreuzer    cmp         r10, QWORD PTR __exp_mask
14764afb647cSTimo Kreuzer    cmove       rax, r8
14774afb647cSTimo Kreuzer    mov         r10, QWORD PTR __mant_mask
14784afb647cSTimo Kreuzer    and         r10, rax
14794afb647cSTimo Kreuzer    cmovnz      r11, r9
14804afb647cSTimo Kreuzer    jnz         Lpow_sse2_y_is_nan
14814afb647cSTimo Kreuzer
14824afb647cSTimo Kreuzer    test        rax, rax
14834afb647cSTimo Kreuzer    jnz         Lpow_sse2_y_is_inf
14844afb647cSTimo Kreuzer
14854afb647cSTimo KreuzerALIGN 16
14864afb647cSTimo KreuzerLpow_sse2_z_is_zero_or_inf:
14874afb647cSTimo Kreuzer
14884afb647cSTimo Kreuzer    mov         r9d, DWORD PTR __flag_z_zero
14894afb647cSTimo Kreuzer    test        r11, QWORD PTR __exp_mant_mask
14904afb647cSTimo Kreuzer    cmovnz      r9d, DWORD PTR __flag_z_inf
14914afb647cSTimo Kreuzer
14924afb647cSTimo Kreuzer    movsd       xmm0, QWORD PTR [save_x+rsp]
14934afb647cSTimo Kreuzer    movsd       xmm1, QWORD PTR [save_y+rsp]
14944afb647cSTimo Kreuzer    movd        xmm2, r11
14954afb647cSTimo Kreuzer
14964afb647cSTimo Kreuzer    call        fname_special
14974afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
14984afb647cSTimo Kreuzer
14994afb647cSTimo KreuzerALIGN 16
15004afb647cSTimo KreuzerLpow_sse2_y_is_inf:
15014afb647cSTimo Kreuzer
15024afb647cSTimo Kreuzer    movd        xmm0, r11
15034afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
15044afb647cSTimo Kreuzer
15054afb647cSTimo KreuzerALIGN 16
15064afb647cSTimo KreuzerLpow_sse2_x_is_nan:
15074afb647cSTimo Kreuzer
15084afb647cSTimo Kreuzer    xor         rax, rax
15094afb647cSTimo Kreuzer    mov         r10, QWORD PTR __exp_mask
15104afb647cSTimo Kreuzer    and         r10, r8
15114afb647cSTimo Kreuzer    cmp         r10, QWORD PTR __exp_mask
15124afb647cSTimo Kreuzer    cmove       rax, r8
15134afb647cSTimo Kreuzer    mov         r10, QWORD PTR __mant_mask
15144afb647cSTimo Kreuzer    and         r10, rax
15154afb647cSTimo Kreuzer    jnz         Lpow_sse2_x_is_nan_y_is_nan
15164afb647cSTimo Kreuzer
15174afb647cSTimo Kreuzer    movsd       xmm0, QWORD PTR [save_x+rsp]
15184afb647cSTimo Kreuzer    movsd       xmm1, QWORD PTR [save_y+rsp]
15194afb647cSTimo Kreuzer    movd        xmm2, r11
15204afb647cSTimo Kreuzer    mov         r9d, DWORD PTR __flag_x_nan
15214afb647cSTimo Kreuzer
15224afb647cSTimo Kreuzer    call        fname_special
15234afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
15244afb647cSTimo Kreuzer
15254afb647cSTimo KreuzerALIGN 16
15264afb647cSTimo KreuzerLpow_sse2_y_is_nan:
15274afb647cSTimo Kreuzer
15284afb647cSTimo Kreuzer    movsd       xmm0, QWORD PTR [save_x+rsp]
15294afb647cSTimo Kreuzer    movsd       xmm1, QWORD PTR [save_y+rsp]
15304afb647cSTimo Kreuzer    movd        xmm2, r11
15314afb647cSTimo Kreuzer    mov         r9d, DWORD PTR __flag_y_nan
15324afb647cSTimo Kreuzer
15334afb647cSTimo Kreuzer    call        fname_special
15344afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
15354afb647cSTimo Kreuzer
15364afb647cSTimo KreuzerALIGN 16
15374afb647cSTimo KreuzerLpow_sse2_x_is_nan_y_is_nan:
15384afb647cSTimo Kreuzer
15394afb647cSTimo Kreuzer    mov         r9, r8
15404afb647cSTimo Kreuzer
15414afb647cSTimo Kreuzer    cmp         r11, QWORD PTR __ind_pattern
15424afb647cSTimo Kreuzer    cmove       r11, r9
15434afb647cSTimo Kreuzer    je          Lpow_sse2_continue_xy_nan
15444afb647cSTimo Kreuzer
15454afb647cSTimo Kreuzer    cmp         r9, QWORD PTR __ind_pattern
15464afb647cSTimo Kreuzer    cmove       r9, r11
15474afb647cSTimo Kreuzer
15484afb647cSTimo Kreuzer    mov         r10, r9
15494afb647cSTimo Kreuzer    and         r10, QWORD PTR __sign_mask
15504afb647cSTimo Kreuzer    cmovnz      r9, r11
15514afb647cSTimo Kreuzer
15524afb647cSTimo Kreuzer    mov         r10, r11
15534afb647cSTimo Kreuzer    and         r10, QWORD PTR __sign_mask
15544afb647cSTimo Kreuzer    cmovnz      r11, r9
15554afb647cSTimo Kreuzer
15564afb647cSTimo KreuzerLpow_sse2_continue_xy_nan:
15574afb647cSTimo Kreuzer    ;or          r11, QWORD PTR __qnan_set
15584afb647cSTimo Kreuzer    movsd       xmm0, QWORD PTR [save_x+rsp]
15594afb647cSTimo Kreuzer    movsd       xmm1, QWORD PTR [save_y+rsp]
15604afb647cSTimo Kreuzer    movd        xmm2, r11
15614afb647cSTimo Kreuzer    mov         r9d, DWORD PTR __flag_x_nan_y_nan
15624afb647cSTimo Kreuzer
15634afb647cSTimo Kreuzer    call        fname_special
15644afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
15654afb647cSTimo Kreuzer
15664afb647cSTimo KreuzerALIGN 16
15674afb647cSTimo KreuzerLpow_sse2_z_denormal:
15684afb647cSTimo Kreuzer
15694afb647cSTimo Kreuzer    movsd       xmm2, xmm0
15704afb647cSTimo Kreuzer    movsd       xmm0, QWORD PTR [save_x+rsp]
15714afb647cSTimo Kreuzer    movsd       xmm1, QWORD PTR [save_y+rsp]
15724afb647cSTimo Kreuzer    mov         r9d, DWORD PTR __flag_z_denormal
15734afb647cSTimo Kreuzer
15744afb647cSTimo Kreuzer    call        fname_special
15754afb647cSTimo Kreuzer    jmp         Lpow_sse2_final_check
15764afb647cSTimo Kreuzer
15774afb647cSTimo KreuzerLpow_fma3:
15784afb647cSTimo Kreuzer    vmovsd       QWORD PTR [save_x+rsp], xmm0
15794afb647cSTimo Kreuzer    vmovsd       QWORD PTR [save_y+rsp], xmm1
15804afb647cSTimo Kreuzer
15814afb647cSTimo Kreuzer    mov          rdx, QWORD PTR [save_x+rsp]
15824afb647cSTimo Kreuzer    mov          r8, QWORD PTR [save_y+rsp]
15834afb647cSTimo Kreuzer
15844afb647cSTimo Kreuzer    mov          r10, QWORD PTR __exp_mant_mask
15854afb647cSTimo Kreuzer    and          r10, r8
15864afb647cSTimo Kreuzer    jz           Lpow_fma3_y_is_zero
15874afb647cSTimo Kreuzer
15884afb647cSTimo Kreuzer    cmp          r8, QWORD PTR __pos_one
15894afb647cSTimo Kreuzer    je           Lpow_fma3_y_is_one
15904afb647cSTimo Kreuzer
15914afb647cSTimo Kreuzer    mov          r9, QWORD PTR __sign_mask
15924afb647cSTimo Kreuzer    and          r9, rdx
15934afb647cSTimo Kreuzer    cmp          r9, QWORD PTR __sign_mask
15944afb647cSTimo Kreuzer    mov          rax, QWORD PTR __pos_zero
15954afb647cSTimo Kreuzer    mov          QWORD PTR [negate_result+rsp], rax
15964afb647cSTimo Kreuzer    je           Lpow_fma3_x_is_neg
15974afb647cSTimo Kreuzer
15984afb647cSTimo Kreuzer    cmp          rdx, QWORD PTR __pos_one
15994afb647cSTimo Kreuzer    je           Lpow_fma3_x_is_pos_one
16004afb647cSTimo Kreuzer
16014afb647cSTimo Kreuzer    cmp          rdx, QWORD PTR __pos_zero
16024afb647cSTimo Kreuzer    je           Lpow_fma3_x_is_zero
16034afb647cSTimo Kreuzer
16044afb647cSTimo Kreuzer    mov          r9, QWORD PTR __exp_mask
16054afb647cSTimo Kreuzer    and          r9, rdx
16064afb647cSTimo Kreuzer    cmp          r9, QWORD PTR __exp_mask
16074afb647cSTimo Kreuzer    je           Lpow_fma3_x_is_inf_or_nan
16084afb647cSTimo Kreuzer
16094afb647cSTimo Kreuzer    mov          r10, QWORD PTR __exp_mask
16104afb647cSTimo Kreuzer    and          r10, r8
16114afb647cSTimo Kreuzer    cmp          r10, QWORD PTR __ay_max_bound
16124afb647cSTimo Kreuzer    jg           Lpow_fma3_ay_is_very_large
16134afb647cSTimo Kreuzer
16144afb647cSTimo Kreuzer    mov          r10, QWORD PTR __exp_mask
16154afb647cSTimo Kreuzer    and          r10, r8
16164afb647cSTimo Kreuzer    cmp          r10, QWORD PTR __ay_min_bound
16174afb647cSTimo Kreuzer    jl           Lpow_fma3_ay_is_very_small
16184afb647cSTimo Kreuzer
16194afb647cSTimo Kreuzer    ; -----------------------------
16204afb647cSTimo Kreuzer    ; compute log(x) here
16214afb647cSTimo Kreuzer    ; -----------------------------
16224afb647cSTimo KreuzerLpow_fma3_log_x:
16234afb647cSTimo Kreuzer
16244afb647cSTimo Kreuzer    ; compute exponent part
16254afb647cSTimo Kreuzer    vpsrlq       xmm3, xmm0, 52
16264afb647cSTimo Kreuzer    vmovq        r8, xmm0
16274afb647cSTimo Kreuzer    vpsubq       xmm3, xmm3, XMMWORD PTR __mask_1023
16284afb647cSTimo Kreuzer    vcvtdq2pd    xmm6, xmm3 ; xexp
16294afb647cSTimo Kreuzer    vpand        xmm2, xmm0, XMMWORD PTR __real_mant
16304afb647cSTimo Kreuzer
16314afb647cSTimo Kreuzer    vcomisd      xmm6, QWORD PTR __mask_1023_f
16324afb647cSTimo Kreuzer    je           Lpow_fma3_denormal_adjust
16334afb647cSTimo Kreuzer
16344afb647cSTimo KreuzerLpow_fma3_continue_common:
16354afb647cSTimo Kreuzer
16364afb647cSTimo Kreuzer    ; compute index into the log tables
16374afb647cSTimo Kreuzer    mov          r9, r8
16384afb647cSTimo Kreuzer    and          r8, QWORD PTR __mask_mant_all8
16394afb647cSTimo Kreuzer    and          r9, QWORD PTR __mask_mant9
16404afb647cSTimo Kreuzer    vsubsd       xmm7, xmm0, __real_one
16414afb647cSTimo Kreuzer    shl          r9, 1
16424afb647cSTimo Kreuzer    add          r8, r9
16434afb647cSTimo Kreuzer    vmovq        xmm1, r8
16444afb647cSTimo Kreuzer    vandpd       xmm7, xmm7, __real_notsign
16454afb647cSTimo Kreuzer
16464afb647cSTimo Kreuzer    ; F, Y, switch to near-one codepath
16474afb647cSTimo Kreuzer    shr          r8, 44
16484afb647cSTimo Kreuzer    vpor         xmm2, xmm2, XMMWORD PTR __real_half
16494afb647cSTimo Kreuzer    vpor         xmm1, xmm1, XMMWORD PTR __real_half
16504afb647cSTimo Kreuzer    vcomisd      xmm7, __real_threshold
16514afb647cSTimo Kreuzer    lea          r9, QWORD PTR __log_F_inv_head
16524afb647cSTimo Kreuzer    lea          rdx, QWORD PTR __log_F_inv_tail
16534afb647cSTimo Kreuzer    jb           Lpow_fma3_near_one
16544afb647cSTimo Kreuzer
16554afb647cSTimo Kreuzer    ; f = F - Y, r = f * inv
16564afb647cSTimo Kreuzer    vsubsd       xmm4, xmm1, xmm2          ; xmm4 <-- f = F - Y
16574afb647cSTimo Kreuzer    vmulsd       xmm1, xmm4, QWORD PTR [r9+r8*8] ; xmm1 <-- rhead = f*inv_head
16584afb647cSTimo Kreuzer    vmovapd      xmm5, xmm1                ; xmm5 <-- copy of rhead
16594afb647cSTimo Kreuzer    vmulsd       xmm4, xmm4, QWORD PTR [rdx+r8*8] ; xmm4 <-- rtail = f*inv_tail
16604afb647cSTimo Kreuzer    vmovapd      xmm7, xmm4                ; xmm7 <-- copy of rtail
16614afb647cSTimo Kreuzer    vaddsd       xmm1, xmm1, xmm4          ; xmm1 <-- r = rhead + rtail
16624afb647cSTimo Kreuzer
16634afb647cSTimo Kreuzer    vmovapd      xmm2, xmm1                ; xmm2 <-- copy of r
16644afb647cSTimo Kreuzer    vmovapd      xmm0, xmm1                ; xmm1 <-- copy of r
16654afb647cSTimo Kreuzer    lea          r9, __log_256_lead
16664afb647cSTimo Kreuzer
16674afb647cSTimo Kreuzer    ; poly
16684afb647cSTimo Kreuzer;    movsd       xmm3, QWORD PTR __real_1_over_6
16694afb647cSTimo Kreuzer;    movsd       xmm1, QWORD PTR __real_1_over_3
16704afb647cSTimo Kreuzer;    mulsd       xmm3, xmm2               ; r*1/6
16714afb647cSTimo Kreuzer;    mulsd       xmm1, xmm2               ; r*1/3
16724afb647cSTimo Kreuzer;    mulsd       xmm0, xmm2               ; r^2
16734afb647cSTimo Kreuzer;    subsd       xmm5, xmm2               ; xmm5 <-- rhead - r
16744afb647cSTimo Kreuzer;    movsd       xmm4, xmm0               ; xmm4 <-- copy of r^2
16754afb647cSTimo Kreuzer;    addsd       xmm3, QWORD PTR __real_1_over_5 ; xmm3 <-- r*1/6 + 1/5
16764afb647cSTimo Kreuzer;    addsd       xmm1, QWORD PTR __real_1_over_2 ; xmm1 <-- r*1/3 + 1/2
16774afb647cSTimo Kreuzer;    mulsd       xmm4, xmm0               ; xmm4 <-- r^4
16784afb647cSTimo Kreuzer;    mulsd       xmm3, xmm2               ; xmm3 <-- (r*1/6 + 1/5)*r
16794afb647cSTimo Kreuzer;    mulsd       xmm1, xmm0               ; xmm1 <-- (r*1/3 + 1/2)*r^2
16804afb647cSTimo Kreuzer;    addsd       xmm3, QWORD PTR __real_1_over_4 ; xmm3 <-- (r*1/6+1/5)*r + 1/4
16814afb647cSTimo Kreuzer;    addsd       xmm7, xmm5               ; xmm7 <-- rtail + (rhead - r)
16824afb647cSTimo Kreuzer;    mulsd       xmm3, xmm4               ; xmm3 <-- (r*1/6 + 1/5)*r^5 + r^4*1/4
16834afb647cSTimo Kreuzer;    addsd       xmm1, xmm3               ; xmm1 <-- poly down to r^2
16844afb647cSTimo Kreuzer;    addsd       xmm1, xmm7               ; xmm1 <-- poly + correction
16854afb647cSTimo Kreuzer
16864afb647cSTimo Kreuzer
16874afb647cSTimo Kreuzer    vsubsd       xmm3, xmm5, xmm2
16884afb647cSTimo Kreuzer    vmovsd       xmm1, QWORD PTR __real_1_over_6
16894afb647cSTimo Kreuzer    vmulsd       xmm0,xmm0,xmm0
16904afb647cSTimo Kreuzer    vaddsd       xmm3, xmm3, xmm7
16914afb647cSTimo Kreuzer    vfmadd213sd  xmm1, xmm2, QWORD PTR __real_1_over_5
16924afb647cSTimo Kreuzer    vfmadd213sd  xmm1, xmm2, QWORD PTR __real_1_over_4
16934afb647cSTimo Kreuzer    vfmadd213sd  xmm1, xmm2, QWORD PTR __real_1_over_3
16944afb647cSTimo Kreuzer    vfmadd213sd  xmm1, xmm2, QWORD PTR __real_1_over_2
16954afb647cSTimo Kreuzer    vfmadd213sd  xmm1, xmm0, xmm3
16964afb647cSTimo Kreuzer
16974afb647cSTimo Kreuzer    vmovsd       xmm5, QWORD PTR __real_log2_tail
16984afb647cSTimo Kreuzer    lea          rdx, __log_256_tail
16994afb647cSTimo Kreuzer    vfmsub213sd  xmm5, xmm6, xmm1
17004afb647cSTimo Kreuzer    vmovsd       xmm0, QWORD PTR [r9+r8*8]
17014afb647cSTimo Kreuzer
17024afb647cSTimo Kreuzer    vaddsd       xmm3, xmm5, QWORD PTR [rdx+r8*8]
17034afb647cSTimo Kreuzer    vmovapd      xmm1, xmm3
17044afb647cSTimo Kreuzer    vsubsd       xmm3, xmm3, xmm2
17054afb647cSTimo Kreuzer
17064afb647cSTimo Kreuzer    vfmadd231sd  xmm0, xmm6, QWORD PTR __real_log2_lead
17074afb647cSTimo Kreuzer
17084afb647cSTimo Kreuzer    ; result of ln(x) is computed from head and tail parts, resH and resT
17094afb647cSTimo Kreuzer    ; res = ln(x) = resH + resT
17104afb647cSTimo Kreuzer    ; resH and resT are in full precision
17114afb647cSTimo Kreuzer
17124afb647cSTimo Kreuzer    ; resT is computed from head and tail parts, resT_h and resT_t
17134afb647cSTimo Kreuzer    ; resT = resT_h + resT_t
17144afb647cSTimo Kreuzer
17154afb647cSTimo Kreuzer    ; now
17164afb647cSTimo Kreuzer    ; xmm3 - resT
17174afb647cSTimo Kreuzer    ; xmm0 - resH
17184afb647cSTimo Kreuzer    ; xmm1 - (resT_t)
17194afb647cSTimo Kreuzer    ; xmm2 - (-resT_h)
17204afb647cSTimo Kreuzer
17214afb647cSTimo KreuzerLpow_fma3_log_x_continue:
17224afb647cSTimo Kreuzer
17234afb647cSTimo Kreuzer    vmovapd      xmm7, xmm0
17244afb647cSTimo Kreuzer    vaddsd       xmm0, xmm0, xmm3
17254afb647cSTimo Kreuzer    vmovapd      xmm5, xmm0
17264afb647cSTimo Kreuzer    vandpd       xmm0, xmm0, XMMWORD PTR __real_fffffffff8000000
17274afb647cSTimo Kreuzer
17284afb647cSTimo Kreuzer    ; xmm0 - H
17294afb647cSTimo Kreuzer    ; xmm7 - resH
17304afb647cSTimo Kreuzer    ; xmm5 - res
17314afb647cSTimo Kreuzer
17324afb647cSTimo Kreuzer    mov          rax, QWORD PTR [save_y+rsp]
17334afb647cSTimo Kreuzer    and          rax, QWORD PTR __real_fffffffff8000000
17344afb647cSTimo Kreuzer
17354afb647cSTimo Kreuzer    vaddsd       xmm2, xmm2, xmm3
17364afb647cSTimo Kreuzer    vsubsd       xmm7, xmm7, xmm5
17374afb647cSTimo Kreuzer    vsubsd       xmm1, xmm1, xmm2
17384afb647cSTimo Kreuzer    vaddsd       xmm7, xmm7, xmm3
17394afb647cSTimo Kreuzer    vsubsd       xmm5, xmm5, xmm0
17404afb647cSTimo Kreuzer
17414afb647cSTimo Kreuzer    mov          QWORD PTR [y_head+rsp], rax
17424afb647cSTimo Kreuzer    vmovsd       xmm4, QWORD PTR [save_y+rsp]
17434afb647cSTimo Kreuzer
17444afb647cSTimo Kreuzer    vaddsd       xmm7, xmm7, xmm1
17454afb647cSTimo Kreuzer    vaddsd       xmm7, xmm7, xmm5
17464afb647cSTimo Kreuzer
17474afb647cSTimo Kreuzer    ; res = H + T
17484afb647cSTimo Kreuzer    ; H has leading 26 bits of precision
17494afb647cSTimo Kreuzer    ; T has full precision
17504afb647cSTimo Kreuzer
17514afb647cSTimo Kreuzer    ; xmm0 - H
17524afb647cSTimo Kreuzer    ; xmm7 - T
17534afb647cSTimo Kreuzer
17544afb647cSTimo Kreuzer    vmovsd       xmm2, QWORD PTR [y_head+rsp]
17554afb647cSTimo Kreuzer    vsubsd       xmm4, xmm4, xmm2
17564afb647cSTimo Kreuzer
17574afb647cSTimo Kreuzer    ; y is split into head and tail
17584afb647cSTimo Kreuzer    ; for y * ln(x) computation
17594afb647cSTimo Kreuzer
17604afb647cSTimo Kreuzer    ; xmm4 - Yt
17614afb647cSTimo Kreuzer    ; xmm2 - Yh
17624afb647cSTimo Kreuzer    ; xmm0 - H
17634afb647cSTimo Kreuzer    ; xmm7 - T
17644afb647cSTimo Kreuzer
17654afb647cSTimo Kreuzer    vmulsd       xmm3, xmm4, xmm7 ; YtRt
17664afb647cSTimo Kreuzer    vmulsd       xmm4, xmm4, xmm0 ; YtRh
17674afb647cSTimo Kreuzer    vmulsd       xmm5, xmm7, xmm2 ; YhRt
17684afb647cSTimo Kreuzer    vmulsd       xmm6, xmm0, xmm2 ; YhRh
17694afb647cSTimo Kreuzer
17704afb647cSTimo Kreuzer    vmovapd      xmm1, xmm6
17714afb647cSTimo Kreuzer    vaddsd       xmm3, xmm3, xmm4
17724afb647cSTimo Kreuzer    vaddsd       xmm3, xmm3, xmm5
17734afb647cSTimo Kreuzer
17744afb647cSTimo Kreuzer    vaddsd       xmm1, xmm1, xmm3
17754afb647cSTimo Kreuzer    vmovapd      xmm0, xmm1
17764afb647cSTimo Kreuzer
17774afb647cSTimo Kreuzer    vsubsd       xmm6, xmm6, xmm1
17784afb647cSTimo Kreuzer    vaddsd       xmm6, xmm6, xmm3
17794afb647cSTimo Kreuzer
17804afb647cSTimo Kreuzer    ; y * ln(x) = v + vt
17814afb647cSTimo Kreuzer    ; v and vt are in full precision
17824afb647cSTimo Kreuzer
17834afb647cSTimo Kreuzer    ; xmm0 - v
17844afb647cSTimo Kreuzer    ; xmm6 - vt
17854afb647cSTimo Kreuzer
17864afb647cSTimo Kreuzer    ; -----------------------------
17874afb647cSTimo Kreuzer    ; compute exp( y * ln(x) ) here
17884afb647cSTimo Kreuzer    ; -----------------------------
17894afb647cSTimo Kreuzer
17904afb647cSTimo Kreuzer    ; v * (64/ln(2))
17914afb647cSTimo Kreuzer    vmovsd       QWORD PTR [p_temp_exp+rsp], xmm0
17924afb647cSTimo Kreuzer    vmulsd       xmm7, xmm0, QWORD PTR __real_64_by_log2
17934afb647cSTimo Kreuzer    mov          rdx, QWORD PTR [p_temp_exp+rsp]
17944afb647cSTimo Kreuzer
17954afb647cSTimo Kreuzer    ; v < 1024*ln(2), ( v * (64/ln(2)) ) < 64*1024
17964afb647cSTimo Kreuzer    ; v >= -1075*ln(2), ( v * (64/ln(2)) ) >= 64*(-1075)
17974afb647cSTimo Kreuzer    vcomisd      xmm7, QWORD PTR __real_p65536
17984afb647cSTimo Kreuzer    ja           Lpow_fma3_process_result_inf
17994afb647cSTimo Kreuzer
18004afb647cSTimo Kreuzer    vcomisd      xmm7, QWORD PTR __real_m68800
18014afb647cSTimo Kreuzer    jb           Lpow_fma3_process_result_zero
18024afb647cSTimo Kreuzer
18034afb647cSTimo Kreuzer    ; n = int( v * (64/ln(2)) )
18044afb647cSTimo Kreuzer    vcvtpd2dq    xmm4, xmm7
18054afb647cSTimo Kreuzer    lea          r10, __two_to_jby64_head_table
18064afb647cSTimo Kreuzer    lea          r11, __two_to_jby64_tail_table
18074afb647cSTimo Kreuzer    vcvtdq2pd    xmm1, xmm4
18084afb647cSTimo Kreuzer
18094afb647cSTimo Kreuzer    ; r1 = x - n * ln(2)/64 head
18104afb647cSTimo Kreuzer    vfnmadd231sd xmm0, xmm1, QWORD PTR __real_log2_by_64_head
18114afb647cSTimo Kreuzer    vmovd        ecx, xmm4
18124afb647cSTimo Kreuzer    mov          rax, 3fh
18134afb647cSTimo Kreuzer    and          eax, ecx
18144afb647cSTimo Kreuzer
18154afb647cSTimo Kreuzer    ; r2 = - n * ln(2)/64 tail
18164afb647cSTimo Kreuzer    vmulsd       xmm1, xmm1, QWORD PTR __real_log2_by_64_tail
18174afb647cSTimo Kreuzer    vmovapd      xmm2, xmm0
18184afb647cSTimo Kreuzer
18194afb647cSTimo Kreuzer    ; m = (n - j) / 64
18204afb647cSTimo Kreuzer    sub          ecx, eax
18214afb647cSTimo Kreuzer    sar          ecx, 6
18224afb647cSTimo Kreuzer
18234afb647cSTimo Kreuzer    ; r1+r2
18244afb647cSTimo Kreuzer    vaddsd       xmm2, xmm2, xmm1
18254afb647cSTimo Kreuzer    vaddsd       xmm2, xmm2, xmm6 ; add vt here
18264afb647cSTimo Kreuzer    vmovapd      xmm1, xmm2
18274afb647cSTimo Kreuzer
18284afb647cSTimo Kreuzer    ; q
18294afb647cSTimo Kreuzer    vmovsd       xmm0, QWORD PTR __real_1_by_720
18304afb647cSTimo Kreuzer    xor         r9d, r9d
18314afb647cSTimo Kreuzer    vfmadd213sd  xmm0, xmm2,  QWORD PTR __real_1_by_120
18324afb647cSTimo Kreuzer    cmp         ecx, DWORD PTR __denormal_threshold
18334afb647cSTimo Kreuzer    vfmadd213sd  xmm0, xmm2,  QWORD PTR __real_1_by_24
18344afb647cSTimo Kreuzer    cmovle      r9d, ecx
18354afb647cSTimo Kreuzer    vfmadd213sd  xmm0, xmm2,  QWORD PTR __real_1_by_6
18364afb647cSTimo Kreuzer    add         rcx, 1023
18374afb647cSTimo Kreuzer    vfmadd213sd  xmm0, xmm2,  QWORD PTR __real_1_by_2
18384afb647cSTimo Kreuzer    shl         rcx, 52
18394afb647cSTimo Kreuzer    vfmadd213sd  xmm0, xmm2,  QWORD PTR __real_one
18404afb647cSTimo Kreuzer    vmulsd       xmm0, xmm0, xmm2         ; xmm0 <-- q
18414afb647cSTimo Kreuzer;    movsd       xmm0, QWORD PTR __real_1_by_2
18424afb647cSTimo Kreuzer;    movsd       xmm3, QWORD PTR __real_1_by_24
18434afb647cSTimo Kreuzer;    movsd       xmm4, QWORD PTR __real_1_by_720
18444afb647cSTimo Kreuzer;    mulsd       xmm1, xmm2                ; xmm1 <-- r^2
18454afb647cSTimo Kreuzer;    mulsd       xmm0, xmm2                ; xmm0 <-- r/2
18464afb647cSTimo Kreuzer;    mulsd       xmm3, xmm2                ; xmm3 <-- r/24
18474afb647cSTimo Kreuzer;    mulsd       xmm4, xmm2                ; xmm4 <-- r/720
18484afb647cSTimo Kreuzer
18494afb647cSTimo Kreuzer;    movsd       xmm5, xmm1                ; xmm5 <-- copy of r^2
18504afb647cSTimo Kreuzer;    mulsd       xmm1, xmm2                ; xmm1 <-- r^3
18514afb647cSTimo Kreuzer;    addsd       xmm0, QWORD PTR __real_one ; xmm0 <-- r/2 + 1
18524afb647cSTimo Kreuzer;    addsd       xmm3, QWORD PTR __real_1_by_6 ; xmm3 <-- r/24 + 1/6
18534afb647cSTimo Kreuzer;    mulsd       xmm5, xmm1                ; xmm5 <-- r^5
18544afb647cSTimo Kreuzer;    addsd       xmm4, QWORD PTR __real_1_by_120 ; xmm4 <-- r/720 + 1/120
18554afb647cSTimo Kreuzer;    mulsd       xmm0, xmm2                ; xmm0 <-- (r/2 + 1)*r
18564afb647cSTimo Kreuzer;    mulsd       xmm3, xmm1                ; xmm3 <-- (r/24 + 1/6)*r^3
18574afb647cSTimo Kreuzer
18584afb647cSTimo Kreuzer;    mulsd       xmm4, xmm5                ; xmm4 <-- (r/720 + 1/120)*r^5
18594afb647cSTimo Kreuzer
18604afb647cSTimo Kreuzer;   ; deal with denormal results
18614afb647cSTimo Kreuzer;   xor         r9d, r9d
18624afb647cSTimo Kreuzer;   cmp         ecx, DWORD PTR __denormal_threshold
18634afb647cSTimo Kreuzer
18644afb647cSTimo Kreuzer;    addsd       xmm3, xmm4  ; xmm3 <-- (r/720 + 1/120)*r^5 + (r/24 + 1/6)*r^3
18654afb647cSTimo Kreuzer;    addsd       xmm0, xmm3  ; xmm0 <-- poly
18664afb647cSTimo Kreuzer
18674afb647cSTimo Kreuzer;   cmovle      r9d, ecx
18684afb647cSTimo Kreuzer;   add         rcx, 1023
18694afb647cSTimo Kreuzer;   shl         rcx, 52
18704afb647cSTimo Kreuzer
18714afb647cSTimo Kreuzer    ; f1, f2
18724afb647cSTimo Kreuzer    vmulsd       xmm5, xmm0, QWORD PTR [r11+rax*8]
18734afb647cSTimo Kreuzer    vmulsd       xmm1, xmm0, QWORD PTR [r10+rax*8]
18744afb647cSTimo Kreuzer
18754afb647cSTimo Kreuzer    cmp          rcx, QWORD PTR __real_inf
18764afb647cSTimo Kreuzer
18774afb647cSTimo Kreuzer    ; (f1+f2)*(1+q)
18784afb647cSTimo Kreuzer    vaddsd       xmm5, xmm5, QWORD PTR [r11+rax*8]
18794afb647cSTimo Kreuzer    vaddsd       xmm1, xmm1, xmm5
18804afb647cSTimo Kreuzer    vaddsd       xmm1, xmm1, QWORD PTR [r10+rax*8]
18814afb647cSTimo Kreuzer    vmovapd      xmm0, xmm1
18824afb647cSTimo Kreuzer
18834afb647cSTimo Kreuzer    je           Lpow_fma3_process_almost_inf
18844afb647cSTimo Kreuzer
18854afb647cSTimo Kreuzer    test         r9d, r9d
18864afb647cSTimo Kreuzer    mov          QWORD PTR [p_temp_exp+rsp], rcx
18874afb647cSTimo Kreuzer    jnz          Lpow_fma3_process_denormal
18884afb647cSTimo Kreuzer    vmulsd       xmm0, xmm0, QWORD PTR [p_temp_exp+rsp]
18894afb647cSTimo Kreuzer    vorpd        xmm0, xmm0, XMMWORD PTR [negate_result+rsp]
18904afb647cSTimo Kreuzer
18914afb647cSTimo KreuzerLpow_fma3_final_check:
18924afb647cSTimo Kreuzer    AVXRestoreXmm  xmm7, save_xmm7
18934afb647cSTimo Kreuzer    AVXRestoreXmm  xmm6, save_xmm6
18944afb647cSTimo Kreuzer    StackDeallocate stack_size
18954afb647cSTimo Kreuzer    ret
18964afb647cSTimo Kreuzer
18974afb647cSTimo KreuzerALIGN 16
18984afb647cSTimo KreuzerLpow_fma3_process_almost_inf:
18994afb647cSTimo Kreuzer    vcomisd      xmm0, QWORD PTR __real_one
19004afb647cSTimo Kreuzer    jae          Lpow_fma3_process_result_inf
19014afb647cSTimo Kreuzer
19024afb647cSTimo Kreuzer    vorpd        xmm0, xmm0, XMMWORD PTR __enable_almost_inf
19034afb647cSTimo Kreuzer    vorpd        xmm0, xmm0, XMMWORD PTR [negate_result+rsp]
19044afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
19054afb647cSTimo Kreuzer
19064afb647cSTimo KreuzerALIGN 16
19074afb647cSTimo KreuzerLpow_fma3_process_denormal:
19084afb647cSTimo Kreuzer    mov          ecx, r9d
19094afb647cSTimo Kreuzer    xor          r11d, r11d
19104afb647cSTimo Kreuzer    vcomisd      xmm0, QWORD PTR __real_one
19114afb647cSTimo Kreuzer    cmovae       r11d, ecx
19124afb647cSTimo Kreuzer    cmp          r11d, DWORD PTR __denormal_threshold
19134afb647cSTimo Kreuzer    jne          Lpow_fma3_process_true_denormal
19144afb647cSTimo Kreuzer
19154afb647cSTimo Kreuzer    vmulsd       xmm0, xmm0, QWORD PTR [p_temp_exp+rsp]
19164afb647cSTimo Kreuzer    vorpd        xmm0, xmm0, XMMWORD PTR [negate_result+rsp]
19174afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
19184afb647cSTimo Kreuzer
19194afb647cSTimo KreuzerALIGN 16
19204afb647cSTimo KreuzerLpow_fma3_process_true_denormal:
19214afb647cSTimo Kreuzer    xor          r8, r8
19224afb647cSTimo Kreuzer    cmp          rdx, QWORD PTR __denormal_tiny_threshold
19234afb647cSTimo Kreuzer    mov          r9, 1
19244afb647cSTimo Kreuzer    jg           Lpow_fma3_process_denormal_tiny
19254afb647cSTimo Kreuzer    add          ecx, 1074
19264afb647cSTimo Kreuzer    cmovs        rcx, r8
19274afb647cSTimo Kreuzer    shl          r9, cl
19284afb647cSTimo Kreuzer    mov          rcx, r9
19294afb647cSTimo Kreuzer
19304afb647cSTimo Kreuzer    mov          QWORD PTR [p_temp_exp+rsp], rcx
19314afb647cSTimo Kreuzer    vmulsd       xmm0, xmm0, QWORD PTR [p_temp_exp+rsp]
19324afb647cSTimo Kreuzer    vorpd        xmm0, xmm0, XMMWORD PTR [negate_result+rsp]
19334afb647cSTimo Kreuzer    jmp          Lpow_fma3_z_denormal
19344afb647cSTimo Kreuzer
19354afb647cSTimo KreuzerALIGN 16
19364afb647cSTimo KreuzerLpow_fma3_process_denormal_tiny:
19374afb647cSTimo Kreuzer    vmovsd       xmm0, QWORD PTR __real_smallest_denormal
19384afb647cSTimo Kreuzer    vorpd        xmm0, xmm0, XMMWORD PTR [negate_result+rsp]
19394afb647cSTimo Kreuzer    jmp          Lpow_fma3_z_denormal
19404afb647cSTimo Kreuzer
19414afb647cSTimo KreuzerALIGN 16
19424afb647cSTimo KreuzerLpow_fma3_process_result_zero:
19434afb647cSTimo Kreuzer    mov          r11, QWORD PTR __real_zero
19444afb647cSTimo Kreuzer    or           r11, QWORD PTR [negate_result+rsp]
19454afb647cSTimo Kreuzer    jmp          Lpow_fma3_z_is_zero_or_inf
19464afb647cSTimo Kreuzer
19474afb647cSTimo KreuzerALIGN 16
19484afb647cSTimo KreuzerLpow_fma3_process_result_inf:
19494afb647cSTimo Kreuzer    mov          r11, QWORD PTR __real_inf
19504afb647cSTimo Kreuzer    or           r11, QWORD PTR [negate_result+rsp]
19514afb647cSTimo Kreuzer    jmp          Lpow_fma3_z_is_zero_or_inf
19524afb647cSTimo Kreuzer
19534afb647cSTimo KreuzerALIGN 16
19544afb647cSTimo KreuzerLpow_fma3_denormal_adjust:
19554afb647cSTimo Kreuzer    vpor         xmm2, xmm2, XMMWORD PTR __real_one
19564afb647cSTimo Kreuzer    vsubsd       xmm2, xmm2, QWORD PTR __real_one
19574afb647cSTimo Kreuzer    vmovapd      xmm5, xmm2
19584afb647cSTimo Kreuzer    vpand        xmm2, xmm2, XMMWORD PTR __real_mant
19594afb647cSTimo Kreuzer    vmovq        r8, xmm2
19604afb647cSTimo Kreuzer    vpsrlq       xmm5, xmm5, 52
19614afb647cSTimo Kreuzer    vpsubd       xmm5, xmm5, XMMWORD PTR __mask_2045
19624afb647cSTimo Kreuzer    vcvtdq2pd    xmm6, xmm5
19634afb647cSTimo Kreuzer    jmp          Lpow_fma3_continue_common
19644afb647cSTimo Kreuzer
19654afb647cSTimo KreuzerALIGN 16
19664afb647cSTimo KreuzerLpow_fma3_x_is_neg:
19674afb647cSTimo Kreuzer
19684afb647cSTimo Kreuzer    mov          r10, QWORD PTR __exp_mask
19694afb647cSTimo Kreuzer    and          r10, r8
19704afb647cSTimo Kreuzer    cmp          r10, QWORD PTR __ay_max_bound
19714afb647cSTimo Kreuzer    jg           Lpow_fma3_ay_is_very_large
19724afb647cSTimo Kreuzer
19734afb647cSTimo Kreuzer    ; determine if y is an integer
19744afb647cSTimo Kreuzer    mov          r10, QWORD PTR __exp_mant_mask
19754afb647cSTimo Kreuzer    and          r10, r8
19764afb647cSTimo Kreuzer    mov          r11, r10
19774afb647cSTimo Kreuzer    mov          rcx, QWORD PTR __exp_shift
19784afb647cSTimo Kreuzer    shr          r10, cl
19794afb647cSTimo Kreuzer    sub          r10, QWORD PTR __exp_bias
19804afb647cSTimo Kreuzer    js           Lpow_fma3_x_is_neg_y_is_not_int
19814afb647cSTimo Kreuzer
19824afb647cSTimo Kreuzer    mov          rax, QWORD PTR __exp_mant_mask
19834afb647cSTimo Kreuzer    and          rax, rdx
19844afb647cSTimo Kreuzer    mov          QWORD PTR [save_ax+rsp], rax
19854afb647cSTimo Kreuzer
19864afb647cSTimo Kreuzer    cmp          r10, QWORD PTR __yexp_53
19874afb647cSTimo Kreuzer    mov          rcx, r10
19884afb647cSTimo Kreuzer    jg           Lpow_fma3_continue_after_y_int_check
19894afb647cSTimo Kreuzer
19904afb647cSTimo Kreuzer    mov          r9, QWORD PTR __mant_full
19914afb647cSTimo Kreuzer    shr          r9, cl
19924afb647cSTimo Kreuzer    and          r9, r11
19934afb647cSTimo Kreuzer    jnz          Lpow_fma3_x_is_neg_y_is_not_int
19944afb647cSTimo Kreuzer
19954afb647cSTimo Kreuzer    mov          r9, QWORD PTR __1_before_mant
19964afb647cSTimo Kreuzer    shr          r9, cl
19974afb647cSTimo Kreuzer    and          r9, r11
19984afb647cSTimo Kreuzer    jz           Lpow_fma3_continue_after_y_int_check
19994afb647cSTimo Kreuzer
20004afb647cSTimo Kreuzer    mov          rax, QWORD PTR __sign_mask
20014afb647cSTimo Kreuzer    mov          QWORD PTR [negate_result+rsp], rax
20024afb647cSTimo Kreuzer
20034afb647cSTimo KreuzerLpow_fma3_continue_after_y_int_check:
20044afb647cSTimo Kreuzer
20054afb647cSTimo Kreuzer    cmp          rdx, QWORD PTR __neg_zero
20064afb647cSTimo Kreuzer    je           Lpow_fma3_x_is_zero
20074afb647cSTimo Kreuzer
20084afb647cSTimo Kreuzer    cmp          rdx, QWORD PTR __neg_one
20094afb647cSTimo Kreuzer    je           Lpow_fma3_x_is_neg_one
20104afb647cSTimo Kreuzer
20114afb647cSTimo Kreuzer    mov          r9, QWORD PTR __exp_mask
20124afb647cSTimo Kreuzer    and          r9, rdx
20134afb647cSTimo Kreuzer    cmp          r9, QWORD PTR __exp_mask
20144afb647cSTimo Kreuzer    je           Lpow_fma3_x_is_inf_or_nan
20154afb647cSTimo Kreuzer
20164afb647cSTimo Kreuzer    vmovsd       xmm0, QWORD PTR [save_ax+rsp]
20174afb647cSTimo Kreuzer    jmp          Lpow_fma3_log_x
20184afb647cSTimo Kreuzer
20194afb647cSTimo Kreuzer
20204afb647cSTimo KreuzerALIGN 16
20214afb647cSTimo KreuzerLpow_fma3_near_one:
20224afb647cSTimo Kreuzer
20234afb647cSTimo Kreuzer    ; f = F - Y, r = f * inv
20244afb647cSTimo Kreuzer    vmovapd      xmm0, xmm1
20254afb647cSTimo Kreuzer    vsubsd       xmm1, xmm1, xmm2         ; xmm1 <-- f
20264afb647cSTimo Kreuzer    vmovapd      xmm4, xmm1               ; xmm4 <-- copy of f
20274afb647cSTimo Kreuzer
20284afb647cSTimo Kreuzer    vmovsd       xmm3, QWORD PTR [r9+r8*8]
20294afb647cSTimo Kreuzer    vaddsd       xmm3, xmm3, QWORD PTR [rdx+r8*8]
20304afb647cSTimo Kreuzer    vmulsd       xmm4, xmm4, xmm3         ; xmm4 <-- r = f*inv
2031*105426b8STimo Kreuzer    vandpd       xmm4, xmm4, XMMWORD PTR __real_fffffffff8000000 ; r1
20324afb647cSTimo Kreuzer    vmovapd      xmm5, xmm4               ; xmm5 <-- copy of r1
20334afb647cSTimo Kreuzer;   mulsd        xmm4, xmm0               ; xmm4 <-- F*r1
20344afb647cSTimo Kreuzer;   subsd        xmm1, xmm4               ; xmm1 <-- f - F*r1
20354afb647cSTimo Kreuzer    vfnmadd231sd xmm1, xmm4, xmm0         ; xmm1 <-- f - F*r1
20364afb647cSTimo Kreuzer    vmulsd       xmm1, xmm1, xmm3         ; xmm1 <-- r2 = (f - F*r1)*inv
20374afb647cSTimo Kreuzer    vmovapd      xmm7, xmm1               ; xmm7 <-- copy of r2
20384afb647cSTimo Kreuzer    vaddsd       xmm1, xmm1, xmm5         ; xmm1 <-- r = r1 + r2
20394afb647cSTimo Kreuzer
20404afb647cSTimo Kreuzer    vmovapd      xmm2, xmm1               ; xmm2 <-- copy of r
20414afb647cSTimo Kreuzer    vmovapd      xmm0, xmm1               ; xmm0 <-- copy of r
20424afb647cSTimo Kreuzer
20434afb647cSTimo Kreuzer    lea          r9, __log_256_lead
20444afb647cSTimo Kreuzer
20454afb647cSTimo Kreuzer    ; poly
20464afb647cSTimo Kreuzer    ; NOTE: Given the complicated corrections here,
20474afb647cSTimo Kreuzer    ; I'm afraid to mess with it too much - WAT
20484afb647cSTimo Kreuzer    vmovsd       xmm3, QWORD PTR __real_1_over_7
20494afb647cSTimo Kreuzer    vmovsd       xmm1, QWORD PTR __real_1_over_4
20504afb647cSTimo Kreuzer    vmulsd       xmm0, xmm0, xmm2         ; xmm0 <-- r^2
20514afb647cSTimo Kreuzer    vmovapd      xmm4, xmm0               ; xmm4 <-- copy of r^2
20524afb647cSTimo Kreuzer    vfmadd213sd  xmm3, xmm2, QWORD PTR __real_1_over_6 ; xmm3 <-- r/7 + 1/6
20534afb647cSTimo Kreuzer    vfmadd213sd  xmm1, xmm2, QWORD PTR __real_1_over_3 ; xmm1 <-- r/4 + 1/3
20544afb647cSTimo Kreuzer    vmulsd       xmm4, xmm4, xmm0         ; xmm4 <-- r^4
20554afb647cSTimo Kreuzer    vmulsd       xmm1, xmm1, xmm2         ; xmm1 <-- (r/4 + 1/3)*r
20564afb647cSTimo Kreuzer    vfmadd213sd  xmm3, xmm2, QWORD PTR __real_1_over_5 ; xmm3 <-- ((r/7 + 1/6)*r) + 1/5
20574afb647cSTimo Kreuzer    vmulsd       xmm3, xmm3, xmm2         ; xmm3 <-- (((r/7 + 1/6)*r) + 1/5)*r
20584afb647cSTimo Kreuzer    vmulsd       xmm1, xmm1, xmm0         ; xmm1 <-- ((r/4 + 1/3)*r)*r^2
20594afb647cSTimo Kreuzer    vmulsd       xmm3, xmm3, xmm4         ; xmm3 <-- ((((r/7 + 1/6)*r) + 1/5)*r)*r^4
20604afb647cSTimo Kreuzer
20614afb647cSTimo Kreuzer    vmovapd      xmm2, xmm5               ; xmm2 <-- copy of r1
20624afb647cSTimo Kreuzer    vmovapd      xmm0, xmm7               ; xmm0 <-- copy of r2
20634afb647cSTimo Kreuzer    vmulsd       xmm0, xmm0, xmm0         ; xmm0 <-- r2^2
20644afb647cSTimo Kreuzer    vmulsd       xmm0, xmm0, QWORD PTR __real_1_over_2 ; xmm0 <-- r2^2/2
20654afb647cSTimo Kreuzer;   mulsd        xmm5, xmm7               ; xmm5 <-- r1*r2
20664afb647cSTimo Kreuzer;   addsd        xmm5, xmm0               ; xmm5 <-- r1*r2 + r2^2^2
20674afb647cSTimo Kreuzer    vfmadd213sd  xmm5, xmm7, xmm0         ; xmm5 <-- r1*r2 + r2^2^2
20684afb647cSTimo Kreuzer    vaddsd       xmm5, xmm5, xmm7         ; xmm5 <-- r1*r2 + r2^2/2 + r2
20694afb647cSTimo Kreuzer
20704afb647cSTimo Kreuzer    vmovapd      xmm0, xmm2               ; xmm0 <-- copy of r1
20714afb647cSTimo Kreuzer    vmovapd      xmm7, xmm2               ; xmm7 <-- copy of r1
20724afb647cSTimo Kreuzer    vmulsd       xmm0, xmm0, xmm0         ; xmm0 <-- r1^2
20734afb647cSTimo Kreuzer    vmulsd       xmm0, xmm0, QWORD PTR __real_1_over_2 ; xmm0 <-- r1^2/2
20744afb647cSTimo Kreuzer    vmovapd      xmm4, xmm0               ; xmm4 <-- copy of r1^2/2
20754afb647cSTimo Kreuzer    vaddsd       xmm2, xmm2, xmm0         ; xmm2 <--  r1 + r1^2/2
20764afb647cSTimo Kreuzer    vsubsd       xmm7, xmm7, xmm2         ; xmm7 <-- r1 - (r1 + r1^2/2)
20774afb647cSTimo Kreuzer    vaddsd       xmm7, xmm7, xmm4         ; xmm7 <-- r1 - (r1 + r1^2/2) + r1^2/2
20784afb647cSTimo Kreuzer    ; xmm3 <-- ((((r/7 + 1/6)*r) + 1/5)*r)*r^4 + r1 - (r1 + r1^2/2) + r1^2/2
20794afb647cSTimo Kreuzer    vaddsd       xmm3, xmm3, xmm7
20804afb647cSTimo Kreuzer    vmovsd       xmm4, QWORD PTR __real_log2_tail
20814afb647cSTimo Kreuzer    ; xmm1 <-- (((((r/7 + 1/6)*r) + 1/5)*r)*r^4) +
20824afb647cSTimo Kreuzer    ;   (r1 - (r1 + r1^2/2) + r1^2/2) + ((r/4 + 1/3)*r)*r^2)
20834afb647cSTimo Kreuzer    vaddsd       xmm1, xmm1, xmm3
20844afb647cSTimo Kreuzer    lea          rdx, __log_256_tail
20854afb647cSTimo Kreuzer    ; xmm1 <-- ((((((r/7 + 1/6)*r) + 1/5)*r)*r^4) +
20864afb647cSTimo Kreuzer    ;   (r1 - (r1 + r1^2/2) + r1^2/2) + ((r/4 + 1/3)*r)*r^2))
20874afb647cSTimo Kreuzer    ;   +(r1*r2 + r2^2/2 + r2)
20884afb647cSTimo Kreuzer    vaddsd       xmm1, xmm1, xmm5
20894afb647cSTimo Kreuzer    ; xmm4 <-- vt * log2_tail  + log256_tail
20904afb647cSTimo Kreuzer    vfmadd213sd  xmm4, xmm6, QWORD PTR [rdx+r8*8]
20914afb647cSTimo Kreuzer    ; xmm4 <-- vt * log2_tail  + log2_tail - corrected poly
20924afb647cSTimo Kreuzer    vsubsd       xmm4, xmm4, xmm1
20934afb647cSTimo Kreuzer
20944afb647cSTimo Kreuzer    vmovapd      xmm1, xmm4
20954afb647cSTimo Kreuzer    vsubsd       xmm3, xmm4, xmm2 ; xmm3 <-- xmm4 - more correction???
20964afb647cSTimo Kreuzer
20974afb647cSTimo Kreuzer    vmovsd       xmm0, QWORD PTR [r9+r8*8] ; xmm0 <-- log256_lead
20984afb647cSTimo Kreuzer    ; xmm0 <-- log256_lead + vt*log2_lead
20994afb647cSTimo Kreuzer    vfmadd231sd  xmm0, xmm6, QWORD PTR __real_log2_lead
21004afb647cSTimo Kreuzer
21014afb647cSTimo Kreuzer    ; at this point, xmm0, xmm1, xmm2, and xmm3 should matter
21024afb647cSTimo Kreuzer    jmp          Lpow_fma3_log_x_continue
21034afb647cSTimo Kreuzer
21044afb647cSTimo Kreuzer
21054afb647cSTimo KreuzerALIGN 16
21064afb647cSTimo KreuzerLpow_fma3_x_is_pos_one:
21074afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
21084afb647cSTimo Kreuzer
21094afb647cSTimo KreuzerALIGN 16
21104afb647cSTimo KreuzerLpow_fma3_y_is_zero:
21114afb647cSTimo Kreuzer    vmovsd       xmm0, QWORD PTR __real_one
21124afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
21134afb647cSTimo Kreuzer
21144afb647cSTimo KreuzerALIGN 16
21154afb647cSTimo KreuzerLpow_fma3_y_is_one:
21164afb647cSTimo Kreuzer    xor          rax, rax
21174afb647cSTimo Kreuzer    mov          r11, rdx
21184afb647cSTimo Kreuzer    mov          r9, QWORD PTR __exp_mask
21194afb647cSTimo Kreuzer    ;or          r11, QWORD PTR __qnan_set
21204afb647cSTimo Kreuzer    and          r9, rdx
21214afb647cSTimo Kreuzer    cmp          r9, QWORD PTR __exp_mask
21224afb647cSTimo Kreuzer    cmove        rax, rdx
21234afb647cSTimo Kreuzer    mov          r9, QWORD PTR __mant_mask
21244afb647cSTimo Kreuzer    and          r9, rax
21254afb647cSTimo Kreuzer    jnz          Lpow_fma3_x_is_nan
21264afb647cSTimo Kreuzer
21274afb647cSTimo Kreuzer    vmovq        xmm0, rdx
21284afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
21294afb647cSTimo Kreuzer
21304afb647cSTimo KreuzerALIGN 16
21314afb647cSTimo KreuzerLpow_fma3_x_is_neg_one:
21324afb647cSTimo Kreuzer    mov          rdx, QWORD PTR __pos_one
21334afb647cSTimo Kreuzer    or           rdx, QWORD PTR [negate_result+rsp]
21344afb647cSTimo Kreuzer    xor          rax, rax
21354afb647cSTimo Kreuzer    mov          r11, r8
21364afb647cSTimo Kreuzer    mov          r10, QWORD PTR __exp_mask
21374afb647cSTimo Kreuzer    ;or          r11, QWORD PTR __qnan_set
21384afb647cSTimo Kreuzer    and          r10, r8
21394afb647cSTimo Kreuzer    cmp          r10, QWORD PTR __exp_mask
21404afb647cSTimo Kreuzer    cmove        rax, r8
21414afb647cSTimo Kreuzer    mov          r10, QWORD PTR __mant_mask
21424afb647cSTimo Kreuzer    and          r10, rax
21434afb647cSTimo Kreuzer    jnz          Lpow_fma3_y_is_nan
21444afb647cSTimo Kreuzer
21454afb647cSTimo Kreuzer    vmovq        xmm0, rdx
21464afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
21474afb647cSTimo Kreuzer
21484afb647cSTimo KreuzerALIGN 16
21494afb647cSTimo KreuzerLpow_fma3_x_is_neg_y_is_not_int:
21504afb647cSTimo Kreuzer    mov          r9, QWORD PTR __exp_mask
21514afb647cSTimo Kreuzer    and          r9, rdx
21524afb647cSTimo Kreuzer    cmp          r9, QWORD PTR __exp_mask
21534afb647cSTimo Kreuzer    je           Lpow_fma3_x_is_inf_or_nan
21544afb647cSTimo Kreuzer
21554afb647cSTimo Kreuzer    cmp          rdx, QWORD PTR __neg_zero
21564afb647cSTimo Kreuzer    je           Lpow_fma3_x_is_zero
21574afb647cSTimo Kreuzer
21584afb647cSTimo Kreuzer    vmovsd       xmm0, QWORD PTR [save_x+rsp]
21594afb647cSTimo Kreuzer    vmovsd       xmm1, QWORD PTR [save_y+rsp]
21604afb647cSTimo Kreuzer    vmovsd       xmm2, QWORD PTR __neg_qnan
21614afb647cSTimo Kreuzer    mov          r9d, DWORD PTR __flag_x_neg_y_notint
21624afb647cSTimo Kreuzer
21634afb647cSTimo Kreuzer    call         fname_special
21644afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
21654afb647cSTimo Kreuzer
21664afb647cSTimo KreuzerALIGN 16
21674afb647cSTimo KreuzerLpow_fma3_ay_is_very_large:
21684afb647cSTimo Kreuzer    mov          r9, QWORD PTR __exp_mask
21694afb647cSTimo Kreuzer    and          r9, rdx
21704afb647cSTimo Kreuzer    cmp          r9, QWORD PTR __exp_mask
21714afb647cSTimo Kreuzer    je           Lpow_fma3_x_is_inf_or_nan
21724afb647cSTimo Kreuzer
21734afb647cSTimo Kreuzer    mov          r9, QWORD PTR __exp_mant_mask
21744afb647cSTimo Kreuzer    and          r9, rdx
21754afb647cSTimo Kreuzer    jz           Lpow_fma3_x_is_zero
21764afb647cSTimo Kreuzer
21774afb647cSTimo Kreuzer    cmp          rdx, QWORD PTR __neg_one
21784afb647cSTimo Kreuzer    je           Lpow_fma3_x_is_neg_one
21794afb647cSTimo Kreuzer
21804afb647cSTimo Kreuzer    mov          r9, rdx
21814afb647cSTimo Kreuzer    and          r9, QWORD PTR __exp_mant_mask
21824afb647cSTimo Kreuzer    cmp          r9, QWORD PTR __pos_one
21834afb647cSTimo Kreuzer    jl           Lpow_fma3_ax_lt1_y_is_large_or_inf_or_nan
21844afb647cSTimo Kreuzer
21854afb647cSTimo Kreuzer    jmp          Lpow_fma3_ax_gt1_y_is_large_or_inf_or_nan
21864afb647cSTimo Kreuzer
21874afb647cSTimo KreuzerALIGN 16
21884afb647cSTimo KreuzerLpow_fma3_x_is_zero:
21894afb647cSTimo Kreuzer    mov          r10, QWORD PTR __exp_mask
21904afb647cSTimo Kreuzer    xor          rax, rax
21914afb647cSTimo Kreuzer    and          r10, r8
21924afb647cSTimo Kreuzer    cmp          r10, QWORD PTR __exp_mask
21934afb647cSTimo Kreuzer    je           Lpow_fma3_x_is_zero_y_is_inf_or_nan
21944afb647cSTimo Kreuzer
21954afb647cSTimo Kreuzer    mov          r10, QWORD PTR __sign_mask
21964afb647cSTimo Kreuzer    and          r10, r8
21974afb647cSTimo Kreuzer    cmovnz       rax, QWORD PTR __pos_inf
21984afb647cSTimo Kreuzer    jnz          Lpow_fma3_x_is_zero_z_is_inf
21994afb647cSTimo Kreuzer
22004afb647cSTimo Kreuzer    vmovq        xmm0, rax
22014afb647cSTimo Kreuzer    vorpd        xmm0, xmm0, XMMWORD PTR [negate_result+rsp]
22024afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
22034afb647cSTimo Kreuzer
22044afb647cSTimo KreuzerALIGN 16
22054afb647cSTimo KreuzerLpow_fma3_x_is_zero_z_is_inf:
22064afb647cSTimo Kreuzer
22074afb647cSTimo Kreuzer    vmovsd       xmm0, QWORD PTR [save_x+rsp]
22084afb647cSTimo Kreuzer    vmovsd       xmm1, QWORD PTR [save_y+rsp]
22094afb647cSTimo Kreuzer    vmovq        xmm2, rax
22104afb647cSTimo Kreuzer    vorpd        xmm2, xmm2, XMMWORD PTR [negate_result+rsp]
22114afb647cSTimo Kreuzer    mov          r9d, DWORD PTR __flag_x_zero_z_inf
22124afb647cSTimo Kreuzer
22134afb647cSTimo Kreuzer    call         fname_special
22144afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
22154afb647cSTimo Kreuzer
22164afb647cSTimo KreuzerALIGN 16
22174afb647cSTimo KreuzerLpow_fma3_x_is_zero_y_is_inf_or_nan:
22184afb647cSTimo Kreuzer    mov          r11, r8
22194afb647cSTimo Kreuzer    cmp          r8, QWORD PTR __neg_inf
22204afb647cSTimo Kreuzer;   The next two lines do not correspond to IEEE754-2008.
22214afb647cSTimo Kreuzer;   +-0 ^ -Inf should be +Inf with no exception
22224afb647cSTimo Kreuzer;   +-0 ^ +Inf should be +0 with no exception
22234afb647cSTimo Kreuzer;   cmove        rax, QWORD PTR __pos_inf
22244afb647cSTimo Kreuzer;   je           Lpow_fma3_x_is_zero_z_is_inf
22254afb647cSTimo Kreuzer;  begin replacement
22264afb647cSTimo Kreuzer    je           Lpow_fma3_x_is_zero_y_is_neg_inf
22274afb647cSTimo Kreuzer    cmp          r8, QWORD PTR __neg_inf
22284afb647cSTimo Kreuzer    je           Lpow_fma3_x_is_zero_y_is_pos_inf
22294afb647cSTimo Kreuzer;  end replacement
22304afb647cSTimo Kreuzer
22314afb647cSTimo Kreuzer    ;or          r11, QWORD PTR __qnan_set
22324afb647cSTimo Kreuzer    mov          r10, QWORD PTR __mant_mask
22334afb647cSTimo Kreuzer    and          r10, r8
22344afb647cSTimo Kreuzer    jnz          Lpow_fma3_y_is_nan
22354afb647cSTimo Kreuzer
22364afb647cSTimo Kreuzer    vmovq        xmm0, rax
22374afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
22384afb647cSTimo Kreuzer
22394afb647cSTimo KreuzerALIGN 16
22404afb647cSTimo KreuzerLpow_fma3_x_is_zero_y_is_neg_inf:
22414afb647cSTimo Kreuzer    ; quietly return +Inf
22424afb647cSTimo Kreuzer    vmovsd       xmm0, __pos_inf
22434afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
22444afb647cSTimo Kreuzer
22454afb647cSTimo KreuzerALIGN 16
22464afb647cSTimo KreuzerLpow_fma3_x_is_zero_y_is_pos_inf:
22474afb647cSTimo Kreuzer    ; quietly return +0.
22484afb647cSTimo Kreuzer    vxorpd       xmm0, xmm0, xmm0
22494afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
22504afb647cSTimo Kreuzer
22514afb647cSTimo KreuzerALIGN 16
22524afb647cSTimo KreuzerLpow_fma3_x_is_inf_or_nan:
22534afb647cSTimo Kreuzer    xor          r11, r11
22544afb647cSTimo Kreuzer    mov          r10, QWORD PTR __sign_mask
22554afb647cSTimo Kreuzer    and          r10, r8
22564afb647cSTimo Kreuzer    cmovz        r11, QWORD PTR __pos_inf
22574afb647cSTimo Kreuzer    mov          rax, rdx
22584afb647cSTimo Kreuzer    mov          r9, QWORD PTR __mant_mask
22594afb647cSTimo Kreuzer    ;or          rax, QWORD PTR __qnan_set
22604afb647cSTimo Kreuzer    and          r9, rdx
22614afb647cSTimo Kreuzer    cmovnz       r11, rax
22624afb647cSTimo Kreuzer    jnz          Lpow_fma3_x_is_nan
22634afb647cSTimo Kreuzer
22644afb647cSTimo Kreuzer    xor          rax, rax
22654afb647cSTimo Kreuzer    mov          r9, r8
22664afb647cSTimo Kreuzer    mov          r10, QWORD PTR __exp_mask
22674afb647cSTimo Kreuzer    ;or          r9, QWORD PTR __qnan_set
22684afb647cSTimo Kreuzer    and          r10, r8
22694afb647cSTimo Kreuzer    cmp          r10, QWORD PTR __exp_mask
22704afb647cSTimo Kreuzer    cmove        rax, r8
22714afb647cSTimo Kreuzer    mov          r10, QWORD PTR __mant_mask
22724afb647cSTimo Kreuzer    and          r10, rax
22734afb647cSTimo Kreuzer    cmovnz       r11, r9
22744afb647cSTimo Kreuzer    jnz          Lpow_fma3_y_is_nan
22754afb647cSTimo Kreuzer
22764afb647cSTimo Kreuzer    vmovq        xmm0, r11
22774afb647cSTimo Kreuzer    vorpd        xmm0, xmm0, XMMWORD PTR [negate_result+rsp]
22784afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
22794afb647cSTimo Kreuzer
22804afb647cSTimo KreuzerALIGN 16
22814afb647cSTimo KreuzerLpow_fma3_ay_is_very_small:
22824afb647cSTimo Kreuzer    vaddsd       xmm0, xmm1, QWORD PTR __pos_one
22834afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
22844afb647cSTimo Kreuzer
22854afb647cSTimo Kreuzer
22864afb647cSTimo KreuzerALIGN 16
22874afb647cSTimo KreuzerLpow_fma3_ax_lt1_y_is_large_or_inf_or_nan:
22884afb647cSTimo Kreuzer    xor          r11, r11
22894afb647cSTimo Kreuzer    mov          r10, QWORD PTR __sign_mask
22904afb647cSTimo Kreuzer    and          r10, r8
22914afb647cSTimo Kreuzer    cmovnz       r11, QWORD PTR __pos_inf
22924afb647cSTimo Kreuzer    jmp          Lpow_fma3_adjust_for_nan
22934afb647cSTimo Kreuzer
22944afb647cSTimo KreuzerALIGN 16
22954afb647cSTimo KreuzerLpow_fma3_ax_gt1_y_is_large_or_inf_or_nan:
22964afb647cSTimo Kreuzer    xor          r11, r11
22974afb647cSTimo Kreuzer    mov          r10, QWORD PTR __sign_mask
22984afb647cSTimo Kreuzer    and          r10, r8
22994afb647cSTimo Kreuzer    cmovz        r11, QWORD PTR __pos_inf
23004afb647cSTimo Kreuzer
23014afb647cSTimo KreuzerALIGN 16
23024afb647cSTimo KreuzerLpow_fma3_adjust_for_nan:
23034afb647cSTimo Kreuzer
23044afb647cSTimo Kreuzer    xor          rax, rax
23054afb647cSTimo Kreuzer    mov          r9, r8
23064afb647cSTimo Kreuzer    mov          r10, QWORD PTR __exp_mask
23074afb647cSTimo Kreuzer    ;or          r9, QWORD PTR __qnan_set
23084afb647cSTimo Kreuzer    and          r10, r8
23094afb647cSTimo Kreuzer    cmp          r10, QWORD PTR __exp_mask
23104afb647cSTimo Kreuzer    cmove        rax, r8
23114afb647cSTimo Kreuzer    mov          r10, QWORD PTR __mant_mask
23124afb647cSTimo Kreuzer    and          r10, rax
23134afb647cSTimo Kreuzer    cmovnz       r11, r9
23144afb647cSTimo Kreuzer    jnz          Lpow_fma3_y_is_nan
23154afb647cSTimo Kreuzer
23164afb647cSTimo Kreuzer    test         rax, rax
23174afb647cSTimo Kreuzer    jnz          Lpow_fma3_y_is_inf
23184afb647cSTimo Kreuzer
23194afb647cSTimo KreuzerALIGN 16
23204afb647cSTimo KreuzerLpow_fma3_z_is_zero_or_inf:
23214afb647cSTimo Kreuzer
23224afb647cSTimo Kreuzer    mov          r9d, DWORD PTR __flag_z_zero
23234afb647cSTimo Kreuzer    test         r11, QWORD PTR __exp_mant_mask
23244afb647cSTimo Kreuzer    cmovnz       r9d, DWORD PTR __flag_z_inf
23254afb647cSTimo Kreuzer
23264afb647cSTimo Kreuzer    vmovsd       xmm0, QWORD PTR [save_x+rsp]
23274afb647cSTimo Kreuzer    vmovsd       xmm1, QWORD PTR [save_y+rsp]
23284afb647cSTimo Kreuzer    vmovq        xmm2, r11
23294afb647cSTimo Kreuzer
23304afb647cSTimo Kreuzer    call         fname_special
23314afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
23324afb647cSTimo Kreuzer
23334afb647cSTimo KreuzerALIGN 16
23344afb647cSTimo KreuzerLpow_fma3_y_is_inf:
23354afb647cSTimo Kreuzer
23364afb647cSTimo Kreuzer    vmovq        xmm0, r11
23374afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
23384afb647cSTimo Kreuzer
23394afb647cSTimo KreuzerALIGN 16
23404afb647cSTimo KreuzerLpow_fma3_x_is_nan:
23414afb647cSTimo Kreuzer
23424afb647cSTimo Kreuzer    xor          rax, rax
23434afb647cSTimo Kreuzer    mov          r10, QWORD PTR __exp_mask
23444afb647cSTimo Kreuzer    and          r10, r8
23454afb647cSTimo Kreuzer    cmp          r10, QWORD PTR __exp_mask
23464afb647cSTimo Kreuzer    cmove        rax, r8
23474afb647cSTimo Kreuzer    mov          r10, QWORD PTR __mant_mask
23484afb647cSTimo Kreuzer    and          r10, rax
23494afb647cSTimo Kreuzer    jnz          Lpow_fma3_x_is_nan_y_is_nan
23504afb647cSTimo Kreuzer
23514afb647cSTimo Kreuzer    vmovsd       xmm0, QWORD PTR [save_x+rsp]
23524afb647cSTimo Kreuzer    vmovsd       xmm1, QWORD PTR [save_y+rsp]
23534afb647cSTimo Kreuzer    vmovq        xmm2, r11
23544afb647cSTimo Kreuzer    mov          r9d, DWORD PTR __flag_x_nan
23554afb647cSTimo Kreuzer
23564afb647cSTimo Kreuzer    call         fname_special
23574afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
23584afb647cSTimo Kreuzer
23594afb647cSTimo KreuzerALIGN 16
23604afb647cSTimo KreuzerLpow_fma3_y_is_nan:
23614afb647cSTimo Kreuzer
23624afb647cSTimo Kreuzer    vmovsd       xmm0, QWORD PTR [save_x+rsp]
23634afb647cSTimo Kreuzer    vmovsd       xmm1, QWORD PTR [save_y+rsp]
23644afb647cSTimo Kreuzer    vmovq        xmm2, r11
23654afb647cSTimo Kreuzer    mov          r9d, DWORD PTR __flag_y_nan
23664afb647cSTimo Kreuzer
23674afb647cSTimo Kreuzer    call         fname_special
23684afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
23694afb647cSTimo Kreuzer
23704afb647cSTimo KreuzerALIGN 16
23714afb647cSTimo KreuzerLpow_fma3_x_is_nan_y_is_nan:
23724afb647cSTimo Kreuzer
23734afb647cSTimo Kreuzer    mov          r9, r8
23744afb647cSTimo Kreuzer
23754afb647cSTimo Kreuzer    cmp          r11, QWORD PTR __ind_pattern
23764afb647cSTimo Kreuzer    cmove        r11, r9
23774afb647cSTimo Kreuzer    je           Lpow_fma3_continue_xy_nan
23784afb647cSTimo Kreuzer
23794afb647cSTimo Kreuzer    cmp          r9, QWORD PTR __ind_pattern
23804afb647cSTimo Kreuzer    cmove        r9, r11
23814afb647cSTimo Kreuzer
23824afb647cSTimo Kreuzer    mov          r10, r9
23834afb647cSTimo Kreuzer    and          r10, QWORD PTR __sign_mask
23844afb647cSTimo Kreuzer    cmovnz       r9, r11
23854afb647cSTimo Kreuzer
23864afb647cSTimo Kreuzer    mov          r10, r11
23874afb647cSTimo Kreuzer    and          r10, QWORD PTR __sign_mask
23884afb647cSTimo Kreuzer    cmovnz       r11, r9
23894afb647cSTimo Kreuzer
23904afb647cSTimo KreuzerLpow_fma3_continue_xy_nan:
23914afb647cSTimo Kreuzer    ;or          r11, QWORD PTR __qnan_set
23924afb647cSTimo Kreuzer    vmovsd       xmm0, QWORD PTR [save_x+rsp]
23934afb647cSTimo Kreuzer    vmovsd       xmm1, QWORD PTR [save_y+rsp]
23944afb647cSTimo Kreuzer    vmovq        xmm2, r11
23954afb647cSTimo Kreuzer    mov          r9d, DWORD PTR __flag_x_nan_y_nan
23964afb647cSTimo Kreuzer
23974afb647cSTimo Kreuzer    call         fname_special
23984afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
23994afb647cSTimo Kreuzer
24004afb647cSTimo KreuzerALIGN 16
24014afb647cSTimo KreuzerLpow_fma3_z_denormal:
24024afb647cSTimo Kreuzer    vmovapd      xmm2, xmm0
24034afb647cSTimo Kreuzer    vmovsd       xmm0, QWORD PTR [save_x+rsp]
24044afb647cSTimo Kreuzer    vmovsd       xmm1, QWORD PTR [save_y+rsp]
24054afb647cSTimo Kreuzer    mov          r9d, DWORD PTR __flag_z_denormal
24064afb647cSTimo Kreuzer
24074afb647cSTimo Kreuzer    call         fname_special
24084afb647cSTimo Kreuzer    jmp          Lpow_fma3_final_check
24094afb647cSTimo Kreuzer
24104afb647cSTimo Kreuzerfname endp
24114afb647cSTimo KreuzerEND
2412