1 /*
2  * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /******************************************************************************
19  *                                                                            *
20  * Background:                                                                *
21  * The POWERPC ABI does not provide for tail calls. Thus, the math dispatch   *
22  * table processing incurs overhead with the saving and restoration of GPR 2  *
23  * that can severely affect application performance.  For POWERPC, we use an  *
24  * optimized assembly dispatch set of routines that make tail calls to all of *
25  * the routines defined in the math dispatch configuration files but do not   *
26  * saveand /restore GPR 2.                                                    *
27  *                                                                            *
28  * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
29  *                                                                            *
30  * If any entry (routine <FUNC>) in any of the dispatch tables is not present *
31  * in i.e. not  satisfied by, libpgmath, in order to properly preserve/restore*
32  * GRP 2 when calling routine <FUNC>, the actual function must first be       *
33  * encapsulated in a routine present in libpgmath.                            *
34  *                                                                            *
35  * No doubt there are pathological cases that will show this engineering      *
36  * choice to be wrong, but current performance testing shows otherwise.       *
37  *                                                                            *
38  *****************************************************************************/
39 
40 /* R(:)**I4 */
41 MTHINTRIN(powi1, ss   , any        , __mth_i_rpowi         , __mth_i_rpowi         , __pmth_i_rpowi        ,__math_dispatch_error)
42 MTHINTRIN(powi1, ds   , any        , __mth_i_dpowi         , __mth_i_dpowi         , __pmth_i_dpowi        ,__math_dispatch_error)
43 MTHINTRIN(powi1, sv4  , any        , __fx_powi1_4          , __fx_powi1_4          , __px_powi1_4          ,__math_dispatch_error)
44 MTHINTRIN(powi1, dv2  , any        , __fx_powi1_2          , __fx_powi1_2          , __px_powi1_2          ,__math_dispatch_error)
45 MTHINTRIN(powi1, sv4m , any        , __fs_powi1_4_mn       , __rs_powi1_4_mn       , __ps_powi1_4_mn       ,__math_dispatch_error)
46 MTHINTRIN(powi1, dv2m , any        , __fd_powi1_2_mn       , __rd_powi1_2_mn       , __pd_powi1_2_mn       ,__math_dispatch_error)
47 /* R(:)**I4(:) */
48 MTHINTRIN(powi , ss   , any        , __mth_i_rpowi         , __mth_i_rpowi         , __pmth_i_rpowi        ,__math_dispatch_error)
49 MTHINTRIN(powi , ds   , any        , __mth_i_dpowi         , __mth_i_dpowi         , __pmth_i_dpowi        ,__math_dispatch_error)
50 MTHINTRIN(powi , sv4  , any        , __gs_powi_4_f         , __gs_powi_4_r         , __px_powi_4           ,__math_dispatch_error)
51 MTHINTRIN(powi , dv2  , any        , __gd_powi_2_f         , __gd_powi_2_r         , __px_powi_2           ,__math_dispatch_error)
52 MTHINTRIN(powi , sv4m , any        , __fs_powi_4_mn        , __rs_powi_4_mn        , __ps_powi_4_mn        ,__math_dispatch_error)
53 MTHINTRIN(powi , dv2m , any        , __fd_powi_2_mn        , __rd_powi_2_mn        , __pd_powi_2_mn        ,__math_dispatch_error)
54 /* R(:)**I8 */
55 MTHINTRIN(powk1, ss   , any        , __mth_i_rpowk         , __mth_i_rpowk         , __pmth_i_rpowk        ,__math_dispatch_error)
56 MTHINTRIN(powk1, ds   , any        , __mth_i_dpowk         , __mth_i_dpowk         , __pmth_i_dpowk        ,__math_dispatch_error)
57 MTHINTRIN(powk1, sv4  , any        , __fx_powk1_4          , __fx_powk1_4          , __px_powk1_4          ,__math_dispatch_error)
58 MTHINTRIN(powk1, dv2  , any        , __fx_powk1_2          , __fx_powk1_2          , __px_powk1_2          ,__math_dispatch_error)
59 MTHINTRIN(powk1, sv4m , any        , __fs_powk1_4_mn       , __rs_powk1_4_mn       , __ps_powk1_4_mn       ,__math_dispatch_error)
60 MTHINTRIN(powk1, dv2m , any        , __fd_powk1_2_mn       , __rd_powk1_2_mn       , __pd_powk1_2_mn       ,__math_dispatch_error)
61 /* R(:)**I8(:) */
62 MTHINTRIN(powk , ss   , any        , __mth_i_rpowk         , __mth_i_rpowk         , __pmth_i_rpowk        ,__math_dispatch_error)
63 MTHINTRIN(powk , ds   , any        , __mth_i_dpowk         , __mth_i_dpowk         , __pmth_i_dpowk        ,__math_dispatch_error)
64 MTHINTRIN(powk , sv4  , any        , __gs_powk_4_f         , __gs_powk_4_r         , __px_powk_4           ,__math_dispatch_error)
65 MTHINTRIN(powk , dv2  , any        , __gd_powk_2_f         , __gd_powk_2_r         , __px_powk_2           ,__math_dispatch_error)
66 MTHINTRIN(powk , sv4m , any        , __fs_powk_4_mn        , __rs_powk_4_mn        , __ps_powk_4_mn        ,__math_dispatch_error)
67 MTHINTRIN(powk , dv2m , any        , __fd_powk_2_mn        , __rd_powk_2_mn        , __pd_powk_2_mn        ,__math_dispatch_error)
68 
69