1 
2 /*
3  * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 
20 #ifndef COMMON_H_H63T0LSL
21 #define COMMON_H_H63T0LSL
22 
23 #include <stdint.h>
24 
25 #define FMAF __builtin_fmaf
26 
27 /* Constants for Cody-Waite argument reduction */
28 #define _2_OVER_PI_F 6.36619772e-01f
29 #define PI_2_HI_F    1.57079601e+00f
30 #define PI_2_MI_F    3.13916473e-07f
31 #define PI_2_LO_F    5.38561632e-15f
32 #define THRESHOLD_F  1.00000000e+04f
33 
34 /* Coefficents of approximate tan on [-PI/4,+PI/4] */
35 #define A_F 9.42561682e-03f
36 #define B_F 3.06017953e-03f
37 #define C_F 2.44512185e-02f
38 #define D_F 5.34108058e-02f
39 #define E_F 1.33389056e-01f
40 #define F_F 3.33331138e-01f
41 
42 /* 192 bits of 2/PI for Payne-Hanek argument reduction. */
43 static uint32_t i2opi_f [] = {
44     0x3c439041,
45     0xdb629599,
46     0xf534ddc0,
47     0xfc2757d1,
48     0x4e441529,
49     0xa2f9836e,
50 };
51 
52 #define PI_2_M64 1.70306079004327746902e-19
53 
54 /* -fno-strict-aliasing */
55 static int32_t
float_as_int(float f)56 float_as_int(float f)
57 {
58     return *(int32_t*)&f;
59 }
60 
61 /* -fno-strict-aliasing */
62 static float
int_as_float(int32_t i)63 int_as_float(int32_t i)
64 {
65     return *(float*)&i;
66 }
67 
68 typedef struct {
69     uint32_t x;
70     uint32_t y;
71 } uint2;
72 
73 /* -fno-strict-aliasing */
74 static uint2
umad32wide(uint32_t a,uint32_t b,uint32_t c)75 umad32wide(uint32_t a, uint32_t b, uint32_t c)
76 {
77     union {
78         uint2 ui2;
79         uint64_t ull;
80     } res;
81     res.ull = (uint64_t)a * b + c;
82     return res.ui2;
83 }
84 
85 #endif
86