1
2 /*
3 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18
19
20 #ifndef COMMON_H_H63T0LSL
21 #define COMMON_H_H63T0LSL
22
23 #include <stdint.h>
24
25 #define FMAF __builtin_fmaf
26
27 /* Constants for Cody-Waite argument reduction */
28 #define _2_OVER_PI_F 6.36619772e-01f
29 #define PI_2_HI_F 1.57079601e+00f
30 #define PI_2_MI_F 3.13916473e-07f
31 #define PI_2_LO_F 5.38561632e-15f
32 #define THRESHOLD_F 1.00000000e+04f
33
34 /* Coefficents of approximate tan on [-PI/4,+PI/4] */
35 #define A_F 9.42561682e-03f
36 #define B_F 3.06017953e-03f
37 #define C_F 2.44512185e-02f
38 #define D_F 5.34108058e-02f
39 #define E_F 1.33389056e-01f
40 #define F_F 3.33331138e-01f
41
42 /* 192 bits of 2/PI for Payne-Hanek argument reduction. */
43 static uint32_t i2opi_f [] = {
44 0x3c439041,
45 0xdb629599,
46 0xf534ddc0,
47 0xfc2757d1,
48 0x4e441529,
49 0xa2f9836e,
50 };
51
52 #define PI_2_M64 1.70306079004327746902e-19
53
54 /* -fno-strict-aliasing */
55 static int32_t
float_as_int(float f)56 float_as_int(float f)
57 {
58 return *(int32_t*)&f;
59 }
60
61 /* -fno-strict-aliasing */
62 static float
int_as_float(int32_t i)63 int_as_float(int32_t i)
64 {
65 return *(float*)&i;
66 }
67
68 typedef struct {
69 uint32_t x;
70 uint32_t y;
71 } uint2;
72
73 /* -fno-strict-aliasing */
74 static uint2
umad32wide(uint32_t a,uint32_t b,uint32_t c)75 umad32wide(uint32_t a, uint32_t b, uint32_t c)
76 {
77 union {
78 uint2 ui2;
79 uint64_t ull;
80 } res;
81 res.ull = (uint64_t)a * b + c;
82 return res.ui2;
83 }
84
85 #endif
86