1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4
5define i16 @v_powi_f16(i16 %l, i32 %r) {
6; GFX7-LABEL: v_powi_f16:
7; GFX7:       ; %bb.0:
8; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
10; GFX7-NEXT:    v_cvt_f32_i32_e32 v1, v1
11; GFX7-NEXT:    v_log_f32_e32 v0, v0
12; GFX7-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
13; GFX7-NEXT:    v_exp_f32_e32 v0, v0
14; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
15; GFX7-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX8-LABEL: v_powi_f16:
18; GFX8:       ; %bb.0:
19; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX8-NEXT:    v_cvt_f32_i32_e32 v1, v1
21; GFX8-NEXT:    v_log_f16_e32 v0, v0
22; GFX8-NEXT:    v_cvt_f16_f32_e32 v1, v1
23; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
24; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
25; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
26; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
27; GFX8-NEXT:    v_exp_f16_e32 v0, v0
28; GFX8-NEXT:    s_setpc_b64 s[30:31]
29  %l.cast = bitcast i16 %l to half
30  %res = call half @llvm.powi.f16(half %l.cast, i32 %r)
31  %res.cast = bitcast half %res to i16
32  ret i16 %res.cast
33}
34
35define float @v_powi_f32(float %l, i32 %r) {
36; GCN-LABEL: v_powi_f32:
37; GCN:       ; %bb.0:
38; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39; GCN-NEXT:    v_cvt_f32_i32_e32 v1, v1
40; GCN-NEXT:    v_log_f32_e32 v0, v0
41; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
42; GCN-NEXT:    v_exp_f32_e32 v0, v0
43; GCN-NEXT:    s_setpc_b64 s[30:31]
44  %res = call float @llvm.powi.f32(float %l, i32 %r)
45  ret float %res
46}
47
48define float @v_powi_0_f32(float %l) {
49; GCN-LABEL: v_powi_0_f32:
50; GCN:       ; %bb.0:
51; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52; GCN-NEXT:    v_mov_b32_e32 v0, 1.0
53; GCN-NEXT:    s_setpc_b64 s[30:31]
54  %res = call float @llvm.powi.f32(float %l, i32 0)
55  ret float %res
56}
57
58define float @v_powi_1_f32(float %l) {
59; GCN-LABEL: v_powi_1_f32:
60; GCN:       ; %bb.0:
61; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62; GCN-NEXT:    s_setpc_b64 s[30:31]
63  %res = call float @llvm.powi.f32(float %l, i32 1)
64  ret float %res
65}
66
67define float @v_powi_neg1_f32(float %l) {
68; GCN-LABEL: v_powi_neg1_f32:
69; GCN:       ; %bb.0:
70; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71; GCN-NEXT:    v_cvt_f32_i32_e32 v1, -1
72; GCN-NEXT:    v_log_f32_e32 v0, v0
73; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
74; GCN-NEXT:    v_exp_f32_e32 v0, v0
75; GCN-NEXT:    s_setpc_b64 s[30:31]
76  %res = call float @llvm.powi.f32(float %l, i32 -1)
77  ret float %res
78}
79
80define float @v_powi_2_f32(float %l) {
81; GCN-LABEL: v_powi_2_f32:
82; GCN:       ; %bb.0:
83; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84; GCN-NEXT:    v_log_f32_e32 v0, v0
85; GCN-NEXT:    v_cvt_f32_ubyte0_e32 v1, 2
86; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
87; GCN-NEXT:    v_exp_f32_e32 v0, v0
88; GCN-NEXT:    s_setpc_b64 s[30:31]
89  %res = call float @llvm.powi.f32(float %l, i32 2)
90  ret float %res
91}
92
93define float @v_powi_neg2_f32(float %l) {
94; GCN-LABEL: v_powi_neg2_f32:
95; GCN:       ; %bb.0:
96; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97; GCN-NEXT:    v_cvt_f32_i32_e32 v1, -2
98; GCN-NEXT:    v_log_f32_e32 v0, v0
99; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
100; GCN-NEXT:    v_exp_f32_e32 v0, v0
101; GCN-NEXT:    s_setpc_b64 s[30:31]
102  %res = call float @llvm.powi.f32(float %l, i32 -2)
103  ret float %res
104}
105
106define float @v_powi_4_f32(float %l) {
107; GCN-LABEL: v_powi_4_f32:
108; GCN:       ; %bb.0:
109; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110; GCN-NEXT:    v_log_f32_e32 v0, v0
111; GCN-NEXT:    v_cvt_f32_ubyte0_e32 v1, 4
112; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
113; GCN-NEXT:    v_exp_f32_e32 v0, v0
114; GCN-NEXT:    s_setpc_b64 s[30:31]
115  %res = call float @llvm.powi.f32(float %l, i32 4)
116  ret float %res
117}
118
119define float @v_powi_8_f32(float %l) {
120; GCN-LABEL: v_powi_8_f32:
121; GCN:       ; %bb.0:
122; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123; GCN-NEXT:    v_log_f32_e32 v0, v0
124; GCN-NEXT:    v_cvt_f32_ubyte0_e32 v1, 8
125; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
126; GCN-NEXT:    v_exp_f32_e32 v0, v0
127; GCN-NEXT:    s_setpc_b64 s[30:31]
128  %res = call float @llvm.powi.f32(float %l, i32 8)
129  ret float %res
130}
131
132define float @v_powi_16_f32(float %l) {
133; GCN-LABEL: v_powi_16_f32:
134; GCN:       ; %bb.0:
135; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136; GCN-NEXT:    v_log_f32_e32 v0, v0
137; GCN-NEXT:    v_cvt_f32_ubyte0_e32 v1, 16
138; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
139; GCN-NEXT:    v_exp_f32_e32 v0, v0
140; GCN-NEXT:    s_setpc_b64 s[30:31]
141  %res = call float @llvm.powi.f32(float %l, i32 16)
142  ret float %res
143}
144
145define float @v_powi_128_f32(float %l) {
146; GCN-LABEL: v_powi_128_f32:
147; GCN:       ; %bb.0:
148; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149; GCN-NEXT:    v_log_f32_e32 v0, v0
150; GCN-NEXT:    v_cvt_f32_ubyte0_e32 v1, 0x80
151; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
152; GCN-NEXT:    v_exp_f32_e32 v0, v0
153; GCN-NEXT:    s_setpc_b64 s[30:31]
154  %res = call float @llvm.powi.f32(float %l, i32 128)
155  ret float %res
156}
157
158define float @v_powi_neg128_f32(float %l) {
159; GCN-LABEL: v_powi_neg128_f32:
160; GCN:       ; %bb.0:
161; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162; GCN-NEXT:    v_cvt_f32_i32_e32 v1, 0xffffff80
163; GCN-NEXT:    v_log_f32_e32 v0, v0
164; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
165; GCN-NEXT:    v_exp_f32_e32 v0, v0
166; GCN-NEXT:    s_setpc_b64 s[30:31]
167  %res = call float @llvm.powi.f32(float %l, i32 -128)
168  ret float %res
169}
170
171; FIXME: f64 broken
172; define double @v_powi_f64(double %l, i32 %r) {
173;   %res = call double @llvm.powi.f64(double %l, i32 %r)
174;   ret double %res
175; }
176
177declare half @llvm.powi.f16(half, i32) #0
178declare float @llvm.powi.f32(float, i32) #0
179declare double @llvm.powi.f64(double, i32) #0
180
181attributes #0 = { nounwind readnone speculatable willreturn }
182