1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
5; RUN: llc -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6
7define float @v_pow_f32(float %x, float %y) {
8; GFX6-LABEL: v_pow_f32:
9; GFX6:       ; %bb.0:
10; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX6-NEXT:    v_log_f32_e32 v0, v0
12; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
13; GFX6-NEXT:    v_exp_f32_e32 v0, v0
14; GFX6-NEXT:    s_setpc_b64 s[30:31]
15;
16; GFX8-LABEL: v_pow_f32:
17; GFX8:       ; %bb.0:
18; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19; GFX8-NEXT:    v_log_f32_e32 v0, v0
20; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
21; GFX8-NEXT:    v_exp_f32_e32 v0, v0
22; GFX8-NEXT:    s_setpc_b64 s[30:31]
23;
24; GFX9-LABEL: v_pow_f32:
25; GFX9:       ; %bb.0:
26; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX9-NEXT:    v_log_f32_e32 v0, v0
28; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
29; GFX9-NEXT:    v_exp_f32_e32 v0, v0
30; GFX9-NEXT:    s_setpc_b64 s[30:31]
31;
32; GFX10-LABEL: v_pow_f32:
33; GFX10:       ; %bb.0:
34; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
36; GFX10-NEXT:    v_log_f32_e32 v0, v0
37; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
38; GFX10-NEXT:    v_exp_f32_e32 v0, v0
39; GFX10-NEXT:    s_setpc_b64 s[30:31]
40  %pow = call float @llvm.pow.f32(float %x, float %y)
41  ret float %pow
42}
43
44define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
45; GFX6-LABEL: v_pow_v2f32:
46; GFX6:       ; %bb.0:
47; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GFX6-NEXT:    v_log_f32_e32 v0, v0
49; GFX6-NEXT:    v_log_f32_e32 v1, v1
50; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
51; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
52; GFX6-NEXT:    v_exp_f32_e32 v0, v0
53; GFX6-NEXT:    v_exp_f32_e32 v1, v1
54; GFX6-NEXT:    s_setpc_b64 s[30:31]
55;
56; GFX8-LABEL: v_pow_v2f32:
57; GFX8:       ; %bb.0:
58; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59; GFX8-NEXT:    v_log_f32_e32 v0, v0
60; GFX8-NEXT:    v_log_f32_e32 v1, v1
61; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
62; GFX8-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
63; GFX8-NEXT:    v_exp_f32_e32 v0, v0
64; GFX8-NEXT:    v_exp_f32_e32 v1, v1
65; GFX8-NEXT:    s_setpc_b64 s[30:31]
66;
67; GFX9-LABEL: v_pow_v2f32:
68; GFX9:       ; %bb.0:
69; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70; GFX9-NEXT:    v_log_f32_e32 v0, v0
71; GFX9-NEXT:    v_log_f32_e32 v1, v1
72; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
73; GFX9-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
74; GFX9-NEXT:    v_exp_f32_e32 v0, v0
75; GFX9-NEXT:    v_exp_f32_e32 v1, v1
76; GFX9-NEXT:    s_setpc_b64 s[30:31]
77;
78; GFX10-LABEL: v_pow_v2f32:
79; GFX10:       ; %bb.0:
80; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
82; GFX10-NEXT:    v_log_f32_e32 v0, v0
83; GFX10-NEXT:    v_log_f32_e32 v1, v1
84; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
85; GFX10-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
86; GFX10-NEXT:    v_exp_f32_e32 v0, v0
87; GFX10-NEXT:    v_exp_f32_e32 v1, v1
88; GFX10-NEXT:    s_setpc_b64 s[30:31]
89  %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y)
90  ret <2 x float> %pow
91}
92
93define half @v_pow_f16(half %x, half %y) {
94; GFX6-LABEL: v_pow_f16:
95; GFX6:       ; %bb.0:
96; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
98; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
99; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
100; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
101; GFX6-NEXT:    v_log_f32_e32 v0, v0
102; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
103; GFX6-NEXT:    v_exp_f32_e32 v0, v0
104; GFX6-NEXT:    s_setpc_b64 s[30:31]
105;
106; GFX8-LABEL: v_pow_f16:
107; GFX8:       ; %bb.0:
108; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
110; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
111; GFX8-NEXT:    v_log_f32_e32 v0, v0
112; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
113; GFX8-NEXT:    v_exp_f32_e32 v0, v0
114; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
115; GFX8-NEXT:    s_setpc_b64 s[30:31]
116;
117; GFX9-LABEL: v_pow_f16:
118; GFX9:       ; %bb.0:
119; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
121; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
122; GFX9-NEXT:    v_log_f32_e32 v0, v0
123; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
124; GFX9-NEXT:    v_exp_f32_e32 v0, v0
125; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
126; GFX9-NEXT:    s_setpc_b64 s[30:31]
127;
128; GFX10-LABEL: v_pow_f16:
129; GFX10:       ; %bb.0:
130; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
132; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
133; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v1
134; GFX10-NEXT:    v_log_f32_e32 v0, v0
135; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
136; GFX10-NEXT:    v_exp_f32_e32 v0, v0
137; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
138; GFX10-NEXT:    s_setpc_b64 s[30:31]
139  %pow = call half @llvm.pow.f16(half %x, half %y)
140  ret half %pow
141}
142
143define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) {
144; GFX6-LABEL: v_pow_v2f16:
145; GFX6:       ; %bb.0:
146; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
148; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
149; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
150; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
151; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
152; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
153; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
154; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
155; GFX6-NEXT:    v_log_f32_e32 v0, v0
156; GFX6-NEXT:    v_log_f32_e32 v1, v1
157; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
158; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
159; GFX6-NEXT:    v_exp_f32_e32 v0, v0
160; GFX6-NEXT:    v_exp_f32_e32 v1, v1
161; GFX6-NEXT:    s_setpc_b64 s[30:31]
162;
163; GFX8-LABEL: v_pow_v2f16:
164; GFX8:       ; %bb.0:
165; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
167; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
168; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
169; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
170; GFX8-NEXT:    v_log_f32_e32 v2, v2
171; GFX8-NEXT:    v_log_f32_e32 v0, v0
172; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
173; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
174; GFX8-NEXT:    v_exp_f32_e32 v0, v0
175; GFX8-NEXT:    v_exp_f32_e32 v2, v2
176; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
177; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
178; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
179; GFX8-NEXT:    s_setpc_b64 s[30:31]
180;
181; GFX9-LABEL: v_pow_v2f16:
182; GFX9:       ; %bb.0:
183; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
185; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
186; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
187; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
188; GFX9-NEXT:    v_log_f32_e32 v2, v2
189; GFX9-NEXT:    v_log_f32_e32 v0, v0
190; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
191; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
192; GFX9-NEXT:    v_exp_f32_e32 v0, v0
193; GFX9-NEXT:    v_exp_f32_e32 v2, v2
194; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
195; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v2
196; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
197; GFX9-NEXT:    s_setpc_b64 s[30:31]
198;
199; GFX10-LABEL: v_pow_v2f16:
200; GFX10:       ; %bb.0:
201; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
203; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
204; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
205; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
206; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v1
207; GFX10-NEXT:    v_log_f32_e32 v2, v2
208; GFX10-NEXT:    v_log_f32_e32 v0, v0
209; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
210; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
211; GFX10-NEXT:    v_exp_f32_e32 v1, v2
212; GFX10-NEXT:    v_exp_f32_e32 v0, v0
213; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
214; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
215; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
216; GFX10-NEXT:    s_setpc_b64 s[30:31]
217  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y)
218  ret <2 x half> %pow
219}
220
221define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) {
222; GFX6-LABEL: v_pow_v2f16_fneg_lhs:
223; GFX6:       ; %bb.0:
224; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
226; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
227; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
228; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
229; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
230; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
231; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v3
232; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v0
233; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
234; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
235; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
236; GFX6-NEXT:    v_log_f32_e32 v3, v3
237; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
238; GFX6-NEXT:    v_log_f32_e32 v4, v0
239; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v3
240; GFX6-NEXT:    v_exp_f32_e32 v0, v0
241; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v1, v4
242; GFX6-NEXT:    v_exp_f32_e32 v1, v1
243; GFX6-NEXT:    s_setpc_b64 s[30:31]
244;
245; GFX8-LABEL: v_pow_v2f16_fneg_lhs:
246; GFX8:       ; %bb.0:
247; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
249; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, -v0
250; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
251; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
252; GFX8-NEXT:    v_log_f32_e32 v2, v2
253; GFX8-NEXT:    v_log_f32_e32 v0, v0
254; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
255; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
256; GFX8-NEXT:    v_exp_f32_e32 v0, v0
257; GFX8-NEXT:    v_exp_f32_e32 v2, v2
258; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
259; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
260; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
261; GFX8-NEXT:    s_setpc_b64 s[30:31]
262;
263; GFX9-LABEL: v_pow_v2f16_fneg_lhs:
264; GFX9:       ; %bb.0:
265; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
267; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, -v0
268; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
269; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
270; GFX9-NEXT:    v_log_f32_e32 v2, v2
271; GFX9-NEXT:    v_log_f32_e32 v0, v0
272; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
273; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
274; GFX9-NEXT:    v_exp_f32_e32 v0, v0
275; GFX9-NEXT:    v_exp_f32_e32 v2, v2
276; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
277; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v2
278; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
279; GFX9-NEXT:    s_setpc_b64 s[30:31]
280;
281; GFX10-LABEL: v_pow_v2f16_fneg_lhs:
282; GFX10:       ; %bb.0:
283; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
284; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
285;GFX10-NEXT:     v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
286;GFX10-NEXT:     v_cvt_f32_f16_e64 v0, -v0
287;GFX10-NEXT:     v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
288;GFX10-NEXT:     v_cvt_f32_f16_e32 v1, v1
289;GFX10-NEXT:     v_log_f32_e32 v2, v2
290;GFX10-NEXT:     v_log_f32_e32 v0, v0
291;GFX10-NEXT:     v_mul_legacy_f32_e32 v2, v3, v2
292;GFX10-NEXT:     v_mul_legacy_f32_e32 v0, v1, v0
293;GFX10-NEXT:     v_exp_f32_e32 v1, v2
294;GFX10-NEXT:     v_exp_f32_e32 v0, v0
295;GFX10-NEXT:     v_cvt_f16_f32_e32 v1, v1
296;GFX10-NEXT:     v_cvt_f16_f32_e32 v0, v0
297;GFX10-NEXT:     v_pack_b32_f16 v0, v0, v1
298; GFX10-NEXT:    s_setpc_b64 s[30:31]
299  %x.fneg = fneg <2 x half> %x
300  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y)
301  ret <2 x half> %pow
302}
303
304define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) {
305; GFX6-LABEL: v_pow_v2f16_fneg_rhs:
306; GFX6:       ; %bb.0:
307; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
309; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
310; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
311; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
312; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
313; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
314; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
315; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
316; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
317; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
318; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
319; GFX6-NEXT:    v_log_f32_e32 v0, v0
320; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
321; GFX6-NEXT:    v_log_f32_e32 v1, v1
322; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
323; GFX6-NEXT:    v_exp_f32_e32 v0, v0
324; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
325; GFX6-NEXT:    v_exp_f32_e32 v1, v1
326; GFX6-NEXT:    s_setpc_b64 s[30:31]
327;
328; GFX8-LABEL: v_pow_v2f16_fneg_rhs:
329; GFX8:       ; %bb.0:
330; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
331; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
332; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
333; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
334; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, -v1
335; GFX8-NEXT:    v_log_f32_e32 v2, v2
336; GFX8-NEXT:    v_log_f32_e32 v0, v0
337; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
338; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
339; GFX8-NEXT:    v_exp_f32_e32 v0, v0
340; GFX8-NEXT:    v_exp_f32_e32 v2, v2
341; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
342; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
343; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
344; GFX8-NEXT:    s_setpc_b64 s[30:31]
345;
346; GFX9-LABEL: v_pow_v2f16_fneg_rhs:
347; GFX9:       ; %bb.0:
348; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
350; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
351; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
352; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, -v1
353; GFX9-NEXT:    v_log_f32_e32 v2, v2
354; GFX9-NEXT:    v_log_f32_e32 v0, v0
355; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
356; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
357; GFX9-NEXT:    v_exp_f32_e32 v0, v0
358; GFX9-NEXT:    v_exp_f32_e32 v2, v2
359; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
360; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v2
361; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
362; GFX9-NEXT:    s_setpc_b64 s[30:31]
363;
364; GFX10-LABEL: v_pow_v2f16_fneg_rhs:
365; GFX10:       ; %bb.0:
366; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
367; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
368;GFX10-NEXT:     v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
369;GFX10-NEXT:     v_cvt_f32_f16_e32 v0, v0
370;GFX10-NEXT:     v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
371;GFX10-NEXT:     v_cvt_f32_f16_e64 v1, -v1
372;GFX10-NEXT:     v_log_f32_e32 v2, v2
373;GFX10-NEXT:     v_log_f32_e32 v0, v0
374;GFX10-NEXT:     v_mul_legacy_f32_e32 v2, v3, v2
375;GFX10-NEXT:     v_mul_legacy_f32_e32 v0, v1, v0
376;GFX10-NEXT:     v_exp_f32_e32 v1, v2
377;GFX10-NEXT:     v_exp_f32_e32 v0, v0
378;GFX10-NEXT:     v_cvt_f16_f32_e32 v1, v1
379;GFX10-NEXT:     v_cvt_f16_f32_e32 v0, v0
380;GFX10-NEXT:     v_pack_b32_f16 v0, v0, v1
381; GFX10-NEXT:    s_setpc_b64 s[30:31]
382  %y.fneg = fneg <2 x half> %y
383  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg)
384  ret <2 x half> %pow
385}
386
387define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) {
388; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs:
389; GFX6:       ; %bb.0:
390; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
392; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
393; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
394; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
395; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
396; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
397; GFX6-NEXT:    s_mov_b32 s4, 0x80008000
398; GFX6-NEXT:    v_xor_b32_e32 v0, s4, v0
399; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
400; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
401; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
402; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
403; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
404; GFX6-NEXT:    v_xor_b32_e32 v2, s4, v2
405; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
406; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
407; GFX6-NEXT:    v_log_f32_e32 v0, v0
408; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
409; GFX6-NEXT:    v_log_f32_e32 v1, v1
410; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
411; GFX6-NEXT:    v_exp_f32_e32 v0, v0
412; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
413; GFX6-NEXT:    v_exp_f32_e32 v1, v1
414; GFX6-NEXT:    s_setpc_b64 s[30:31]
415;
416; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs:
417; GFX8:       ; %bb.0:
418; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
419; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
420; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, -v0
421; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
422; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, -v1
423; GFX8-NEXT:    v_log_f32_e32 v2, v2
424; GFX8-NEXT:    v_log_f32_e32 v0, v0
425; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
426; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
427; GFX8-NEXT:    v_exp_f32_e32 v0, v0
428; GFX8-NEXT:    v_exp_f32_e32 v2, v2
429; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
430; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
431; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
432; GFX8-NEXT:    s_setpc_b64 s[30:31]
433;
434; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs:
435; GFX9:       ; %bb.0:
436; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
438; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, -v0
439; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
440; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, -v1
441; GFX9-NEXT:    v_log_f32_e32 v2, v2
442; GFX9-NEXT:    v_log_f32_e32 v0, v0
443; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
444; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
445; GFX9-NEXT:    v_exp_f32_e32 v0, v0
446; GFX9-NEXT:    v_exp_f32_e32 v2, v2
447; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
448; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v2
449; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
450; GFX9-NEXT:    s_setpc_b64 s[30:31]
451;
452; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs:
453; GFX10:       ; %bb.0:
454; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
455; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
456; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
457; GFX10-NEXT:    v_cvt_f32_f16_e64 v0, -v0
458; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
459; GFX10-NEXT:    v_cvt_f32_f16_e64 v1, -v1
460; GFX10-NEXT:    v_log_f32_e32 v2, v2
461; GFX10-NEXT:    v_log_f32_e32 v0, v0
462; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
463; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
464; GFX10-NEXT:    v_exp_f32_e32 v1, v2
465; GFX10-NEXT:    v_exp_f32_e32 v0, v0
466; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
467; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
468; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
469; GFX10-NEXT:    s_setpc_b64 s[30:31]
470  %x.fneg = fneg <2 x half> %x
471  %y.fneg = fneg <2 x half> %y
472  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg)
473  ret <2 x half> %pow
474}
475
476; FIXME
477; define double @v_pow_f64(double %x, double %y) {
478;   %pow = call double @llvm.pow.f64(double %x, double %y)
479;   ret double %pow
480; }
481
482define float @v_pow_f32_fabs_lhs(float %x, float %y) {
483; GFX6-LABEL: v_pow_f32_fabs_lhs:
484; GFX6:       ; %bb.0:
485; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486; GFX6-NEXT:    v_and_b32_e32 v0, 0x7fffffff, v0
487; GFX6-NEXT:    v_log_f32_e32 v0, v0
488; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
489; GFX6-NEXT:    v_exp_f32_e32 v0, v0
490; GFX6-NEXT:    s_setpc_b64 s[30:31]
491;
492; GFX8-LABEL: v_pow_f32_fabs_lhs:
493; GFX8:       ; %bb.0:
494; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
495; GFX8-NEXT:    v_and_b32_e32 v0, 0x7fffffff, v0
496; GFX8-NEXT:    v_log_f32_e32 v0, v0
497; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
498; GFX8-NEXT:    v_exp_f32_e32 v0, v0
499; GFX8-NEXT:    s_setpc_b64 s[30:31]
500;
501; GFX9-LABEL: v_pow_f32_fabs_lhs:
502; GFX9:       ; %bb.0:
503; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
504; GFX9-NEXT:    v_and_b32_e32 v0, 0x7fffffff, v0
505; GFX9-NEXT:    v_log_f32_e32 v0, v0
506; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
507; GFX9-NEXT:    v_exp_f32_e32 v0, v0
508; GFX9-NEXT:    s_setpc_b64 s[30:31]
509;
510; GFX10-LABEL: v_pow_f32_fabs_lhs:
511; GFX10:       ; %bb.0:
512; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
513; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
514; GFX10-NEXT:    v_and_b32_e32 v0, 0x7fffffff, v0
515; GFX10-NEXT:    v_log_f32_e32 v0, v0
516; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
517; GFX10-NEXT:    v_exp_f32_e32 v0, v0
518; GFX10-NEXT:    s_setpc_b64 s[30:31]
519  %fabs.x = call float @llvm.fabs.f32(float %x)
520  %pow = call float @llvm.pow.f32(float %fabs.x, float %y)
521  ret float %pow
522}
523
524define float @v_pow_f32_fabs_rhs(float %x, float %y) {
525; GFX6-LABEL: v_pow_f32_fabs_rhs:
526; GFX6:       ; %bb.0:
527; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
528; GFX6-NEXT:    v_log_f32_e32 v0, v0
529; GFX6-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
530; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
531; GFX6-NEXT:    v_exp_f32_e32 v0, v0
532; GFX6-NEXT:    s_setpc_b64 s[30:31]
533;
534; GFX8-LABEL: v_pow_f32_fabs_rhs:
535; GFX8:       ; %bb.0:
536; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537; GFX8-NEXT:    v_log_f32_e32 v0, v0
538; GFX8-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
539; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
540; GFX8-NEXT:    v_exp_f32_e32 v0, v0
541; GFX8-NEXT:    s_setpc_b64 s[30:31]
542;
543; GFX9-LABEL: v_pow_f32_fabs_rhs:
544; GFX9:       ; %bb.0:
545; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546; GFX9-NEXT:    v_log_f32_e32 v0, v0
547; GFX9-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
548; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
549; GFX9-NEXT:    v_exp_f32_e32 v0, v0
550; GFX9-NEXT:    s_setpc_b64 s[30:31]
551;
552; GFX10-LABEL: v_pow_f32_fabs_rhs:
553; GFX10:       ; %bb.0:
554; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
556; GFX10-NEXT:    v_log_f32_e32 v0, v0
557; GFX10-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
558; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
559; GFX10-NEXT:    v_exp_f32_e32 v0, v0
560; GFX10-NEXT:    s_setpc_b64 s[30:31]
561  %fabs.y = call float @llvm.fabs.f32(float %y)
562  %pow = call float @llvm.pow.f32(float %x, float %fabs.y)
563  ret float %pow
564}
565
566define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
567; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs:
568; GFX6:       ; %bb.0:
569; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
570; GFX6-NEXT:    s_brev_b32 s4, -2
571; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
572; GFX6-NEXT:    v_log_f32_e32 v0, v0
573; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
574; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
575; GFX6-NEXT:    v_exp_f32_e32 v0, v0
576; GFX6-NEXT:    s_setpc_b64 s[30:31]
577;
578; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs:
579; GFX8:       ; %bb.0:
580; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
581; GFX8-NEXT:    s_brev_b32 s4, -2
582; GFX8-NEXT:    v_and_b32_e32 v0, s4, v0
583; GFX8-NEXT:    v_log_f32_e32 v0, v0
584; GFX8-NEXT:    v_and_b32_e32 v1, s4, v1
585; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
586; GFX8-NEXT:    v_exp_f32_e32 v0, v0
587; GFX8-NEXT:    s_setpc_b64 s[30:31]
588;
589; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs:
590; GFX9:       ; %bb.0:
591; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
592; GFX9-NEXT:    s_brev_b32 s4, -2
593; GFX9-NEXT:    v_and_b32_e32 v0, s4, v0
594; GFX9-NEXT:    v_log_f32_e32 v0, v0
595; GFX9-NEXT:    v_and_b32_e32 v1, s4, v1
596; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
597; GFX9-NEXT:    v_exp_f32_e32 v0, v0
598; GFX9-NEXT:    s_setpc_b64 s[30:31]
599;
600; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs:
601; GFX10:       ; %bb.0:
602; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
603; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
604; GFX10-NEXT:    s_brev_b32 s4, -2
605; GFX10-NEXT:    v_and_b32_e32 v0, s4, v0
606; GFX10-NEXT:    v_and_b32_e32 v1, s4, v1
607; GFX10-NEXT:    v_log_f32_e32 v0, v0
608; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
609; GFX10-NEXT:    v_exp_f32_e32 v0, v0
610; GFX10-NEXT:    s_setpc_b64 s[30:31]
611  %fabs.x = call float @llvm.fabs.f32(float %x)
612  %fabs.y = call float @llvm.fabs.f32(float %y)
613  %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y)
614  ret float %pow
615}
616
617define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
618; GFX6-LABEL: v_pow_f32_sgpr_vgpr:
619; GFX6:       ; %bb.0:
620; GFX6-NEXT:    v_log_f32_e32 v1, s0
621; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
622; GFX6-NEXT:    v_exp_f32_e32 v0, v0
623; GFX6-NEXT:    ; return to shader part epilog
624;
625; GFX8-LABEL: v_pow_f32_sgpr_vgpr:
626; GFX8:       ; %bb.0:
627; GFX8-NEXT:    v_log_f32_e32 v1, s0
628; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
629; GFX8-NEXT:    v_exp_f32_e32 v0, v0
630; GFX8-NEXT:    ; return to shader part epilog
631;
632; GFX9-LABEL: v_pow_f32_sgpr_vgpr:
633; GFX9:       ; %bb.0:
634; GFX9-NEXT:    v_log_f32_e32 v1, s0
635; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
636; GFX9-NEXT:    v_exp_f32_e32 v0, v0
637; GFX9-NEXT:    ; return to shader part epilog
638;
639; GFX10-LABEL: v_pow_f32_sgpr_vgpr:
640; GFX10:       ; %bb.0:
641; GFX10-NEXT:    v_log_f32_e32 v1, s0
642; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
643; GFX10-NEXT:    v_exp_f32_e32 v0, v0
644; GFX10-NEXT:    ; return to shader part epilog
645  %pow = call float @llvm.pow.f32(float %x, float %y)
646  ret float %pow
647}
648
649define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
650; GFX6-LABEL: v_pow_f32_vgpr_sgpr:
651; GFX6:       ; %bb.0:
652; GFX6-NEXT:    v_log_f32_e32 v0, v0
653; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
654; GFX6-NEXT:    v_exp_f32_e32 v0, v0
655; GFX6-NEXT:    ; return to shader part epilog
656;
657; GFX8-LABEL: v_pow_f32_vgpr_sgpr:
658; GFX8:       ; %bb.0:
659; GFX8-NEXT:    v_log_f32_e32 v0, v0
660; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
661; GFX8-NEXT:    v_exp_f32_e32 v0, v0
662; GFX8-NEXT:    ; return to shader part epilog
663;
664; GFX9-LABEL: v_pow_f32_vgpr_sgpr:
665; GFX9:       ; %bb.0:
666; GFX9-NEXT:    v_log_f32_e32 v0, v0
667; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
668; GFX9-NEXT:    v_exp_f32_e32 v0, v0
669; GFX9-NEXT:    ; return to shader part epilog
670;
671; GFX10-LABEL: v_pow_f32_vgpr_sgpr:
672; GFX10:       ; %bb.0:
673; GFX10-NEXT:    v_log_f32_e32 v0, v0
674; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
675; GFX10-NEXT:    v_exp_f32_e32 v0, v0
676; GFX10-NEXT:    ; return to shader part epilog
677  %pow = call float @llvm.pow.f32(float %x, float %y)
678  ret float %pow
679}
680
681define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
682; GFX6-LABEL: v_pow_f32_sgpr_sgpr:
683; GFX6:       ; %bb.0:
684; GFX6-NEXT:    v_log_f32_e32 v0, s0
685; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
686; GFX6-NEXT:    v_exp_f32_e32 v0, v0
687; GFX6-NEXT:    ; return to shader part epilog
688;
689; GFX8-LABEL: v_pow_f32_sgpr_sgpr:
690; GFX8:       ; %bb.0:
691; GFX8-NEXT:    v_log_f32_e32 v0, s0
692; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
693; GFX8-NEXT:    v_exp_f32_e32 v0, v0
694; GFX8-NEXT:    ; return to shader part epilog
695;
696; GFX9-LABEL: v_pow_f32_sgpr_sgpr:
697; GFX9:       ; %bb.0:
698; GFX9-NEXT:    v_log_f32_e32 v0, s0
699; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
700; GFX9-NEXT:    v_exp_f32_e32 v0, v0
701; GFX9-NEXT:    ; return to shader part epilog
702;
703; GFX10-LABEL: v_pow_f32_sgpr_sgpr:
704; GFX10:       ; %bb.0:
705; GFX10-NEXT:    v_log_f32_e32 v0, s0
706; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
707; GFX10-NEXT:    v_exp_f32_e32 v0, v0
708; GFX10-NEXT:    ; return to shader part epilog
709  %pow = call float @llvm.pow.f32(float %x, float %y)
710  ret float %pow
711}
712
713declare half @llvm.pow.f16(half, half)
714declare float @llvm.pow.f32(float, float)
715declare double @llvm.pow.f64(double, double)
716
717declare half @llvm.fabs.f16(half)
718declare float @llvm.fabs.f32(float)
719
720declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>)
721declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>)
722