1; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s
2; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
4
5; These tests check that fdiv is expanded correctly and also test that the
6; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
7; instruction groups.
8
9; FUNC-LABEL: {{^}}fdiv_f32:
10; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
11; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
12; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
13; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
14
15; SI-DAG: v_rcp_f32
16; SI-DAG: v_mul_f32
17define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) {
18entry:
19  %0 = fdiv float %a, %b
20  store float %0, float addrspace(1)* %out
21  ret void
22}
23
24
25
26; FUNC-LABEL: {{^}}fdiv_v2f32:
27; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
28; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
29; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
30; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
31
32; SI-DAG: v_rcp_f32
33; SI-DAG: v_mul_f32
34; SI-DAG: v_rcp_f32
35; SI-DAG: v_mul_f32
36define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
37entry:
38  %0 = fdiv <2 x float> %a, %b
39  store <2 x float> %0, <2 x float> addrspace(1)* %out
40  ret void
41}
42
43; FUNC-LABEL: {{^}}fdiv_v4f32:
44; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
45; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
46; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
47; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
48; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
49; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
50; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
51; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
52
53; SI-DAG: v_rcp_f32
54; SI-DAG: v_mul_f32
55; SI-DAG: v_rcp_f32
56; SI-DAG: v_mul_f32
57; SI-DAG: v_rcp_f32
58; SI-DAG: v_mul_f32
59; SI-DAG: v_rcp_f32
60; SI-DAG: v_mul_f32
61define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
62  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
63  %a = load <4 x float> addrspace(1) * %in
64  %b = load <4 x float> addrspace(1) * %b_ptr
65  %result = fdiv <4 x float> %a, %b
66  store <4 x float> %result, <4 x float> addrspace(1)* %out
67  ret void
68}
69