1; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs <%s | FileCheck -enable-var-scope -check-prefixes=GCN,SICI,SI %s
2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs <%s | FileCheck -enable-var-scope -check-prefixes=GCN,SICI,CI %s
3
4; Check that an addrspace(1) (const) load with various combinations of
5; uniform, nonuniform and constant address components all load with an
6; addr64 mubuf with no readfirstlane.
7
8@indexable = internal unnamed_addr addrspace(1) constant [6 x <3 x float>] [<3 x float> <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00>, <3 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00>, <3 x float> <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, <3 x float> <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 1.000000e+00, float 0.000000e+00>]
9
10; GCN-LABEL: {{^}}nonuniform_uniform:
11; GCN-NOT: readfirstlane
12; SI: buffer_load_dwordx4 {{.*}} addr64
13; CI: buffer_load_dwordx3 {{.*}} addr64
14
15define amdgpu_ps float @nonuniform_uniform(i32 %arg18) {
16.entry:
17  %tmp31 = sext i32 %arg18 to i64
18  %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* @indexable, i64 0, i64 %tmp31
19  %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16
20  %tmp34 = extractelement <3 x float> %tmp33, i32 0
21  ret float %tmp34
22}
23
24; GCN-LABEL: {{^}}uniform_nonuniform:
25; GCN-NOT: readfirstlane
26; SI: buffer_load_dwordx4 {{.*}} addr64
27; CI: buffer_load_dwordx3 {{.*}} addr64
28
29define amdgpu_ps float @uniform_nonuniform(i32 inreg %offset, i32 %arg18) {
30.entry:
31  %tmp1 = zext i32 %arg18 to i64
32  %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)*
33  %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset
34  %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16
35  %tmp34 = extractelement <3 x float> %tmp33, i32 0
36  ret float %tmp34
37}
38
39; GCN-LABEL: {{^}}const_nonuniform:
40; GCN-NOT: readfirstlane
41; SI: buffer_load_dwordx4 {{.*}} addr64
42; CI: buffer_load_dwordx3 {{.*}} addr64
43
44define amdgpu_ps float @const_nonuniform(i32 %arg18) {
45.entry:
46  %tmp1 = zext i32 %arg18 to i64
47  %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)*
48  %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 1
49  %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16
50  %tmp34 = extractelement <3 x float> %tmp33, i32 0
51  ret float %tmp34
52}
53
54; GCN-LABEL: {{^}}nonuniform_nonuniform:
55; GCN-NOT: readfirstlane
56; SI: buffer_load_dwordx4 {{.*}} addr64
57; CI: buffer_load_dwordx3 {{.*}} addr64
58
59define amdgpu_ps float @nonuniform_nonuniform(i32 %offset, i32 %arg18) {
60.entry:
61  %tmp1 = zext i32 %arg18 to i64
62  %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)*
63  %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset
64  %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16
65  %tmp34 = extractelement <3 x float> %tmp33, i32 0
66  ret float %tmp34
67}
68
69; GCN-LABEL: {{^}}nonuniform_uniform_const:
70; GCN-NOT: readfirstlane
71; SICI: buffer_load_dword {{.*}} addr64
72
73define amdgpu_ps float @nonuniform_uniform_const(i32 %arg18) {
74.entry:
75  %tmp31 = sext i32 %arg18 to i64
76  %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* @indexable, i64 0, i64 %tmp31, i64 1
77  %tmp33 = load float, float addrspace(1)* %tmp32, align 4
78  ret float %tmp33
79}
80
81; GCN-LABEL: {{^}}uniform_nonuniform_const:
82; GCN-NOT: readfirstlane
83; SICI: buffer_load_dword {{.*}} addr64
84
85define amdgpu_ps float @uniform_nonuniform_const(i32 inreg %offset, i32 %arg18) {
86.entry:
87  %tmp1 = zext i32 %arg18 to i64
88  %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)*
89  %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset, i32 1
90  %tmp33 = load float, float addrspace(1)* %tmp32, align 4
91  ret float %tmp33
92}
93
94; GCN-LABEL: {{^}}nonuniform_nonuniform_const:
95; GCN-NOT: readfirstlane
96; SICI: buffer_load_dword {{.*}} addr64
97
98define amdgpu_ps float @nonuniform_nonuniform_const(i32 %offset, i32 %arg18) {
99.entry:
100  %tmp1 = zext i32 %arg18 to i64
101  %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)*
102  %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset, i32 1
103  %tmp33 = load float, float addrspace(1)* %tmp32, align 4
104  ret float %tmp33
105}
106
107
108
109
110