1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
4; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
5; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
6
7; Testing for ds_read_b128
8; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
9; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
10
11; FUNC-LABEL: {{^}}local_load_f64:
12; SICIV: s_mov_b32 m0
13; GFX9-NOT: m0
14
15; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}}
16; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]]
17
18; EG: LDS_READ_RET
19; EG: LDS_READ_RET
20define amdgpu_kernel void @local_load_f64(double addrspace(3)* %out, double addrspace(3)* %in) #0 {
21  %ld = load double, double addrspace(3)* %in
22  store double %ld, double addrspace(3)* %out
23  ret void
24}
25
26; FUNC-LABEL: {{^}}local_load_v2f64:
27; SICIV: s_mov_b32 m0
28; GFX9-NOT: m0
29
30; GCN: ds_read2_b64
31
32; EG: LDS_READ_RET
33; EG: LDS_READ_RET
34; EG: LDS_READ_RET
35; EG: LDS_READ_RET
36define amdgpu_kernel void @local_load_v2f64(<2 x double> addrspace(3)* %out, <2 x double> addrspace(3)* %in) #0 {
37entry:
38  %ld = load <2 x double>, <2 x double> addrspace(3)* %in
39  store <2 x double> %ld, <2 x double> addrspace(3)* %out
40  ret void
41}
42
43; FUNC-LABEL: {{^}}local_load_v3f64:
44; SICIV: s_mov_b32 m0
45; GFX9-NOT: m0
46
47; GCN-DAG: ds_read2_b64
48; GCN-DAG: ds_read_b64
49
50; EG: LDS_READ_RET
51; EG: LDS_READ_RET
52; EG: LDS_READ_RET
53; EG: LDS_READ_RET
54; EG: LDS_READ_RET
55; EG: LDS_READ_RET
56define amdgpu_kernel void @local_load_v3f64(<3 x double> addrspace(3)* %out, <3 x double> addrspace(3)* %in) #0 {
57entry:
58  %ld = load <3 x double>, <3 x double> addrspace(3)* %in
59  store <3 x double> %ld, <3 x double> addrspace(3)* %out
60  ret void
61}
62
63; FUNC-LABEL: {{^}}local_load_v4f64:
64; SICIV: s_mov_b32 m0
65; GFX9-NOT: m0
66
67; GCN: ds_read2_b64
68; GCN: ds_read2_b64
69
70; EG: LDS_READ_RET
71; EG: LDS_READ_RET
72; EG: LDS_READ_RET
73; EG: LDS_READ_RET
74
75; EG: LDS_READ_RET
76; EG: LDS_READ_RET
77; EG: LDS_READ_RET
78; EG: LDS_READ_RET
79define amdgpu_kernel void @local_load_v4f64(<4 x double> addrspace(3)* %out, <4 x double> addrspace(3)* %in) #0 {
80entry:
81  %ld = load <4 x double>, <4 x double> addrspace(3)* %in
82  store <4 x double> %ld, <4 x double> addrspace(3)* %out
83  ret void
84}
85
86; FUNC-LABEL: {{^}}local_load_v8f64:
87; SICIV: s_mov_b32 m0
88; GFX9-NOT: m0
89
90; GCN: ds_read2_b64
91; GCN: ds_read2_b64
92; GCN: ds_read2_b64
93; GCN: ds_read2_b64
94
95; EG: LDS_READ_RET
96; EG: LDS_READ_RET
97; EG: LDS_READ_RET
98; EG: LDS_READ_RET
99; EG: LDS_READ_RET
100; EG: LDS_READ_RET
101; EG: LDS_READ_RET
102; EG: LDS_READ_RET
103; EG: LDS_READ_RET
104; EG: LDS_READ_RET
105; EG: LDS_READ_RET
106; EG: LDS_READ_RET
107; EG: LDS_READ_RET
108; EG: LDS_READ_RET
109; EG: LDS_READ_RET
110; EG: LDS_READ_RET
111define amdgpu_kernel void @local_load_v8f64(<8 x double> addrspace(3)* %out, <8 x double> addrspace(3)* %in) #0 {
112entry:
113  %ld = load <8 x double>, <8 x double> addrspace(3)* %in
114  store <8 x double> %ld, <8 x double> addrspace(3)* %out
115  ret void
116}
117
118; FUNC-LABEL: {{^}}local_load_v16f64:
119; SICIV: s_mov_b32 m0
120; GFX9-NOT: m0
121
122; GCN: ds_read2_b64
123; GCN: ds_read2_b64
124; GCN: ds_read2_b64
125; GCN: ds_read2_b64
126; GCN: ds_read2_b64
127; GCN: ds_read2_b64
128; GCN: ds_read2_b64
129; GCN: ds_read2_b64
130
131; EG: LDS_READ_RET
132; EG: LDS_READ_RET
133; EG: LDS_READ_RET
134; EG: LDS_READ_RET
135
136; EG: LDS_READ_RET
137; EG: LDS_READ_RET
138; EG: LDS_READ_RET
139; EG: LDS_READ_RET
140
141; EG: LDS_READ_RET
142; EG: LDS_READ_RET
143; EG: LDS_READ_RET
144; EG: LDS_READ_RET
145
146; EG: LDS_READ_RET
147; EG: LDS_READ_RET
148; EG: LDS_READ_RET
149; EG: LDS_READ_RET
150
151; EG: LDS_READ_RET
152; EG: LDS_READ_RET
153; EG: LDS_READ_RET
154; EG: LDS_READ_RET
155
156; EG: LDS_READ_RET
157; EG: LDS_READ_RET
158; EG: LDS_READ_RET
159; EG: LDS_READ_RET
160
161; EG: LDS_READ_RET
162; EG: LDS_READ_RET
163; EG: LDS_READ_RET
164; EG: LDS_READ_RET
165
166; EG: LDS_READ_RET
167; EG: LDS_READ_RET
168; EG: LDS_READ_RET
169; EG: LDS_READ_RET
170define amdgpu_kernel void @local_load_v16f64(<16 x double> addrspace(3)* %out, <16 x double> addrspace(3)* %in) #0 {
171entry:
172  %ld = load <16 x double>, <16 x double> addrspace(3)* %in
173  store <16 x double> %ld, <16 x double> addrspace(3)* %out
174  ret void
175}
176
177; Tests if ds_read_b128 gets generated for the 16 byte aligned load.
178; FUNC-LABEL: {{^}}local_load_v2f64_to_128:
179
180; CIVI: ds_read_b128
181; CIVI: ds_write_b128
182
183; EG: LDS_READ_RET
184; EG: LDS_READ_RET
185; EG: LDS_READ_RET
186; EG: LDS_READ_RET
187define amdgpu_kernel void @local_load_v2f64_to_128(<2 x double> addrspace(3)* %out, <2 x double> addrspace(3)* %in) {
188entry:
189  %ld = load <2 x double>, <2 x double> addrspace(3)* %in, align 16
190  store <2 x double> %ld, <2 x double> addrspace(3)* %out, align 16
191  ret void
192}
193
194attributes #0 = { nounwind }
195