1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM
2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM
3
4; Test that basic bulk memory codegen works correctly
5
6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
7target triple = "wasm32-unknown-unknown"
8
9declare void @llvm.memcpy.p0i8.p0i8.i8(i8*, i8*, i8, i1)
10declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
11declare void @llvm.memcpy.p0i32.p0i32.i32(i32*, i32*, i32, i1)
12
13declare void @llvm.memmove.p0i8.p0i8.i8(i8*, i8*, i8, i1)
14declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1)
15declare void @llvm.memmove.p0i32.p0i32.i32(i32*, i32*, i32, i1)
16
17declare void @llvm.memset.p0i8.i8(i8*, i8, i8, i1)
18declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1)
19declare void @llvm.memset.p0i32.i32(i32*, i8, i32, i1)
20
21; CHECK-LABEL: memcpy_i8:
22; NO-BULK-MEM-NOT: memory.copy
23; BULK-MEM-NEXT: .functype memcpy_i8 (i32, i32, i32) -> ()
24; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
25; BULK-MEM-NEXT: return
26define void @memcpy_i8(i8* %dest, i8* %src, i8 zeroext %len) {
27  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0)
28  ret void
29}
30
31; CHECK-LABEL: memmove_i8:
32; NO-BULK-MEM-NOT: memory.copy
33; BULK-MEM-NEXT: .functype memmove_i8 (i32, i32, i32) -> ()
34; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
35; BULK-MEM-NEXT: return
36define void @memmove_i8(i8* %dest, i8* %src, i8 zeroext %len) {
37  call void @llvm.memmove.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0)
38  ret void
39}
40
41; CHECK-LABEL: memset_i8:
42; NO-BULK-MEM-NOT: memory.fill
43; BULK-MEM-NEXT: .functype memset_i8 (i32, i32, i32) -> ()
44; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2
45; BULK-MEM-NEXT: return
46define void @memset_i8(i8* %dest, i8 %val, i8 zeroext %len) {
47  call void @llvm.memset.p0i8.i8(i8* %dest, i8 %val, i8 %len, i1 0)
48  ret void
49}
50
51; CHECK-LABEL: memcpy_i32:
52; NO-BULK-MEM-NOT: memory.copy
53; BULK-MEM-NEXT: .functype memcpy_i32 (i32, i32, i32) -> ()
54; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
55; BULK-MEM-NEXT: return
56define void @memcpy_i32(i32* %dest, i32* %src, i32 %len) {
57  call void @llvm.memcpy.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0)
58  ret void
59}
60
61; CHECK-LABEL: memmove_i32:
62; NO-BULK-MEM-NOT: memory.copy
63; BULK-MEM-NEXT: .functype memmove_i32 (i32, i32, i32) -> ()
64; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
65; BULK-MEM-NEXT: return
66define void @memmove_i32(i32* %dest, i32* %src, i32 %len) {
67  call void @llvm.memmove.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0)
68  ret void
69}
70
71; CHECK-LABEL: memset_i32:
72; NO-BULK-MEM-NOT: memory.fill
73; BULK-MEM-NEXT: .functype memset_i32 (i32, i32, i32) -> ()
74; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2
75; BULK-MEM-NEXT: return
76define void @memset_i32(i32* %dest, i8 %val, i32 %len) {
77  call void @llvm.memset.p0i32.i32(i32* %dest, i8 %val, i32 %len, i1 0)
78  ret void
79}
80
81; CHECK-LABEL: memcpy_1:
82; CHECK-NEXT: .functype memcpy_1 (i32, i32) -> ()
83; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
84; CHECK-NEXT: i32.store8 0($0), $pop[[L0]]
85; CHECK-NEXT: return
86define void @memcpy_1(i8* %dest, i8* %src) {
87  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 0)
88  ret void
89}
90
91; CHECK-LABEL: memmove_1:
92; CHECK-NEXT: .functype memmove_1 (i32, i32) -> ()
93; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
94; CHECK-NEXT: i32.store8 0($0), $pop[[L0]]
95; CHECK-NEXT: return
96define void @memmove_1(i8* %dest, i8* %src) {
97  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 0)
98  ret void
99}
100
101; CHECK-LABEL: memset_1:
102; NO-BULK-MEM-NOT: memory.fill
103; BULK-MEM-NEXT: .functype memset_1 (i32, i32) -> ()
104; BULK-MEM-NEXT: i32.store8 0($0), $1
105; BULK-MEM-NEXT: return
106define void @memset_1(i8* %dest, i8 %val) {
107  call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1, i1 0)
108  ret void
109}
110
111; CHECK-LABEL: memcpy_1024:
112; NO-BULK-MEM-NOT: memory.copy
113; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> ()
114; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
115; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
116; BULK-MEM-NEXT: return
117define void @memcpy_1024(i8* %dest, i8* %src) {
118  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0)
119  ret void
120}
121
122; CHECK-LABEL: memmove_1024:
123; NO-BULK-MEM-NOT: memory.copy
124; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> ()
125; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
126; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
127; BULK-MEM-NEXT: return
128define void @memmove_1024(i8* %dest, i8* %src) {
129  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0)
130  ret void
131}
132
133; CHECK-LABEL: memset_1024:
134; NO-BULK-MEM-NOT: memory.fill
135; BULK-MEM-NEXT: .functype memset_1024 (i32, i32) -> ()
136; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
137; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]]
138; BULK-MEM-NEXT: return
139define void @memset_1024(i8* %dest, i8 %val) {
140  call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1024, i1 0)
141  ret void
142}
143
144; The following tests check that frame index elimination works for
145; bulk memory instructions. The stack pointer is bumped by 112 instead
146; of 100 because the stack pointer in WebAssembly is currently always
147; 16-byte aligned, even in leaf functions, although it is not written
148; back to the global in this case.
149
150; TODO: Change TransientStackAlignment to 1 to avoid this extra
151; arithmetic. This will require forcing the use of StackAlignment in
152; PrologEpilogEmitter.cpp when
153; WebAssemblyFrameLowering::needsSPWriteback would be true.
154
155; CHECK-LABEL: memcpy_alloca_src:
156; NO-BULK-MEM-NOT: memory.copy
157; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> ()
158; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
159; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
160; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
161; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
162; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
163; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
164; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
165; BULK-MEM-NEXT: return
166define void @memcpy_alloca_src(i8* %dst) {
167  %a = alloca [100 x i8]
168  %p = bitcast [100 x i8]* %a to i8*
169  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %p, i32 100, i1 false)
170  ret void
171}
172
173; CHECK-LABEL: memcpy_alloca_dst:
174; NO-BULK-MEM-NOT: memory.copy
175; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> ()
176; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
177; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
178; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
179; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
180; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
181; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
182; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
183; BULK-MEM-NEXT: return
184define void @memcpy_alloca_dst(i8* %src) {
185  %a = alloca [100 x i8]
186  %p = bitcast [100 x i8]* %a to i8*
187  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %src, i32 100, i1 false)
188  ret void
189}
190
191; CHECK-LABEL: memset_alloca:
192; NO-BULK-MEM-NOT: memory.fill
193; BULK-MEM-NEXT: .functype memset_alloca (i32) -> ()
194; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
195; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
196; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
197; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
198; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
199; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
200; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
201; BULK-MEM-NEXT: return
202define void @memset_alloca(i8 %val) {
203  %a = alloca [100 x i8]
204  %p = bitcast [100 x i8]* %a to i8*
205  call void @llvm.memset.p0i8.i32(i8* %p, i8 %val, i32 100, i1 false)
206  ret void
207}
208