1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM 2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM 3 4; Test that basic bulk memory codegen works correctly 5 6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" 7target triple = "wasm32-unknown-unknown" 8 9declare void @llvm.memcpy.p0i8.p0i8.i8(i8*, i8*, i8, i1) 10declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) 11declare void @llvm.memcpy.p0i32.p0i32.i32(i32*, i32*, i32, i1) 12 13declare void @llvm.memmove.p0i8.p0i8.i8(i8*, i8*, i8, i1) 14declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1) 15declare void @llvm.memmove.p0i32.p0i32.i32(i32*, i32*, i32, i1) 16 17declare void @llvm.memset.p0i8.i8(i8*, i8, i8, i1) 18declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1) 19declare void @llvm.memset.p0i32.i32(i32*, i8, i32, i1) 20 21; CHECK-LABEL: memcpy_i8: 22; NO-BULK-MEM-NOT: memory.copy 23; BULK-MEM-NEXT: .functype memcpy_i8 (i32, i32, i32) -> () 24; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 25; BULK-MEM-NEXT: return 26define void @memcpy_i8(i8* %dest, i8* %src, i8 zeroext %len) { 27 call void @llvm.memcpy.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0) 28 ret void 29} 30 31; CHECK-LABEL: memmove_i8: 32; NO-BULK-MEM-NOT: memory.copy 33; BULK-MEM-NEXT: .functype memmove_i8 (i32, i32, i32) -> () 34; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 35; BULK-MEM-NEXT: return 36define void @memmove_i8(i8* %dest, i8* %src, i8 zeroext %len) { 37 call void @llvm.memmove.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0) 38 ret void 39} 40 41; CHECK-LABEL: memset_i8: 42; NO-BULK-MEM-NOT: memory.fill 43; BULK-MEM-NEXT: .functype memset_i8 (i32, i32, i32) -> () 44; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2 45; BULK-MEM-NEXT: return 46define void @memset_i8(i8* %dest, i8 %val, i8 zeroext %len) { 47 call void @llvm.memset.p0i8.i8(i8* %dest, i8 %val, i8 %len, i1 0) 48 ret void 49} 50 51; CHECK-LABEL: memcpy_i32: 52; NO-BULK-MEM-NOT: memory.copy 53; BULK-MEM-NEXT: .functype memcpy_i32 (i32, i32, i32) -> () 54; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 55; BULK-MEM-NEXT: return 56define void @memcpy_i32(i32* %dest, i32* %src, i32 %len) { 57 call void @llvm.memcpy.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0) 58 ret void 59} 60 61; CHECK-LABEL: memmove_i32: 62; NO-BULK-MEM-NOT: memory.copy 63; BULK-MEM-NEXT: .functype memmove_i32 (i32, i32, i32) -> () 64; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 65; BULK-MEM-NEXT: return 66define void @memmove_i32(i32* %dest, i32* %src, i32 %len) { 67 call void @llvm.memmove.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0) 68 ret void 69} 70 71; CHECK-LABEL: memset_i32: 72; NO-BULK-MEM-NOT: memory.fill 73; BULK-MEM-NEXT: .functype memset_i32 (i32, i32, i32) -> () 74; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2 75; BULK-MEM-NEXT: return 76define void @memset_i32(i32* %dest, i8 %val, i32 %len) { 77 call void @llvm.memset.p0i32.i32(i32* %dest, i8 %val, i32 %len, i1 0) 78 ret void 79} 80 81; CHECK-LABEL: memcpy_1: 82; CHECK-NEXT: .functype memcpy_1 (i32, i32) -> () 83; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) 84; CHECK-NEXT: i32.store8 0($0), $pop[[L0]] 85; CHECK-NEXT: return 86define void @memcpy_1(i8* %dest, i8* %src) { 87 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 0) 88 ret void 89} 90 91; CHECK-LABEL: memmove_1: 92; CHECK-NEXT: .functype memmove_1 (i32, i32) -> () 93; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) 94; CHECK-NEXT: i32.store8 0($0), $pop[[L0]] 95; CHECK-NEXT: return 96define void @memmove_1(i8* %dest, i8* %src) { 97 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 0) 98 ret void 99} 100 101; CHECK-LABEL: memset_1: 102; NO-BULK-MEM-NOT: memory.fill 103; BULK-MEM-NEXT: .functype memset_1 (i32, i32) -> () 104; BULK-MEM-NEXT: i32.store8 0($0), $1 105; BULK-MEM-NEXT: return 106define void @memset_1(i8* %dest, i8 %val) { 107 call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1, i1 0) 108 ret void 109} 110 111; CHECK-LABEL: memcpy_1024: 112; NO-BULK-MEM-NOT: memory.copy 113; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> () 114; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 115; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] 116; BULK-MEM-NEXT: return 117define void @memcpy_1024(i8* %dest, i8* %src) { 118 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0) 119 ret void 120} 121 122; CHECK-LABEL: memmove_1024: 123; NO-BULK-MEM-NOT: memory.copy 124; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> () 125; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 126; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] 127; BULK-MEM-NEXT: return 128define void @memmove_1024(i8* %dest, i8* %src) { 129 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0) 130 ret void 131} 132 133; CHECK-LABEL: memset_1024: 134; NO-BULK-MEM-NOT: memory.fill 135; BULK-MEM-NEXT: .functype memset_1024 (i32, i32) -> () 136; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 137; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]] 138; BULK-MEM-NEXT: return 139define void @memset_1024(i8* %dest, i8 %val) { 140 call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1024, i1 0) 141 ret void 142} 143 144; The following tests check that frame index elimination works for 145; bulk memory instructions. The stack pointer is bumped by 112 instead 146; of 100 because the stack pointer in WebAssembly is currently always 147; 16-byte aligned, even in leaf functions, although it is not written 148; back to the global in this case. 149 150; TODO: Change TransientStackAlignment to 1 to avoid this extra 151; arithmetic. This will require forcing the use of StackAlignment in 152; PrologEpilogEmitter.cpp when 153; WebAssemblyFrameLowering::needsSPWriteback would be true. 154 155; CHECK-LABEL: memcpy_alloca_src: 156; NO-BULK-MEM-NOT: memory.copy 157; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> () 158; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 159; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 160; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] 161; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 162; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] 163; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 164; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]] 165; BULK-MEM-NEXT: return 166define void @memcpy_alloca_src(i8* %dst) { 167 %a = alloca [100 x i8] 168 %p = bitcast [100 x i8]* %a to i8* 169 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %p, i32 100, i1 false) 170 ret void 171} 172 173; CHECK-LABEL: memcpy_alloca_dst: 174; NO-BULK-MEM-NOT: memory.copy 175; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> () 176; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 177; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 178; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] 179; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 180; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] 181; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 182; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]] 183; BULK-MEM-NEXT: return 184define void @memcpy_alloca_dst(i8* %src) { 185 %a = alloca [100 x i8] 186 %p = bitcast [100 x i8]* %a to i8* 187 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %src, i32 100, i1 false) 188 ret void 189} 190 191; CHECK-LABEL: memset_alloca: 192; NO-BULK-MEM-NOT: memory.fill 193; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () 194; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 195; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 196; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] 197; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 198; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] 199; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 200; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]] 201; BULK-MEM-NEXT: return 202define void @memset_alloca(i8 %val) { 203 %a = alloca [100 x i8] 204 %p = bitcast [100 x i8]* %a to i8* 205 call void @llvm.memset.p0i8.i32(i8* %p, i8 %val, i32 100, i1 false) 206 ret void 207} 208