1; Test vector extraction of byte-swapped value to memory. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s 4 5declare i16 @llvm.bswap.i16(i16) 6declare i32 @llvm.bswap.i32(i32) 7declare i64 @llvm.bswap.i64(i64) 8declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) 9declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) 10declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) 11 12; Test v8i16 extraction from the first element. 13define void @f1(<8 x i16> %val, i16 *%ptr) { 14; CHECK-LABEL: f1: 15; CHECK: vstebrh %v24, 0(%r2), 0 16; CHECK: br %r14 17 %element = extractelement <8 x i16> %val, i32 0 18 %swap = call i16 @llvm.bswap.i16(i16 %element) 19 store i16 %swap, i16 *%ptr 20 ret void 21} 22 23; Test v8i16 extraction from the last element. 24define void @f2(<8 x i16> %val, i16 *%ptr) { 25; CHECK-LABEL: f2: 26; CHECK: vstebrh %v24, 0(%r2), 7 27; CHECK: br %r14 28 %element = extractelement <8 x i16> %val, i32 7 29 %swap = call i16 @llvm.bswap.i16(i16 %element) 30 store i16 %swap, i16 *%ptr 31 ret void 32} 33 34; Test v8i16 extraction of an invalid element. This must compile, 35; but we don't care what it does. 36define void @f3(<8 x i16> %val, i16 *%ptr) { 37; CHECK-LABEL: f3: 38; CHECK-NOT: vstebrh %v24, 0(%r2), 8 39; CHECK: br %r14 40 %element = extractelement <8 x i16> %val, i32 8 41 %swap = call i16 @llvm.bswap.i16(i16 %element) 42 store i16 %swap, i16 *%ptr 43 ret void 44} 45 46; Test v8i16 extraction with the highest in-range offset. 47define void @f4(<8 x i16> %val, i16 *%base) { 48; CHECK-LABEL: f4: 49; CHECK: vstebrh %v24, 4094(%r2), 5 50; CHECK: br %r14 51 %ptr = getelementptr i16, i16 *%base, i32 2047 52 %element = extractelement <8 x i16> %val, i32 5 53 %swap = call i16 @llvm.bswap.i16(i16 %element) 54 store i16 %swap, i16 *%ptr 55 ret void 56} 57 58; Test v8i16 extraction with the first ouf-of-range offset. 59define void @f5(<8 x i16> %val, i16 *%base) { 60; CHECK-LABEL: f5: 61; CHECK: aghi %r2, 4096 62; CHECK: vstebrh %v24, 0(%r2), 1 63; CHECK: br %r14 64 %ptr = getelementptr i16, i16 *%base, i32 2048 65 %element = extractelement <8 x i16> %val, i32 1 66 %swap = call i16 @llvm.bswap.i16(i16 %element) 67 store i16 %swap, i16 *%ptr 68 ret void 69} 70 71; Test v8i16 extraction from a variable element. 72define void @f6(<8 x i16> %val, i16 *%ptr, i32 %index) { 73; CHECK-LABEL: f6: 74; CHECK-NOT: vstebrh 75; CHECK: br %r14 76 %element = extractelement <8 x i16> %val, i32 %index 77 %swap = call i16 @llvm.bswap.i16(i16 %element) 78 store i16 %swap, i16 *%ptr 79 ret void 80} 81 82; Test v8i16 extraction using a vector bswap. 83define void @f7(<8 x i16> %val, i16 *%ptr) { 84; CHECK-LABEL: f7: 85; CHECK: vstebrh %v24, 0(%r2), 0 86; CHECK: br %r14 87 %swap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val) 88 %element = extractelement <8 x i16> %swap, i32 0 89 store i16 %element, i16 *%ptr 90 ret void 91} 92 93; Test v4i32 extraction from the first element. 94define void @f8(<4 x i32> %val, i32 *%ptr) { 95; CHECK-LABEL: f8: 96; CHECK: vstebrf %v24, 0(%r2), 0 97; CHECK: br %r14 98 %element = extractelement <4 x i32> %val, i32 0 99 %swap = call i32 @llvm.bswap.i32(i32 %element) 100 store i32 %swap, i32 *%ptr 101 ret void 102} 103 104; Test v4i32 extraction from the last element. 105define void @f9(<4 x i32> %val, i32 *%ptr) { 106; CHECK-LABEL: f9: 107; CHECK: vstebrf %v24, 0(%r2), 3 108; CHECK: br %r14 109 %element = extractelement <4 x i32> %val, i32 3 110 %swap = call i32 @llvm.bswap.i32(i32 %element) 111 store i32 %swap, i32 *%ptr 112 ret void 113} 114 115; Test v4i32 extraction of an invalid element. This must compile, 116; but we don't care what it does. 117define void @f10(<4 x i32> %val, i32 *%ptr) { 118; CHECK-LABEL: f10: 119; CHECK-NOT: vstebrf %v24, 0(%r2), 4 120; CHECK: br %r14 121 %element = extractelement <4 x i32> %val, i32 4 122 %swap = call i32 @llvm.bswap.i32(i32 %element) 123 store i32 %swap, i32 *%ptr 124 ret void 125} 126 127; Test v4i32 extraction with the highest in-range offset. 128define void @f11(<4 x i32> %val, i32 *%base) { 129; CHECK-LABEL: f11: 130; CHECK: vstebrf %v24, 4092(%r2), 2 131; CHECK: br %r14 132 %ptr = getelementptr i32, i32 *%base, i32 1023 133 %element = extractelement <4 x i32> %val, i32 2 134 %swap = call i32 @llvm.bswap.i32(i32 %element) 135 store i32 %swap, i32 *%ptr 136 ret void 137} 138 139; Test v4i32 extraction with the first ouf-of-range offset. 140define void @f12(<4 x i32> %val, i32 *%base) { 141; CHECK-LABEL: f12: 142; CHECK: aghi %r2, 4096 143; CHECK: vstebrf %v24, 0(%r2), 1 144; CHECK: br %r14 145 %ptr = getelementptr i32, i32 *%base, i32 1024 146 %element = extractelement <4 x i32> %val, i32 1 147 %swap = call i32 @llvm.bswap.i32(i32 %element) 148 store i32 %swap, i32 *%ptr 149 ret void 150} 151 152; Test v4i32 extraction from a variable element. 153define void @f13(<4 x i32> %val, i32 *%ptr, i32 %index) { 154; CHECK-LABEL: f13: 155; CHECK-NOT: vstebrf 156; CHECK: br %r14 157 %element = extractelement <4 x i32> %val, i32 %index 158 %swap = call i32 @llvm.bswap.i32(i32 %element) 159 store i32 %swap, i32 *%ptr 160 ret void 161} 162 163; Test v4i32 extraction using a vector bswap. 164define void @f14(<4 x i32> %val, i32 *%ptr) { 165; CHECK-LABEL: f14: 166; CHECK: vstebrf %v24, 0(%r2), 0 167; CHECK: br %r14 168 %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) 169 %element = extractelement <4 x i32> %swap, i32 0 170 store i32 %element, i32 *%ptr 171 ret void 172} 173 174; Test v2i64 extraction from the first element. 175define void @f15(<2 x i64> %val, i64 *%ptr) { 176; CHECK-LABEL: f15: 177; CHECK: vstebrg %v24, 0(%r2), 0 178; CHECK: br %r14 179 %element = extractelement <2 x i64> %val, i32 0 180 %swap = call i64 @llvm.bswap.i64(i64 %element) 181 store i64 %swap, i64 *%ptr 182 ret void 183} 184 185; Test v2i64 extraction from the last element. 186define void @f16(<2 x i64> %val, i64 *%ptr) { 187; CHECK-LABEL: f16: 188; CHECK: vstebrg %v24, 0(%r2), 1 189; CHECK: br %r14 190 %element = extractelement <2 x i64> %val, i32 1 191 %swap = call i64 @llvm.bswap.i64(i64 %element) 192 store i64 %swap, i64 *%ptr 193 ret void 194} 195 196; Test v2i64 extraction of an invalid element. This must compile, 197; but we don't care what it does. 198define void @f17(<2 x i64> %val, i64 *%ptr) { 199; CHECK-LABEL: f17: 200; CHECK-NOT: vstebrg %v24, 0(%r2), 2 201; CHECK: br %r14 202 %element = extractelement <2 x i64> %val, i32 2 203 %swap = call i64 @llvm.bswap.i64(i64 %element) 204 store i64 %swap, i64 *%ptr 205 ret void 206} 207 208; Test v2i64 extraction with the highest in-range offset. 209define void @f18(<2 x i64> %val, i64 *%base) { 210; CHECK-LABEL: f18: 211; CHECK: vstebrg %v24, 4088(%r2), 1 212; CHECK: br %r14 213 %ptr = getelementptr i64, i64 *%base, i32 511 214 %element = extractelement <2 x i64> %val, i32 1 215 %swap = call i64 @llvm.bswap.i64(i64 %element) 216 store i64 %swap, i64 *%ptr 217 ret void 218} 219 220; Test v2i64 extraction with the first ouf-of-range offset. 221define void @f19(<2 x i64> %val, i64 *%base) { 222; CHECK-LABEL: f19: 223; CHECK: aghi %r2, 4096 224; CHECK: vstebrg %v24, 0(%r2), 0 225; CHECK: br %r14 226 %ptr = getelementptr i64, i64 *%base, i32 512 227 %element = extractelement <2 x i64> %val, i32 0 228 %swap = call i64 @llvm.bswap.i64(i64 %element) 229 store i64 %swap, i64 *%ptr 230 ret void 231} 232 233; Test v2i64 extraction from a variable element. 234define void @f20(<2 x i64> %val, i64 *%ptr, i32 %index) { 235; CHECK-LABEL: f20: 236; CHECK-NOT: vstebrg 237; CHECK: br %r14 238 %element = extractelement <2 x i64> %val, i32 %index 239 %swap = call i64 @llvm.bswap.i64(i64 %element) 240 store i64 %swap, i64 *%ptr 241 ret void 242} 243 244; Test v2i64 extraction using a vector bswap. 245define void @f21(<2 x i64> %val, i64 *%ptr) { 246; CHECK-LABEL: f21: 247; CHECK: vstebrg %v24, 0(%r2), 0 248; CHECK: br %r14 249 %swap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) 250 %element = extractelement <2 x i64> %swap, i32 0 251 store i64 %element, i64 *%ptr 252 ret void 253} 254 255