1; Test vector insertion of memory values. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s 4 5; Test v16i8 insertion into the first element. 6define <16 x i8> @f1(<16 x i8> %val, i8 *%ptr) { 7; CHECK-LABEL: f1: 8; CHECK: vleb %v24, 0(%r2), 0 9; CHECK: br %r14 10 %element = load i8, i8 *%ptr 11 %ret = insertelement <16 x i8> %val, i8 %element, i32 0 12 ret <16 x i8> %ret 13} 14 15; Test v16i8 insertion into the last element. 16define <16 x i8> @f2(<16 x i8> %val, i8 *%ptr) { 17; CHECK-LABEL: f2: 18; CHECK: vleb %v24, 0(%r2), 15 19; CHECK: br %r14 20 %element = load i8, i8 *%ptr 21 %ret = insertelement <16 x i8> %val, i8 %element, i32 15 22 ret <16 x i8> %ret 23} 24 25; Test v16i8 insertion with the highest in-range offset. 26define <16 x i8> @f3(<16 x i8> %val, i8 *%base) { 27; CHECK-LABEL: f3: 28; CHECK: vleb %v24, 4095(%r2), 10 29; CHECK: br %r14 30 %ptr = getelementptr i8, i8 *%base, i32 4095 31 %element = load i8, i8 *%ptr 32 %ret = insertelement <16 x i8> %val, i8 %element, i32 10 33 ret <16 x i8> %ret 34} 35 36; Test v16i8 insertion with the first ouf-of-range offset. 37define <16 x i8> @f4(<16 x i8> %val, i8 *%base) { 38; CHECK-LABEL: f4: 39; CHECK: aghi %r2, 4096 40; CHECK: vleb %v24, 0(%r2), 5 41; CHECK: br %r14 42 %ptr = getelementptr i8, i8 *%base, i32 4096 43 %element = load i8, i8 *%ptr 44 %ret = insertelement <16 x i8> %val, i8 %element, i32 5 45 ret <16 x i8> %ret 46} 47 48; Test v16i8 insertion into a variable element. 49define <16 x i8> @f5(<16 x i8> %val, i8 *%ptr, i32 %index) { 50; CHECK-LABEL: f5: 51; CHECK-NOT: vleb 52; CHECK: br %r14 53 %element = load i8, i8 *%ptr 54 %ret = insertelement <16 x i8> %val, i8 %element, i32 %index 55 ret <16 x i8> %ret 56} 57 58; Test v8i16 insertion into the first element. 59define <8 x i16> @f6(<8 x i16> %val, i16 *%ptr) { 60; CHECK-LABEL: f6: 61; CHECK: vleh %v24, 0(%r2), 0 62; CHECK: br %r14 63 %element = load i16, i16 *%ptr 64 %ret = insertelement <8 x i16> %val, i16 %element, i32 0 65 ret <8 x i16> %ret 66} 67 68; Test v8i16 insertion into the last element. 69define <8 x i16> @f7(<8 x i16> %val, i16 *%ptr) { 70; CHECK-LABEL: f7: 71; CHECK: vleh %v24, 0(%r2), 7 72; CHECK: br %r14 73 %element = load i16, i16 *%ptr 74 %ret = insertelement <8 x i16> %val, i16 %element, i32 7 75 ret <8 x i16> %ret 76} 77 78; Test v8i16 insertion with the highest in-range offset. 79define <8 x i16> @f8(<8 x i16> %val, i16 *%base) { 80; CHECK-LABEL: f8: 81; CHECK: vleh %v24, 4094(%r2), 5 82; CHECK: br %r14 83 %ptr = getelementptr i16, i16 *%base, i32 2047 84 %element = load i16, i16 *%ptr 85 %ret = insertelement <8 x i16> %val, i16 %element, i32 5 86 ret <8 x i16> %ret 87} 88 89; Test v8i16 insertion with the first ouf-of-range offset. 90define <8 x i16> @f9(<8 x i16> %val, i16 *%base) { 91; CHECK-LABEL: f9: 92; CHECK: aghi %r2, 4096 93; CHECK: vleh %v24, 0(%r2), 1 94; CHECK: br %r14 95 %ptr = getelementptr i16, i16 *%base, i32 2048 96 %element = load i16, i16 *%ptr 97 %ret = insertelement <8 x i16> %val, i16 %element, i32 1 98 ret <8 x i16> %ret 99} 100 101; Test v8i16 insertion into a variable element. 102define <8 x i16> @f10(<8 x i16> %val, i16 *%ptr, i32 %index) { 103; CHECK-LABEL: f10: 104; CHECK-NOT: vleh 105; CHECK: br %r14 106 %element = load i16, i16 *%ptr 107 %ret = insertelement <8 x i16> %val, i16 %element, i32 %index 108 ret <8 x i16> %ret 109} 110 111; Test v4i32 insertion into the first element. 112define <4 x i32> @f11(<4 x i32> %val, i32 *%ptr) { 113; CHECK-LABEL: f11: 114; CHECK: vlef %v24, 0(%r2), 0 115; CHECK: br %r14 116 %element = load i32, i32 *%ptr 117 %ret = insertelement <4 x i32> %val, i32 %element, i32 0 118 ret <4 x i32> %ret 119} 120 121; Test v4i32 insertion into the last element. 122define <4 x i32> @f12(<4 x i32> %val, i32 *%ptr) { 123; CHECK-LABEL: f12: 124; CHECK: vlef %v24, 0(%r2), 3 125; CHECK: br %r14 126 %element = load i32, i32 *%ptr 127 %ret = insertelement <4 x i32> %val, i32 %element, i32 3 128 ret <4 x i32> %ret 129} 130 131; Test v4i32 insertion with the highest in-range offset. 132define <4 x i32> @f13(<4 x i32> %val, i32 *%base) { 133; CHECK-LABEL: f13: 134; CHECK: vlef %v24, 4092(%r2), 2 135; CHECK: br %r14 136 %ptr = getelementptr i32, i32 *%base, i32 1023 137 %element = load i32, i32 *%ptr 138 %ret = insertelement <4 x i32> %val, i32 %element, i32 2 139 ret <4 x i32> %ret 140} 141 142; Test v4i32 insertion with the first ouf-of-range offset. 143define <4 x i32> @f14(<4 x i32> %val, i32 *%base) { 144; CHECK-LABEL: f14: 145; CHECK: aghi %r2, 4096 146; CHECK: vlef %v24, 0(%r2), 1 147; CHECK: br %r14 148 %ptr = getelementptr i32, i32 *%base, i32 1024 149 %element = load i32, i32 *%ptr 150 %ret = insertelement <4 x i32> %val, i32 %element, i32 1 151 ret <4 x i32> %ret 152} 153 154; Test v4i32 insertion into a variable element. 155define <4 x i32> @f15(<4 x i32> %val, i32 *%ptr, i32 %index) { 156; CHECK-LABEL: f15: 157; CHECK-NOT: vlef 158; CHECK: br %r14 159 %element = load i32, i32 *%ptr 160 %ret = insertelement <4 x i32> %val, i32 %element, i32 %index 161 ret <4 x i32> %ret 162} 163 164; Test v2i64 insertion into the first element. 165define <2 x i64> @f16(<2 x i64> %val, i64 *%ptr) { 166; CHECK-LABEL: f16: 167; CHECK: vleg %v24, 0(%r2), 0 168; CHECK: br %r14 169 %element = load i64, i64 *%ptr 170 %ret = insertelement <2 x i64> %val, i64 %element, i32 0 171 ret <2 x i64> %ret 172} 173 174; Test v2i64 insertion into the last element. 175define <2 x i64> @f17(<2 x i64> %val, i64 *%ptr) { 176; CHECK-LABEL: f17: 177; CHECK: vleg %v24, 0(%r2), 1 178; CHECK: br %r14 179 %element = load i64, i64 *%ptr 180 %ret = insertelement <2 x i64> %val, i64 %element, i32 1 181 ret <2 x i64> %ret 182} 183 184; Test v2i64 insertion with the highest in-range offset. 185define <2 x i64> @f18(<2 x i64> %val, i64 *%base) { 186; CHECK-LABEL: f18: 187; CHECK: vleg %v24, 4088(%r2), 1 188; CHECK: br %r14 189 %ptr = getelementptr i64, i64 *%base, i32 511 190 %element = load i64, i64 *%ptr 191 %ret = insertelement <2 x i64> %val, i64 %element, i32 1 192 ret <2 x i64> %ret 193} 194 195; Test v2i64 insertion with the first ouf-of-range offset. 196define <2 x i64> @f19(<2 x i64> %val, i64 *%base) { 197; CHECK-LABEL: f19: 198; CHECK: aghi %r2, 4096 199; CHECK: vleg %v24, 0(%r2), 0 200; CHECK: br %r14 201 %ptr = getelementptr i64, i64 *%base, i32 512 202 %element = load i64, i64 *%ptr 203 %ret = insertelement <2 x i64> %val, i64 %element, i32 0 204 ret <2 x i64> %ret 205} 206 207; Test v2i64 insertion into a variable element. 208define <2 x i64> @f20(<2 x i64> %val, i64 *%ptr, i32 %index) { 209; CHECK-LABEL: f20: 210; CHECK-NOT: vleg 211; CHECK: br %r14 212 %element = load i64, i64 *%ptr 213 %ret = insertelement <2 x i64> %val, i64 %element, i32 %index 214 ret <2 x i64> %ret 215} 216 217; Test v4f32 insertion into the first element. 218define <4 x float> @f21(<4 x float> %val, float *%ptr) { 219; CHECK-LABEL: f21: 220; CHECK: vlef %v24, 0(%r2), 0 221; CHECK: br %r14 222 %element = load float, float *%ptr 223 %ret = insertelement <4 x float> %val, float %element, i32 0 224 ret <4 x float> %ret 225} 226 227; Test v4f32 insertion into the last element. 228define <4 x float> @f22(<4 x float> %val, float *%ptr) { 229; CHECK-LABEL: f22: 230; CHECK: vlef %v24, 0(%r2), 3 231; CHECK: br %r14 232 %element = load float, float *%ptr 233 %ret = insertelement <4 x float> %val, float %element, i32 3 234 ret <4 x float> %ret 235} 236 237; Test v4f32 insertion with the highest in-range offset. 238define <4 x float> @f23(<4 x float> %val, float *%base) { 239; CHECK-LABEL: f23: 240; CHECK: vlef %v24, 4092(%r2), 2 241; CHECK: br %r14 242 %ptr = getelementptr float, float *%base, i32 1023 243 %element = load float, float *%ptr 244 %ret = insertelement <4 x float> %val, float %element, i32 2 245 ret <4 x float> %ret 246} 247 248; Test v4f32 insertion with the first ouf-of-range offset. 249define <4 x float> @f24(<4 x float> %val, float *%base) { 250; CHECK-LABEL: f24: 251; CHECK: aghi %r2, 4096 252; CHECK: vlef %v24, 0(%r2), 1 253; CHECK: br %r14 254 %ptr = getelementptr float, float *%base, i32 1024 255 %element = load float, float *%ptr 256 %ret = insertelement <4 x float> %val, float %element, i32 1 257 ret <4 x float> %ret 258} 259 260; Test v4f32 insertion into a variable element. 261define <4 x float> @f25(<4 x float> %val, float *%ptr, i32 %index) { 262; CHECK-LABEL: f25: 263; CHECK-NOT: vlef 264; CHECK: br %r14 265 %element = load float, float *%ptr 266 %ret = insertelement <4 x float> %val, float %element, i32 %index 267 ret <4 x float> %ret 268} 269 270; Test v2f64 insertion into the first element. 271define <2 x double> @f26(<2 x double> %val, double *%ptr) { 272; CHECK-LABEL: f26: 273; CHECK: vleg %v24, 0(%r2), 0 274; CHECK: br %r14 275 %element = load double, double *%ptr 276 %ret = insertelement <2 x double> %val, double %element, i32 0 277 ret <2 x double> %ret 278} 279 280; Test v2f64 insertion into the last element. 281define <2 x double> @f27(<2 x double> %val, double *%ptr) { 282; CHECK-LABEL: f27: 283; CHECK: vleg %v24, 0(%r2), 1 284; CHECK: br %r14 285 %element = load double, double *%ptr 286 %ret = insertelement <2 x double> %val, double %element, i32 1 287 ret <2 x double> %ret 288} 289 290; Test v2f64 insertion with the highest in-range offset. 291define <2 x double> @f28(<2 x double> %val, double *%base) { 292; CHECK-LABEL: f28: 293; CHECK: vleg %v24, 4088(%r2), 1 294; CHECK: br %r14 295 %ptr = getelementptr double, double *%base, i32 511 296 %element = load double, double *%ptr 297 %ret = insertelement <2 x double> %val, double %element, i32 1 298 ret <2 x double> %ret 299} 300 301; Test v2f64 insertion with the first ouf-of-range offset. 302define <2 x double> @f29(<2 x double> %val, double *%base) { 303; CHECK-LABEL: f29: 304; CHECK: aghi %r2, 4096 305; CHECK: vleg %v24, 0(%r2), 0 306; CHECK: br %r14 307 %ptr = getelementptr double, double *%base, i32 512 308 %element = load double, double *%ptr 309 %ret = insertelement <2 x double> %val, double %element, i32 0 310 ret <2 x double> %ret 311} 312 313; Test v2f64 insertion into a variable element. 314define <2 x double> @f30(<2 x double> %val, double *%ptr, i32 %index) { 315; CHECK-LABEL: f30: 316; CHECK-NOT: vleg 317; CHECK: br %r14 318 %element = load double, double *%ptr 319 %ret = insertelement <2 x double> %val, double %element, i32 %index 320 ret <2 x double> %ret 321} 322 323; Test a v4i32 gather of the first element. 324define <4 x i32> @f31(<4 x i32> %val, <4 x i32> %index, i64 %base) { 325; CHECK-LABEL: f31: 326; CHECK: vgef %v24, 0(%v26,%r2), 0 327; CHECK: br %r14 328 %elem = extractelement <4 x i32> %index, i32 0 329 %ext = zext i32 %elem to i64 330 %add = add i64 %base, %ext 331 %ptr = inttoptr i64 %add to i32 * 332 %element = load i32, i32 *%ptr 333 %ret = insertelement <4 x i32> %val, i32 %element, i32 0 334 ret <4 x i32> %ret 335} 336 337; Test a v4i32 gather of the last element. 338define <4 x i32> @f32(<4 x i32> %val, <4 x i32> %index, i64 %base) { 339; CHECK-LABEL: f32: 340; CHECK: vgef %v24, 0(%v26,%r2), 3 341; CHECK: br %r14 342 %elem = extractelement <4 x i32> %index, i32 3 343 %ext = zext i32 %elem to i64 344 %add = add i64 %base, %ext 345 %ptr = inttoptr i64 %add to i32 * 346 %element = load i32, i32 *%ptr 347 %ret = insertelement <4 x i32> %val, i32 %element, i32 3 348 ret <4 x i32> %ret 349} 350 351; Test a v4i32 gather with the highest in-range offset. 352define <4 x i32> @f33(<4 x i32> %val, <4 x i32> %index, i64 %base) { 353; CHECK-LABEL: f33: 354; CHECK: vgef %v24, 4095(%v26,%r2), 1 355; CHECK: br %r14 356 %elem = extractelement <4 x i32> %index, i32 1 357 %ext = zext i32 %elem to i64 358 %add1 = add i64 %base, %ext 359 %add2 = add i64 %add1, 4095 360 %ptr = inttoptr i64 %add2 to i32 * 361 %element = load i32, i32 *%ptr 362 %ret = insertelement <4 x i32> %val, i32 %element, i32 1 363 ret <4 x i32> %ret 364} 365 366; Test a v2i64 gather of the first element. 367define <2 x i64> @f34(<2 x i64> %val, <2 x i64> %index, i64 %base) { 368; CHECK-LABEL: f34: 369; CHECK: vgeg %v24, 0(%v26,%r2), 0 370; CHECK: br %r14 371 %elem = extractelement <2 x i64> %index, i32 0 372 %add = add i64 %base, %elem 373 %ptr = inttoptr i64 %add to i64 * 374 %element = load i64, i64 *%ptr 375 %ret = insertelement <2 x i64> %val, i64 %element, i32 0 376 ret <2 x i64> %ret 377} 378 379; Test a v2i64 gather of the last element. 380define <2 x i64> @f35(<2 x i64> %val, <2 x i64> %index, i64 %base) { 381; CHECK-LABEL: f35: 382; CHECK: vgeg %v24, 0(%v26,%r2), 1 383; CHECK: br %r14 384 %elem = extractelement <2 x i64> %index, i32 1 385 %add = add i64 %base, %elem 386 %ptr = inttoptr i64 %add to i64 * 387 %element = load i64, i64 *%ptr 388 %ret = insertelement <2 x i64> %val, i64 %element, i32 1 389 ret <2 x i64> %ret 390} 391 392; Test a v4f32 gather of the first element. 393define <4 x float> @f36(<4 x float> %val, <4 x i32> %index, i64 %base) { 394; CHECK-LABEL: f36: 395; CHECK: vgef %v24, 0(%v26,%r2), 0 396; CHECK: br %r14 397 %elem = extractelement <4 x i32> %index, i32 0 398 %ext = zext i32 %elem to i64 399 %add = add i64 %base, %ext 400 %ptr = inttoptr i64 %add to float * 401 %element = load float, float *%ptr 402 %ret = insertelement <4 x float> %val, float %element, i32 0 403 ret <4 x float> %ret 404} 405 406; Test a v4f32 gather of the last element. 407define <4 x float> @f37(<4 x float> %val, <4 x i32> %index, i64 %base) { 408; CHECK-LABEL: f37: 409; CHECK: vgef %v24, 0(%v26,%r2), 3 410; CHECK: br %r14 411 %elem = extractelement <4 x i32> %index, i32 3 412 %ext = zext i32 %elem to i64 413 %add = add i64 %base, %ext 414 %ptr = inttoptr i64 %add to float * 415 %element = load float, float *%ptr 416 %ret = insertelement <4 x float> %val, float %element, i32 3 417 ret <4 x float> %ret 418} 419 420; Test a v2f64 gather of the first element. 421define <2 x double> @f38(<2 x double> %val, <2 x i64> %index, i64 %base) { 422; CHECK-LABEL: f38: 423; CHECK: vgeg %v24, 0(%v26,%r2), 0 424; CHECK: br %r14 425 %elem = extractelement <2 x i64> %index, i32 0 426 %add = add i64 %base, %elem 427 %ptr = inttoptr i64 %add to double * 428 %element = load double, double *%ptr 429 %ret = insertelement <2 x double> %val, double %element, i32 0 430 ret <2 x double> %ret 431} 432 433; Test a v2f64 gather of the last element. 434define <2 x double> @f39(<2 x double> %val, <2 x i64> %index, i64 %base) { 435; CHECK-LABEL: f39: 436; CHECK: vgeg %v24, 0(%v26,%r2), 1 437; CHECK: br %r14 438 %elem = extractelement <2 x i64> %index, i32 1 439 %add = add i64 %base, %elem 440 %ptr = inttoptr i64 %add to double * 441 %element = load double, double *%ptr 442 %ret = insertelement <2 x double> %val, double %element, i32 1 443 ret <2 x double> %ret 444} 445 446; Test a v4i32 gather where the load is chained. 447define void @f40(<4 x i32> %val, <4 x i32> %index, i64 %base, <4 x i32> *%res) { 448; CHECK-LABEL: f40: 449; CHECK: vgef %v24, 0(%v26,%r2), 1 450; CHECK: vst %v24, 0(%r3) 451; CHECK: br %r14 452 %elem = extractelement <4 x i32> %index, i32 1 453 %ext = zext i32 %elem to i64 454 %add = add i64 %base, %ext 455 %ptr = inttoptr i64 %add to i32 * 456 %element = load i32, i32 *%ptr 457 %ret = insertelement <4 x i32> %val, i32 %element, i32 1 458 store <4 x i32> %ret, <4 x i32> *%res 459 ret void 460} 461 462; Test a v2i64 gather where the load is chained. 463define void @f41(<2 x i64> %val, <2 x i64> %index, i64 %base, <2 x i64> *%res) { 464; CHECK-LABEL: f41: 465; CHECK: vgeg %v24, 0(%v26,%r2), 1 466; CHECK: vst %v24, 0(%r3) 467; CHECK: br %r14 468 %elem = extractelement <2 x i64> %index, i32 1 469 %add = add i64 %base, %elem 470 %ptr = inttoptr i64 %add to i64 * 471 %element = load i64, i64 *%ptr 472 %ret = insertelement <2 x i64> %val, i64 %element, i32 1 473 store <2 x i64> %ret, <2 x i64> *%res 474 ret void 475} 476 477