1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple powerpc64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s 3; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -vec-extabi < %s | FileCheck %s 4 5define <16 x i8> @test_l_v16i8(<16 x i8>* %p) #0 { 6; CHECK-LABEL: test_l_v16i8: 7; CHECK: # %bb.0: # %entry 8; CHECK-NEXT: li 4, 15 9; CHECK-NEXT: lvsl 3, 0, 3 10; CHECK-NEXT: lvx 2, 3, 4 11; CHECK-NEXT: lvx 4, 0, 3 12; CHECK-NEXT: vperm 2, 4, 2, 3 13; CHECK-NEXT: blr 14entry: 15 %r = load <16 x i8>, <16 x i8>* %p, align 1 16 ret <16 x i8> %r 17 18} 19 20define <32 x i8> @test_l_v32i8(<32 x i8>* %p) #0 { 21; CHECK-LABEL: test_l_v32i8: 22; CHECK: # %bb.0: # %entry 23; CHECK-NEXT: li 4, 31 24; CHECK-NEXT: lvsl 5, 0, 3 25; CHECK-NEXT: lvx 2, 3, 4 26; CHECK-NEXT: li 4, 16 27; CHECK-NEXT: lvx 4, 3, 4 28; CHECK-NEXT: lvx 0, 0, 3 29; CHECK-NEXT: vperm 3, 4, 2, 5 30; CHECK-NEXT: vperm 2, 0, 4, 5 31; CHECK-NEXT: blr 32entry: 33 %r = load <32 x i8>, <32 x i8>* %p, align 1 34 ret <32 x i8> %r 35 36} 37 38define <8 x i16> @test_l_v8i16(<8 x i16>* %p) #0 { 39; CHECK-LABEL: test_l_v8i16: 40; CHECK: # %bb.0: # %entry 41; CHECK-NEXT: li 4, 15 42; CHECK-NEXT: lvsl 3, 0, 3 43; CHECK-NEXT: lvx 2, 3, 4 44; CHECK-NEXT: lvx 4, 0, 3 45; CHECK-NEXT: vperm 2, 4, 2, 3 46; CHECK-NEXT: blr 47entry: 48 %r = load <8 x i16>, <8 x i16>* %p, align 2 49 ret <8 x i16> %r 50 51} 52 53define <16 x i16> @test_l_v16i16(<16 x i16>* %p) #0 { 54; CHECK-LABEL: test_l_v16i16: 55; CHECK: # %bb.0: # %entry 56; CHECK-NEXT: li 4, 31 57; CHECK-NEXT: lvsl 5, 0, 3 58; CHECK-NEXT: lvx 2, 3, 4 59; CHECK-NEXT: li 4, 16 60; CHECK-NEXT: lvx 4, 3, 4 61; CHECK-NEXT: lvx 0, 0, 3 62; CHECK-NEXT: vperm 3, 4, 2, 5 63; CHECK-NEXT: vperm 2, 0, 4, 5 64; CHECK-NEXT: blr 65entry: 66 %r = load <16 x i16>, <16 x i16>* %p, align 2 67 ret <16 x i16> %r 68 69} 70 71define <4 x i32> @test_l_v4i32(<4 x i32>* %p) #0 { 72; CHECK-LABEL: test_l_v4i32: 73; CHECK: # %bb.0: # %entry 74; CHECK-NEXT: li 4, 15 75; CHECK-NEXT: lvsl 3, 0, 3 76; CHECK-NEXT: lvx 2, 3, 4 77; CHECK-NEXT: lvx 4, 0, 3 78; CHECK-NEXT: vperm 2, 4, 2, 3 79; CHECK-NEXT: blr 80entry: 81 %r = load <4 x i32>, <4 x i32>* %p, align 4 82 ret <4 x i32> %r 83 84} 85 86define <8 x i32> @test_l_v8i32(<8 x i32>* %p) #0 { 87; CHECK-LABEL: test_l_v8i32: 88; CHECK: # %bb.0: # %entry 89; CHECK-NEXT: li 4, 31 90; CHECK-NEXT: lvsl 5, 0, 3 91; CHECK-NEXT: lvx 2, 3, 4 92; CHECK-NEXT: li 4, 16 93; CHECK-NEXT: lvx 4, 3, 4 94; CHECK-NEXT: lvx 0, 0, 3 95; CHECK-NEXT: vperm 3, 4, 2, 5 96; CHECK-NEXT: vperm 2, 0, 4, 5 97; CHECK-NEXT: blr 98entry: 99 %r = load <8 x i32>, <8 x i32>* %p, align 4 100 ret <8 x i32> %r 101 102} 103 104define <2 x i64> @test_l_v2i64(<2 x i64>* %p) #0 { 105; CHECK-LABEL: test_l_v2i64: 106; CHECK: # %bb.0: # %entry 107; CHECK-NEXT: lxvd2x 34, 0, 3 108; CHECK-NEXT: blr 109entry: 110 %r = load <2 x i64>, <2 x i64>* %p, align 8 111 ret <2 x i64> %r 112 113} 114 115define <4 x i64> @test_l_v4i64(<4 x i64>* %p) #0 { 116; CHECK-LABEL: test_l_v4i64: 117; CHECK: # %bb.0: # %entry 118; CHECK-NEXT: li 4, 16 119; CHECK-NEXT: lxvd2x 34, 0, 3 120; CHECK-NEXT: lxvd2x 35, 3, 4 121; CHECK-NEXT: blr 122entry: 123 %r = load <4 x i64>, <4 x i64>* %p, align 8 124 ret <4 x i64> %r 125 126} 127 128define <4 x float> @test_l_v4float(<4 x float>* %p) #0 { 129; CHECK-LABEL: test_l_v4float: 130; CHECK: # %bb.0: # %entry 131; CHECK-NEXT: li 4, 15 132; CHECK-NEXT: lvsl 3, 0, 3 133; CHECK-NEXT: lvx 2, 3, 4 134; CHECK-NEXT: lvx 4, 0, 3 135; CHECK-NEXT: vperm 2, 4, 2, 3 136; CHECK-NEXT: blr 137entry: 138 %r = load <4 x float>, <4 x float>* %p, align 4 139 ret <4 x float> %r 140 141} 142 143define <8 x float> @test_l_v8float(<8 x float>* %p) #0 { 144; CHECK-LABEL: test_l_v8float: 145; CHECK: # %bb.0: # %entry 146; CHECK-NEXT: li 4, 31 147; CHECK-NEXT: lvsl 5, 0, 3 148; CHECK-NEXT: lvx 2, 3, 4 149; CHECK-NEXT: li 4, 16 150; CHECK-NEXT: lvx 4, 3, 4 151; CHECK-NEXT: lvx 0, 0, 3 152; CHECK-NEXT: vperm 3, 4, 2, 5 153; CHECK-NEXT: vperm 2, 0, 4, 5 154; CHECK-NEXT: blr 155entry: 156 %r = load <8 x float>, <8 x float>* %p, align 4 157 ret <8 x float> %r 158 159} 160 161define <2 x double> @test_l_v2double(<2 x double>* %p) #0 { 162; CHECK-LABEL: test_l_v2double: 163; CHECK: # %bb.0: # %entry 164; CHECK-NEXT: lxvd2x 34, 0, 3 165; CHECK-NEXT: blr 166entry: 167 %r = load <2 x double>, <2 x double>* %p, align 8 168 ret <2 x double> %r 169 170} 171 172define <4 x double> @test_l_v4double(<4 x double>* %p) #0 { 173; CHECK-LABEL: test_l_v4double: 174; CHECK: # %bb.0: # %entry 175; CHECK-NEXT: li 4, 16 176; CHECK-NEXT: lxvd2x 34, 0, 3 177; CHECK-NEXT: lxvd2x 35, 3, 4 178; CHECK-NEXT: blr 179entry: 180 %r = load <4 x double>, <4 x double>* %p, align 8 181 ret <4 x double> %r 182 183} 184 185define <16 x i8> @test_l_p8v16i8(<16 x i8>* %p) #2 { 186; CHECK-LABEL: test_l_p8v16i8: 187; CHECK: # %bb.0: # %entry 188; CHECK-NEXT: lxvw4x 34, 0, 3 189; CHECK-NEXT: blr 190entry: 191 %r = load <16 x i8>, <16 x i8>* %p, align 1 192 ret <16 x i8> %r 193 194} 195 196define <32 x i8> @test_l_p8v32i8(<32 x i8>* %p) #2 { 197; CHECK-LABEL: test_l_p8v32i8: 198; CHECK: # %bb.0: # %entry 199; CHECK-NEXT: li 4, 16 200; CHECK-NEXT: lxvw4x 34, 0, 3 201; CHECK-NEXT: lxvw4x 35, 3, 4 202; CHECK-NEXT: blr 203entry: 204 %r = load <32 x i8>, <32 x i8>* %p, align 1 205 ret <32 x i8> %r 206 207} 208 209define <8 x i16> @test_l_p8v8i16(<8 x i16>* %p) #2 { 210; CHECK-LABEL: test_l_p8v8i16: 211; CHECK: # %bb.0: # %entry 212; CHECK-NEXT: lxvw4x 34, 0, 3 213; CHECK-NEXT: blr 214entry: 215 %r = load <8 x i16>, <8 x i16>* %p, align 2 216 ret <8 x i16> %r 217 218} 219 220define <16 x i16> @test_l_p8v16i16(<16 x i16>* %p) #2 { 221; CHECK-LABEL: test_l_p8v16i16: 222; CHECK: # %bb.0: # %entry 223; CHECK-NEXT: li 4, 16 224; CHECK-NEXT: lxvw4x 34, 0, 3 225; CHECK-NEXT: lxvw4x 35, 3, 4 226; CHECK-NEXT: blr 227entry: 228 %r = load <16 x i16>, <16 x i16>* %p, align 2 229 ret <16 x i16> %r 230 231} 232 233define <4 x i32> @test_l_p8v4i32(<4 x i32>* %p) #2 { 234; CHECK-LABEL: test_l_p8v4i32: 235; CHECK: # %bb.0: # %entry 236; CHECK-NEXT: lxvw4x 34, 0, 3 237; CHECK-NEXT: blr 238entry: 239 %r = load <4 x i32>, <4 x i32>* %p, align 4 240 ret <4 x i32> %r 241 242} 243 244define <8 x i32> @test_l_p8v8i32(<8 x i32>* %p) #2 { 245; CHECK-LABEL: test_l_p8v8i32: 246; CHECK: # %bb.0: # %entry 247; CHECK-NEXT: li 4, 16 248; CHECK-NEXT: lxvw4x 34, 0, 3 249; CHECK-NEXT: lxvw4x 35, 3, 4 250; CHECK-NEXT: blr 251entry: 252 %r = load <8 x i32>, <8 x i32>* %p, align 4 253 ret <8 x i32> %r 254 255} 256 257define <2 x i64> @test_l_p8v2i64(<2 x i64>* %p) #2 { 258; CHECK-LABEL: test_l_p8v2i64: 259; CHECK: # %bb.0: # %entry 260; CHECK-NEXT: lxvd2x 34, 0, 3 261; CHECK-NEXT: blr 262entry: 263 %r = load <2 x i64>, <2 x i64>* %p, align 8 264 ret <2 x i64> %r 265 266} 267 268define <4 x i64> @test_l_p8v4i64(<4 x i64>* %p) #2 { 269; CHECK-LABEL: test_l_p8v4i64: 270; CHECK: # %bb.0: # %entry 271; CHECK-NEXT: li 4, 16 272; CHECK-NEXT: lxvd2x 34, 0, 3 273; CHECK-NEXT: lxvd2x 35, 3, 4 274; CHECK-NEXT: blr 275entry: 276 %r = load <4 x i64>, <4 x i64>* %p, align 8 277 ret <4 x i64> %r 278 279} 280 281define <4 x float> @test_l_p8v4float(<4 x float>* %p) #2 { 282; CHECK-LABEL: test_l_p8v4float: 283; CHECK: # %bb.0: # %entry 284; CHECK-NEXT: lxvw4x 34, 0, 3 285; CHECK-NEXT: blr 286entry: 287 %r = load <4 x float>, <4 x float>* %p, align 4 288 ret <4 x float> %r 289 290} 291 292define <8 x float> @test_l_p8v8float(<8 x float>* %p) #2 { 293; CHECK-LABEL: test_l_p8v8float: 294; CHECK: # %bb.0: # %entry 295; CHECK-NEXT: li 4, 16 296; CHECK-NEXT: lxvw4x 34, 0, 3 297; CHECK-NEXT: lxvw4x 35, 3, 4 298; CHECK-NEXT: blr 299entry: 300 %r = load <8 x float>, <8 x float>* %p, align 4 301 ret <8 x float> %r 302 303} 304 305define <2 x double> @test_l_p8v2double(<2 x double>* %p) #2 { 306; CHECK-LABEL: test_l_p8v2double: 307; CHECK: # %bb.0: # %entry 308; CHECK-NEXT: lxvd2x 34, 0, 3 309; CHECK-NEXT: blr 310entry: 311 %r = load <2 x double>, <2 x double>* %p, align 8 312 ret <2 x double> %r 313 314} 315 316define <4 x double> @test_l_p8v4double(<4 x double>* %p) #2 { 317; CHECK-LABEL: test_l_p8v4double: 318; CHECK: # %bb.0: # %entry 319; CHECK-NEXT: li 4, 16 320; CHECK-NEXT: lxvd2x 34, 0, 3 321; CHECK-NEXT: lxvd2x 35, 3, 4 322; CHECK-NEXT: blr 323entry: 324 %r = load <4 x double>, <4 x double>* %p, align 8 325 ret <4 x double> %r 326 327} 328 329define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 { 330; CHECK-LABEL: test_s_v16i8: 331; CHECK: # %bb.0: # %entry 332; CHECK-NEXT: stxvw4x 34, 0, 3 333; CHECK-NEXT: blr 334entry: 335 store <16 x i8> %v, <16 x i8>* %p, align 1 336 ret void 337 338} 339 340define void @test_s_v32i8(<32 x i8>* %p, <32 x i8> %v) #0 { 341; CHECK-LABEL: test_s_v32i8: 342; CHECK: # %bb.0: # %entry 343; CHECK-NEXT: li 4, 16 344; CHECK-NEXT: stxvw4x 34, 0, 3 345; CHECK-NEXT: stxvw4x 35, 3, 4 346; CHECK-NEXT: blr 347entry: 348 store <32 x i8> %v, <32 x i8>* %p, align 1 349 ret void 350 351} 352 353define void @test_s_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 { 354; CHECK-LABEL: test_s_v8i16: 355; CHECK: # %bb.0: # %entry 356; CHECK-NEXT: stxvw4x 34, 0, 3 357; CHECK-NEXT: blr 358entry: 359 store <8 x i16> %v, <8 x i16>* %p, align 2 360 ret void 361 362} 363 364define void @test_s_v16i16(<16 x i16>* %p, <16 x i16> %v) #0 { 365; CHECK-LABEL: test_s_v16i16: 366; CHECK: # %bb.0: # %entry 367; CHECK-NEXT: li 4, 16 368; CHECK-NEXT: stxvw4x 34, 0, 3 369; CHECK-NEXT: stxvw4x 35, 3, 4 370; CHECK-NEXT: blr 371entry: 372 store <16 x i16> %v, <16 x i16>* %p, align 2 373 ret void 374 375} 376 377define void @test_s_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 { 378; CHECK-LABEL: test_s_v4i32: 379; CHECK: # %bb.0: # %entry 380; CHECK-NEXT: stxvw4x 34, 0, 3 381; CHECK-NEXT: blr 382entry: 383 store <4 x i32> %v, <4 x i32>* %p, align 4 384 ret void 385 386} 387 388define void @test_s_v8i32(<8 x i32>* %p, <8 x i32> %v) #0 { 389; CHECK-LABEL: test_s_v8i32: 390; CHECK: # %bb.0: # %entry 391; CHECK-NEXT: li 4, 16 392; CHECK-NEXT: stxvw4x 34, 0, 3 393; CHECK-NEXT: stxvw4x 35, 3, 4 394; CHECK-NEXT: blr 395entry: 396 store <8 x i32> %v, <8 x i32>* %p, align 4 397 ret void 398 399} 400 401define void @test_s_v2i64(<2 x i64>* %p, <2 x i64> %v) #0 { 402; CHECK-LABEL: test_s_v2i64: 403; CHECK: # %bb.0: # %entry 404; CHECK-NEXT: stxvd2x 34, 0, 3 405; CHECK-NEXT: blr 406entry: 407 store <2 x i64> %v, <2 x i64>* %p, align 8 408 ret void 409 410} 411 412define void @test_s_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 { 413; CHECK-LABEL: test_s_v4i64: 414; CHECK: # %bb.0: # %entry 415; CHECK-NEXT: li 4, 16 416; CHECK-NEXT: stxvd2x 34, 0, 3 417; CHECK-NEXT: stxvd2x 35, 3, 4 418; CHECK-NEXT: blr 419entry: 420 store <4 x i64> %v, <4 x i64>* %p, align 8 421 ret void 422 423} 424 425define void @test_s_v4float(<4 x float>* %p, <4 x float> %v) #0 { 426; CHECK-LABEL: test_s_v4float: 427; CHECK: # %bb.0: # %entry 428; CHECK-NEXT: stxvw4x 34, 0, 3 429; CHECK-NEXT: blr 430entry: 431 store <4 x float> %v, <4 x float>* %p, align 4 432 ret void 433 434} 435 436define void @test_s_v8float(<8 x float>* %p, <8 x float> %v) #0 { 437; CHECK-LABEL: test_s_v8float: 438; CHECK: # %bb.0: # %entry 439; CHECK-NEXT: li 4, 16 440; CHECK-NEXT: stxvw4x 34, 0, 3 441; CHECK-NEXT: stxvw4x 35, 3, 4 442; CHECK-NEXT: blr 443entry: 444 store <8 x float> %v, <8 x float>* %p, align 4 445 ret void 446 447} 448 449define void @test_s_v2double(<2 x double>* %p, <2 x double> %v) #0 { 450; CHECK-LABEL: test_s_v2double: 451; CHECK: # %bb.0: # %entry 452; CHECK-NEXT: stxvd2x 34, 0, 3 453; CHECK-NEXT: blr 454entry: 455 store <2 x double> %v, <2 x double>* %p, align 8 456 ret void 457 458} 459 460define void @test_s_v4double(<4 x double>* %p, <4 x double> %v) #0 { 461; CHECK-LABEL: test_s_v4double: 462; CHECK: # %bb.0: # %entry 463; CHECK-NEXT: li 4, 16 464; CHECK-NEXT: stxvd2x 34, 0, 3 465; CHECK-NEXT: stxvd2x 35, 3, 4 466; CHECK-NEXT: blr 467entry: 468 store <4 x double> %v, <4 x double>* %p, align 8 469 ret void 470 471} 472 473attributes #0 = { nounwind "target-cpu"="pwr7" } 474attributes #2 = { nounwind "target-cpu"="pwr8" } 475 476