1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s 2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s 3 4define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 5 ; CHECK: vshf_v16i8_0: 6 7 %1 = load <16 x i8>* %a 8 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 9 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 10 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 11 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]]) 12 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]] 13 store <16 x i8> %2, <16 x i8>* %c 14 ; CHECK-DAG: st.b [[R3]], 0($4) 15 16 ret void 17 ; CHECK: .size vshf_v16i8_0 18} 19 20define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 21 ; CHECK: vshf_v16i8_1: 22 23 %1 = load <16 x i8>* %a 24 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 25 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 26 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1] 27 store <16 x i8> %2, <16 x i8>* %c 28 ; CHECK-DAG: st.b [[R3]], 0($4) 29 30 ret void 31 ; CHECK: .size vshf_v16i8_1 32} 33 34define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 35 ; CHECK: vshf_v16i8_2: 36 37 %1 = load <16 x i8>* %a 38 %2 = load <16 x i8>* %b 39 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 40 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16> 41 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 42 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]]) 43 ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]] 44 store <16 x i8> %3, <16 x i8>* %c 45 ; CHECK-DAG: st.b [[R3]], 0($4) 46 47 ret void 48 ; CHECK: .size vshf_v16i8_2 49} 50 51define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 52 ; CHECK: vshf_v16i8_3: 53 54 %1 = load <16 x i8>* %a 55 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 56 %2 = load <16 x i8>* %b 57 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 58 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3> 59 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 60 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]]) 61 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse 62 ; the operands to get the right answer. 63 ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R1]] 64 store <16 x i8> %3, <16 x i8>* %c 65 ; CHECK-DAG: st.b [[R3]], 0($4) 66 67 ret void 68 ; CHECK: .size vshf_v16i8_3 69} 70 71define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 72 ; CHECK: vshf_v16i8_4: 73 74 %1 = load <16 x i8>* %a 75 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 76 %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17> 77 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1] 78 store <16 x i8> %2, <16 x i8>* %c 79 ; CHECK-DAG: st.b [[R3]], 0($4) 80 81 ret void 82 ; CHECK: .size vshf_v16i8_4 83} 84 85define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 86 ; CHECK: vshf_v8i16_0: 87 88 %1 = load <8 x i16>* %a 89 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 90 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 91 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 92 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]]) 93 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]] 94 store <8 x i16> %2, <8 x i16>* %c 95 ; CHECK-DAG: st.h [[R3]], 0($4) 96 97 ret void 98 ; CHECK: .size vshf_v8i16_0 99} 100 101define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 102 ; CHECK: vshf_v8i16_1: 103 104 %1 = load <8 x i16>* %a 105 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 106 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 107 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1] 108 store <8 x i16> %2, <8 x i16>* %c 109 ; CHECK-DAG: st.h [[R3]], 0($4) 110 111 ret void 112 ; CHECK: .size vshf_v8i16_1 113} 114 115define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 116 ; CHECK: vshf_v8i16_2: 117 118 %1 = load <8 x i16>* %a 119 %2 = load <8 x i16>* %b 120 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 121 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8> 122 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 123 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]]) 124 ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]] 125 store <8 x i16> %3, <8 x i16>* %c 126 ; CHECK-DAG: st.h [[R3]], 0($4) 127 128 ret void 129 ; CHECK: .size vshf_v8i16_2 130} 131 132define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 133 ; CHECK: vshf_v8i16_3: 134 135 %1 = load <8 x i16>* %a 136 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 137 %2 = load <8 x i16>* %b 138 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 139 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3> 140 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 141 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]]) 142 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse 143 ; the operands to get the right answer. 144 ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R1]] 145 store <8 x i16> %3, <8 x i16>* %c 146 ; CHECK-DAG: st.h [[R3]], 0($4) 147 148 ret void 149 ; CHECK: .size vshf_v8i16_3 150} 151 152define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 153 ; CHECK: vshf_v8i16_4: 154 155 %1 = load <8 x i16>* %a 156 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 157 %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9> 158 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1] 159 store <8 x i16> %2, <8 x i16>* %c 160 ; CHECK-DAG: st.h [[R3]], 0($4) 161 162 ret void 163 ; CHECK: .size vshf_v8i16_4 164} 165 166; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w 167; instruction when using a single vector. 168 169define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 170 ; CHECK: vshf_v4i32_0: 171 172 %1 = load <4 x i32>* %a 173 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 174 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 175 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27 176 store <4 x i32> %2, <4 x i32>* %c 177 ; CHECK-DAG: st.w [[R3]], 0($4) 178 179 ret void 180 ; CHECK: .size vshf_v4i32_0 181} 182 183define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 184 ; CHECK: vshf_v4i32_1: 185 186 %1 = load <4 x i32>* %a 187 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 188 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 189 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85 190 store <4 x i32> %2, <4 x i32>* %c 191 ; CHECK-DAG: st.w [[R3]], 0($4) 192 193 ret void 194 ; CHECK: .size vshf_v4i32_1 195} 196 197define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 198 ; CHECK: vshf_v4i32_2: 199 200 %1 = load <4 x i32>* %a 201 %2 = load <4 x i32>* %b 202 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 203 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4> 204 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36 205 store <4 x i32> %3, <4 x i32>* %c 206 ; CHECK-DAG: st.w [[R3]], 0($4) 207 208 ret void 209 ; CHECK: .size vshf_v4i32_2 210} 211 212define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 213 ; CHECK: vshf_v4i32_3: 214 215 %1 = load <4 x i32>* %a 216 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 217 %2 = load <4 x i32>* %b 218 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 219 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4> 220 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 221 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[PTR_A]]) 222 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse 223 ; the operands to get the right answer. 224 ; CHECK-DAG: vshf.w [[R3]], [[R2]], [[R1]] 225 store <4 x i32> %3, <4 x i32>* %c 226 ; CHECK-DAG: st.w [[R3]], 0($4) 227 228 ret void 229 ; CHECK: .size vshf_v4i32_3 230} 231 232define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 233 ; CHECK: vshf_v4i32_4: 234 235 %1 = load <4 x i32>* %a 236 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 237 %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1> 238 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85 239 store <4 x i32> %2, <4 x i32>* %c 240 ; CHECK-DAG: st.w [[R3]], 0($4) 241 242 ret void 243 ; CHECK: .size vshf_v4i32_4 244} 245 246define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 247 ; CHECK: vshf_v2i64_0: 248 249 %1 = load <2 x i64>* %a 250 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 251 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0> 252 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 253 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]]) 254 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]] 255 store <2 x i64> %2, <2 x i64>* %c 256 ; CHECK-DAG: st.d [[R3]], 0($4) 257 258 ret void 259 ; CHECK: .size vshf_v2i64_0 260} 261 262define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 263 ; CHECK: vshf_v2i64_1: 264 265 %1 = load <2 x i64>* %a 266 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 267 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 268 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 269 store <2 x i64> %2, <2 x i64>* %c 270 ; CHECK-DAG: st.d [[R3]], 0($4) 271 272 ret void 273 ; CHECK: .size vshf_v2i64_1 274} 275 276define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 277 ; CHECK: vshf_v2i64_2: 278 279 %1 = load <2 x i64>* %a 280 %2 = load <2 x i64>* %b 281 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 282 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2> 283 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 284 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]]) 285 ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]] 286 store <2 x i64> %3, <2 x i64>* %c 287 ; CHECK-DAG: st.d [[R3]], 0($4) 288 289 ret void 290 ; CHECK: .size vshf_v2i64_2 291} 292 293define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 294 ; CHECK: vshf_v2i64_3: 295 296 %1 = load <2 x i64>* %a 297 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 298 %2 = load <2 x i64>* %b 299 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 300 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2> 301 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 302 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]]) 303 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse 304 ; the operands to get the right answer. 305 ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R1]] 306 store <2 x i64> %3, <2 x i64>* %c 307 ; CHECK-DAG: st.d [[R3]], 0($4) 308 309 ret void 310 ; CHECK: .size vshf_v2i64_3 311} 312 313define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 314 ; CHECK: vshf_v2i64_4: 315 316 %1 = load <2 x i64>* %a 317 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 318 %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3> 319 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 320 store <2 x i64> %2, <2 x i64>* %c 321 ; CHECK-DAG: st.d [[R3]], 0($4) 322 323 ret void 324 ; CHECK: .size vshf_v2i64_4 325} 326 327define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 328 ; CHECK: shf_v16i8_0: 329 330 %1 = load <16 x i8>* %a 331 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 332 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12> 333 ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45 334 store <16 x i8> %2, <16 x i8>* %c 335 ; CHECK-DAG: st.b [[R3]], 0($4) 336 337 ret void 338 ; CHECK: .size shf_v16i8_0 339} 340 341define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 342 ; CHECK: shf_v8i16_0: 343 344 %1 = load <8 x i16>* %a 345 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 346 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 347 ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27 348 store <8 x i16> %2, <8 x i16>* %c 349 ; CHECK-DAG: st.h [[R3]], 0($4) 350 351 ret void 352 ; CHECK: .size shf_v8i16_0 353} 354 355define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 356 ; CHECK: shf_v4i32_0: 357 358 %1 = load <4 x i32>* %a 359 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 360 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 361 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27 362 store <4 x i32> %2, <4 x i32>* %c 363 ; CHECK-DAG: st.w [[R3]], 0($4) 364 365 ret void 366 ; CHECK: .size shf_v4i32_0 367} 368 369; shf.d does not exist 370 371define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 372 ; CHECK: ilvev_v16i8_0: 373 374 %1 = load <16 x i8>* %a 375 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 376 %2 = load <16 x i8>* %b 377 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 378 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 379 <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 380 ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 381 store <16 x i8> %3, <16 x i8>* %c 382 ; CHECK-DAG: st.b [[R3]], 0($4) 383 384 ret void 385 ; CHECK: .size ilvev_v16i8_0 386} 387 388define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 389 ; CHECK: ilvev_v8i16_0: 390 391 %1 = load <8 x i16>* %a 392 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 393 %2 = load <8 x i16>* %b 394 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 395 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 396 ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 397 store <8 x i16> %3, <8 x i16>* %c 398 ; CHECK-DAG: st.h [[R3]], 0($4) 399 400 ret void 401 ; CHECK: .size ilvev_v8i16_0 402} 403 404define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 405 ; CHECK: ilvev_v4i32_0: 406 407 %1 = load <4 x i32>* %a 408 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 409 %2 = load <4 x i32>* %b 410 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 411 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 412 ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 413 store <4 x i32> %3, <4 x i32>* %c 414 ; CHECK-DAG: st.w [[R3]], 0($4) 415 416 ret void 417 ; CHECK: .size ilvev_v4i32_0 418} 419 420define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 421 ; CHECK: ilvev_v2i64_0: 422 423 %1 = load <2 x i64>* %a 424 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 425 %2 = load <2 x i64>* %b 426 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 427 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> 428 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 429 store <2 x i64> %3, <2 x i64>* %c 430 ; CHECK-DAG: st.d [[R3]], 0($4) 431 432 ret void 433 ; CHECK: .size ilvev_v2i64_0 434} 435 436define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 437 ; CHECK: ilvod_v16i8_0: 438 439 %1 = load <16 x i8>* %a 440 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 441 %2 = load <16 x i8>* %b 442 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 443 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 444 <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 445 ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 446 store <16 x i8> %3, <16 x i8>* %c 447 ; CHECK-DAG: st.b [[R3]], 0($4) 448 449 ret void 450 ; CHECK: .size ilvod_v16i8_0 451} 452 453define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 454 ; CHECK: ilvod_v8i16_0: 455 456 %1 = load <8 x i16>* %a 457 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 458 %2 = load <8 x i16>* %b 459 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 460 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 461 ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 462 store <8 x i16> %3, <8 x i16>* %c 463 ; CHECK-DAG: st.h [[R3]], 0($4) 464 465 ret void 466 ; CHECK: .size ilvod_v8i16_0 467} 468 469define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 470 ; CHECK: ilvod_v4i32_0: 471 472 %1 = load <4 x i32>* %a 473 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 474 %2 = load <4 x i32>* %b 475 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 476 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 477 ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 478 store <4 x i32> %3, <4 x i32>* %c 479 ; CHECK-DAG: st.w [[R3]], 0($4) 480 481 ret void 482 ; CHECK: .size ilvod_v4i32_0 483} 484 485define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 486 ; CHECK: ilvod_v2i64_0: 487 488 %1 = load <2 x i64>* %a 489 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 490 %2 = load <2 x i64>* %b 491 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 492 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3> 493 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 494 store <2 x i64> %3, <2 x i64>* %c 495 ; CHECK-DAG: st.d [[R3]], 0($4) 496 497 ret void 498 ; CHECK: .size ilvod_v2i64_0 499} 500 501define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 502 ; CHECK: ilvl_v16i8_0: 503 504 %1 = load <16 x i8>* %a 505 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 506 %2 = load <16 x i8>* %b 507 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 508 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 509 <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 510 ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 511 store <16 x i8> %3, <16 x i8>* %c 512 ; CHECK-DAG: st.b [[R3]], 0($4) 513 514 ret void 515 ; CHECK: .size ilvl_v16i8_0 516} 517 518define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 519 ; CHECK: ilvl_v8i16_0: 520 521 %1 = load <8 x i16>* %a 522 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 523 %2 = load <8 x i16>* %b 524 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 525 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 526 ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 527 store <8 x i16> %3, <8 x i16>* %c 528 ; CHECK-DAG: st.h [[R3]], 0($4) 529 530 ret void 531 ; CHECK: .size ilvl_v8i16_0 532} 533 534define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 535 ; CHECK: ilvl_v4i32_0: 536 537 %1 = load <4 x i32>* %a 538 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 539 %2 = load <4 x i32>* %b 540 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 541 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 542 ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 543 store <4 x i32> %3, <4 x i32>* %c 544 ; CHECK-DAG: st.w [[R3]], 0($4) 545 546 ret void 547 ; CHECK: .size ilvl_v4i32_0 548} 549 550define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 551 ; CHECK: ilvl_v2i64_0: 552 553 %1 = load <2 x i64>* %a 554 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 555 %2 = load <2 x i64>* %b 556 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 557 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> 558 ; ilvl.d and ilvev.d are equivalent for v2i64 559 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 560 store <2 x i64> %3, <2 x i64>* %c 561 ; CHECK-DAG: st.d [[R3]], 0($4) 562 563 ret void 564 ; CHECK: .size ilvl_v2i64_0 565} 566 567define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 568 ; CHECK: ilvr_v16i8_0: 569 570 %1 = load <16 x i8>* %a 571 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 572 %2 = load <16 x i8>* %b 573 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 574 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 575 <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 576 ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 577 store <16 x i8> %3, <16 x i8>* %c 578 ; CHECK-DAG: st.b [[R3]], 0($4) 579 580 ret void 581 ; CHECK: .size ilvr_v16i8_0 582} 583 584define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 585 ; CHECK: ilvr_v8i16_0: 586 587 %1 = load <8 x i16>* %a 588 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 589 %2 = load <8 x i16>* %b 590 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 591 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 592 ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 593 store <8 x i16> %3, <8 x i16>* %c 594 ; CHECK-DAG: st.h [[R3]], 0($4) 595 596 ret void 597 ; CHECK: .size ilvr_v8i16_0 598} 599 600define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 601 ; CHECK: ilvr_v4i32_0: 602 603 %1 = load <4 x i32>* %a 604 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 605 %2 = load <4 x i32>* %b 606 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 607 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 608 ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 609 store <4 x i32> %3, <4 x i32>* %c 610 ; CHECK-DAG: st.w [[R3]], 0($4) 611 612 ret void 613 ; CHECK: .size ilvr_v4i32_0 614} 615 616define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 617 ; CHECK: ilvr_v2i64_0: 618 619 %1 = load <2 x i64>* %a 620 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 621 %2 = load <2 x i64>* %b 622 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 623 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3> 624 ; ilvr.d and ilvod.d are equivalent for v2i64 625 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 626 store <2 x i64> %3, <2 x i64>* %c 627 ; CHECK-DAG: st.d [[R3]], 0($4) 628 629 ret void 630 ; CHECK: .size ilvr_v2i64_0 631} 632 633define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 634 ; CHECK: pckev_v16i8_0: 635 636 %1 = load <16 x i8>* %a 637 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 638 %2 = load <16 x i8>* %b 639 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 640 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 641 <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 642 ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 643 store <16 x i8> %3, <16 x i8>* %c 644 ; CHECK-DAG: st.b [[R3]], 0($4) 645 646 ret void 647 ; CHECK: .size pckev_v16i8_0 648} 649 650define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 651 ; CHECK: pckev_v8i16_0: 652 653 %1 = load <8 x i16>* %a 654 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 655 %2 = load <8 x i16>* %b 656 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 657 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 658 ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 659 store <8 x i16> %3, <8 x i16>* %c 660 ; CHECK-DAG: st.h [[R3]], 0($4) 661 662 ret void 663 ; CHECK: .size pckev_v8i16_0 664} 665 666define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 667 ; CHECK: pckev_v4i32_0: 668 669 %1 = load <4 x i32>* %a 670 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 671 %2 = load <4 x i32>* %b 672 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 673 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 674 ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 675 store <4 x i32> %3, <4 x i32>* %c 676 ; CHECK-DAG: st.w [[R3]], 0($4) 677 678 ret void 679 ; CHECK: .size pckev_v4i32_0 680} 681 682define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 683 ; CHECK: pckev_v2i64_0: 684 685 %1 = load <2 x i64>* %a 686 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 687 %2 = load <2 x i64>* %b 688 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 689 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> 690 ; pckev.d and ilvev.d are equivalent for v2i64 691 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 692 store <2 x i64> %3, <2 x i64>* %c 693 ; CHECK-DAG: st.d [[R3]], 0($4) 694 695 ret void 696 ; CHECK: .size pckev_v2i64_0 697} 698 699define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 700 ; CHECK: pckod_v16i8_0: 701 702 %1 = load <16 x i8>* %a 703 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 704 %2 = load <16 x i8>* %b 705 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 706 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 707 <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 708 ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 709 store <16 x i8> %3, <16 x i8>* %c 710 ; CHECK-DAG: st.b [[R3]], 0($4) 711 712 ret void 713 ; CHECK: .size pckod_v16i8_0 714} 715 716define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 717 ; CHECK: pckod_v8i16_0: 718 719 %1 = load <8 x i16>* %a 720 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 721 %2 = load <8 x i16>* %b 722 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 723 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 724 ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 725 store <8 x i16> %3, <8 x i16>* %c 726 ; CHECK-DAG: st.h [[R3]], 0($4) 727 728 ret void 729 ; CHECK: .size pckod_v8i16_0 730} 731 732define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 733 ; CHECK: pckod_v4i32_0: 734 735 %1 = load <4 x i32>* %a 736 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 737 %2 = load <4 x i32>* %b 738 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 739 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 740 ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 741 store <4 x i32> %3, <4 x i32>* %c 742 ; CHECK-DAG: st.w [[R3]], 0($4) 743 744 ret void 745 ; CHECK: .size pckod_v4i32_0 746} 747 748define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 749 ; CHECK: pckod_v2i64_0: 750 751 %1 = load <2 x i64>* %a 752 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 753 %2 = load <2 x i64>* %b 754 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 755 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3> 756 ; pckod.d and ilvod.d are equivalent for v2i64 757 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 758 store <2 x i64> %3, <2 x i64>* %c 759 ; CHECK-DAG: st.d [[R3]], 0($4) 760 761 ret void 762 ; CHECK: .size pckod_v2i64_0 763} 764 765define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind { 766 ; CHECK: splati_v16i8_0: 767 768 %1 = load <16 x i8>* %a 769 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 770 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, 771 <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 772 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4] 773 store <16 x i8> %2, <16 x i8>* %c 774 ; CHECK-DAG: st.b [[R3]], 0($4) 775 776 ret void 777 ; CHECK: .size splati_v16i8_0 778} 779 780define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind { 781 ; CHECK: splati_v8i16_0: 782 783 %1 = load <8 x i16>* %a 784 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 785 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 786 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4] 787 store <8 x i16> %2, <8 x i16>* %c 788 ; CHECK-DAG: st.h [[R3]], 0($4) 789 790 ret void 791 ; CHECK: .size splati_v8i16_0 792} 793 794define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind { 795 ; CHECK: splati_v4i32_0: 796 797 %1 = load <4 x i32>* %a 798 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 799 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 800 ; shf.w and splati.w are equivalent 801 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 255 802 store <4 x i32> %2, <4 x i32>* %c 803 ; CHECK-DAG: st.w [[R3]], 0($4) 804 805 ret void 806 ; CHECK: .size splati_v4i32_0 807} 808 809define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind { 810 ; CHECK: splati_v2i64_0: 811 812 %1 = load <2 x i64>* %a 813 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 814 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 815 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 816 store <2 x i64> %2, <2 x i64>* %c 817 ; CHECK-DAG: st.d [[R3]], 0($4) 818 819 ret void 820 ; CHECK: .size splati_v2i64_0 821} 822