1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s 2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s 3 4define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 5 ; CHECK: vshf_v16i8_0: 6 7 %1 = load <16 x i8>* %a 8 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 9 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 10 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo 11 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]] 12 store <16 x i8> %2, <16 x i8>* %c 13 ; CHECK-DAG: st.b [[R3]], 0($4) 14 15 ret void 16 ; CHECK: .size vshf_v16i8_0 17} 18 19define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 20 ; CHECK: vshf_v16i8_1: 21 22 %1 = load <16 x i8>* %a 23 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 24 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 25 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1] 26 store <16 x i8> %2, <16 x i8>* %c 27 ; CHECK-DAG: st.b [[R3]], 0($4) 28 29 ret void 30 ; CHECK: .size vshf_v16i8_1 31} 32 33define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 34 ; CHECK: vshf_v16i8_2: 35 36 %1 = load <16 x i8>* %a 37 %2 = load <16 x i8>* %b 38 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 39 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16> 40 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo 41 ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]] 42 store <16 x i8> %3, <16 x i8>* %c 43 ; CHECK-DAG: st.b [[R3]], 0($4) 44 45 ret void 46 ; CHECK: .size vshf_v16i8_2 47} 48 49define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 50 ; CHECK: vshf_v16i8_3: 51 52 %1 = load <16 x i8>* %a 53 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 54 %2 = load <16 x i8>* %b 55 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 56 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3> 57 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo 58 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R2]] 59 store <16 x i8> %3, <16 x i8>* %c 60 ; CHECK-DAG: st.b [[R3]], 0($4) 61 62 ret void 63 ; CHECK: .size vshf_v16i8_3 64} 65 66define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 67 ; CHECK: vshf_v16i8_4: 68 69 %1 = load <16 x i8>* %a 70 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 71 %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17> 72 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1] 73 store <16 x i8> %2, <16 x i8>* %c 74 ; CHECK-DAG: st.b [[R3]], 0($4) 75 76 ret void 77 ; CHECK: .size vshf_v16i8_4 78} 79 80define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 81 ; CHECK: vshf_v8i16_0: 82 83 %1 = load <8 x i16>* %a 84 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 85 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 86 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo 87 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]] 88 store <8 x i16> %2, <8 x i16>* %c 89 ; CHECK-DAG: st.h [[R3]], 0($4) 90 91 ret void 92 ; CHECK: .size vshf_v8i16_0 93} 94 95define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 96 ; CHECK: vshf_v8i16_1: 97 98 %1 = load <8 x i16>* %a 99 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 100 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 101 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1] 102 store <8 x i16> %2, <8 x i16>* %c 103 ; CHECK-DAG: st.h [[R3]], 0($4) 104 105 ret void 106 ; CHECK: .size vshf_v8i16_1 107} 108 109define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 110 ; CHECK: vshf_v8i16_2: 111 112 %1 = load <8 x i16>* %a 113 %2 = load <8 x i16>* %b 114 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 115 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8> 116 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo 117 ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]] 118 store <8 x i16> %3, <8 x i16>* %c 119 ; CHECK-DAG: st.h [[R3]], 0($4) 120 121 ret void 122 ; CHECK: .size vshf_v8i16_2 123} 124 125define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 126 ; CHECK: vshf_v8i16_3: 127 128 %1 = load <8 x i16>* %a 129 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 130 %2 = load <8 x i16>* %b 131 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 132 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3> 133 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo 134 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R2]] 135 store <8 x i16> %3, <8 x i16>* %c 136 ; CHECK-DAG: st.h [[R3]], 0($4) 137 138 ret void 139 ; CHECK: .size vshf_v8i16_3 140} 141 142define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 143 ; CHECK: vshf_v8i16_4: 144 145 %1 = load <8 x i16>* %a 146 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 147 %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9> 148 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1] 149 store <8 x i16> %2, <8 x i16>* %c 150 ; CHECK-DAG: st.h [[R3]], 0($4) 151 152 ret void 153 ; CHECK: .size vshf_v8i16_4 154} 155 156; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w 157; instruction when using a single vector. 158 159define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 160 ; CHECK: vshf_v4i32_0: 161 162 %1 = load <4 x i32>* %a 163 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 164 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 165 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27 166 store <4 x i32> %2, <4 x i32>* %c 167 ; CHECK-DAG: st.w [[R3]], 0($4) 168 169 ret void 170 ; CHECK: .size vshf_v4i32_0 171} 172 173define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 174 ; CHECK: vshf_v4i32_1: 175 176 %1 = load <4 x i32>* %a 177 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 178 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 179 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85 180 store <4 x i32> %2, <4 x i32>* %c 181 ; CHECK-DAG: st.w [[R3]], 0($4) 182 183 ret void 184 ; CHECK: .size vshf_v4i32_1 185} 186 187define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 188 ; CHECK: vshf_v4i32_2: 189 190 %1 = load <4 x i32>* %a 191 %2 = load <4 x i32>* %b 192 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 193 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4> 194 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36 195 store <4 x i32> %3, <4 x i32>* %c 196 ; CHECK-DAG: st.w [[R3]], 0($4) 197 198 ret void 199 ; CHECK: .size vshf_v4i32_2 200} 201 202define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 203 ; CHECK: vshf_v4i32_3: 204 205 %1 = load <4 x i32>* %a 206 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 207 %2 = load <4 x i32>* %b 208 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 209 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4> 210 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo 211 ; CHECK-DAG: vshf.w [[R3]], [[R1]], [[R2]] 212 store <4 x i32> %3, <4 x i32>* %c 213 ; CHECK-DAG: st.w [[R3]], 0($4) 214 215 ret void 216 ; CHECK: .size vshf_v4i32_3 217} 218 219define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 220 ; CHECK: vshf_v4i32_4: 221 222 %1 = load <4 x i32>* %a 223 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 224 %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1> 225 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85 226 store <4 x i32> %2, <4 x i32>* %c 227 ; CHECK-DAG: st.w [[R3]], 0($4) 228 229 ret void 230 ; CHECK: .size vshf_v4i32_4 231} 232 233define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 234 ; CHECK: vshf_v2i64_0: 235 236 %1 = load <2 x i64>* %a 237 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 238 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0> 239 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo 240 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]] 241 store <2 x i64> %2, <2 x i64>* %c 242 ; CHECK-DAG: st.d [[R3]], 0($4) 243 244 ret void 245 ; CHECK: .size vshf_v2i64_0 246} 247 248define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 249 ; CHECK: vshf_v2i64_1: 250 251 %1 = load <2 x i64>* %a 252 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 253 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 254 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 255 store <2 x i64> %2, <2 x i64>* %c 256 ; CHECK-DAG: st.d [[R3]], 0($4) 257 258 ret void 259 ; CHECK: .size vshf_v2i64_1 260} 261 262define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 263 ; CHECK: vshf_v2i64_2: 264 265 %1 = load <2 x i64>* %a 266 %2 = load <2 x i64>* %b 267 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 268 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2> 269 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo 270 ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]] 271 store <2 x i64> %3, <2 x i64>* %c 272 ; CHECK-DAG: st.d [[R3]], 0($4) 273 274 ret void 275 ; CHECK: .size vshf_v2i64_2 276} 277 278define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 279 ; CHECK: vshf_v2i64_3: 280 281 %1 = load <2 x i64>* %a 282 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 283 %2 = load <2 x i64>* %b 284 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 285 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2> 286 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo 287 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R2]] 288 store <2 x i64> %3, <2 x i64>* %c 289 ; CHECK-DAG: st.d [[R3]], 0($4) 290 291 ret void 292 ; CHECK: .size vshf_v2i64_3 293} 294 295define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 296 ; CHECK: vshf_v2i64_4: 297 298 %1 = load <2 x i64>* %a 299 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 300 %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3> 301 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 302 store <2 x i64> %2, <2 x i64>* %c 303 ; CHECK-DAG: st.d [[R3]], 0($4) 304 305 ret void 306 ; CHECK: .size vshf_v2i64_4 307} 308 309define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 310 ; CHECK: shf_v16i8_0: 311 312 %1 = load <16 x i8>* %a 313 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 314 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12> 315 ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45 316 store <16 x i8> %2, <16 x i8>* %c 317 ; CHECK-DAG: st.b [[R3]], 0($4) 318 319 ret void 320 ; CHECK: .size shf_v16i8_0 321} 322 323define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 324 ; CHECK: shf_v8i16_0: 325 326 %1 = load <8 x i16>* %a 327 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 328 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 329 ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27 330 store <8 x i16> %2, <8 x i16>* %c 331 ; CHECK-DAG: st.h [[R3]], 0($4) 332 333 ret void 334 ; CHECK: .size shf_v8i16_0 335} 336 337define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 338 ; CHECK: shf_v4i32_0: 339 340 %1 = load <4 x i32>* %a 341 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 342 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 343 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27 344 store <4 x i32> %2, <4 x i32>* %c 345 ; CHECK-DAG: st.w [[R3]], 0($4) 346 347 ret void 348 ; CHECK: .size shf_v4i32_0 349} 350 351; shf.d does not exist 352 353define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 354 ; CHECK: ilvev_v16i8_0: 355 356 %1 = load <16 x i8>* %a 357 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 358 %2 = load <16 x i8>* %b 359 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 360 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 361 <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 362 ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 363 store <16 x i8> %3, <16 x i8>* %c 364 ; CHECK-DAG: st.b [[R3]], 0($4) 365 366 ret void 367 ; CHECK: .size ilvev_v16i8_0 368} 369 370define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 371 ; CHECK: ilvev_v8i16_0: 372 373 %1 = load <8 x i16>* %a 374 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 375 %2 = load <8 x i16>* %b 376 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 377 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 378 ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 379 store <8 x i16> %3, <8 x i16>* %c 380 ; CHECK-DAG: st.h [[R3]], 0($4) 381 382 ret void 383 ; CHECK: .size ilvev_v8i16_0 384} 385 386define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 387 ; CHECK: ilvev_v4i32_0: 388 389 %1 = load <4 x i32>* %a 390 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 391 %2 = load <4 x i32>* %b 392 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 393 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 394 ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 395 store <4 x i32> %3, <4 x i32>* %c 396 ; CHECK-DAG: st.w [[R3]], 0($4) 397 398 ret void 399 ; CHECK: .size ilvev_v4i32_0 400} 401 402define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 403 ; CHECK: ilvev_v2i64_0: 404 405 %1 = load <2 x i64>* %a 406 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 407 %2 = load <2 x i64>* %b 408 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 409 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> 410 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 411 store <2 x i64> %3, <2 x i64>* %c 412 ; CHECK-DAG: st.d [[R3]], 0($4) 413 414 ret void 415 ; CHECK: .size ilvev_v2i64_0 416} 417 418define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 419 ; CHECK: ilvod_v16i8_0: 420 421 %1 = load <16 x i8>* %a 422 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 423 %2 = load <16 x i8>* %b 424 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 425 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 426 <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 427 ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 428 store <16 x i8> %3, <16 x i8>* %c 429 ; CHECK-DAG: st.b [[R3]], 0($4) 430 431 ret void 432 ; CHECK: .size ilvod_v16i8_0 433} 434 435define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 436 ; CHECK: ilvod_v8i16_0: 437 438 %1 = load <8 x i16>* %a 439 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 440 %2 = load <8 x i16>* %b 441 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 442 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 443 ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 444 store <8 x i16> %3, <8 x i16>* %c 445 ; CHECK-DAG: st.h [[R3]], 0($4) 446 447 ret void 448 ; CHECK: .size ilvod_v8i16_0 449} 450 451define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 452 ; CHECK: ilvod_v4i32_0: 453 454 %1 = load <4 x i32>* %a 455 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 456 %2 = load <4 x i32>* %b 457 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 458 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 459 ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 460 store <4 x i32> %3, <4 x i32>* %c 461 ; CHECK-DAG: st.w [[R3]], 0($4) 462 463 ret void 464 ; CHECK: .size ilvod_v4i32_0 465} 466 467define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 468 ; CHECK: ilvod_v2i64_0: 469 470 %1 = load <2 x i64>* %a 471 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 472 %2 = load <2 x i64>* %b 473 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 474 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3> 475 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 476 store <2 x i64> %3, <2 x i64>* %c 477 ; CHECK-DAG: st.d [[R3]], 0($4) 478 479 ret void 480 ; CHECK: .size ilvod_v2i64_0 481} 482 483define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 484 ; CHECK: ilvl_v16i8_0: 485 486 %1 = load <16 x i8>* %a 487 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 488 %2 = load <16 x i8>* %b 489 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 490 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 491 <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 492 ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 493 store <16 x i8> %3, <16 x i8>* %c 494 ; CHECK-DAG: st.b [[R3]], 0($4) 495 496 ret void 497 ; CHECK: .size ilvl_v16i8_0 498} 499 500define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 501 ; CHECK: ilvl_v8i16_0: 502 503 %1 = load <8 x i16>* %a 504 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 505 %2 = load <8 x i16>* %b 506 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 507 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 508 ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 509 store <8 x i16> %3, <8 x i16>* %c 510 ; CHECK-DAG: st.h [[R3]], 0($4) 511 512 ret void 513 ; CHECK: .size ilvl_v8i16_0 514} 515 516define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 517 ; CHECK: ilvl_v4i32_0: 518 519 %1 = load <4 x i32>* %a 520 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 521 %2 = load <4 x i32>* %b 522 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 523 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 524 ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 525 store <4 x i32> %3, <4 x i32>* %c 526 ; CHECK-DAG: st.w [[R3]], 0($4) 527 528 ret void 529 ; CHECK: .size ilvl_v4i32_0 530} 531 532define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 533 ; CHECK: ilvl_v2i64_0: 534 535 %1 = load <2 x i64>* %a 536 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 537 %2 = load <2 x i64>* %b 538 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 539 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> 540 ; ilvl.d and ilvev.d are equivalent for v2i64 541 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 542 store <2 x i64> %3, <2 x i64>* %c 543 ; CHECK-DAG: st.d [[R3]], 0($4) 544 545 ret void 546 ; CHECK: .size ilvl_v2i64_0 547} 548 549define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 550 ; CHECK: ilvr_v16i8_0: 551 552 %1 = load <16 x i8>* %a 553 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 554 %2 = load <16 x i8>* %b 555 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 556 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 557 <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 558 ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 559 store <16 x i8> %3, <16 x i8>* %c 560 ; CHECK-DAG: st.b [[R3]], 0($4) 561 562 ret void 563 ; CHECK: .size ilvr_v16i8_0 564} 565 566define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 567 ; CHECK: ilvr_v8i16_0: 568 569 %1 = load <8 x i16>* %a 570 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 571 %2 = load <8 x i16>* %b 572 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 573 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 574 ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 575 store <8 x i16> %3, <8 x i16>* %c 576 ; CHECK-DAG: st.h [[R3]], 0($4) 577 578 ret void 579 ; CHECK: .size ilvr_v8i16_0 580} 581 582define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 583 ; CHECK: ilvr_v4i32_0: 584 585 %1 = load <4 x i32>* %a 586 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 587 %2 = load <4 x i32>* %b 588 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 589 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 590 ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 591 store <4 x i32> %3, <4 x i32>* %c 592 ; CHECK-DAG: st.w [[R3]], 0($4) 593 594 ret void 595 ; CHECK: .size ilvr_v4i32_0 596} 597 598define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 599 ; CHECK: ilvr_v2i64_0: 600 601 %1 = load <2 x i64>* %a 602 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 603 %2 = load <2 x i64>* %b 604 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 605 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3> 606 ; ilvr.d and ilvod.d are equivalent for v2i64 607 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 608 store <2 x i64> %3, <2 x i64>* %c 609 ; CHECK-DAG: st.d [[R3]], 0($4) 610 611 ret void 612 ; CHECK: .size ilvr_v2i64_0 613} 614 615define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 616 ; CHECK: pckev_v16i8_0: 617 618 %1 = load <16 x i8>* %a 619 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 620 %2 = load <16 x i8>* %b 621 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 622 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 623 <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 624 ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 625 store <16 x i8> %3, <16 x i8>* %c 626 ; CHECK-DAG: st.b [[R3]], 0($4) 627 628 ret void 629 ; CHECK: .size pckev_v16i8_0 630} 631 632define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 633 ; CHECK: pckev_v8i16_0: 634 635 %1 = load <8 x i16>* %a 636 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 637 %2 = load <8 x i16>* %b 638 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 639 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 640 ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 641 store <8 x i16> %3, <8 x i16>* %c 642 ; CHECK-DAG: st.h [[R3]], 0($4) 643 644 ret void 645 ; CHECK: .size pckev_v8i16_0 646} 647 648define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 649 ; CHECK: pckev_v4i32_0: 650 651 %1 = load <4 x i32>* %a 652 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 653 %2 = load <4 x i32>* %b 654 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 655 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 656 ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 657 store <4 x i32> %3, <4 x i32>* %c 658 ; CHECK-DAG: st.w [[R3]], 0($4) 659 660 ret void 661 ; CHECK: .size pckev_v4i32_0 662} 663 664define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 665 ; CHECK: pckev_v2i64_0: 666 667 %1 = load <2 x i64>* %a 668 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 669 %2 = load <2 x i64>* %b 670 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 671 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> 672 ; pckev.d and ilvev.d are equivalent for v2i64 673 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 674 store <2 x i64> %3, <2 x i64>* %c 675 ; CHECK-DAG: st.d [[R3]], 0($4) 676 677 ret void 678 ; CHECK: .size pckev_v2i64_0 679} 680 681define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 682 ; CHECK: pckod_v16i8_0: 683 684 %1 = load <16 x i8>* %a 685 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 686 %2 = load <16 x i8>* %b 687 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 688 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 689 <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 690 ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 691 store <16 x i8> %3, <16 x i8>* %c 692 ; CHECK-DAG: st.b [[R3]], 0($4) 693 694 ret void 695 ; CHECK: .size pckod_v16i8_0 696} 697 698define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 699 ; CHECK: pckod_v8i16_0: 700 701 %1 = load <8 x i16>* %a 702 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 703 %2 = load <8 x i16>* %b 704 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 705 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 706 ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 707 store <8 x i16> %3, <8 x i16>* %c 708 ; CHECK-DAG: st.h [[R3]], 0($4) 709 710 ret void 711 ; CHECK: .size pckod_v8i16_0 712} 713 714define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 715 ; CHECK: pckod_v4i32_0: 716 717 %1 = load <4 x i32>* %a 718 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 719 %2 = load <4 x i32>* %b 720 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 721 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 722 ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 723 store <4 x i32> %3, <4 x i32>* %c 724 ; CHECK-DAG: st.w [[R3]], 0($4) 725 726 ret void 727 ; CHECK: .size pckod_v4i32_0 728} 729 730define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 731 ; CHECK: pckod_v2i64_0: 732 733 %1 = load <2 x i64>* %a 734 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 735 %2 = load <2 x i64>* %b 736 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 737 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3> 738 ; pckod.d and ilvod.d are equivalent for v2i64 739 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 740 store <2 x i64> %3, <2 x i64>* %c 741 ; CHECK-DAG: st.d [[R3]], 0($4) 742 743 ret void 744 ; CHECK: .size pckod_v2i64_0 745} 746 747define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind { 748 ; CHECK: splati_v16i8_0: 749 750 %1 = load <16 x i8>* %a 751 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 752 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, 753 <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 754 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4] 755 store <16 x i8> %2, <16 x i8>* %c 756 ; CHECK-DAG: st.b [[R3]], 0($4) 757 758 ret void 759 ; CHECK: .size splati_v16i8_0 760} 761 762define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind { 763 ; CHECK: splati_v8i16_0: 764 765 %1 = load <8 x i16>* %a 766 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 767 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 768 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4] 769 store <8 x i16> %2, <8 x i16>* %c 770 ; CHECK-DAG: st.h [[R3]], 0($4) 771 772 ret void 773 ; CHECK: .size splati_v8i16_0 774} 775 776define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind { 777 ; CHECK: splati_v4i32_0: 778 779 %1 = load <4 x i32>* %a 780 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 781 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 782 ; shf.w and splati.w are equivalent 783 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 255 784 store <4 x i32> %2, <4 x i32>* %c 785 ; CHECK-DAG: st.w [[R3]], 0($4) 786 787 ret void 788 ; CHECK: .size splati_v4i32_0 789} 790 791define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind { 792 ; CHECK: splati_v2i64_0: 793 794 %1 = load <2 x i64>* %a 795 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 796 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 797 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 798 store <2 x i64> %2, <2 x i64>* %c 799 ; CHECK-DAG: st.d [[R3]], 0($4) 800 801 ret void 802 ; CHECK: .size splati_v2i64_0 803} 804