1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s 2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s 3 4define void @and_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 5 ; CHECK: and_v16i8: 6 7 %1 = load <16 x i8>* %a 8 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 9 %2 = load <16 x i8>* %b 10 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 11 %3 = and <16 x i8> %1, %2 12 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 13 store <16 x i8> %3, <16 x i8>* %c 14 ; CHECK-DAG: st.b [[R3]], 0($4) 15 16 ret void 17 ; CHECK: .size and_v16i8 18} 19 20define void @and_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 21 ; CHECK: and_v8i16: 22 23 %1 = load <8 x i16>* %a 24 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 25 %2 = load <8 x i16>* %b 26 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 27 %3 = and <8 x i16> %1, %2 28 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 29 store <8 x i16> %3, <8 x i16>* %c 30 ; CHECK-DAG: st.h [[R3]], 0($4) 31 32 ret void 33 ; CHECK: .size and_v8i16 34} 35 36define void @and_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 37 ; CHECK: and_v4i32: 38 39 %1 = load <4 x i32>* %a 40 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 41 %2 = load <4 x i32>* %b 42 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 43 %3 = and <4 x i32> %1, %2 44 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 45 store <4 x i32> %3, <4 x i32>* %c 46 ; CHECK-DAG: st.w [[R3]], 0($4) 47 48 ret void 49 ; CHECK: .size and_v4i32 50} 51 52define void @and_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 53 ; CHECK: and_v2i64: 54 55 %1 = load <2 x i64>* %a 56 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 57 %2 = load <2 x i64>* %b 58 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 59 %3 = and <2 x i64> %1, %2 60 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 61 store <2 x i64> %3, <2 x i64>* %c 62 ; CHECK-DAG: st.d [[R3]], 0($4) 63 64 ret void 65 ; CHECK: .size and_v2i64 66} 67 68define void @and_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 69 ; CHECK: and_v16i8_i: 70 71 %1 = load <16 x i8>* %a 72 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 73 %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 74 ; CHECK-DAG: andi.b [[R4:\$w[0-9]+]], [[R1]], 1 75 store <16 x i8> %2, <16 x i8>* %c 76 ; CHECK-DAG: st.b [[R4]], 0($4) 77 78 ret void 79 ; CHECK: .size and_v16i8_i 80} 81 82define void @and_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 83 ; CHECK: and_v8i16_i: 84 85 %1 = load <8 x i16>* %a 86 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 87 %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 88 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1 89 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 90 store <8 x i16> %2, <8 x i16>* %c 91 ; CHECK-DAG: st.h [[R4]], 0($4) 92 93 ret void 94 ; CHECK: .size and_v8i16_i 95} 96 97define void @and_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 98 ; CHECK: and_v4i32_i: 99 100 %1 = load <4 x i32>* %a 101 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 102 %2 = and <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 103 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 104 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 105 store <4 x i32> %2, <4 x i32>* %c 106 ; CHECK-DAG: st.w [[R4]], 0($4) 107 108 ret void 109 ; CHECK: .size and_v4i32_i 110} 111 112define void @and_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 113 ; CHECK: and_v2i64_i: 114 115 %1 = load <2 x i64>* %a 116 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 117 %2 = and <2 x i64> %1, <i64 1, i64 1> 118 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1 119 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 120 store <2 x i64> %2, <2 x i64>* %c 121 ; CHECK-DAG: st.d [[R4]], 0($4) 122 123 ret void 124 ; CHECK: .size and_v2i64_i 125} 126 127define void @or_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 128 ; CHECK: or_v16i8: 129 130 %1 = load <16 x i8>* %a 131 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 132 %2 = load <16 x i8>* %b 133 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 134 %3 = or <16 x i8> %1, %2 135 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 136 store <16 x i8> %3, <16 x i8>* %c 137 ; CHECK-DAG: st.b [[R3]], 0($4) 138 139 ret void 140 ; CHECK: .size or_v16i8 141} 142 143define void @or_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 144 ; CHECK: or_v8i16: 145 146 %1 = load <8 x i16>* %a 147 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 148 %2 = load <8 x i16>* %b 149 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 150 %3 = or <8 x i16> %1, %2 151 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 152 store <8 x i16> %3, <8 x i16>* %c 153 ; CHECK-DAG: st.h [[R3]], 0($4) 154 155 ret void 156 ; CHECK: .size or_v8i16 157} 158 159define void @or_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 160 ; CHECK: or_v4i32: 161 162 %1 = load <4 x i32>* %a 163 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 164 %2 = load <4 x i32>* %b 165 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 166 %3 = or <4 x i32> %1, %2 167 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 168 store <4 x i32> %3, <4 x i32>* %c 169 ; CHECK-DAG: st.w [[R3]], 0($4) 170 171 ret void 172 ; CHECK: .size or_v4i32 173} 174 175define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 176 ; CHECK: or_v2i64: 177 178 %1 = load <2 x i64>* %a 179 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 180 %2 = load <2 x i64>* %b 181 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 182 %3 = or <2 x i64> %1, %2 183 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 184 store <2 x i64> %3, <2 x i64>* %c 185 ; CHECK-DAG: st.d [[R3]], 0($4) 186 187 ret void 188 ; CHECK: .size or_v2i64 189} 190 191define void @or_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 192 ; CHECK: or_v16i8_i: 193 194 %1 = load <16 x i8>* %a 195 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 196 %2 = or <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 197 ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 3 198 store <16 x i8> %2, <16 x i8>* %c 199 ; CHECK-DAG: st.b [[R4]], 0($4) 200 201 ret void 202 ; CHECK: .size or_v16i8_i 203} 204 205define void @or_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 206 ; CHECK: or_v8i16_i: 207 208 %1 = load <8 x i16>* %a 209 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 210 %2 = or <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 211 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3 212 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 213 store <8 x i16> %2, <8 x i16>* %c 214 ; CHECK-DAG: st.h [[R4]], 0($4) 215 216 ret void 217 ; CHECK: .size or_v8i16_i 218} 219 220define void @or_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 221 ; CHECK: or_v4i32_i: 222 223 %1 = load <4 x i32>* %a 224 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 225 %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 226 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3 227 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 228 store <4 x i32> %2, <4 x i32>* %c 229 ; CHECK-DAG: st.w [[R4]], 0($4) 230 231 ret void 232 ; CHECK: .size or_v4i32_i 233} 234 235define void @or_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 236 ; CHECK: or_v2i64_i: 237 238 %1 = load <2 x i64>* %a 239 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 240 %2 = or <2 x i64> %1, <i64 3, i64 3> 241 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3 242 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 243 store <2 x i64> %2, <2 x i64>* %c 244 ; CHECK-DAG: st.d [[R4]], 0($4) 245 246 ret void 247 ; CHECK: .size or_v2i64_i 248} 249 250define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 251 ; CHECK: nor_v16i8: 252 253 %1 = load <16 x i8>* %a 254 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 255 %2 = load <16 x i8>* %b 256 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 257 %3 = or <16 x i8> %1, %2 258 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 259 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 260 store <16 x i8> %4, <16 x i8>* %c 261 ; CHECK-DAG: st.b [[R3]], 0($4) 262 263 ret void 264 ; CHECK: .size nor_v16i8 265} 266 267define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 268 ; CHECK: nor_v8i16: 269 270 %1 = load <8 x i16>* %a 271 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 272 %2 = load <8 x i16>* %b 273 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 274 %3 = or <8 x i16> %1, %2 275 %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 276 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 277 store <8 x i16> %4, <8 x i16>* %c 278 ; CHECK-DAG: st.h [[R3]], 0($4) 279 280 ret void 281 ; CHECK: .size nor_v8i16 282} 283 284define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 285 ; CHECK: nor_v4i32: 286 287 %1 = load <4 x i32>* %a 288 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 289 %2 = load <4 x i32>* %b 290 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 291 %3 = or <4 x i32> %1, %2 292 %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1> 293 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 294 store <4 x i32> %4, <4 x i32>* %c 295 ; CHECK-DAG: st.w [[R3]], 0($4) 296 297 ret void 298 ; CHECK: .size nor_v4i32 299} 300 301define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 302 ; CHECK: nor_v2i64: 303 304 %1 = load <2 x i64>* %a 305 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 306 %2 = load <2 x i64>* %b 307 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 308 %3 = or <2 x i64> %1, %2 309 %4 = xor <2 x i64> %3, <i64 -1, i64 -1> 310 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 311 store <2 x i64> %4, <2 x i64>* %c 312 ; CHECK-DAG: st.d [[R3]], 0($4) 313 314 ret void 315 ; CHECK: .size nor_v2i64 316} 317 318define void @nor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 319 ; CHECK: nor_v16i8_i: 320 321 %1 = load <16 x i8>* %a 322 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 323 %2 = or <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 324 %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 325 ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 1 326 store <16 x i8> %3, <16 x i8>* %c 327 ; CHECK-DAG: st.b [[R4]], 0($4) 328 329 ret void 330 ; CHECK: .size nor_v16i8_i 331} 332 333define void @nor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 334 ; CHECK: nor_v8i16_i: 335 336 %1 = load <8 x i16>* %a 337 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 338 %2 = or <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 339 %3 = xor <8 x i16> %2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 340 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1 341 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 342 store <8 x i16> %3, <8 x i16>* %c 343 ; CHECK-DAG: st.h [[R4]], 0($4) 344 345 ret void 346 ; CHECK: .size nor_v8i16_i 347} 348 349define void @nor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 350 ; CHECK: nor_v4i32_i: 351 352 %1 = load <4 x i32>* %a 353 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 354 %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 355 %3 = xor <4 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1> 356 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 357 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 358 store <4 x i32> %3, <4 x i32>* %c 359 ; CHECK-DAG: st.w [[R4]], 0($4) 360 361 ret void 362 ; CHECK: .size nor_v4i32_i 363} 364 365define void @nor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 366 ; CHECK: nor_v2i64_i: 367 368 %1 = load <2 x i64>* %a 369 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 370 %2 = or <2 x i64> %1, <i64 1, i64 1> 371 %3 = xor <2 x i64> %2, <i64 -1, i64 -1> 372 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1 373 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 374 store <2 x i64> %3, <2 x i64>* %c 375 ; CHECK-DAG: st.d [[R4]], 0($4) 376 377 ret void 378 ; CHECK: .size nor_v2i64_i 379} 380 381define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 382 ; CHECK: xor_v16i8: 383 384 %1 = load <16 x i8>* %a 385 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 386 %2 = load <16 x i8>* %b 387 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 388 %3 = xor <16 x i8> %1, %2 389 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 390 store <16 x i8> %3, <16 x i8>* %c 391 ; CHECK-DAG: st.b [[R3]], 0($4) 392 393 ret void 394 ; CHECK: .size xor_v16i8 395} 396 397define void @xor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 398 ; CHECK: xor_v8i16: 399 400 %1 = load <8 x i16>* %a 401 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 402 %2 = load <8 x i16>* %b 403 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 404 %3 = xor <8 x i16> %1, %2 405 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 406 store <8 x i16> %3, <8 x i16>* %c 407 ; CHECK-DAG: st.h [[R3]], 0($4) 408 409 ret void 410 ; CHECK: .size xor_v8i16 411} 412 413define void @xor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 414 ; CHECK: xor_v4i32: 415 416 %1 = load <4 x i32>* %a 417 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 418 %2 = load <4 x i32>* %b 419 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 420 %3 = xor <4 x i32> %1, %2 421 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 422 store <4 x i32> %3, <4 x i32>* %c 423 ; CHECK-DAG: st.w [[R3]], 0($4) 424 425 ret void 426 ; CHECK: .size xor_v4i32 427} 428 429define void @xor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 430 ; CHECK: xor_v2i64: 431 432 %1 = load <2 x i64>* %a 433 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 434 %2 = load <2 x i64>* %b 435 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 436 %3 = xor <2 x i64> %1, %2 437 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]] 438 store <2 x i64> %3, <2 x i64>* %c 439 ; CHECK-DAG: st.d [[R3]], 0($4) 440 441 ret void 442 ; CHECK: .size xor_v2i64 443} 444 445define void @xor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 446 ; CHECK: xor_v16i8_i: 447 448 %1 = load <16 x i8>* %a 449 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 450 %2 = xor <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 451 ; CHECK-DAG: xori.b [[R4:\$w[0-9]+]], [[R1]], 3 452 store <16 x i8> %2, <16 x i8>* %c 453 ; CHECK-DAG: st.b [[R4]], 0($4) 454 455 ret void 456 ; CHECK: .size xor_v16i8_i 457} 458 459define void @xor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 460 ; CHECK: xor_v8i16_i: 461 462 %1 = load <8 x i16>* %a 463 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 464 %2 = xor <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 465 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3 466 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 467 store <8 x i16> %2, <8 x i16>* %c 468 ; CHECK-DAG: st.h [[R4]], 0($4) 469 470 ret void 471 ; CHECK: .size xor_v8i16_i 472} 473 474define void @xor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 475 ; CHECK: xor_v4i32_i: 476 477 %1 = load <4 x i32>* %a 478 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 479 %2 = xor <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 480 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3 481 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 482 store <4 x i32> %2, <4 x i32>* %c 483 ; CHECK-DAG: st.w [[R4]], 0($4) 484 485 ret void 486 ; CHECK: .size xor_v4i32_i 487} 488 489define void @xor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 490 ; CHECK: xor_v2i64_i: 491 492 %1 = load <2 x i64>* %a 493 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 494 %2 = xor <2 x i64> %1, <i64 3, i64 3> 495 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3 496 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]] 497 store <2 x i64> %2, <2 x i64>* %c 498 ; CHECK-DAG: st.d [[R4]], 0($4) 499 500 ret void 501 ; CHECK: .size xor_v2i64_i 502} 503 504define void @sll_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 505 ; CHECK: sll_v16i8: 506 507 %1 = load <16 x i8>* %a 508 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 509 %2 = load <16 x i8>* %b 510 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 511 %3 = shl <16 x i8> %1, %2 512 ; CHECK-DAG: sll.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 513 store <16 x i8> %3, <16 x i8>* %c 514 ; CHECK-DAG: st.b [[R3]], 0($4) 515 516 ret void 517 ; CHECK: .size sll_v16i8 518} 519 520define void @sll_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 521 ; CHECK: sll_v8i16: 522 523 %1 = load <8 x i16>* %a 524 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 525 %2 = load <8 x i16>* %b 526 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 527 %3 = shl <8 x i16> %1, %2 528 ; CHECK-DAG: sll.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 529 store <8 x i16> %3, <8 x i16>* %c 530 ; CHECK-DAG: st.h [[R3]], 0($4) 531 532 ret void 533 ; CHECK: .size sll_v8i16 534} 535 536define void @sll_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 537 ; CHECK: sll_v4i32: 538 539 %1 = load <4 x i32>* %a 540 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 541 %2 = load <4 x i32>* %b 542 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 543 %3 = shl <4 x i32> %1, %2 544 ; CHECK-DAG: sll.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 545 store <4 x i32> %3, <4 x i32>* %c 546 ; CHECK-DAG: st.w [[R3]], 0($4) 547 548 ret void 549 ; CHECK: .size sll_v4i32 550} 551 552define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 553 ; CHECK: sll_v2i64: 554 555 %1 = load <2 x i64>* %a 556 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 557 %2 = load <2 x i64>* %b 558 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 559 %3 = shl <2 x i64> %1, %2 560 ; CHECK-DAG: sll.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 561 store <2 x i64> %3, <2 x i64>* %c 562 ; CHECK-DAG: st.d [[R3]], 0($4) 563 564 ret void 565 ; CHECK: .size sll_v2i64 566} 567 568define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 569 ; CHECK: sll_v16i8_i: 570 571 %1 = load <16 x i8>* %a 572 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 573 %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 574 ; CHECK-DAG: slli.b [[R4:\$w[0-9]+]], [[R1]], 1 575 store <16 x i8> %2, <16 x i8>* %c 576 ; CHECK-DAG: st.b [[R4]], 0($4) 577 578 ret void 579 ; CHECK: .size sll_v16i8_i 580} 581 582define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 583 ; CHECK: sll_v8i16_i: 584 585 %1 = load <8 x i16>* %a 586 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 587 %2 = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 588 ; CHECK-DAG: slli.h [[R4:\$w[0-9]+]], [[R1]], 1 589 store <8 x i16> %2, <8 x i16>* %c 590 ; CHECK-DAG: st.h [[R4]], 0($4) 591 592 ret void 593 ; CHECK: .size sll_v8i16_i 594} 595 596define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 597 ; CHECK: sll_v4i32_i: 598 599 %1 = load <4 x i32>* %a 600 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 601 %2 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 602 ; CHECK-DAG: slli.w [[R4:\$w[0-9]+]], [[R1]], 1 603 store <4 x i32> %2, <4 x i32>* %c 604 ; CHECK-DAG: st.w [[R4]], 0($4) 605 606 ret void 607 ; CHECK: .size sll_v4i32_i 608} 609 610define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 611 ; CHECK: sll_v2i64_i: 612 613 %1 = load <2 x i64>* %a 614 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 615 %2 = shl <2 x i64> %1, <i64 1, i64 1> 616 ; CHECK-DAG: slli.d [[R4:\$w[0-9]+]], [[R1]], 1 617 store <2 x i64> %2, <2 x i64>* %c 618 ; CHECK-DAG: st.d [[R4]], 0($4) 619 620 ret void 621 ; CHECK: .size sll_v2i64_i 622} 623 624define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 625 ; CHECK: sra_v16i8: 626 627 %1 = load <16 x i8>* %a 628 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 629 %2 = load <16 x i8>* %b 630 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 631 %3 = ashr <16 x i8> %1, %2 632 ; CHECK-DAG: sra.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 633 store <16 x i8> %3, <16 x i8>* %c 634 ; CHECK-DAG: st.b [[R3]], 0($4) 635 636 ret void 637 ; CHECK: .size sra_v16i8 638} 639 640define void @sra_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 641 ; CHECK: sra_v8i16: 642 643 %1 = load <8 x i16>* %a 644 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 645 %2 = load <8 x i16>* %b 646 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 647 %3 = ashr <8 x i16> %1, %2 648 ; CHECK-DAG: sra.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 649 store <8 x i16> %3, <8 x i16>* %c 650 ; CHECK-DAG: st.h [[R3]], 0($4) 651 652 ret void 653 ; CHECK: .size sra_v8i16 654} 655 656define void @sra_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 657 ; CHECK: sra_v4i32: 658 659 %1 = load <4 x i32>* %a 660 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 661 %2 = load <4 x i32>* %b 662 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 663 %3 = ashr <4 x i32> %1, %2 664 ; CHECK-DAG: sra.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 665 store <4 x i32> %3, <4 x i32>* %c 666 ; CHECK-DAG: st.w [[R3]], 0($4) 667 668 ret void 669 ; CHECK: .size sra_v4i32 670} 671 672define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 673 ; CHECK: sra_v2i64: 674 675 %1 = load <2 x i64>* %a 676 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 677 %2 = load <2 x i64>* %b 678 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 679 %3 = ashr <2 x i64> %1, %2 680 ; CHECK-DAG: sra.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 681 store <2 x i64> %3, <2 x i64>* %c 682 ; CHECK-DAG: st.d [[R3]], 0($4) 683 684 ret void 685 ; CHECK: .size sra_v2i64 686} 687 688define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 689 ; CHECK: sra_v16i8_i: 690 691 %1 = load <16 x i8>* %a 692 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 693 %2 = ashr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 694 ; CHECK-DAG: srai.b [[R4:\$w[0-9]+]], [[R1]], 1 695 store <16 x i8> %2, <16 x i8>* %c 696 ; CHECK-DAG: st.b [[R4]], 0($4) 697 698 ret void 699 ; CHECK: .size sra_v16i8_i 700} 701 702define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 703 ; CHECK: sra_v8i16_i: 704 705 %1 = load <8 x i16>* %a 706 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 707 %2 = ashr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 708 ; CHECK-DAG: srai.h [[R4:\$w[0-9]+]], [[R1]], 1 709 store <8 x i16> %2, <8 x i16>* %c 710 ; CHECK-DAG: st.h [[R4]], 0($4) 711 712 ret void 713 ; CHECK: .size sra_v8i16_i 714} 715 716define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 717 ; CHECK: sra_v4i32_i: 718 719 %1 = load <4 x i32>* %a 720 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 721 %2 = ashr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 722 ; CHECK-DAG: srai.w [[R4:\$w[0-9]+]], [[R1]], 1 723 store <4 x i32> %2, <4 x i32>* %c 724 ; CHECK-DAG: st.w [[R4]], 0($4) 725 726 ret void 727 ; CHECK: .size sra_v4i32_i 728} 729 730define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 731 ; CHECK: sra_v2i64_i: 732 733 %1 = load <2 x i64>* %a 734 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 735 %2 = ashr <2 x i64> %1, <i64 1, i64 1> 736 ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R1]], 1 737 store <2 x i64> %2, <2 x i64>* %c 738 ; CHECK-DAG: st.d [[R4]], 0($4) 739 740 ret void 741 ; CHECK: .size sra_v2i64_i 742} 743 744define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 745 ; CHECK: srl_v16i8: 746 747 %1 = load <16 x i8>* %a 748 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 749 %2 = load <16 x i8>* %b 750 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 751 %3 = lshr <16 x i8> %1, %2 752 ; CHECK-DAG: srl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 753 store <16 x i8> %3, <16 x i8>* %c 754 ; CHECK-DAG: st.b [[R3]], 0($4) 755 756 ret void 757 ; CHECK: .size srl_v16i8 758} 759 760define void @srl_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 761 ; CHECK: srl_v8i16: 762 763 %1 = load <8 x i16>* %a 764 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 765 %2 = load <8 x i16>* %b 766 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 767 %3 = lshr <8 x i16> %1, %2 768 ; CHECK-DAG: srl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 769 store <8 x i16> %3, <8 x i16>* %c 770 ; CHECK-DAG: st.h [[R3]], 0($4) 771 772 ret void 773 ; CHECK: .size srl_v8i16 774} 775 776define void @srl_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 777 ; CHECK: srl_v4i32: 778 779 %1 = load <4 x i32>* %a 780 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 781 %2 = load <4 x i32>* %b 782 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 783 %3 = lshr <4 x i32> %1, %2 784 ; CHECK-DAG: srl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 785 store <4 x i32> %3, <4 x i32>* %c 786 ; CHECK-DAG: st.w [[R3]], 0($4) 787 788 ret void 789 ; CHECK: .size srl_v4i32 790} 791 792define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 793 ; CHECK: srl_v2i64: 794 795 %1 = load <2 x i64>* %a 796 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 797 %2 = load <2 x i64>* %b 798 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 799 %3 = lshr <2 x i64> %1, %2 800 ; CHECK-DAG: srl.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 801 store <2 x i64> %3, <2 x i64>* %c 802 ; CHECK-DAG: st.d [[R3]], 0($4) 803 804 ret void 805 ; CHECK: .size srl_v2i64 806} 807 808define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { 809 ; CHECK: srl_v16i8_i: 810 811 %1 = load <16 x i8>* %a 812 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 813 %2 = lshr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 814 ; CHECK-DAG: srli.b [[R4:\$w[0-9]+]], [[R1]], 1 815 store <16 x i8> %2, <16 x i8>* %c 816 ; CHECK-DAG: st.b [[R4]], 0($4) 817 818 ret void 819 ; CHECK: .size srl_v16i8_i 820} 821 822define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { 823 ; CHECK: srl_v8i16_i: 824 825 %1 = load <8 x i16>* %a 826 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 827 %2 = lshr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 828 ; CHECK-DAG: srli.h [[R4:\$w[0-9]+]], [[R1]], 1 829 store <8 x i16> %2, <8 x i16>* %c 830 ; CHECK-DAG: st.h [[R4]], 0($4) 831 832 ret void 833 ; CHECK: .size srl_v8i16_i 834} 835 836define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { 837 ; CHECK: srl_v4i32_i: 838 839 %1 = load <4 x i32>* %a 840 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 841 %2 = lshr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 842 ; CHECK-DAG: srli.w [[R4:\$w[0-9]+]], [[R1]], 1 843 store <4 x i32> %2, <4 x i32>* %c 844 ; CHECK-DAG: st.w [[R4]], 0($4) 845 846 ret void 847 ; CHECK: .size srl_v4i32_i 848} 849 850define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { 851 ; CHECK: srl_v2i64_i: 852 853 %1 = load <2 x i64>* %a 854 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 855 %2 = lshr <2 x i64> %1, <i64 1, i64 1> 856 ; CHECK-DAG: srli.d [[R4:\$w[0-9]+]], [[R1]], 1 857 store <2 x i64> %2, <2 x i64>* %c 858 ; CHECK-DAG: st.d [[R4]], 0($4) 859 860 ret void 861 ; CHECK: .size srl_v2i64_i 862} 863 864define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { 865 ; CHECK: ctpop_v16i8: 866 867 %1 = load <16 x i8>* %a 868 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 869 %2 = tail call <16 x i8> @llvm.ctpop.v16i8 (<16 x i8> %1) 870 ; CHECK-DAG: pcnt.b [[R3:\$w[0-9]+]], [[R1]] 871 store <16 x i8> %2, <16 x i8>* %c 872 ; CHECK-DAG: st.b [[R3]], 0($4) 873 874 ret void 875 ; CHECK: .size ctpop_v16i8 876} 877 878define void @ctpop_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { 879 ; CHECK: ctpop_v8i16: 880 881 %1 = load <8 x i16>* %a 882 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 883 %2 = tail call <8 x i16> @llvm.ctpop.v8i16 (<8 x i16> %1) 884 ; CHECK-DAG: pcnt.h [[R3:\$w[0-9]+]], [[R1]] 885 store <8 x i16> %2, <8 x i16>* %c 886 ; CHECK-DAG: st.h [[R3]], 0($4) 887 888 ret void 889 ; CHECK: .size ctpop_v8i16 890} 891 892define void @ctpop_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { 893 ; CHECK: ctpop_v4i32: 894 895 %1 = load <4 x i32>* %a 896 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 897 %2 = tail call <4 x i32> @llvm.ctpop.v4i32 (<4 x i32> %1) 898 ; CHECK-DAG: pcnt.w [[R3:\$w[0-9]+]], [[R1]] 899 store <4 x i32> %2, <4 x i32>* %c 900 ; CHECK-DAG: st.w [[R3]], 0($4) 901 902 ret void 903 ; CHECK: .size ctpop_v4i32 904} 905 906define void @ctpop_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { 907 ; CHECK: ctpop_v2i64: 908 909 %1 = load <2 x i64>* %a 910 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 911 %2 = tail call <2 x i64> @llvm.ctpop.v2i64 (<2 x i64> %1) 912 ; CHECK-DAG: pcnt.d [[R3:\$w[0-9]+]], [[R1]] 913 store <2 x i64> %2, <2 x i64>* %c 914 ; CHECK-DAG: st.d [[R3]], 0($4) 915 916 ret void 917 ; CHECK: .size ctpop_v2i64 918} 919 920define void @ctlz_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { 921 ; CHECK: ctlz_v16i8: 922 923 %1 = load <16 x i8>* %a 924 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 925 %2 = tail call <16 x i8> @llvm.ctlz.v16i8 (<16 x i8> %1) 926 ; CHECK-DAG: nlzc.b [[R3:\$w[0-9]+]], [[R1]] 927 store <16 x i8> %2, <16 x i8>* %c 928 ; CHECK-DAG: st.b [[R3]], 0($4) 929 930 ret void 931 ; CHECK: .size ctlz_v16i8 932} 933 934define void @ctlz_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { 935 ; CHECK: ctlz_v8i16: 936 937 %1 = load <8 x i16>* %a 938 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 939 %2 = tail call <8 x i16> @llvm.ctlz.v8i16 (<8 x i16> %1) 940 ; CHECK-DAG: nlzc.h [[R3:\$w[0-9]+]], [[R1]] 941 store <8 x i16> %2, <8 x i16>* %c 942 ; CHECK-DAG: st.h [[R3]], 0($4) 943 944 ret void 945 ; CHECK: .size ctlz_v8i16 946} 947 948define void @ctlz_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { 949 ; CHECK: ctlz_v4i32: 950 951 %1 = load <4 x i32>* %a 952 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 953 %2 = tail call <4 x i32> @llvm.ctlz.v4i32 (<4 x i32> %1) 954 ; CHECK-DAG: nlzc.w [[R3:\$w[0-9]+]], [[R1]] 955 store <4 x i32> %2, <4 x i32>* %c 956 ; CHECK-DAG: st.w [[R3]], 0($4) 957 958 ret void 959 ; CHECK: .size ctlz_v4i32 960} 961 962define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { 963 ; CHECK: ctlz_v2i64: 964 965 %1 = load <2 x i64>* %a 966 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 967 %2 = tail call <2 x i64> @llvm.ctlz.v2i64 (<2 x i64> %1) 968 ; CHECK-DAG: nlzc.d [[R3:\$w[0-9]+]], [[R1]] 969 store <2 x i64> %2, <2 x i64>* %c 970 ; CHECK-DAG: st.d [[R3]], 0($4) 971 972 ret void 973 ; CHECK: .size ctlz_v2i64 974} 975 976define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %m) nounwind { 977 ; CHECK: bsel_v16i8: 978 979 %1 = load <16 x i8>* %a 980 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 981 %2 = load <16 x i8>* %b 982 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 983 %3 = load <16 x i8>* %m 984 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7) 985 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, 986 i8 -1, i8 -1, i8 -1, i8 -1, 987 i8 -1, i8 -1, i8 -1, i8 -1, 988 i8 -1, i8 -1, i8 -1, i8 -1> 989 %5 = and <16 x i8> %1, %3 990 %6 = and <16 x i8> %2, %4 991 %7 = or <16 x i8> %5, %6 992 ; bmnz is the same operation 993 ; CHECK-DAG: bmnz.v [[R1]], [[R2]], [[R3]] 994 store <16 x i8> %7, <16 x i8>* %c 995 ; CHECK-DAG: st.b [[R1]], 0($4) 996 997 ret void 998 ; CHECK: .size bsel_v16i8 999} 1000 1001define void @bsel_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %m) nounwind { 1002 ; CHECK: bsel_v16i8_i: 1003 1004 %1 = load <16 x i8>* %a 1005 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1006 %2 = load <16 x i8>* %m 1007 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($6) 1008 %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, 1009 i8 -1, i8 -1, i8 -1, i8 -1, 1010 i8 -1, i8 -1, i8 -1, i8 -1, 1011 i8 -1, i8 -1, i8 -1, i8 -1> 1012 %4 = and <16 x i8> %1, %3 1013 %5 = and <16 x i8> <i8 6, i8 6, i8 6, i8 6, 1014 i8 6, i8 6, i8 6, i8 6, 1015 i8 6, i8 6, i8 6, i8 6, 1016 i8 6, i8 6, i8 6, i8 6>, %2 1017 %6 = or <16 x i8> %4, %5 1018 ; CHECK-DAG: bseli.b [[R3]], [[R1]], 6 1019 store <16 x i8> %6, <16 x i8>* %c 1020 ; CHECK-DAG: st.b [[R3]], 0($4) 1021 1022 ret void 1023 ; CHECK: .size bsel_v16i8_i 1024} 1025 1026define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1027 ; CHECK: bsel_v8i16: 1028 1029 %1 = load <8 x i16>* %a 1030 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1031 %2 = load <8 x i16>* %b 1032 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1033 %3 = and <8 x i16> %1, <i16 6, i16 6, i16 6, i16 6, 1034 i16 6, i16 6, i16 6, i16 6> 1035 %4 = and <8 x i16> %2, <i16 65529, i16 65529, i16 65529, i16 65529, 1036 i16 65529, i16 65529, i16 65529, i16 65529> 1037 %5 = or <8 x i16> %3, %4 1038 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 6 1039 ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] 1040 store <8 x i16> %5, <8 x i16>* %c 1041 ; CHECK-DAG: st.h [[R3]], 0($4) 1042 1043 ret void 1044 ; CHECK: .size bsel_v8i16 1045} 1046 1047define void @bsel_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1048 ; CHECK: bsel_v4i32: 1049 1050 %1 = load <4 x i32>* %a 1051 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1052 %2 = load <4 x i32>* %b 1053 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1054 %3 = and <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6> 1055 %4 = and <4 x i32> %2, <i32 4294967289, i32 4294967289, i32 4294967289, i32 4294967289> 1056 %5 = or <4 x i32> %3, %4 1057 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 6 1058 ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] 1059 store <4 x i32> %5, <4 x i32>* %c 1060 ; CHECK-DAG: st.w [[R3]], 0($4) 1061 1062 ret void 1063 ; CHECK: .size bsel_v4i32 1064} 1065 1066define void @bsel_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1067 ; CHECK: bsel_v2i64: 1068 1069 %1 = load <2 x i64>* %a 1070 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1071 %2 = load <2 x i64>* %b 1072 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1073 %3 = and <2 x i64> %1, <i64 6, i64 6> 1074 %4 = and <2 x i64> %2, <i64 18446744073709551609, i64 18446744073709551609> 1075 %5 = or <2 x i64> %3, %4 1076 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 6 1077 ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] 1078 store <2 x i64> %5, <2 x i64>* %c 1079 ; CHECK-DAG: st.d [[R3]], 0($4) 1080 1081 ret void 1082 ; CHECK: .size bsel_v2i64 1083} 1084 1085define void @binsl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1086 ; CHECK: binsl_v16i8_i: 1087 1088 %1 = load <16 x i8>* %a 1089 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1090 %2 = load <16 x i8>* %b 1091 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 1092 %3 = and <16 x i8> %1, <i8 192, i8 192, i8 192, i8 192, 1093 i8 192, i8 192, i8 192, i8 192, 1094 i8 192, i8 192, i8 192, i8 192, 1095 i8 192, i8 192, i8 192, i8 192> 1096 %4 = and <16 x i8> %2, <i8 63, i8 63, i8 63, i8 63, 1097 i8 63, i8 63, i8 63, i8 63, 1098 i8 63, i8 63, i8 63, i8 63, 1099 i8 63, i8 63, i8 63, i8 63> 1100 %5 = or <16 x i8> %3, %4 1101 ; CHECK-DAG: binsli.b [[R2]], [[R1]], 2 1102 store <16 x i8> %5, <16 x i8>* %c 1103 ; CHECK-DAG: st.b [[R2]], 0($4) 1104 1105 ret void 1106 ; CHECK: .size binsl_v16i8_i 1107} 1108 1109define void @binsl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1110 ; CHECK: binsl_v8i16_i: 1111 1112 %1 = load <8 x i16>* %a 1113 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1114 %2 = load <8 x i16>* %b 1115 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1116 %3 = and <8 x i16> %1, <i16 49152, i16 49152, i16 49152, i16 49152, 1117 i16 49152, i16 49152, i16 49152, i16 49152> 1118 %4 = and <8 x i16> %2, <i16 16383, i16 16383, i16 16383, i16 16383, 1119 i16 16383, i16 16383, i16 16383, i16 16383> 1120 %5 = or <8 x i16> %3, %4 1121 ; CHECK-DAG: binsli.h [[R2]], [[R1]], 2 1122 store <8 x i16> %5, <8 x i16>* %c 1123 ; CHECK-DAG: st.h [[R2]], 0($4) 1124 1125 ret void 1126 ; CHECK: .size binsl_v8i16_i 1127} 1128 1129define void @binsl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1130 ; CHECK: binsl_v4i32_i: 1131 1132 %1 = load <4 x i32>* %a 1133 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1134 %2 = load <4 x i32>* %b 1135 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1136 %3 = and <4 x i32> %1, <i32 3221225472, i32 3221225472, i32 3221225472, i32 3221225472> 1137 %4 = and <4 x i32> %2, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823> 1138 %5 = or <4 x i32> %3, %4 1139 ; CHECK-DAG: binsli.w [[R2]], [[R1]], 2 1140 store <4 x i32> %5, <4 x i32>* %c 1141 ; CHECK-DAG: st.w [[R2]], 0($4) 1142 1143 ret void 1144 ; CHECK: .size binsl_v4i32_i 1145} 1146 1147define void @binsl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1148 ; CHECK: binsl_v2i64_i: 1149 1150 %1 = load <2 x i64>* %a 1151 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1152 %2 = load <2 x i64>* %b 1153 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1154 %3 = and <2 x i64> %1, <i64 18446744073709551608, i64 18446744073709551608> 1155 %4 = and <2 x i64> %2, <i64 7, i64 7> 1156 %5 = or <2 x i64> %3, %4 1157 ; TODO: We use a particularly wide mask here to work around a legalization 1158 ; issue. If the mask doesn't fit within a 10-bit immediate, it gets 1159 ; legalized into a constant pool. We should add a test to cover the 1160 ; other cases once they correctly select binsli.d. 1161 ; CHECK-DAG: binsli.d [[R2]], [[R1]], 61 1162 store <2 x i64> %5, <2 x i64>* %c 1163 ; CHECK-DAG: st.d [[R2]], 0($4) 1164 1165 ret void 1166 ; CHECK: .size binsl_v2i64_i 1167} 1168 1169define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1170 ; CHECK: binsr_v16i8_i: 1171 1172 %1 = load <16 x i8>* %a 1173 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1174 %2 = load <16 x i8>* %b 1175 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 1176 %3 = and <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, 1177 i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 1178 %4 = and <16 x i8> %2, <i8 252, i8 252, i8 252, i8 252, 1179 i8 252, i8 252, i8 252, i8 252, 1180 i8 252, i8 252, i8 252, i8 252, 1181 i8 252, i8 252, i8 252, i8 252> 1182 %5 = or <16 x i8> %3, %4 1183 ; CHECK-DAG: binsri.b [[R2]], [[R1]], 2 1184 store <16 x i8> %5, <16 x i8>* %c 1185 ; CHECK-DAG: st.b [[R2]], 0($4) 1186 1187 ret void 1188 ; CHECK: .size binsr_v16i8_i 1189} 1190 1191define void @binsr_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1192 ; CHECK: binsr_v8i16_i: 1193 1194 %1 = load <8 x i16>* %a 1195 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1196 %2 = load <8 x i16>* %b 1197 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1198 %3 = and <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, 1199 i16 3, i16 3, i16 3, i16 3> 1200 %4 = and <8 x i16> %2, <i16 65532, i16 65532, i16 65532, i16 65532, 1201 i16 65532, i16 65532, i16 65532, i16 65532> 1202 %5 = or <8 x i16> %3, %4 1203 ; CHECK-DAG: binsri.h [[R2]], [[R1]], 2 1204 store <8 x i16> %5, <8 x i16>* %c 1205 ; CHECK-DAG: st.h [[R2]], 0($4) 1206 1207 ret void 1208 ; CHECK: .size binsr_v8i16_i 1209} 1210 1211define void @binsr_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1212 ; CHECK: binsr_v4i32_i: 1213 1214 %1 = load <4 x i32>* %a 1215 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1216 %2 = load <4 x i32>* %b 1217 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1218 %3 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 1219 %4 = and <4 x i32> %2, <i32 4294967292, i32 4294967292, i32 4294967292, i32 4294967292> 1220 %5 = or <4 x i32> %3, %4 1221 ; CHECK-DAG: binsri.w [[R2]], [[R1]], 2 1222 store <4 x i32> %5, <4 x i32>* %c 1223 ; CHECK-DAG: st.w [[R2]], 0($4) 1224 1225 ret void 1226 ; CHECK: .size binsr_v4i32_i 1227} 1228 1229define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1230 ; CHECK: binsr_v2i64_i: 1231 1232 %1 = load <2 x i64>* %a 1233 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1234 %2 = load <2 x i64>* %b 1235 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1236 %3 = and <2 x i64> %1, <i64 3, i64 3> 1237 %4 = and <2 x i64> %2, <i64 18446744073709551612, i64 18446744073709551612> 1238 %5 = or <2 x i64> %3, %4 1239 ; CHECK-DAG: binsri.d [[R2]], [[R1]], 2 1240 store <2 x i64> %5, <2 x i64>* %c 1241 ; CHECK-DAG: st.d [[R2]], 0($4) 1242 1243 ret void 1244 ; CHECK: .size binsr_v2i64_i 1245} 1246 1247define void @bclr_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1248 ; CHECK: bclr_v16i8: 1249 1250 %1 = load <16 x i8>* %a 1251 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1252 %2 = load <16 x i8>* %b 1253 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 1254 %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2 1255 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1256 %5 = and <16 x i8> %1, %4 1257 ; CHECK-DAG: bclr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1258 store <16 x i8> %5, <16 x i8>* %c 1259 ; CHECK-DAG: st.b [[R3]], 0($4) 1260 1261 ret void 1262 ; CHECK: .size bclr_v16i8 1263} 1264 1265define void @bclr_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1266 ; CHECK: bclr_v8i16: 1267 1268 %1 = load <8 x i16>* %a 1269 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1270 %2 = load <8 x i16>* %b 1271 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1272 %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2 1273 %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1274 %5 = and <8 x i16> %1, %4 1275 ; CHECK-DAG: bclr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1276 store <8 x i16> %5, <8 x i16>* %c 1277 ; CHECK-DAG: st.h [[R3]], 0($4) 1278 1279 ret void 1280 ; CHECK: .size bclr_v8i16 1281} 1282 1283define void @bclr_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1284 ; CHECK: bclr_v4i32: 1285 1286 %1 = load <4 x i32>* %a 1287 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1288 %2 = load <4 x i32>* %b 1289 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1290 %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2 1291 %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1> 1292 %5 = and <4 x i32> %1, %4 1293 ; CHECK-DAG: bclr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1294 store <4 x i32> %5, <4 x i32>* %c 1295 ; CHECK-DAG: st.w [[R3]], 0($4) 1296 1297 ret void 1298 ; CHECK: .size bclr_v4i32 1299} 1300 1301define void @bclr_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1302 ; CHECK: bclr_v2i64: 1303 1304 %1 = load <2 x i64>* %a 1305 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1306 %2 = load <2 x i64>* %b 1307 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1308 %3 = shl <2 x i64> <i64 1, i64 1>, %2 1309 %4 = xor <2 x i64> %3, <i64 -1, i64 -1> 1310 %5 = and <2 x i64> %1, %4 1311 ; CHECK-DAG: bclr.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1312 store <2 x i64> %5, <2 x i64>* %c 1313 ; CHECK-DAG: st.d [[R3]], 0($4) 1314 1315 ret void 1316 ; CHECK: .size bclr_v2i64 1317} 1318 1319define void @bset_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1320 ; CHECK: bset_v16i8: 1321 1322 %1 = load <16 x i8>* %a 1323 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1324 %2 = load <16 x i8>* %b 1325 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 1326 %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2 1327 %4 = or <16 x i8> %1, %3 1328 ; CHECK-DAG: bset.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1329 store <16 x i8> %4, <16 x i8>* %c 1330 ; CHECK-DAG: st.b [[R3]], 0($4) 1331 1332 ret void 1333 ; CHECK: .size bset_v16i8 1334} 1335 1336define void @bset_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1337 ; CHECK: bset_v8i16: 1338 1339 %1 = load <8 x i16>* %a 1340 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1341 %2 = load <8 x i16>* %b 1342 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1343 %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2 1344 %4 = or <8 x i16> %1, %3 1345 ; CHECK-DAG: bset.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1346 store <8 x i16> %4, <8 x i16>* %c 1347 ; CHECK-DAG: st.h [[R3]], 0($4) 1348 1349 ret void 1350 ; CHECK: .size bset_v8i16 1351} 1352 1353define void @bset_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1354 ; CHECK: bset_v4i32: 1355 1356 %1 = load <4 x i32>* %a 1357 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1358 %2 = load <4 x i32>* %b 1359 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1360 %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2 1361 %4 = or <4 x i32> %1, %3 1362 ; CHECK-DAG: bset.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1363 store <4 x i32> %4, <4 x i32>* %c 1364 ; CHECK-DAG: st.w [[R3]], 0($4) 1365 1366 ret void 1367 ; CHECK: .size bset_v4i32 1368} 1369 1370define void @bset_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1371 ; CHECK: bset_v2i64: 1372 1373 %1 = load <2 x i64>* %a 1374 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1375 %2 = load <2 x i64>* %b 1376 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1377 %3 = shl <2 x i64> <i64 1, i64 1>, %2 1378 %4 = or <2 x i64> %1, %3 1379 ; CHECK-DAG: bset.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1380 store <2 x i64> %4, <2 x i64>* %c 1381 ; CHECK-DAG: st.d [[R3]], 0($4) 1382 1383 ret void 1384 ; CHECK: .size bset_v2i64 1385} 1386 1387define void @bneg_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1388 ; CHECK: bneg_v16i8: 1389 1390 %1 = load <16 x i8>* %a 1391 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1392 %2 = load <16 x i8>* %b 1393 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 1394 %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2 1395 %4 = xor <16 x i8> %1, %3 1396 ; CHECK-DAG: bneg.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1397 store <16 x i8> %4, <16 x i8>* %c 1398 ; CHECK-DAG: st.b [[R3]], 0($4) 1399 1400 ret void 1401 ; CHECK: .size bneg_v16i8 1402} 1403 1404define void @bneg_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1405 ; CHECK: bneg_v8i16: 1406 1407 %1 = load <8 x i16>* %a 1408 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1409 %2 = load <8 x i16>* %b 1410 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1411 %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2 1412 %4 = xor <8 x i16> %1, %3 1413 ; CHECK-DAG: bneg.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1414 store <8 x i16> %4, <8 x i16>* %c 1415 ; CHECK-DAG: st.h [[R3]], 0($4) 1416 1417 ret void 1418 ; CHECK: .size bneg_v8i16 1419} 1420 1421define void @bneg_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1422 ; CHECK: bneg_v4i32: 1423 1424 %1 = load <4 x i32>* %a 1425 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1426 %2 = load <4 x i32>* %b 1427 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1428 %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2 1429 %4 = xor <4 x i32> %1, %3 1430 ; CHECK-DAG: bneg.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1431 store <4 x i32> %4, <4 x i32>* %c 1432 ; CHECK-DAG: st.w [[R3]], 0($4) 1433 1434 ret void 1435 ; CHECK: .size bneg_v4i32 1436} 1437 1438define void @bneg_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1439 ; CHECK: bneg_v2i64: 1440 1441 %1 = load <2 x i64>* %a 1442 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1443 %2 = load <2 x i64>* %b 1444 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1445 %3 = shl <2 x i64> <i64 1, i64 1>, %2 1446 %4 = xor <2 x i64> %1, %3 1447 ; CHECK-DAG: bneg.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1448 store <2 x i64> %4, <2 x i64>* %c 1449 ; CHECK-DAG: st.d [[R3]], 0($4) 1450 1451 ret void 1452 ; CHECK: .size bneg_v2i64 1453} 1454 1455define void @bclri_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { 1456 ; CHECK: bclri_v16i8: 1457 1458 %1 = load <16 x i8>* %a 1459 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1460 %2 = xor <16 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>, 1461 <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1462 %3 = and <16 x i8> %1, %2 1463 ; bclri.b and andi.b are exactly equivalent. 1464 ; CHECK-DAG: andi.b [[R3:\$w[0-9]+]], [[R1]], 247 1465 store <16 x i8> %3, <16 x i8>* %c 1466 ; CHECK-DAG: st.b [[R3]], 0($4) 1467 1468 ret void 1469 ; CHECK: .size bclri_v16i8 1470} 1471 1472define void @bclri_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { 1473 ; CHECK: bclri_v8i16: 1474 1475 %1 = load <8 x i16>* %a 1476 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1477 %2 = xor <8 x i16> <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>, 1478 <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1479 %3 = and <8 x i16> %1, %2 1480 ; CHECK-DAG: bclri.h [[R3:\$w[0-9]+]], [[R1]], 3 1481 store <8 x i16> %3, <8 x i16>* %c 1482 ; CHECK-DAG: st.h [[R3]], 0($4) 1483 1484 ret void 1485 ; CHECK: .size bclri_v8i16 1486} 1487 1488define void @bclri_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { 1489 ; CHECK: bclri_v4i32: 1490 1491 %1 = load <4 x i32>* %a 1492 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1493 %2 = xor <4 x i32> <i32 8, i32 8, i32 8, i32 8>, 1494 <i32 -1, i32 -1, i32 -1, i32 -1> 1495 %3 = and <4 x i32> %1, %2 1496 ; CHECK-DAG: bclri.w [[R3:\$w[0-9]+]], [[R1]], 3 1497 store <4 x i32> %3, <4 x i32>* %c 1498 ; CHECK-DAG: st.w [[R3]], 0($4) 1499 1500 ret void 1501 ; CHECK: .size bclri_v4i32 1502} 1503 1504define void @bclri_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { 1505 ; CHECK: bclri_v2i64: 1506 1507 %1 = load <2 x i64>* %a 1508 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1509 %2 = xor <2 x i64> <i64 8, i64 8>, 1510 <i64 -1, i64 -1> 1511 %3 = and <2 x i64> %1, %2 1512 ; CHECK-DAG: bclri.d [[R3:\$w[0-9]+]], [[R1]], 3 1513 store <2 x i64> %3, <2 x i64>* %c 1514 ; CHECK-DAG: st.d [[R3]], 0($4) 1515 1516 ret void 1517 ; CHECK: .size bclri_v2i64 1518} 1519 1520define void @bseti_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { 1521 ; CHECK: bseti_v16i8: 1522 1523 %1 = load <16 x i8>* %a 1524 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1525 %2 = or <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8> 1526 ; CHECK-DAG: bseti.b [[R3:\$w[0-9]+]], [[R1]], 3 1527 store <16 x i8> %2, <16 x i8>* %c 1528 ; CHECK-DAG: st.b [[R3]], 0($4) 1529 1530 ret void 1531 ; CHECK: .size bseti_v16i8 1532} 1533 1534define void @bseti_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { 1535 ; CHECK: bseti_v8i16: 1536 1537 %1 = load <8 x i16>* %a 1538 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1539 %2 = or <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1540 ; CHECK-DAG: bseti.h [[R3:\$w[0-9]+]], [[R1]], 3 1541 store <8 x i16> %2, <8 x i16>* %c 1542 ; CHECK-DAG: st.h [[R3]], 0($4) 1543 1544 ret void 1545 ; CHECK: .size bseti_v8i16 1546} 1547 1548define void @bseti_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { 1549 ; CHECK: bseti_v4i32: 1550 1551 %1 = load <4 x i32>* %a 1552 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1553 %2 = or <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8> 1554 ; CHECK-DAG: bseti.w [[R3:\$w[0-9]+]], [[R1]], 3 1555 store <4 x i32> %2, <4 x i32>* %c 1556 ; CHECK-DAG: st.w [[R3]], 0($4) 1557 1558 ret void 1559 ; CHECK: .size bseti_v4i32 1560} 1561 1562define void @bseti_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { 1563 ; CHECK: bseti_v2i64: 1564 1565 %1 = load <2 x i64>* %a 1566 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1567 %2 = or <2 x i64> %1, <i64 8, i64 8> 1568 ; CHECK-DAG: bseti.d [[R3:\$w[0-9]+]], [[R1]], 3 1569 store <2 x i64> %2, <2 x i64>* %c 1570 ; CHECK-DAG: st.d [[R3]], 0($4) 1571 1572 ret void 1573 ; CHECK: .size bseti_v2i64 1574} 1575 1576define void @bnegi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { 1577 ; CHECK: bnegi_v16i8: 1578 1579 %1 = load <16 x i8>* %a 1580 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1581 %2 = xor <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8> 1582 ; CHECK-DAG: bnegi.b [[R3:\$w[0-9]+]], [[R1]], 3 1583 store <16 x i8> %2, <16 x i8>* %c 1584 ; CHECK-DAG: st.b [[R3]], 0($4) 1585 1586 ret void 1587 ; CHECK: .size bnegi_v16i8 1588} 1589 1590define void @bnegi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { 1591 ; CHECK: bnegi_v8i16: 1592 1593 %1 = load <8 x i16>* %a 1594 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1595 %2 = xor <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1596 ; CHECK-DAG: bnegi.h [[R3:\$w[0-9]+]], [[R1]], 3 1597 store <8 x i16> %2, <8 x i16>* %c 1598 ; CHECK-DAG: st.h [[R3]], 0($4) 1599 1600 ret void 1601 ; CHECK: .size bnegi_v8i16 1602} 1603 1604define void @bnegi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { 1605 ; CHECK: bnegi_v4i32: 1606 1607 %1 = load <4 x i32>* %a 1608 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1609 %2 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8> 1610 ; CHECK-DAG: bnegi.w [[R3:\$w[0-9]+]], [[R1]], 3 1611 store <4 x i32> %2, <4 x i32>* %c 1612 ; CHECK-DAG: st.w [[R3]], 0($4) 1613 1614 ret void 1615 ; CHECK: .size bnegi_v4i32 1616} 1617 1618define void @bnegi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { 1619 ; CHECK: bnegi_v2i64: 1620 1621 %1 = load <2 x i64>* %a 1622 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1623 %2 = xor <2 x i64> %1, <i64 8, i64 8> 1624 ; CHECK-DAG: bnegi.d [[R3:\$w[0-9]+]], [[R1]], 3 1625 store <2 x i64> %2, <2 x i64>* %c 1626 ; CHECK-DAG: st.d [[R3]], 0($4) 1627 1628 ret void 1629 ; CHECK: .size bnegi_v2i64 1630} 1631 1632declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val) 1633declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val) 1634declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) 1635declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) 1636declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %val) 1637declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %val) 1638declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val) 1639declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %val) 1640