1; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s 2 3define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) { 4 ; CHECK: vpackssdw 5 %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] 6 ret <16 x i16> %res 7} 8declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone 9 10 11define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) { 12 ; CHECK: vpacksswb 13 %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] 14 ret <32 x i8> %res 15} 16declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone 17 18 19define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) { 20 ; CHECK: vpackuswb 21 %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] 22 ret <32 x i8> %res 23} 24declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone 25 26 27define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) { 28 ; CHECK: vpaddsb 29 %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 30 ret <32 x i8> %res 31} 32declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone 33 34 35define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) { 36 ; CHECK: vpaddsw 37 %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 38 ret <16 x i16> %res 39} 40declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone 41 42 43define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) { 44 ; CHECK: vpaddusb 45 %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 46 ret <32 x i8> %res 47} 48declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone 49 50 51define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) { 52 ; CHECK: vpaddusw 53 %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 54 ret <16 x i16> %res 55} 56declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone 57 58 59define <32 x i8> @test_x86_avx2_pavg_b(<32 x i8> %a0, <32 x i8> %a1) { 60 ; CHECK: vpavgb 61 %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 62 ret <32 x i8> %res 63} 64declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone 65 66 67define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) { 68 ; CHECK: vpavgw 69 %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 70 ret <16 x i16> %res 71} 72declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone 73 74 75define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) { 76 ; CHECK: vpmaddwd 77 %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1] 78 ret <8 x i32> %res 79} 80declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone 81 82 83define <16 x i16> @test_x86_avx2_pmaxs_w(<16 x i16> %a0, <16 x i16> %a1) { 84 ; CHECK: vpmaxsw 85 %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 86 ret <16 x i16> %res 87} 88declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone 89 90 91define <32 x i8> @test_x86_avx2_pmaxu_b(<32 x i8> %a0, <32 x i8> %a1) { 92 ; CHECK: vpmaxub 93 %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 94 ret <32 x i8> %res 95} 96declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone 97 98 99define <16 x i16> @test_x86_avx2_pmins_w(<16 x i16> %a0, <16 x i16> %a1) { 100 ; CHECK: vpminsw 101 %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 102 ret <16 x i16> %res 103} 104declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone 105 106 107define <32 x i8> @test_x86_avx2_pminu_b(<32 x i8> %a0, <32 x i8> %a1) { 108 ; CHECK: vpminub 109 %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 110 ret <32 x i8> %res 111} 112declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone 113 114 115define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) { 116 ; CHECK: vpmovmskb 117 %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; <i32> [#uses=1] 118 ret i32 %res 119} 120declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone 121 122 123define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) { 124 ; CHECK: vpmulhw 125 %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 126 ret <16 x i16> %res 127} 128declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone 129 130 131define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) { 132 ; CHECK: vpmulhuw 133 %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 134 ret <16 x i16> %res 135} 136declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone 137 138 139define <4 x i64> @test_x86_avx2_pmulu_dq(<8 x i32> %a0, <8 x i32> %a1) { 140 ; CHECK: vpmuludq 141 %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1] 142 ret <4 x i64> %res 143} 144declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone 145 146 147define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) { 148 ; CHECK: vpsadbw 149 %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1] 150 ret <4 x i64> %res 151} 152declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone 153 154 155define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) { 156 ; CHECK: vpslld 157 %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 158 ret <8 x i32> %res 159} 160declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone 161 162 163define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) { 164 ; CHECK: vpslldq 165 %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 166 ret <4 x i64> %res 167} 168declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone 169 170 171define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) { 172 ; CHECK: vpslldq 173 %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 174 ret <4 x i64> %res 175} 176declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone 177 178 179define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) { 180 ; CHECK: vpsllq 181 %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] 182 ret <4 x i64> %res 183} 184declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone 185 186 187define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) { 188 ; CHECK: vpsllw 189 %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 190 ret <16 x i16> %res 191} 192declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone 193 194 195define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) { 196 ; CHECK: vpslld 197 %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 198 ret <8 x i32> %res 199} 200declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone 201 202 203define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) { 204 ; CHECK: vpsllq 205 %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 206 ret <4 x i64> %res 207} 208declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone 209 210 211define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) { 212 ; CHECK: vpsllw 213 %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 214 ret <16 x i16> %res 215} 216declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone 217 218 219define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) { 220 ; CHECK: vpsrad 221 %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 222 ret <8 x i32> %res 223} 224declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone 225 226 227define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) { 228 ; CHECK: vpsraw 229 %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 230 ret <16 x i16> %res 231} 232declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone 233 234 235define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) { 236 ; CHECK: vpsrad 237 %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 238 ret <8 x i32> %res 239} 240declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone 241 242 243define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) { 244 ; CHECK: vpsraw 245 %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 246 ret <16 x i16> %res 247} 248declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone 249 250 251define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) { 252 ; CHECK: vpsrld 253 %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 254 ret <8 x i32> %res 255} 256declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone 257 258 259define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) { 260 ; CHECK: vpsrldq 261 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 262 ret <4 x i64> %res 263} 264declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone 265 266 267define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) { 268 ; CHECK: vpsrldq 269 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 270 ret <4 x i64> %res 271} 272declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone 273 274 275define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) { 276 ; CHECK: vpsrlq 277 %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] 278 ret <4 x i64> %res 279} 280declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone 281 282 283define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) { 284 ; CHECK: vpsrlw 285 %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 286 ret <16 x i16> %res 287} 288declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone 289 290 291define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) { 292 ; CHECK: vpsrld 293 %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 294 ret <8 x i32> %res 295} 296declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone 297 298 299define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) { 300 ; CHECK: vpsrlq 301 %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 302 ret <4 x i64> %res 303} 304declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone 305 306 307define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) { 308 ; CHECK: vpsrlw 309 %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 310 ret <16 x i16> %res 311} 312declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone 313 314 315define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) { 316 ; CHECK: vpsubsb 317 %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 318 ret <32 x i8> %res 319} 320declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone 321 322 323define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) { 324 ; CHECK: vpsubsw 325 %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 326 ret <16 x i16> %res 327} 328declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone 329 330 331define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) { 332 ; CHECK: vpsubusb 333 %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 334 ret <32 x i8> %res 335} 336declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone 337 338 339define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) { 340 ; CHECK: vpsubusw 341 %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 342 ret <16 x i16> %res 343} 344declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone 345 346 347define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) { 348 ; CHECK: vpabsb 349 %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1] 350 ret <32 x i8> %res 351} 352declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone 353 354 355define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) { 356 ; CHECK: vpabsd 357 %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1] 358 ret <8 x i32> %res 359} 360declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone 361 362 363define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) { 364 ; CHECK: vpabsw 365 %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1] 366 ret <16 x i16> %res 367} 368declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone 369 370 371define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) { 372 ; CHECK: vphaddd 373 %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 374 ret <8 x i32> %res 375} 376declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone 377 378 379define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) { 380 ; CHECK: vphaddsw 381 %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 382 ret <16 x i16> %res 383} 384declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone 385 386 387define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) { 388 ; CHECK: vphaddw 389 %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 390 ret <16 x i16> %res 391} 392declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone 393 394 395define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) { 396 ; CHECK: vphsubd 397 %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 398 ret <8 x i32> %res 399} 400declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone 401 402 403define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) { 404 ; CHECK: vphsubsw 405 %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 406 ret <16 x i16> %res 407} 408declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone 409 410 411define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) { 412 ; CHECK: vphsubw 413 %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 414 ret <16 x i16> %res 415} 416declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone 417 418 419define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) { 420 ; CHECK: vpmaddubsw 421 %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1] 422 ret <16 x i16> %res 423} 424declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone 425 426 427define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) { 428 ; CHECK: vpmulhrsw 429 %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 430 ret <16 x i16> %res 431} 432declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone 433 434 435define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) { 436 ; CHECK: vpshufb 437 %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1] 438 ret <32 x i8> %res 439} 440declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone 441 442 443define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) { 444 ; CHECK: vpsignb 445 %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 446 ret <32 x i8> %res 447} 448declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone 449 450 451define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) { 452 ; CHECK: vpsignd 453 %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1] 454 ret <8 x i32> %res 455} 456declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone 457 458 459define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) { 460 ; CHECK: vpsignw 461 %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 462 ret <16 x i16> %res 463} 464declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone 465 466 467define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) { 468 ; CHECK: movl 469 ; CHECK: vmovntdqa 470 %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) ; <<4 x i64>> [#uses=1] 471 ret <4 x i64> %res 472} 473declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly 474 475 476define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) { 477 ; CHECK: vmpsadbw 478 %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1] 479 ret <16 x i16> %res 480} 481declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone 482 483 484define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) { 485 ; CHECK: vpackusdw 486 %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] 487 ret <16 x i16> %res 488} 489declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone 490 491 492define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) { 493 ; CHECK: vpblendvb 494 %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1] 495 ret <32 x i8> %res 496} 497declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone 498 499 500define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { 501 ; CHECK: vpblendw 502 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1] 503 ret <16 x i16> %res 504} 505declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone 506 507 508define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) { 509 ; CHECK: vpmaxsb 510 %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 511 ret <32 x i8> %res 512} 513declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone 514 515 516define <8 x i32> @test_x86_avx2_pmaxsd(<8 x i32> %a0, <8 x i32> %a1) { 517 ; CHECK: vpmaxsd 518 %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 519 ret <8 x i32> %res 520} 521declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone 522 523 524define <8 x i32> @test_x86_avx2_pmaxud(<8 x i32> %a0, <8 x i32> %a1) { 525 ; CHECK: vpmaxud 526 %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 527 ret <8 x i32> %res 528} 529declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone 530 531 532define <16 x i16> @test_x86_avx2_pmaxuw(<16 x i16> %a0, <16 x i16> %a1) { 533 ; CHECK: vpmaxuw 534 %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 535 ret <16 x i16> %res 536} 537declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone 538 539 540define <32 x i8> @test_x86_avx2_pminsb(<32 x i8> %a0, <32 x i8> %a1) { 541 ; CHECK: vpminsb 542 %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 543 ret <32 x i8> %res 544} 545declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone 546 547 548define <8 x i32> @test_x86_avx2_pminsd(<8 x i32> %a0, <8 x i32> %a1) { 549 ; CHECK: vpminsd 550 %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 551 ret <8 x i32> %res 552} 553declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone 554 555 556define <8 x i32> @test_x86_avx2_pminud(<8 x i32> %a0, <8 x i32> %a1) { 557 ; CHECK: vpminud 558 %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 559 ret <8 x i32> %res 560} 561declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone 562 563 564define <16 x i16> @test_x86_avx2_pminuw(<16 x i16> %a0, <16 x i16> %a1) { 565 ; CHECK: vpminuw 566 %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 567 ret <16 x i16> %res 568} 569declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone 570 571 572define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) { 573 ; CHECK: vpmovsxbd 574 %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] 575 ret <8 x i32> %res 576} 577declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone 578 579 580define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) { 581 ; CHECK: vpmovsxbq 582 %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] 583 ret <4 x i64> %res 584} 585declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone 586 587 588define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) { 589 ; CHECK: vpmovsxbw 590 %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 591 ret <16 x i16> %res 592} 593declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone 594 595 596define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) { 597 ; CHECK: vpmovsxdq 598 %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] 599 ret <4 x i64> %res 600} 601declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone 602 603 604define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) { 605 ; CHECK: vpmovsxwd 606 %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] 607 ret <8 x i32> %res 608} 609declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone 610 611 612define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) { 613 ; CHECK: vpmovsxwq 614 %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] 615 ret <4 x i64> %res 616} 617declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone 618 619 620define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) { 621 ; CHECK: vpmovzxbd 622 %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] 623 ret <8 x i32> %res 624} 625declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone 626 627 628define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) { 629 ; CHECK: vpmovzxbq 630 %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] 631 ret <4 x i64> %res 632} 633declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone 634 635 636define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) { 637 ; CHECK: vpmovzxbw 638 %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1] 639 ret <16 x i16> %res 640} 641declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone 642 643 644define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) { 645 ; CHECK: vpmovzxdq 646 %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] 647 ret <4 x i64> %res 648} 649declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone 650 651 652define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) { 653 ; CHECK: vpmovzxwd 654 %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] 655 ret <8 x i32> %res 656} 657declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone 658 659 660define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) { 661 ; CHECK: vpmovzxwq 662 %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] 663 ret <4 x i64> %res 664} 665declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone 666 667 668define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) { 669 ; CHECK: vpmuldq 670 %res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<2 x i64>> [#uses=1] 671 ret <4 x i64> %res 672} 673declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone 674 675 676define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) { 677 ; CHECK: vbroadcasti128 678 %res = call <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8* %a0) ; <<4 x i64>> [#uses=1] 679 ret <4 x i64> %res 680} 681declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) nounwind readonly 682 683define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) { 684 ; CHECK: vbroadcastsd 685 %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1] 686 ret <4 x double> %res 687} 688declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly 689 690 691define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) { 692 ; CHECK: vbroadcastss 693 %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 694 ret <4 x float> %res 695} 696declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly 697 698 699define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) { 700 ; CHECK: vbroadcastss 701 %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) ; <<8 x float>> [#uses=1] 702 ret <8 x float> %res 703} 704declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly 705 706 707define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { 708 ; CHECK: vpblendd 709 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1] 710 ret <4 x i32> %res 711} 712declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone 713 714 715define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { 716 ; CHECK: vpblendd 717 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1] 718 ret <8 x i32> %res 719} 720declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone 721 722 723define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) { 724 ; CHECK: vpbroadcastb 725 %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] 726 ret <16 x i8> %res 727} 728declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly 729 730 731define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) { 732 ; CHECK: vpbroadcastb 733 %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0) ; <<32 x i8>> [#uses=1] 734 ret <32 x i8> %res 735} 736declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly 737 738 739define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) { 740 ; CHECK: vpbroadcastw 741 %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 742 ret <8 x i16> %res 743} 744declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly 745 746 747define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) { 748 ; CHECK: vpbroadcastw 749 %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0) ; <<16 x i16>> [#uses=1] 750 ret <16 x i16> %res 751} 752declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly 753 754 755define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) { 756 ; CHECK: vpbroadcastd 757 %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] 758 ret <4 x i32> %res 759} 760declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly 761 762 763define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) { 764 ; CHECK: vpbroadcastd 765 %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0) ; <<8 x i32>> [#uses=1] 766 ret <8 x i32> %res 767} 768declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly 769 770 771define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) { 772 ; CHECK: vpbroadcastq 773 %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] 774 ret <2 x i64> %res 775} 776declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly 777 778 779define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) { 780 ; CHECK: vpbroadcastq 781 %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0) ; <<4 x i64>> [#uses=1] 782 ret <4 x i64> %res 783} 784declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly 785 786 787define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) { 788 ; CHECK: vpermd 789 %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 790 ret <8 x i32> %res 791} 792declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly 793 794 795define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) { 796 ; CHECK: vpermps 797 %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 798 ret <8 x float> %res 799} 800declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly 801 802 803define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) { 804 ; CHECK: vperm2i128 805 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1] 806 ret <4 x i64> %res 807} 808declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly 809 810 811define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) { 812 ; CHECK: vextracti128 813 %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1] 814 ret <2 x i64> %res 815} 816declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone 817 818 819define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) { 820 ; CHECK: vinserti128 821 %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) ; <<4 x i64>> [#uses=1] 822 ret <4 x i64> %res 823} 824declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone 825 826 827define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) { 828 ; CHECK: vpmaskmovq 829 %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 830 ret <2 x i64> %res 831} 832declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly 833 834 835define <4 x i64> @test_x86_avx2_maskload_q_256(i8* %a0, <4 x i64> %a1) { 836 ; CHECK: vpmaskmovq 837 %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 838 ret <4 x i64> %res 839} 840declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly 841 842 843define <4 x i32> @test_x86_avx2_maskload_d(i8* %a0, <4 x i32> %a1) { 844 ; CHECK: vpmaskmovd 845 %res = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 846 ret <4 x i32> %res 847} 848declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly 849 850 851define <8 x i32> @test_x86_avx2_maskload_d_256(i8* %a0, <8 x i32> %a1) { 852 ; CHECK: vpmaskmovd 853 %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 854 ret <8 x i32> %res 855} 856declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly 857 858 859define void @test_x86_avx2_maskstore_q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) { 860 ; CHECK: vpmaskmovq 861 call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) 862 ret void 863} 864declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind 865 866 867define void @test_x86_avx2_maskstore_q_256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) { 868 ; CHECK: vpmaskmovq 869 call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) 870 ret void 871} 872declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind 873 874 875define void @test_x86_avx2_maskstore_d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) { 876 ; CHECK: vpmaskmovd 877 call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) 878 ret void 879} 880declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind 881 882 883define void @test_x86_avx2_maskstore_d_256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) { 884 ; CHECK: vpmaskmovd 885 call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) 886 ret void 887} 888declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind 889 890 891define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) { 892 ; CHECK: vpsllvd 893 %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 894 ret <4 x i32> %res 895} 896declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone 897 898 899define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) { 900 ; CHECK: vpsllvd 901 %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 902 ret <8 x i32> %res 903} 904declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 905 906 907define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) { 908 ; CHECK: vpsllvq 909 %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 910 ret <2 x i64> %res 911} 912declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone 913 914 915define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) { 916 ; CHECK: vpsllvq 917 %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 918 ret <4 x i64> %res 919} 920declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 921 922 923define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) { 924 ; CHECK: vpsrlvd 925 %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 926 ret <4 x i32> %res 927} 928declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone 929 930 931define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) { 932 ; CHECK: vpsrlvd 933 %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 934 ret <8 x i32> %res 935} 936declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 937 938 939define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) { 940 ; CHECK: vpsrlvq 941 %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 942 ret <2 x i64> %res 943} 944declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone 945 946 947define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) { 948 ; CHECK: vpsrlvq 949 %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 950 ret <4 x i64> %res 951} 952declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 953 954 955define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) { 956 ; CHECK: vpsravd 957 %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 958 ret <4 x i32> %res 959} 960declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone 961 962 963define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) { 964 ; CHECK: vpsravd 965 %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 966 ret <8 x i32> %res 967} 968declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone 969 970; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions 971define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 972 ; CHECK: vmovdqu 973 ; add operation forces the execution domain. 974 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 975 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 976 ret void 977} 978declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 979 980define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, 981 <4 x i32> %idx, <2 x double> %mask) { 982 ; CHECK: vgatherdpd 983 %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, 984 i8* %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ; 985 ret <2 x double> %res 986} 987declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, 988 <4 x i32>, <2 x double>, i8) nounwind readonly 989 990define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1, 991 <4 x i32> %idx, <4 x double> %mask) { 992 ; CHECK: vgatherdpd 993 %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, 994 i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ; 995 ret <4 x double> %res 996} 997declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, 998 <4 x i32>, <4 x double>, i8) nounwind readonly 999 1000define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1, 1001 <2 x i64> %idx, <2 x double> %mask) { 1002 ; CHECK: vgatherqpd 1003 %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, 1004 i8* %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ; 1005 ret <2 x double> %res 1006} 1007declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, 1008 <2 x i64>, <2 x double>, i8) nounwind readonly 1009 1010define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, i8* %a1, 1011 <4 x i64> %idx, <4 x double> %mask) { 1012 ; CHECK: vgatherqpd 1013 %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, 1014 i8* %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ; 1015 ret <4 x double> %res 1016} 1017declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, 1018 <4 x i64>, <4 x double>, i8) nounwind readonly 1019 1020define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, i8* %a1, 1021 <4 x i32> %idx, <4 x float> %mask) { 1022 ; CHECK: vgatherdps 1023 %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, 1024 i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ; 1025 ret <4 x float> %res 1026} 1027declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, 1028 <4 x i32>, <4 x float>, i8) nounwind readonly 1029 1030define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, i8* %a1, 1031 <8 x i32> %idx, <8 x float> %mask) { 1032 ; CHECK: vgatherdps 1033 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, 1034 i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ; 1035 ret <8 x float> %res 1036} 1037declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, 1038 <8 x i32>, <8 x float>, i8) nounwind readonly 1039 1040define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1, 1041 <2 x i64> %idx, <4 x float> %mask) { 1042 ; CHECK: vgatherqps 1043 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, 1044 i8* %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ; 1045 ret <4 x float> %res 1046} 1047declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, 1048 <2 x i64>, <4 x float>, i8) nounwind readonly 1049 1050define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1, 1051 <4 x i64> %idx, <4 x float> %mask) { 1052 ; CHECK: vgatherqps 1053 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, 1054 i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ; 1055 ret <4 x float> %res 1056} 1057declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, 1058 <4 x i64>, <4 x float>, i8) nounwind readonly 1059 1060define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1, 1061 <4 x i32> %idx, <2 x i64> %mask) { 1062 ; CHECK: vpgatherdq 1063 %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, 1064 i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ; 1065 ret <2 x i64> %res 1066} 1067declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, 1068 <4 x i32>, <2 x i64>, i8) nounwind readonly 1069 1070define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1, 1071 <4 x i32> %idx, <4 x i64> %mask) { 1072 ; CHECK: vpgatherdq 1073 %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, 1074 i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ; 1075 ret <4 x i64> %res 1076} 1077declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, 1078 <4 x i32>, <4 x i64>, i8) nounwind readonly 1079 1080define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1, 1081 <2 x i64> %idx, <2 x i64> %mask) { 1082 ; CHECK: vpgatherqq 1083 %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, 1084 i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ; 1085 ret <2 x i64> %res 1086} 1087declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, 1088 <2 x i64>, <2 x i64>, i8) nounwind readonly 1089 1090define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1, 1091 <4 x i64> %idx, <4 x i64> %mask) { 1092 ; CHECK: vpgatherqq 1093 %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, 1094 i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ; 1095 ret <4 x i64> %res 1096} 1097declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, 1098 <4 x i64>, <4 x i64>, i8) nounwind readonly 1099 1100define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1, 1101 <4 x i32> %idx, <4 x i32> %mask) { 1102 ; CHECK: vpgatherdd 1103 %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, 1104 i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ; 1105 ret <4 x i32> %res 1106} 1107declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, 1108 <4 x i32>, <4 x i32>, i8) nounwind readonly 1109 1110define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1, 1111 <8 x i32> %idx, <8 x i32> %mask) { 1112 ; CHECK: vpgatherdd 1113 %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, 1114 i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ; 1115 ret <8 x i32> %res 1116} 1117declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, 1118 <8 x i32>, <8 x i32>, i8) nounwind readonly 1119 1120define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1, 1121 <2 x i64> %idx, <4 x i32> %mask) { 1122 ; CHECK: vpgatherqd 1123 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, 1124 i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ; 1125 ret <4 x i32> %res 1126} 1127declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, 1128 <2 x i64>, <4 x i32>, i8) nounwind readonly 1129 1130define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1, 1131 <4 x i64> %idx, <4 x i32> %mask) { 1132 ; CHECK: vpgatherqd 1133 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, 1134 i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ; 1135 ret <4 x i32> %res 1136} 1137declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, 1138 <4 x i64>, <4 x i32>, i8) nounwind readonly 1139 1140; PR13298 1141define <8 x float> @test_gather_mask(<8 x float> %a0, float* %a, 1142 <8 x i32> %idx, <8 x float> %mask, 1143 float* nocapture %out) { 1144; CHECK: test_gather_mask 1145; CHECK: vmovdqa %ymm2, [[DEST:%.*]] 1146; CHECK: vgatherdps [[DEST]] 1147;; gather with mask 1148 %a_i8 = bitcast float* %a to i8* 1149 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, 1150 i8* %a_i8, <8 x i32> %idx, <8 x float> %mask, i8 4) ; 1151 1152;; for debugging, we'll just dump out the mask 1153 %out_ptr = bitcast float * %out to <8 x float> * 1154 store <8 x float> %mask, <8 x float> * %out_ptr, align 4 1155 1156 ret <8 x float> %res 1157} 1158