1; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s 2 3define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) { 4 ; CHECK: vaesdec 5 %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 6 ret <2 x i64> %res 7} 8declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone 9 10 11define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) { 12 ; CHECK: vaesdeclast 13 %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 14 ret <2 x i64> %res 15} 16declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone 17 18 19define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) { 20 ; CHECK: vaesenc 21 %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 22 ret <2 x i64> %res 23} 24declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone 25 26 27define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) { 28 ; CHECK: vaesenclast 29 %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 30 ret <2 x i64> %res 31} 32declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone 33 34 35define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) { 36 ; CHECK: vaesimc 37 %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] 38 ret <2 x i64> %res 39} 40declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone 41 42 43define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) { 44 ; CHECK: vaeskeygenassist 45 %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1] 46 ret <2 x i64> %res 47} 48declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone 49 50 51define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 52 ; CHECK: vaddsd 53 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 54 ret <2 x double> %res 55} 56declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 57 58 59define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { 60 ; CHECK: vcmpordpd 61 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 62 ret <2 x double> %res 63} 64declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 65 66 67define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { 68 ; CHECK: vcmpordsd 69 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 70 ret <2 x double> %res 71} 72declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 73 74 75define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { 76 ; CHECK: vcomisd 77 ; CHECK: sete 78 ; CHECK: movzbl 79 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 80 ret i32 %res 81} 82declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 83 84 85define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { 86 ; CHECK: vcomisd 87 ; CHECK: setae 88 ; CHECK: movzbl 89 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 90 ret i32 %res 91} 92declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 93 94 95define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { 96 ; CHECK: vcomisd 97 ; CHECK: seta 98 ; CHECK: movzbl 99 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 100 ret i32 %res 101} 102declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 103 104 105define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { 106 ; CHECK: vcomisd 107 ; CHECK: setbe 108 ; CHECK: movzbl 109 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 110 ret i32 %res 111} 112declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 113 114 115define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { 116 ; CHECK: vcomisd 117 ; CHECK: sbbl %eax, %eax 118 ; CHECK: andl $1, %eax 119 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 120 ret i32 %res 121} 122declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 123 124 125define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { 126 ; CHECK: vcomisd 127 ; CHECK: setne 128 ; CHECK: movzbl 129 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 130 ret i32 %res 131} 132declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 133 134 135define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 136 ; CHECK: vcvtdq2pd 137 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 138 ret <2 x double> %res 139} 140declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 141 142 143define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 144 ; CHECK: vcvtdq2ps 145 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 146 ret <4 x float> %res 147} 148declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 149 150 151define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { 152 ; CHECK: vcvtpd2dq 153 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 154 ret <4 x i32> %res 155} 156declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 157 158 159define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { 160 ; CHECK: vcvtpd2ps 161 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] 162 ret <4 x float> %res 163} 164declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 165 166 167define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { 168 ; CHECK: vcvtps2dq 169 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 170 ret <4 x i32> %res 171} 172declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 173 174 175define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 176 ; CHECK: vcvtps2pd 177 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 178 ret <2 x double> %res 179} 180declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 181 182 183define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { 184 ; CHECK: vcvtsd2si 185 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] 186 ret i32 %res 187} 188declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 189 190 191define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { 192 ; CHECK: vcvtsd2ss 193 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 194 ret <4 x float> %res 195} 196declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 197 198 199define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { 200 ; CHECK: movl 201 ; CHECK: vcvtsi2sd 202 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 203 ret <2 x double> %res 204} 205declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 206 207 208define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 209 ; CHECK: vcvtss2sd 210 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 211 ret <2 x double> %res 212} 213declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 214 215 216define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { 217 ; CHECK: vcvttpd2dq 218 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 219 ret <4 x i32> %res 220} 221declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 222 223 224define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { 225 ; CHECK: vcvttps2dq 226 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 227 ret <4 x i32> %res 228} 229declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 230 231 232define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { 233 ; CHECK: vcvttsd2si 234 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] 235 ret i32 %res 236} 237declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 238 239 240define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 241 ; CHECK: vdivsd 242 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 243 ret <2 x double> %res 244} 245declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 246 247 248 249define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { 250 ; CHECK: vmaxpd 251 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 252 ret <2 x double> %res 253} 254declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 255 256 257define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { 258 ; CHECK: vmaxsd 259 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 260 ret <2 x double> %res 261} 262declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 263 264 265define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { 266 ; CHECK: vminpd 267 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 268 ret <2 x double> %res 269} 270declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 271 272 273define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { 274 ; CHECK: vminsd 275 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 276 ret <2 x double> %res 277} 278declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 279 280 281define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { 282 ; CHECK: vmovmskpd 283 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] 284 ret i32 %res 285} 286declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 287 288 289 290 291define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 292 ; CHECK: test_x86_sse2_mul_sd 293 ; CHECK: vmulsd 294 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 295 ret <2 x double> %res 296} 297declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 298 299 300define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 301 ; CHECK: vpackssdw 302 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 303 ret <8 x i16> %res 304} 305declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 306 307 308define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 309 ; CHECK: vpacksswb 310 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 311 ret <16 x i8> %res 312} 313declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 314 315 316define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 317 ; CHECK: vpackuswb 318 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 319 ret <16 x i8> %res 320} 321declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 322 323 324define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { 325 ; CHECK: vpaddsb 326 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 327 ret <16 x i8> %res 328} 329declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 330 331 332define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { 333 ; CHECK: vpaddsw 334 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 335 ret <8 x i16> %res 336} 337declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 338 339 340define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { 341 ; CHECK: vpaddusb 342 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 343 ret <16 x i8> %res 344} 345declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 346 347 348define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { 349 ; CHECK: vpaddusw 350 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 351 ret <8 x i16> %res 352} 353declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 354 355 356define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { 357 ; CHECK: vpavgb 358 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 359 ret <16 x i8> %res 360} 361declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 362 363 364define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { 365 ; CHECK: vpavgw 366 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 367 ret <8 x i16> %res 368} 369declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 370 371 372define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { 373 ; CHECK: vpmaddwd 374 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] 375 ret <4 x i32> %res 376} 377declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 378 379 380define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { 381 ; CHECK: vpmaxsw 382 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 383 ret <8 x i16> %res 384} 385declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 386 387 388define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { 389 ; CHECK: vpmaxub 390 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 391 ret <16 x i8> %res 392} 393declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 394 395 396define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { 397 ; CHECK: vpminsw 398 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 399 ret <8 x i16> %res 400} 401declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 402 403 404define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { 405 ; CHECK: vpminub 406 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 407 ret <16 x i8> %res 408} 409declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 410 411 412define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 413 ; CHECK: vpmovmskb 414 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] 415 ret i32 %res 416} 417declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 418 419 420define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { 421 ; CHECK: vpmulhw 422 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 423 ret <8 x i16> %res 424} 425declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 426 427 428define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { 429 ; CHECK: vpmulhuw 430 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 431 ret <8 x i16> %res 432} 433declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 434 435 436define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 437 ; CHECK: vpmuludq 438 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 439 ret <2 x i64> %res 440} 441declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 442 443 444define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { 445 ; CHECK: vpsadbw 446 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] 447 ret <2 x i64> %res 448} 449declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 450 451 452define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { 453 ; CHECK: vpslld 454 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 455 ret <4 x i32> %res 456} 457declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 458 459 460define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { 461 ; CHECK: vpslldq 462 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 463 ret <2 x i64> %res 464} 465declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone 466 467 468define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { 469 ; CHECK: vpslldq 470 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 471 ret <2 x i64> %res 472} 473declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone 474 475 476define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { 477 ; CHECK: vpsllq 478 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 479 ret <2 x i64> %res 480} 481declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 482 483 484define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { 485 ; CHECK: vpsllw 486 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 487 ret <8 x i16> %res 488} 489declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 490 491 492define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { 493 ; CHECK: vpslld 494 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 495 ret <4 x i32> %res 496} 497declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 498 499 500define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { 501 ; CHECK: vpsllq 502 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 503 ret <2 x i64> %res 504} 505declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 506 507 508define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { 509 ; CHECK: vpsllw 510 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 511 ret <8 x i16> %res 512} 513declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 514 515 516define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { 517 ; CHECK: vpsrad 518 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 519 ret <4 x i32> %res 520} 521declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 522 523 524define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { 525 ; CHECK: vpsraw 526 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 527 ret <8 x i16> %res 528} 529declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 530 531 532define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { 533 ; CHECK: vpsrad 534 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 535 ret <4 x i32> %res 536} 537declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 538 539 540define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { 541 ; CHECK: vpsraw 542 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 543 ret <8 x i16> %res 544} 545declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 546 547 548define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { 549 ; CHECK: vpsrld 550 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 551 ret <4 x i32> %res 552} 553declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 554 555 556define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { 557 ; CHECK: vpsrldq 558 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 559 ret <2 x i64> %res 560} 561declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone 562 563 564define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) { 565 ; CHECK: vpsrldq 566 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 567 ret <2 x i64> %res 568} 569declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone 570 571 572define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { 573 ; CHECK: vpsrlq 574 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 575 ret <2 x i64> %res 576} 577declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 578 579 580define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { 581 ; CHECK: vpsrlw 582 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 583 ret <8 x i16> %res 584} 585declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 586 587 588define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { 589 ; CHECK: vpsrld 590 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 591 ret <4 x i32> %res 592} 593declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 594 595 596define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { 597 ; CHECK: vpsrlq 598 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 599 ret <2 x i64> %res 600} 601declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 602 603 604define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { 605 ; CHECK: vpsrlw 606 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 607 ret <8 x i16> %res 608} 609declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 610 611 612define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { 613 ; CHECK: vpsubsb 614 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 615 ret <16 x i8> %res 616} 617declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 618 619 620define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { 621 ; CHECK: vpsubsw 622 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 623 ret <8 x i16> %res 624} 625declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 626 627 628define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { 629 ; CHECK: vpsubusb 630 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 631 ret <16 x i8> %res 632} 633declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 634 635 636define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { 637 ; CHECK: vpsubusw 638 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 639 ret <8 x i16> %res 640} 641declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 642 643 644define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 645 ; CHECK: vsqrtpd 646 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 647 ret <2 x double> %res 648} 649declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 650 651 652define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 653 ; CHECK: vsqrtsd 654 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 655 ret <2 x double> %res 656} 657declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 658 659 660define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { 661 ; CHECK: test_x86_sse2_storel_dq 662 ; CHECK: movl 663 ; CHECK: vmovq 664 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) 665 ret void 666} 667declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind 668 669 670define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 671 ; CHECK: test_x86_sse2_storeu_dq 672 ; CHECK: movl 673 ; CHECK: vmovdqu 674 ; add operation forces the execution domain. 675 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 676 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) 677 ret void 678} 679declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 680 681 682define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 683 ; CHECK: test_x86_sse2_storeu_pd 684 ; CHECK: movl 685 ; CHECK: vmovupd 686 ; fadd operation forces the execution domain. 687 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> 688 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) 689 ret void 690} 691declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 692 693 694define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 695 ; CHECK: test_x86_sse2_sub_sd 696 ; CHECK: vsubsd 697 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 698 ret <2 x double> %res 699} 700declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 701 702 703define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { 704 ; CHECK: vucomisd 705 ; CHECK: sete 706 ; CHECK: movzbl 707 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 708 ret i32 %res 709} 710declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 711 712 713define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { 714 ; CHECK: vucomisd 715 ; CHECK: setae 716 ; CHECK: movzbl 717 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 718 ret i32 %res 719} 720declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 721 722 723define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { 724 ; CHECK: vucomisd 725 ; CHECK: seta 726 ; CHECK: movzbl 727 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 728 ret i32 %res 729} 730declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 731 732 733define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { 734 ; CHECK: vucomisd 735 ; CHECK: setbe 736 ; CHECK: movzbl 737 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 738 ret i32 %res 739} 740declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 741 742 743define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { 744 ; CHECK: vucomisd 745 ; CHECK: sbbl 746 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 747 ret i32 %res 748} 749declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 750 751 752define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { 753 ; CHECK: vucomisd 754 ; CHECK: setne 755 ; CHECK: movzbl 756 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 757 ret i32 %res 758} 759declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 760 761 762define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) { 763 ; CHECK: vaddsubpd 764 %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 765 ret <2 x double> %res 766} 767declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 768 769 770define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) { 771 ; CHECK: vaddsubps 772 %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 773 ret <4 x float> %res 774} 775declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 776 777 778define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) { 779 ; CHECK: vhaddpd 780 %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 781 ret <2 x double> %res 782} 783declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 784 785 786define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) { 787 ; CHECK: vhaddps 788 %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 789 ret <4 x float> %res 790} 791declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 792 793 794define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) { 795 ; CHECK: vhsubpd 796 %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 797 ret <2 x double> %res 798} 799declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 800 801 802define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) { 803 ; CHECK: vhsubps 804 %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 805 ret <4 x float> %res 806} 807declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 808 809 810define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) { 811 ; CHECK: movl 812 ; CHECK: vlddqu 813 %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] 814 ret <16 x i8> %res 815} 816declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly 817 818 819define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { 820 ; CHECK: vblendpd 821 %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 822 ret <2 x double> %res 823} 824declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone 825 826 827define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) { 828 ; CHECK: vblendps 829 %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 830 ret <4 x float> %res 831} 832declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone 833 834 835define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 836 ; CHECK: vblendvpd 837 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] 838 ret <2 x double> %res 839} 840declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 841 842 843define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 844 ; CHECK: vblendvps 845 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] 846 ret <4 x float> %res 847} 848declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 849 850 851define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { 852 ; CHECK: vdppd 853 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 854 ret <2 x double> %res 855} 856declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone 857 858 859define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { 860 ; CHECK: vdpps 861 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 862 ret <4 x float> %res 863} 864declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone 865 866 867define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { 868 ; CHECK: vinsertps 869 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 870 ret <4 x float> %res 871} 872declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone 873 874 875 876define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { 877 ; CHECK: vmpsadbw 878 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1] 879 ret <8 x i16> %res 880} 881declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone 882 883 884define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { 885 ; CHECK: vpackusdw 886 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 887 ret <8 x i16> %res 888} 889declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 890 891 892define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 893 ; CHECK: vpblendvb 894 %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] 895 ret <16 x i8> %res 896} 897declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 898 899 900define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) { 901 ; CHECK: vpblendw 902 %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1] 903 ret <8 x i16> %res 904} 905declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone 906 907 908define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) { 909 ; CHECK: vphminposuw 910 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 911 ret <8 x i16> %res 912} 913declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 914 915 916define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { 917 ; CHECK: vpmaxsb 918 %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 919 ret <16 x i8> %res 920} 921declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone 922 923 924define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { 925 ; CHECK: vpmaxsd 926 %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 927 ret <4 x i32> %res 928} 929declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 930 931 932define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { 933 ; CHECK: vpmaxud 934 %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 935 ret <4 x i32> %res 936} 937declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 938 939 940define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { 941 ; CHECK: vpmaxuw 942 %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 943 ret <8 x i16> %res 944} 945declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone 946 947 948define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) { 949 ; CHECK: vpminsb 950 %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 951 ret <16 x i8> %res 952} 953declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone 954 955 956define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) { 957 ; CHECK: vpminsd 958 %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 959 ret <4 x i32> %res 960} 961declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 962 963 964define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) { 965 ; CHECK: vpminud 966 %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 967 ret <4 x i32> %res 968} 969declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 970 971 972define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { 973 ; CHECK: vpminuw 974 %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 975 ret <8 x i16> %res 976} 977declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone 978 979 980define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) { 981 ; CHECK: vpmovsxbd 982 %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 983 ret <4 x i32> %res 984} 985declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone 986 987 988define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) { 989 ; CHECK: vpmovsxbq 990 %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 991 ret <2 x i64> %res 992} 993declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone 994 995 996define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) { 997 ; CHECK: vpmovsxbw 998 %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 999 ret <8 x i16> %res 1000} 1001declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone 1002 1003 1004define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) { 1005 ; CHECK: vpmovsxdq 1006 %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 1007 ret <2 x i64> %res 1008} 1009declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone 1010 1011 1012define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) { 1013 ; CHECK: vpmovsxwd 1014 %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 1015 ret <4 x i32> %res 1016} 1017declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone 1018 1019 1020define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { 1021 ; CHECK: vpmovsxwq 1022 %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 1023 ret <2 x i64> %res 1024} 1025declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone 1026 1027 1028define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { 1029 ; CHECK: vpmovzxbd 1030 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 1031 ret <4 x i32> %res 1032} 1033declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone 1034 1035 1036define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { 1037 ; CHECK: vpmovzxbq 1038 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 1039 ret <2 x i64> %res 1040} 1041declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone 1042 1043 1044define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { 1045 ; CHECK: vpmovzxbw 1046 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 1047 ret <8 x i16> %res 1048} 1049declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone 1050 1051 1052define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { 1053 ; CHECK: vpmovzxdq 1054 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 1055 ret <2 x i64> %res 1056} 1057declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone 1058 1059 1060define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { 1061 ; CHECK: vpmovzxwd 1062 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 1063 ret <4 x i32> %res 1064} 1065declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone 1066 1067 1068define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { 1069 ; CHECK: vpmovzxwq 1070 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 1071 ret <2 x i64> %res 1072} 1073declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone 1074 1075 1076define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { 1077 ; CHECK: vpmuldq 1078 %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 1079 ret <2 x i64> %res 1080} 1081declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone 1082 1083 1084define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) { 1085 ; CHECK: vptest 1086 ; CHECK: sbbl 1087 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1088 ret i32 %res 1089} 1090declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 1091 1092 1093define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) { 1094 ; CHECK: vptest 1095 ; CHECK: seta 1096 ; CHECK: movzbl 1097 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1098 ret i32 %res 1099} 1100declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone 1101 1102 1103define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) { 1104 ; CHECK: vptest 1105 ; CHECK: sete 1106 ; CHECK: movzbl 1107 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1108 ret i32 %res 1109} 1110declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone 1111 1112 1113define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { 1114 ; CHECK: vroundpd 1115 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 1116 ret <2 x double> %res 1117} 1118declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 1119 1120 1121define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { 1122 ; CHECK: vroundps 1123 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1124 ret <4 x float> %res 1125} 1126declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 1127 1128 1129define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) { 1130 ; CHECK: vroundsd 1131 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 1132 ret <2 x double> %res 1133} 1134declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 1135 1136 1137define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { 1138 ; CHECK: vroundss 1139 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 1140 ret <4 x float> %res 1141} 1142declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 1143 1144 1145define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { 1146 ; CHECK: movl $7 1147 ; CHECK: movl $7 1148 ; CHECK: vpcmpestri $7 1149 ; CHECK: movl 1150 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1151 ret i32 %res 1152} 1153declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1154 1155 1156define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) { 1157 ; CHECK: movl $7 1158 ; CHECK: movl $7 1159 ; CHECK: vpcmpestri $7, ( 1160 ; CHECK: movl 1161 %1 = load <16 x i8>* %a0 1162 %2 = load <16 x i8>* %a2 1163 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1] 1164 ret i32 %res 1165} 1166 1167 1168define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { 1169 ; CHECK: movl 1170 ; CHECK: movl 1171 ; CHECK: vpcmpestri 1172 ; CHECK: seta 1173 %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1174 ret i32 %res 1175} 1176declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1177 1178 1179define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) { 1180 ; CHECK: movl 1181 ; CHECK: movl 1182 ; CHECK: vpcmpestri 1183 ; CHECK: sbbl 1184 %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1185 ret i32 %res 1186} 1187declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1188 1189 1190define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) { 1191 ; CHECK: movl 1192 ; CHECK: movl 1193 ; CHECK: vpcmpestri 1194 ; CHECK: seto 1195 %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1196 ret i32 %res 1197} 1198declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1199 1200 1201define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) { 1202 ; CHECK: movl 1203 ; CHECK: movl 1204 ; CHECK: vpcmpestri 1205 ; CHECK: sets 1206 %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1207 ret i32 %res 1208} 1209declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1210 1211 1212define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) { 1213 ; CHECK: movl 1214 ; CHECK: movl 1215 ; CHECK: vpcmpestri 1216 ; CHECK: sete 1217 %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1218 ret i32 %res 1219} 1220declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1221 1222 1223define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) { 1224 ; CHECK: movl 1225 ; CHECK: movl 1226 ; CHECK: vpcmpestrm 1227 ; CHECK-NOT: vmov 1228 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1] 1229 ret <16 x i8> %res 1230} 1231declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1232 1233 1234define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) { 1235 ; CHECK: movl $7 1236 ; CHECK: movl $7 1237 ; CHECK: vpcmpestrm $7, 1238 ; CHECK-NOT: vmov 1239 %1 = load <16 x i8>* %a2 1240 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1] 1241 ret <16 x i8> %res 1242} 1243 1244 1245define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { 1246 ; CHECK: vpcmpistri $7 1247 ; CHECK: movl 1248 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1249 ret i32 %res 1250} 1251declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1252 1253 1254define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) { 1255 ; CHECK: vpcmpistri $7, ( 1256 ; CHECK: movl 1257 %1 = load <16 x i8>* %a0 1258 %2 = load <16 x i8>* %a1 1259 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1] 1260 ret i32 %res 1261} 1262 1263 1264define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { 1265 ; CHECK: vpcmpistri 1266 ; CHECK: seta 1267 %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1268 ret i32 %res 1269} 1270declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1271 1272 1273define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) { 1274 ; CHECK: vpcmpistri 1275 ; CHECK: sbbl 1276 %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1277 ret i32 %res 1278} 1279declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1280 1281 1282define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) { 1283 ; CHECK: vpcmpistri 1284 ; CHECK: seto 1285 %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1286 ret i32 %res 1287} 1288declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1289 1290 1291define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) { 1292 ; CHECK: vpcmpistri 1293 ; CHECK: sets 1294 %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1295 ret i32 %res 1296} 1297declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1298 1299 1300define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) { 1301 ; CHECK: vpcmpistri 1302 ; CHECK: sete 1303 %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1304 ret i32 %res 1305} 1306declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1307 1308 1309define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { 1310 ; CHECK: vpcmpistrm $7 1311 ; CHECK-NOT: vmov 1312 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1] 1313 ret <16 x i8> %res 1314} 1315declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1316 1317 1318define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) { 1319 ; CHECK: vpcmpistrm $7, ( 1320 ; CHECK-NOT: vmov 1321 %1 = load <16 x i8>* %a1 1322 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1] 1323 ret <16 x i8> %res 1324} 1325 1326 1327define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { 1328 ; CHECK: vaddss 1329 %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1330 ret <4 x float> %res 1331} 1332declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 1333 1334 1335define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { 1336 ; CHECK: vcmpordps 1337 %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1338 ret <4 x float> %res 1339} 1340declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 1341 1342 1343define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) { 1344 ; CHECK: vcmpordss 1345 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1346 ret <4 x float> %res 1347} 1348declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 1349 1350 1351define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { 1352 ; CHECK: vcomiss 1353 ; CHECK: sete 1354 ; CHECK: movzbl 1355 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1356 ret i32 %res 1357} 1358declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 1359 1360 1361define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { 1362 ; CHECK: vcomiss 1363 ; CHECK: setae 1364 ; CHECK: movzbl 1365 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1366 ret i32 %res 1367} 1368declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 1369 1370 1371define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { 1372 ; CHECK: vcomiss 1373 ; CHECK: seta 1374 ; CHECK: movzbl 1375 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1376 ret i32 %res 1377} 1378declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 1379 1380 1381define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { 1382 ; CHECK: vcomiss 1383 ; CHECK: setbe 1384 ; CHECK: movzbl 1385 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1386 ret i32 %res 1387} 1388declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 1389 1390 1391define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { 1392 ; CHECK: vcomiss 1393 ; CHECK: sbb 1394 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1395 ret i32 %res 1396} 1397declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 1398 1399 1400define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) { 1401 ; CHECK: vcomiss 1402 ; CHECK: setne 1403 ; CHECK: movzbl 1404 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1405 ret i32 %res 1406} 1407declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 1408 1409 1410define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) { 1411 ; CHECK: movl 1412 ; CHECK: vcvtsi2ss 1413 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1414 ret <4 x float> %res 1415} 1416declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 1417 1418 1419define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) { 1420 ; CHECK: vcvtss2si 1421 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1] 1422 ret i32 %res 1423} 1424declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 1425 1426 1427define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) { 1428 ; CHECK: vcvttss2si 1429 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1] 1430 ret i32 %res 1431} 1432declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 1433 1434 1435define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { 1436 ; CHECK: vdivss 1437 %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1438 ret <4 x float> %res 1439} 1440declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 1441 1442 1443define void @test_x86_sse_ldmxcsr(i8* %a0) { 1444 ; CHECK: movl 1445 ; CHECK: vldmxcsr 1446 call void @llvm.x86.sse.ldmxcsr(i8* %a0) 1447 ret void 1448} 1449declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind 1450 1451 1452 1453define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) { 1454 ; CHECK: vmaxps 1455 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1456 ret <4 x float> %res 1457} 1458declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1459 1460 1461define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) { 1462 ; CHECK: vmaxss 1463 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1464 ret <4 x float> %res 1465} 1466declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1467 1468 1469define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) { 1470 ; CHECK: vminps 1471 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1472 ret <4 x float> %res 1473} 1474declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1475 1476 1477define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) { 1478 ; CHECK: vminss 1479 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1480 ret <4 x float> %res 1481} 1482declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1483 1484 1485define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) { 1486 ; CHECK: vmovmskps 1487 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1] 1488 ret i32 %res 1489} 1490declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 1491 1492 1493 1494define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { 1495 ; CHECK: vmulss 1496 %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1497 ret <4 x float> %res 1498} 1499declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 1500 1501 1502define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { 1503 ; CHECK: vrcpps 1504 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1505 ret <4 x float> %res 1506} 1507declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1508 1509 1510define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) { 1511 ; CHECK: vrcpss 1512 %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1513 ret <4 x float> %res 1514} 1515declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1516 1517 1518define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) { 1519 ; CHECK: vrsqrtps 1520 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1521 ret <4 x float> %res 1522} 1523declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1524 1525 1526define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { 1527 ; CHECK: vrsqrtss 1528 %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1529 ret <4 x float> %res 1530} 1531declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1532 1533 1534define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { 1535 ; CHECK: vsqrtps 1536 %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1537 ret <4 x float> %res 1538} 1539declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 1540 1541 1542define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { 1543 ; CHECK: vsqrtss 1544 %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1545 ret <4 x float> %res 1546} 1547declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 1548 1549 1550define void @test_x86_sse_stmxcsr(i8* %a0) { 1551 ; CHECK: movl 1552 ; CHECK: vstmxcsr 1553 call void @llvm.x86.sse.stmxcsr(i8* %a0) 1554 ret void 1555} 1556declare void @llvm.x86.sse.stmxcsr(i8*) nounwind 1557 1558 1559define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { 1560 ; CHECK: movl 1561 ; CHECK: vmovups 1562 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) 1563 ret void 1564} 1565declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind 1566 1567 1568define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { 1569 ; CHECK: vsubss 1570 %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1571 ret <4 x float> %res 1572} 1573declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 1574 1575 1576define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { 1577 ; CHECK: vucomiss 1578 ; CHECK: sete 1579 ; CHECK: movzbl 1580 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1581 ret i32 %res 1582} 1583declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 1584 1585 1586define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { 1587 ; CHECK: vucomiss 1588 ; CHECK: setae 1589 ; CHECK: movzbl 1590 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1591 ret i32 %res 1592} 1593declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 1594 1595 1596define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { 1597 ; CHECK: vucomiss 1598 ; CHECK: seta 1599 ; CHECK: movzbl 1600 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1601 ret i32 %res 1602} 1603declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 1604 1605 1606define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { 1607 ; CHECK: vucomiss 1608 ; CHECK: setbe 1609 ; CHECK: movzbl 1610 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1611 ret i32 %res 1612} 1613declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 1614 1615 1616define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { 1617 ; CHECK: vucomiss 1618 ; CHECK: sbbl 1619 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1620 ret i32 %res 1621} 1622declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 1623 1624 1625define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { 1626 ; CHECK: vucomiss 1627 ; CHECK: setne 1628 ; CHECK: movzbl 1629 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1630 ret i32 %res 1631} 1632declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 1633 1634 1635define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { 1636 ; CHECK: vpabsb 1637 %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] 1638 ret <16 x i8> %res 1639} 1640declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone 1641 1642 1643define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { 1644 ; CHECK: vpabsd 1645 %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] 1646 ret <4 x i32> %res 1647} 1648declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone 1649 1650 1651define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { 1652 ; CHECK: vpabsw 1653 %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 1654 ret <8 x i16> %res 1655} 1656declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone 1657 1658 1659define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) { 1660 ; CHECK: vphaddd 1661 %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1662 ret <4 x i32> %res 1663} 1664declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone 1665 1666 1667define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 1668 ; CHECK: vphaddsw 1669 %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1670 ret <8 x i16> %res 1671} 1672declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 1673 1674 1675define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) { 1676 ; CHECK: vphaddw 1677 %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1678 ret <8 x i16> %res 1679} 1680declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone 1681 1682 1683define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) { 1684 ; CHECK: vphsubd 1685 %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1686 ret <4 x i32> %res 1687} 1688declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone 1689 1690 1691define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 1692 ; CHECK: vphsubsw 1693 %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1694 ret <8 x i16> %res 1695} 1696declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 1697 1698 1699define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) { 1700 ; CHECK: vphsubw 1701 %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1702 ret <8 x i16> %res 1703} 1704declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone 1705 1706 1707define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) { 1708 ; CHECK: vpmaddubsw 1709 %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1] 1710 ret <8 x i16> %res 1711} 1712declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone 1713 1714 1715define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 1716 ; CHECK: vpmulhrsw 1717 %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1718 ret <8 x i16> %res 1719} 1720declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 1721 1722 1723define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) { 1724 ; CHECK: vpshufb 1725 %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1726 ret <16 x i8> %res 1727} 1728declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone 1729 1730 1731define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) { 1732 ; CHECK: vpsignb 1733 %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1734 ret <16 x i8> %res 1735} 1736declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone 1737 1738 1739define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) { 1740 ; CHECK: vpsignd 1741 %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1742 ret <4 x i32> %res 1743} 1744declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone 1745 1746 1747define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) { 1748 ; CHECK: vpsignw 1749 %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1750 ret <8 x i16> %res 1751} 1752declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone 1753 1754 1755define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 1756 ; CHECK: vaddsubpd 1757 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 1758 ret <4 x double> %res 1759} 1760declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 1761 1762 1763define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 1764 ; CHECK: vaddsubps 1765 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 1766 ret <8 x float> %res 1767} 1768declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 1769 1770 1771define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) { 1772 ; CHECK: vblendpd 1773 %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1] 1774 ret <4 x double> %res 1775} 1776declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone 1777 1778 1779define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) { 1780 ; CHECK: vblendps 1781 %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 1782 ret <8 x float> %res 1783} 1784declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 1785 1786 1787define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 1788 ; CHECK: vblendvpd 1789 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] 1790 ret <4 x double> %res 1791} 1792declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 1793 1794 1795define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 1796 ; CHECK: vblendvps 1797 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] 1798 ret <8 x float> %res 1799} 1800declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 1801 1802 1803define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) { 1804 ; CHECK: vcmpordpd 1805 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 1806 ret <4 x double> %res 1807} 1808declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 1809 1810 1811define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) { 1812 ; CHECK: vcmpordps 1813 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 1814 ret <8 x float> %res 1815} 1816 1817define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) { 1818 ; CHECK: vcmpeqps 1819 %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1] 1820 ; CHECK: vcmpltps 1821 %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1] 1822 ; CHECK: vcmpleps 1823 %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1] 1824 ; CHECK: vcmpunordps 1825 %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1] 1826 ; CHECK: vcmpneqps 1827 %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1] 1828 ; CHECK: vcmpnltps 1829 %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1] 1830 ; CHECK: vcmpnleps 1831 %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1] 1832 ; CHECK: vcmpordps 1833 %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1] 1834 ; CHECK: vcmpeq_uqps 1835 %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1] 1836 ; CHECK: vcmpngeps 1837 %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1] 1838 ; CHECK: vcmpngtps 1839 %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1] 1840 ; CHECK: vcmpfalseps 1841 %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1] 1842 ; CHECK: vcmpneq_oqps 1843 %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1] 1844 ; CHECK: vcmpgeps 1845 %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1] 1846 ; CHECK: vcmpgtps 1847 %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1] 1848 ; CHECK: vcmptrueps 1849 %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1] 1850 ; CHECK: vcmpeq_osps 1851 %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1] 1852 ; CHECK: vcmplt_oqps 1853 %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1] 1854 ; CHECK: vcmple_oqps 1855 %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1] 1856 ; CHECK: vcmpunord_sps 1857 %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1] 1858 ; CHECK: vcmpneq_usps 1859 %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1] 1860 ; CHECK: vcmpnlt_uqps 1861 %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1] 1862 ; CHECK: vcmpnle_uqps 1863 %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1] 1864 ; CHECK: vcmpord_sps 1865 %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1] 1866 ; CHECK: vcmpeq_usps 1867 %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1] 1868 ; CHECK: vcmpnge_uqps 1869 %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1] 1870 ; CHECK: vcmpngt_uqps 1871 %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1] 1872 ; CHECK: vcmpfalse_osps 1873 %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1] 1874 ; CHECK: vcmpneq_osps 1875 %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1] 1876 ; CHECK: vcmpge_oqps 1877 %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1] 1878 ; CHECK: vcmpgt_oqps 1879 %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1] 1880 ; CHECK: vcmptrue_usps 1881 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1] 1882 ret <8 x float> %res 1883} 1884declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 1885 1886 1887define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) { 1888 ; CHECK: vcvtpd2psy 1889 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] 1890 ret <4 x float> %res 1891} 1892declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone 1893 1894 1895define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) { 1896 ; CHECK: vcvtpd2dqy 1897 %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 1898 ret <4 x i32> %res 1899} 1900declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 1901 1902 1903define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { 1904 ; CHECK: vcvtps2pd 1905 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] 1906 ret <4 x double> %res 1907} 1908declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone 1909 1910 1911define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { 1912 ; CHECK: vcvtps2dq 1913 %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 1914 ret <8 x i32> %res 1915} 1916declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 1917 1918 1919define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { 1920 ; CHECK: vcvtdq2pd 1921 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] 1922 ret <4 x double> %res 1923} 1924declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone 1925 1926 1927define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { 1928 ; CHECK: vcvtdq2ps 1929 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] 1930 ret <8 x float> %res 1931} 1932declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone 1933 1934 1935define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { 1936 ; CHECK: vcvttpd2dqy 1937 %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 1938 ret <4 x i32> %res 1939} 1940declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone 1941 1942 1943define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { 1944 ; CHECK: vcvttps2dq 1945 %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 1946 ret <8 x i32> %res 1947} 1948declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone 1949 1950 1951define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { 1952 ; CHECK: vdpps 1953 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 1954 ret <8 x float> %res 1955} 1956declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 1957 1958 1959define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) { 1960 ; CHECK: vhaddpd 1961 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 1962 ret <4 x double> %res 1963} 1964declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 1965 1966 1967define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) { 1968 ; CHECK: vhaddps 1969 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 1970 ret <8 x float> %res 1971} 1972declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 1973 1974 1975define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 1976 ; CHECK: vhsubpd 1977 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 1978 ret <4 x double> %res 1979} 1980declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 1981 1982 1983define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 1984 ; CHECK: vhsubps 1985 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 1986 ret <8 x float> %res 1987} 1988declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 1989 1990 1991define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) { 1992 ; CHECK: vlddqu 1993 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] 1994 ret <32 x i8> %res 1995} 1996declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly 1997 1998 1999define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) { 2000 ; CHECK: vmaskmovpd 2001 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 2002 ret <2 x double> %res 2003} 2004declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly 2005 2006 2007define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) { 2008 ; CHECK: vmaskmovpd 2009 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2010 ret <4 x double> %res 2011} 2012declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly 2013 2014 2015define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) { 2016 ; CHECK: vmaskmovps 2017 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 2018 ret <4 x float> %res 2019} 2020declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly 2021 2022 2023define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) { 2024 ; CHECK: vmaskmovps 2025 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2026 ret <8 x float> %res 2027} 2028declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly 2029 2030 2031define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) { 2032 ; CHECK: vmaskmovpd 2033 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) 2034 ret void 2035} 2036declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind 2037 2038 2039define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) { 2040 ; CHECK: vmaskmovpd 2041 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2) 2042 ret void 2043} 2044declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind 2045 2046 2047define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) { 2048 ; CHECK: vmaskmovps 2049 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) 2050 ret void 2051} 2052declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind 2053 2054 2055define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) { 2056 ; CHECK: vmaskmovps 2057 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2) 2058 ret void 2059} 2060declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind 2061 2062 2063define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) { 2064 ; CHECK: vmaxpd 2065 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2066 ret <4 x double> %res 2067} 2068declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 2069 2070 2071define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) { 2072 ; CHECK: vmaxps 2073 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2074 ret <8 x float> %res 2075} 2076declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 2077 2078 2079define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) { 2080 ; CHECK: vminpd 2081 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2082 ret <4 x double> %res 2083} 2084declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 2085 2086 2087define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) { 2088 ; CHECK: vminps 2089 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2090 ret <8 x float> %res 2091} 2092declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 2093 2094 2095define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) { 2096 ; CHECK: vmovmskpd 2097 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1] 2098 ret i32 %res 2099} 2100declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone 2101 2102 2103define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) { 2104 ; CHECK: vmovmskps 2105 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1] 2106 ret i32 %res 2107} 2108declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone 2109 2110 2111 2112 2113 2114 2115 2116define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) { 2117 ; CHECK: vptest 2118 ; CHECK: sbbl 2119 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2120 ret i32 %res 2121} 2122declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone 2123 2124 2125define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) { 2126 ; CHECK: vptest 2127 ; CHECK: seta 2128 ; CHECK: movzbl 2129 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2130 ret i32 %res 2131} 2132declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone 2133 2134 2135define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) { 2136 ; CHECK: vptest 2137 ; CHECK: sete 2138 ; CHECK: movzbl 2139 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2140 ret i32 %res 2141} 2142declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone 2143 2144 2145define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) { 2146 ; CHECK: vrcpps 2147 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2148 ret <8 x float> %res 2149} 2150declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 2151 2152 2153define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) { 2154 ; CHECK: vroundpd 2155 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] 2156 ret <4 x double> %res 2157} 2158declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 2159 2160 2161define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) { 2162 ; CHECK: vroundps 2163 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] 2164 ret <8 x float> %res 2165} 2166declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 2167 2168 2169define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) { 2170 ; CHECK: vrsqrtps 2171 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2172 ret <8 x float> %res 2173} 2174declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 2175 2176 2177define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { 2178 ; CHECK: vsqrtpd 2179 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] 2180 ret <4 x double> %res 2181} 2182declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 2183 2184 2185define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { 2186 ; CHECK: vsqrtps 2187 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2188 ret <8 x float> %res 2189} 2190declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 2191 2192 2193define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 2194 ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions 2195 ; CHECK: vmovups 2196 ; add operation forces the execution domain. 2197 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2198 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 2199 ret void 2200} 2201declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 2202 2203 2204define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { 2205 ; CHECK: vmovupd 2206 ; add operation forces the execution domain. 2207 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 2208 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2) 2209 ret void 2210} 2211declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind 2212 2213 2214define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { 2215 ; CHECK: vmovups 2216 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) 2217 ret void 2218} 2219declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind 2220 2221 2222define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) { 2223 ; CHECK: vbroadcastsd 2224 %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1] 2225 ret <4 x double> %res 2226} 2227declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly 2228 2229 2230define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { 2231 ; CHECK: vbroadcastf128 2232 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] 2233 ret <4 x double> %res 2234} 2235declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly 2236 2237 2238define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { 2239 ; CHECK: vbroadcastf128 2240 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] 2241 ret <8 x float> %res 2242} 2243declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly 2244 2245 2246define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) { 2247 ; CHECK: vbroadcastss 2248 %res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1] 2249 ret <4 x float> %res 2250} 2251declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly 2252 2253 2254define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) { 2255 ; CHECK: vbroadcastss 2256 %res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1] 2257 ret <8 x float> %res 2258} 2259declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly 2260 2261 2262define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) { 2263 ; CHECK: vextractf128 2264 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] 2265 ret <2 x double> %res 2266} 2267declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone 2268 2269 2270define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) { 2271 ; CHECK: vextractf128 2272 %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 2273 ret <4 x float> %res 2274} 2275declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone 2276 2277 2278define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) { 2279 ; CHECK: vextractf128 2280 %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1] 2281 ret <4 x i32> %res 2282} 2283declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone 2284 2285 2286define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) { 2287 ; CHECK: vinsertf128 2288 %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2289 ret <4 x double> %res 2290} 2291declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone 2292 2293 2294define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) { 2295 ; CHECK: vinsertf128 2296 %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2297 ret <8 x float> %res 2298} 2299declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone 2300 2301 2302define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) { 2303 ; CHECK: vinsertf128 2304 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 2305 ret <8 x i32> %res 2306} 2307declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone 2308 2309 2310define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { 2311 ; CHECK: vperm2f128 2312 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2313 ret <4 x double> %res 2314} 2315declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2316 2317 2318define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { 2319 ; CHECK: vperm2f128 2320 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2321 ret <8 x float> %res 2322} 2323declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2324 2325 2326define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { 2327 ; CHECK: vperm2f128 2328 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 2329 ret <8 x i32> %res 2330} 2331declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 2332 2333 2334define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) { 2335 ; CHECK: vpermilpd 2336 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] 2337 ret <2 x double> %res 2338} 2339declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone 2340 2341 2342define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) { 2343 ; CHECK: vpermilpd 2344 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] 2345 ret <4 x double> %res 2346} 2347declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone 2348 2349 2350define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { 2351 ; CHECK: vpshufd 2352 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 2353 ret <4 x float> %res 2354} 2355declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone 2356 2357 2358define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { 2359 ; CHECK: vpermilps 2360 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1] 2361 ret <8 x float> %res 2362} 2363declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone 2364 2365 2366define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) { 2367 ; CHECK: vpermilpd 2368 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] 2369 ret <2 x double> %res 2370} 2371declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 2372 2373 2374define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { 2375 ; CHECK: vpermilpd 2376 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] 2377 ret <4 x double> %res 2378} 2379declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 2380 2381 2382define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) { 2383 ; CHECK: vpermilps 2384 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] 2385 ret <4 x float> %res 2386} 2387define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) { 2388 ; CHECK: vpermilps 2389 %a2 = load <4 x i32>* %a1 2390 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1] 2391 ret <4 x float> %res 2392} 2393declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 2394 2395 2396define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { 2397 ; CHECK: vpermilps 2398 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] 2399 ret <8 x float> %res 2400} 2401declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 2402 2403 2404define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) { 2405 ; CHECK: vtestpd 2406 ; CHECK: sbbl 2407 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 2408 ret i32 %res 2409} 2410declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 2411 2412 2413define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) { 2414 ; CHECK: vtestpd 2415 ; CHECK: sbbl 2416 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 2417 ret i32 %res 2418} 2419declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 2420 2421 2422define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) { 2423 ; CHECK: vtestps 2424 ; CHECK: sbbl 2425 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2426 ret i32 %res 2427} 2428declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 2429 2430 2431define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) { 2432 ; CHECK: vtestps 2433 ; CHECK: sbbl 2434 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 2435 ret i32 %res 2436} 2437declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 2438 2439 2440define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) { 2441 ; CHECK: vtestpd 2442 ; CHECK: seta 2443 ; CHECK: movzbl 2444 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 2445 ret i32 %res 2446} 2447declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone 2448 2449 2450define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) { 2451 ; CHECK: vtestpd 2452 ; CHECK: seta 2453 ; CHECK: movzbl 2454 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 2455 ret i32 %res 2456} 2457declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone 2458 2459 2460define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) { 2461 ; CHECK: vtestps 2462 ; CHECK: seta 2463 ; CHECK: movzbl 2464 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2465 ret i32 %res 2466} 2467declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone 2468 2469 2470define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) { 2471 ; CHECK: vtestps 2472 ; CHECK: seta 2473 ; CHECK: movzbl 2474 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 2475 ret i32 %res 2476} 2477declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone 2478 2479 2480define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) { 2481 ; CHECK: vtestpd 2482 ; CHECK: sete 2483 ; CHECK: movzbl 2484 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 2485 ret i32 %res 2486} 2487declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone 2488 2489 2490define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) { 2491 ; CHECK: vtestpd 2492 ; CHECK: sete 2493 ; CHECK: movzbl 2494 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 2495 ret i32 %res 2496} 2497declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone 2498 2499 2500define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) { 2501 ; CHECK: vtestps 2502 ; CHECK: sete 2503 ; CHECK: movzbl 2504 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2505 ret i32 %res 2506} 2507declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone 2508 2509 2510define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) { 2511 ; CHECK: vtestps 2512 ; CHECK: sete 2513 ; CHECK: movzbl 2514 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 2515 ret i32 %res 2516} 2517declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone 2518 2519 2520define void @test_x86_avx_vzeroall() { 2521 ; CHECK: vzeroall 2522 call void @llvm.x86.avx.vzeroall() 2523 ret void 2524} 2525declare void @llvm.x86.avx.vzeroall() nounwind 2526 2527 2528define void @test_x86_avx_vzeroupper() { 2529 ; CHECK: vzeroupper 2530 call void @llvm.x86.avx.vzeroupper() 2531 ret void 2532} 2533declare void @llvm.x86.avx.vzeroupper() nounwind 2534 2535; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work 2536 2537; CHECK: monitor 2538define void @monitor(i8* %P, i32 %E, i32 %H) nounwind { 2539entry: 2540 tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H) 2541 ret void 2542} 2543declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind 2544 2545; CHECK: mwait 2546define void @mwait(i32 %E, i32 %H) nounwind { 2547entry: 2548 tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H) 2549 ret void 2550} 2551declare void @llvm.x86.sse3.mwait(i32, i32) nounwind 2552 2553; CHECK: sfence 2554define void @sfence() nounwind { 2555entry: 2556 tail call void @llvm.x86.sse.sfence() 2557 ret void 2558} 2559declare void @llvm.x86.sse.sfence() nounwind 2560 2561; CHECK: lfence 2562define void @lfence() nounwind { 2563entry: 2564 tail call void @llvm.x86.sse2.lfence() 2565 ret void 2566} 2567declare void @llvm.x86.sse2.lfence() nounwind 2568 2569; CHECK: mfence 2570define void @mfence() nounwind { 2571entry: 2572 tail call void @llvm.x86.sse2.mfence() 2573 ret void 2574} 2575declare void @llvm.x86.sse2.mfence() nounwind 2576 2577; CHECK: clflush 2578define void @clflush(i8* %p) nounwind { 2579entry: 2580 tail call void @llvm.x86.sse2.clflush(i8* %p) 2581 ret void 2582} 2583declare void @llvm.x86.sse2.clflush(i8*) nounwind 2584 2585; CHECK: crc32b 2586define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { 2587 %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) 2588 ret i32 %tmp 2589} 2590declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind 2591 2592; CHECK: crc32w 2593define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { 2594 %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) 2595 ret i32 %tmp 2596} 2597declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind 2598 2599; CHECK: crc32l 2600define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { 2601 %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) 2602 ret i32 %tmp 2603} 2604declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind 2605 2606; CHECK: movntdq 2607define void @movnt_dq(i8* %p, <4 x i64> %a1) nounwind { 2608 %a2 = add <4 x i64> %a1, <i64 1, i64 1, i64 1, i64 1> 2609 tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a2) nounwind 2610 ret void 2611} 2612declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind 2613 2614; CHECK: movntps 2615define void @movnt_ps(i8* %p, <8 x float> %a) nounwind { 2616 tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind 2617 ret void 2618} 2619declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind 2620 2621; CHECK: movntpd 2622define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind { 2623 ; add operation forces the execution domain. 2624 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 2625 tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind 2626 ret void 2627} 2628declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind 2629 2630 2631; Check for pclmulqdq 2632define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) { 2633; CHECK: vpclmulqdq 2634 %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1] 2635 ret <2 x i64> %res 2636} 2637declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone 2638