1; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse | FileCheck %s 2 3define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { 4 ; CHECK: addss 5 %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 6 ret <4 x float> %res 7} 8declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 9 10 11define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { 12 ; CHECK: cmpordps 13 %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 14 ret <4 x float> %res 15} 16declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 17 18 19define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) { 20 ; CHECK: cmpordss 21 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 22 ret <4 x float> %res 23} 24declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 25 26 27define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { 28 ; CHECK: comiss 29 ; CHECK: sete 30 ; CHECK: movzbl 31 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 32 ret i32 %res 33} 34declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 35 36 37define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { 38 ; CHECK: comiss 39 ; CHECK: setae 40 ; CHECK: movzbl 41 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 42 ret i32 %res 43} 44declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 45 46 47define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { 48 ; CHECK: comiss 49 ; CHECK: seta 50 ; CHECK: movzbl 51 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 52 ret i32 %res 53} 54declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 55 56 57define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { 58 ; CHECK: comiss 59 ; CHECK: setbe 60 ; CHECK: movzbl 61 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 62 ret i32 %res 63} 64declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 65 66 67define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { 68 ; CHECK: comiss 69 ; CHECK: sbb 70 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 71 ret i32 %res 72} 73declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 74 75 76define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) { 77 ; CHECK: comiss 78 ; CHECK: setne 79 ; CHECK: movzbl 80 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 81 ret i32 %res 82} 83declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 84 85 86define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) { 87 ; CHECK: movl 88 ; CHECK: cvtsi2ss 89 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 90 ret <4 x float> %res 91} 92declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 93 94 95define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) { 96 ; CHECK: cvtss2si 97 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1] 98 ret i32 %res 99} 100declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 101 102 103define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) { 104 ; CHECK: cvttss2si 105 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1] 106 ret i32 %res 107} 108declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 109 110 111define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { 112 ; CHECK: divss 113 %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 114 ret <4 x float> %res 115} 116declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 117 118 119define void @test_x86_sse_ldmxcsr(i8* %a0) { 120 ; CHECK: movl 121 ; CHECK: ldmxcsr 122 call void @llvm.x86.sse.ldmxcsr(i8* %a0) 123 ret void 124} 125declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind 126 127 128 129define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) { 130 ; CHECK: maxps 131 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 132 ret <4 x float> %res 133} 134declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 135 136 137define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) { 138 ; CHECK: maxss 139 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 140 ret <4 x float> %res 141} 142declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 143 144 145define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) { 146 ; CHECK: minps 147 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 148 ret <4 x float> %res 149} 150declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 151 152 153define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) { 154 ; CHECK: minss 155 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 156 ret <4 x float> %res 157} 158declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 159 160 161define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) { 162 ; CHECK: movmskps 163 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1] 164 ret i32 %res 165} 166declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 167 168 169 170define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { 171 ; CHECK: mulss 172 %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 173 ret <4 x float> %res 174} 175declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 176 177 178define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { 179 ; CHECK: rcpps 180 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 181 ret <4 x float> %res 182} 183declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 184 185 186define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) { 187 ; CHECK: rcpss 188 %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 189 ret <4 x float> %res 190} 191declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 192 193 194define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) { 195 ; CHECK: rsqrtps 196 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 197 ret <4 x float> %res 198} 199declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 200 201 202define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { 203 ; CHECK: rsqrtss 204 %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 205 ret <4 x float> %res 206} 207declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 208 209 210define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { 211 ; CHECK: sqrtps 212 %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 213 ret <4 x float> %res 214} 215declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 216 217 218define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { 219 ; CHECK: sqrtss 220 %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 221 ret <4 x float> %res 222} 223declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 224 225 226define void @test_x86_sse_stmxcsr(i8* %a0) { 227 ; CHECK: movl 228 ; CHECK: stmxcsr 229 call void @llvm.x86.sse.stmxcsr(i8* %a0) 230 ret void 231} 232declare void @llvm.x86.sse.stmxcsr(i8*) nounwind 233 234 235define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { 236 ; CHECK: movl 237 ; CHECK: movups 238 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) 239 ret void 240} 241declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind 242 243 244define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { 245 ; CHECK: subss 246 %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 247 ret <4 x float> %res 248} 249declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 250 251 252define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { 253 ; CHECK: ucomiss 254 ; CHECK: sete 255 ; CHECK: movzbl 256 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 257 ret i32 %res 258} 259declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 260 261 262define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { 263 ; CHECK: ucomiss 264 ; CHECK: setae 265 ; CHECK: movzbl 266 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 267 ret i32 %res 268} 269declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 270 271 272define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { 273 ; CHECK: ucomiss 274 ; CHECK: seta 275 ; CHECK: movzbl 276 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 277 ret i32 %res 278} 279declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 280 281 282define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { 283 ; CHECK: ucomiss 284 ; CHECK: setbe 285 ; CHECK: movzbl 286 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 287 ret i32 %res 288} 289declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 290 291 292define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { 293 ; CHECK: ucomiss 294 ; CHECK: sbbl 295 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 296 ret i32 %res 297} 298declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 299 300 301define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { 302 ; CHECK: ucomiss 303 ; CHECK: setne 304 ; CHECK: movzbl 305 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 306 ret i32 %res 307} 308declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 309