1; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -mcpu=nehalem | FileCheck %s 2 3; rdar: 12558838 4; PR14221 5; There is a mismatch between the intrinsic and the actual instruction. 6; The actual instruction has a partial update of dest, while the intrinsic 7; passes through the upper FP values. Here, we make sure the source and 8; destination of rsqrtss are the same. 9define void @t1(<4 x float> %a) nounwind uwtable ssp { 10entry: 11; CHECK-LABEL: t1: 12; CHECK: rsqrtss %xmm0, %xmm0 13 %0 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a) nounwind 14 %a.addr.0.extract = extractelement <4 x float> %0, i32 0 15 %conv = fpext float %a.addr.0.extract to double 16 %a.addr.4.extract = extractelement <4 x float> %0, i32 1 17 %conv3 = fpext float %a.addr.4.extract to double 18 tail call void @callee(double %conv, double %conv3) nounwind 19 ret void 20} 21declare void @callee(double, double) 22declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 23 24define void @t2(<4 x float> %a) nounwind uwtable ssp { 25entry: 26; CHECK-LABEL: t2: 27; CHECK: rcpss %xmm0, %xmm0 28 %0 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a) nounwind 29 %a.addr.0.extract = extractelement <4 x float> %0, i32 0 30 %conv = fpext float %a.addr.0.extract to double 31 %a.addr.4.extract = extractelement <4 x float> %0, i32 1 32 %conv3 = fpext float %a.addr.4.extract to double 33 tail call void @callee(double %conv, double %conv3) nounwind 34 ret void 35} 36declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 37