Lines Matching refs:SSE2

2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
8 ; SSE2-LABEL: roundeven_f32:
9 ; SSE2: ## %bb.0:
10 ; SSE2-NEXT: jmp _roundevenf ## TAILCALL
26 ; SSE2-LABEL: roundeven_f64:
27 ; SSE2: ## %bb.0:
28 ; SSE2-NEXT: jmp _roundeven ## TAILCALL
44 ; SSE2-LABEL: roundeven_v4f32:
45 ; SSE2: ## %bb.0:
46 ; SSE2-NEXT: subq $56, %rsp
47 ; SSE2-NEXT: .cfi_def_cfa_offset 64
48 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
49 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
50 ; SSE2-NEXT: callq _roundevenf
51 ; SSE2-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
52 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
53 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
54 ; SSE2-NEXT: callq _roundevenf
55 ; SSE2-NEXT: unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload
56 ; SSE2-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
57 ; SSE2-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
58 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
59 ; SSE2-NEXT: callq _roundevenf
60 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
61 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
62 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
63 ; SSE2-NEXT: callq _roundevenf
64 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
65 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
66 ; SSE2-NEXT: unpcklpd (%rsp), %xmm1 ## 16-byte Folded Reload
67 ; SSE2-NEXT: ## xmm1 = xmm1[0],mem[0]
68 ; SSE2-NEXT: movaps %xmm1, %xmm0
69 ; SSE2-NEXT: addq $56, %rsp
70 ; SSE2-NEXT: retq
86 ; SSE2-LABEL: roundeven_v2f64:
87 ; SSE2: ## %bb.0:
88 ; SSE2-NEXT: subq $40, %rsp
89 ; SSE2-NEXT: .cfi_def_cfa_offset 48
90 ; SSE2-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
91 ; SSE2-NEXT: callq _roundeven
92 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
93 ; SSE2-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
94 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
95 ; SSE2-NEXT: callq _roundeven
96 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
97 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
98 ; SSE2-NEXT: movaps %xmm1, %xmm0
99 ; SSE2-NEXT: addq $40, %rsp
100 ; SSE2-NEXT: retq
116 ; SSE2-LABEL: roundeven_v8f32:
117 ; SSE2: ## %bb.0:
118 ; SSE2-NEXT: subq $72, %rsp
119 ; SSE2-NEXT: .cfi_def_cfa_offset 80
120 ; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
121 ; SSE2-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
122 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
123 ; SSE2-NEXT: callq _roundevenf
124 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
125 ; SSE2-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
126 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
127 ; SSE2-NEXT: callq _roundevenf
128 ; SSE2-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
129 ; SSE2-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
130 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
131 ; SSE2-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
132 ; SSE2-NEXT: callq _roundevenf
133 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
134 ; SSE2-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
135 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
136 ; SSE2-NEXT: callq _roundevenf
137 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
138 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
139 ; SSE2-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
140 ; SSE2-NEXT: ## xmm1 = xmm1[0],mem[0]
141 ; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
142 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
143 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
144 ; SSE2-NEXT: callq _roundevenf
145 ; SSE2-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
146 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
147 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
148 ; SSE2-NEXT: callq _roundevenf
149 ; SSE2-NEXT: unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload
150 ; SSE2-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
151 ; SSE2-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
152 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
153 ; SSE2-NEXT: callq _roundevenf
154 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
155 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
156 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
157 ; SSE2-NEXT: callq _roundevenf
158 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
159 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
160 ; SSE2-NEXT: unpcklpd (%rsp), %xmm1 ## 16-byte Folded Reload
161 ; SSE2-NEXT: ## xmm1 = xmm1[0],mem[0]
162 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
163 ; SSE2-NEXT: addq $72, %rsp
164 ; SSE2-NEXT: retq
181 ; SSE2-LABEL: roundeven_v4f64:
182 ; SSE2: ## %bb.0:
183 ; SSE2-NEXT: subq $56, %rsp
184 ; SSE2-NEXT: .cfi_def_cfa_offset 64
185 ; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
186 ; SSE2-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
187 ; SSE2-NEXT: callq _roundeven
188 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
189 ; SSE2-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
190 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
191 ; SSE2-NEXT: callq _roundeven
192 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
193 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
194 ; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
195 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
196 ; SSE2-NEXT: callq _roundeven
197 ; SSE2-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
198 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
199 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
200 ; SSE2-NEXT: callq _roundeven
201 ; SSE2-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload
202 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
203 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
204 ; SSE2-NEXT: addq $56, %rsp
205 ; SSE2-NEXT: retq
222 ; SSE2-LABEL: roundeven_v16f32:
223 ; SSE2: ## %bb.0:
224 ; SSE2-NEXT: subq $104, %rsp
225 ; SSE2-NEXT: .cfi_def_cfa_offset 112
226 ; SSE2-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
227 ; SSE2-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
228 ; SSE2-NEXT: movaps %xmm1, (%rsp) ## 16-byte Spill
229 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
230 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
231 ; SSE2-NEXT: callq _roundevenf
232 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
233 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
234 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
235 ; SSE2-NEXT: callq _roundevenf
236 ; SSE2-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
237 ; SSE2-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
238 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
239 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
240 ; SSE2-NEXT: callq _roundevenf
241 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
242 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
243 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
244 ; SSE2-NEXT: callq _roundevenf
245 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
246 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
247 ; SSE2-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
248 ; SSE2-NEXT: ## xmm1 = xmm1[0],mem[0]
249 ; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
250 ; SSE2-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
251 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
252 ; SSE2-NEXT: callq _roundevenf
253 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
254 ; SSE2-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
255 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
256 ; SSE2-NEXT: callq _roundevenf
257 ; SSE2-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
258 ; SSE2-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
259 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
260 ; SSE2-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
261 ; SSE2-NEXT: callq _roundevenf
262 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
263 ; SSE2-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
264 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
265 ; SSE2-NEXT: callq _roundevenf
266 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
267 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
268 ; SSE2-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
269 ; SSE2-NEXT: ## xmm1 = xmm1[0],mem[0]
270 ; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
271 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
272 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
273 ; SSE2-NEXT: callq _roundevenf
274 ; SSE2-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
275 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
276 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
277 ; SSE2-NEXT: callq _roundevenf
278 ; SSE2-NEXT: unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload
279 ; SSE2-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
280 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
281 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
282 ; SSE2-NEXT: callq _roundevenf
283 ; SSE2-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
284 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
285 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
286 ; SSE2-NEXT: callq _roundevenf
287 ; SSE2-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload
288 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
289 ; SSE2-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
290 ; SSE2-NEXT: ## xmm1 = xmm1[0],mem[0]
291 ; SSE2-NEXT: movaps %xmm1, (%rsp) ## 16-byte Spill
292 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
293 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
294 ; SSE2-NEXT: callq _roundevenf
295 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
296 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
297 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
298 ; SSE2-NEXT: callq _roundevenf
299 ; SSE2-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
300 ; SSE2-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
301 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
302 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
303 ; SSE2-NEXT: callq _roundevenf
304 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
305 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
306 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
307 ; SSE2-NEXT: callq _roundevenf
308 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Reload
309 ; SSE2-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
310 ; SSE2-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Folded Reload
311 ; SSE2-NEXT: ## xmm3 = xmm3[0],mem[0]
312 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
313 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
314 ; SSE2-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload
315 ; SSE2-NEXT: addq $104, %rsp
316 ; SSE2-NEXT: retq
341 ; SSE2-LABEL: roundeven_v8f64:
342 ; SSE2: ## %bb.0:
343 ; SSE2-NEXT: subq $88, %rsp
344 ; SSE2-NEXT: .cfi_def_cfa_offset 96
345 ; SSE2-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
346 ; SSE2-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
347 ; SSE2-NEXT: movaps %xmm1, (%rsp) ## 16-byte Spill
348 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
349 ; SSE2-NEXT: callq _roundeven
350 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
351 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
352 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
353 ; SSE2-NEXT: callq _roundeven
354 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
355 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
356 ; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
357 ; SSE2-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
358 ; SSE2-NEXT: callq _roundeven
359 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
360 ; SSE2-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
361 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
362 ; SSE2-NEXT: callq _roundeven
363 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
364 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
365 ; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
366 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
367 ; SSE2-NEXT: callq _roundeven
368 ; SSE2-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
369 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
370 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
371 ; SSE2-NEXT: callq _roundeven
372 ; SSE2-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload
373 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
374 ; SSE2-NEXT: movaps %xmm1, (%rsp) ## 16-byte Spill
375 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
376 ; SSE2-NEXT: callq _roundeven
377 ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
378 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
379 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
380 ; SSE2-NEXT: callq _roundeven
381 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Reload
382 ; SSE2-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm0[0]
383 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
384 ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
385 ; SSE2-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload
386 ; SSE2-NEXT: addq $88, %rsp
387 ; SSE2-NEXT: retq