1//===---------------------------------------------------------------------===//
2// Random ideas for the X86 backend: MMX-specific stuff.
3//===---------------------------------------------------------------------===//
4
5//===---------------------------------------------------------------------===//
6
7This:
8
9#include <mmintrin.h>
10
11__v2si qux(int A) {
12  return (__v2si){ 0, A };
13}
14
15is compiled into:
16
17_qux:
18        subl $28, %esp
19        movl 32(%esp), %eax
20        movd %eax, %mm0
21        movq %mm0, (%esp)
22        movl (%esp), %eax
23        movl %eax, 20(%esp)
24        movq %mm0, 8(%esp)
25        movl 12(%esp), %eax
26        movl %eax, 16(%esp)
27        movq 16(%esp), %mm0
28        addl $28, %esp
29        ret
30
31Yuck!
32
33GCC gives us:
34
35_qux:
36        subl    $12, %esp
37        movl    16(%esp), %eax
38        movl    20(%esp), %edx
39        movl    $0, (%eax)
40        movl    %edx, 4(%eax)
41        addl    $12, %esp
42        ret     $4
43
44//===---------------------------------------------------------------------===//
45
46We generate crappy code for this:
47
48__m64 t() {
49  return _mm_cvtsi32_si64(1);
50}
51
52_t:
53	subl	$12, %esp
54	movl	$1, %eax
55	movd	%eax, %mm0
56	movq	%mm0, (%esp)
57	movl	(%esp), %eax
58	movl	4(%esp), %edx
59	addl	$12, %esp
60	ret
61
62The extra stack traffic is covered in the previous entry. But the other reason
63is we are not smart about materializing constants in MMX registers. With -m64
64
65	movl	$1, %eax
66	movd	%eax, %mm0
67	movd	%mm0, %rax
68	ret
69
70We should be using a constantpool load instead:
71	movq	LC0(%rip), %rax
72