1#PIII SIMD instructions
2
3.text
4foo:
5 addps		(%ecx),%xmm0
6 addps		%xmm2,%xmm1
7 addss		(%ebx),%xmm2
8 addss		%xmm4,%xmm3
9 andnps		0x0(%ebp),%xmm4
10 andnps		%xmm6,%xmm5
11 andps		(%edi),%xmm6
12 andps		%xmm0,%xmm7
13 cmpps		$0x2,%xmm1,%xmm0
14 cmpps		$0x3,(%edx),%xmm1
15 cmpss		$0x4,%xmm2,%xmm2
16 cmpss		$0x5,(%esp,1),%xmm3
17 cmpps		$0x6,%xmm5,%xmm4
18 cmpps		$0x7,(%esi),%xmm5
19 cmpss		$0x0,%xmm7,%xmm6
20 cmpss		$0x1,(%eax),%xmm7
21 cmpeqps	%xmm1,%xmm0
22 cmpeqps	(%edx),%xmm1
23 cmpeqss	%xmm2,%xmm2
24 cmpeqss	(%esp,1),%xmm3
25 cmpltps	%xmm5,%xmm4
26 cmpltps	(%esi),%xmm5
27 cmpltss	%xmm7,%xmm6
28 cmpltss	(%eax),%xmm7
29 cmpleps	(%ecx),%xmm0
30 cmpleps	%xmm2,%xmm1
31 cmpless	(%ebx),%xmm2
32 cmpless	%xmm4,%xmm3
33 cmpunordps	0x0(%ebp),%xmm4
34 cmpunordps	%xmm6,%xmm5
35 cmpunordss	(%edi),%xmm6
36 cmpunordss	%xmm0,%xmm7
37 cmpneqps	%xmm1,%xmm0
38 cmpneqps	(%edx),%xmm1
39 cmpneqss	%xmm2,%xmm2
40 cmpneqss	(%esp,1),%xmm3
41 cmpnltps	%xmm5,%xmm4
42 cmpnltps	(%esi),%xmm5
43 cmpnltss	%xmm7,%xmm6
44 cmpnltss	(%eax),%xmm7
45 cmpnleps	(%ecx),%xmm0
46 cmpnleps	%xmm2,%xmm1
47 cmpnless	(%ebx),%xmm2
48 cmpnless	%xmm4,%xmm3
49 cmpordps	0x0(%ebp),%xmm4
50 cmpordps	%xmm6,%xmm5
51 cmpordss	(%edi),%xmm6
52 cmpordss	%xmm0,%xmm7
53 comiss		%xmm1,%xmm0
54 comiss		(%edx),%xmm1
55 cvtpi2ps	%mm3,%xmm2
56 cvtpi2ps	(%esp,1),%xmm3
57 cvtsi2ss	%ebp,%xmm4
58 cvtsi2ss	(%esi),%xmm5
59 cvtps2pi	%xmm7,%mm6
60 cvtps2pi	(%eax),%mm7
61 cvtss2si	(%ecx),%eax
62 cvtss2si	%xmm2,%ecx
63 cvttps2pi	(%ebx),%mm2
64 cvttps2pi	%xmm4,%mm3
65 cvttss2si	0x0(%ebp),%esp
66 cvttss2si	%xmm6,%ebp
67 divps		%xmm1,%xmm0
68 divps		(%edx),%xmm1
69 divss		%xmm3,%xmm2
70 divss		(%esp,1),%xmm3
71 ldmxcsr	0x0(%ebp)
72 stmxcsr	(%esi)
73 sfence
74 maxps		%xmm1,%xmm0
75 maxps		(%edx),%xmm1
76 maxss		%xmm3,%xmm2
77 maxss		(%esp,1),%xmm3
78 minps		%xmm5,%xmm4
79 minps		(%esi),%xmm5
80 minss		%xmm7,%xmm6
81 minss		(%eax),%xmm7
82 movaps		%xmm1,%xmm0
83 movaps		%xmm2,(%ecx)
84 movaps		(%edx),%xmm2
85 movlhps	%xmm4,%xmm3
86 movhps		%xmm5,(%esp,1)
87 movhps		(%esi),%xmm5
88 movhlps	%xmm7,%xmm6
89 movlps		%xmm0,(%edi)
90 movlps		(%eax),%xmm0
91 movmskps	%xmm2,%ecx
92 movups		%xmm3,%xmm2
93 movups		%xmm4,(%edx)
94 movups		0x0(%ebp),%xmm4
95 movss		%xmm6,%xmm5
96 movss		%xmm7,(%esi)
97 movss		(%eax),%xmm7
98 mulps		%xmm1,%xmm0
99 mulps		(%edx),%xmm1
100 mulss		%xmm2,%xmm2
101 mulss		(%esp,1),%xmm3
102 orps		%xmm5,%xmm4
103 orps		(%esi),%xmm5
104 rcpps		%xmm7,%xmm6
105 rcpps		(%eax),%xmm7
106 rcpss		(%ecx),%xmm0
107 rcpss		%xmm2,%xmm1
108 rsqrtps	(%ebx),%xmm2
109 rsqrtps	%xmm4,%xmm3
110 rsqrtss	0x0(%ebp),%xmm4
111 rsqrtss	%xmm6,%xmm5
112 shufps		$0x2,(%edi),%xmm6
113 shufps		$0x3,%xmm0,%xmm7
114 sqrtps		%xmm1,%xmm0
115 sqrtps		(%edx),%xmm1
116 sqrtss		%xmm2,%xmm2
117 sqrtss		(%esp,1),%xmm3
118 subps		%xmm5,%xmm4
119 subps		(%esi),%xmm5
120 subss		%xmm7,%xmm6
121 subss		(%eax),%xmm7
122 ucomiss	(%ecx),%xmm0
123 ucomiss	%xmm2,%xmm1
124 unpckhps	(%ebx),%xmm2
125 unpckhps	%xmm4,%xmm3
126 unpcklps	0x0(%ebp),%xmm4
127 unpcklps	%xmm6,%xmm5
128 xorps		(%edi),%xmm6
129 xorps		%xmm0,%xmm7
130 pavgb		%mm1,%mm0
131 pavgb		(%edx),%mm1
132 pavgw		%mm3,%mm2
133 pavgw		(%esp,1),%mm3
134 pextrw		$0x0,%mm1,%eax
135 pinsrw		$0x1,(%ecx),%mm1
136 pinsrw		$0x2,%edx,%mm2
137 pmaxsw		%mm1,%mm0
138 pmaxsw		(%edx),%mm1
139 pmaxub		%mm2,%mm2
140 pmaxub		(%esp,1),%mm3
141 pminsw		%mm5,%mm4
142 pminsw		(%esi),%mm5
143 pminub		%mm7,%mm6
144 pminub		(%eax),%mm7
145 pmovmskb	%mm5,%eax
146 pmulhuw	%mm5,%mm4
147 pmulhuw	(%esi),%mm5
148 psadbw		%mm7,%mm6
149 psadbw		(%eax),%mm7
150 pshufw		$0x1,%mm2,%mm3
151 pshufw		$0x4,0x0(%ebp),%mm6
152 maskmovq	%mm7,%mm0
153 movntps	%xmm6,(%ebx)
154 movntq		%mm2,(%eax)
155 prefetchnta	(%esi)
156 prefetcht0	(%eax,%ebx,4)
157 prefetcht1	(%edx)
158 prefetcht2	(%ecx)
159
160# A bad sfence modrm byte
161.byte 0x65,0x0F,0xAE,0xff
162# Pad out to good alignment
163 .p2align 4,0
164