1;/*
2; * OpenBOR - http://www.LavaLit.com
3; * -----------------------------------------------------------------------
4; * All rights reserved, see LICENSE in OpenBOR root for details.
5; *
6; * Copyright (c) 2004 - 2011 OpenBOR Team
7; */
8
9%include "macros.mac"
10
11;%define FAR_POINTER
12
13BITS 32
14
15SECTION .text
16
17NEWSYM _BilinearMMX
18; Store some stuff
19	 push ebp
20	 mov ebp, esp
21
22	 push ebx
23         mov eax, [ebp+24] ;dx
24         mov ebx, [ebp+28] ;dy
25         push edx
26
27         movq mm0, [eax]
28         movq mm1, [ebx]
29
30         psrlw mm0, 11  ;reduce to 5 bits
31         psrlw mm1, 11
32
33         movq [eax], mm0
34         movq [ebx], mm1
35
36         mov edx, [ebp+20]  ;D
37         pmullw mm0, mm1
38         movq mm5, [RedMask]
39         movq mm6, [GreenMask]
40         movq mm7, [BlueMask]
41         psrlw mm0, 5
42
43         pand mm5, [edx]
44         pand mm6, [edx]
45
46         psrlw mm5, 5
47         pand mm7, [edx]
48
49         pmullw mm5, mm0
50         pmullw mm6, mm0
51         pmullw mm7, mm0
52
53         movq mm4, mm0       ;store x*y
54
55
56         mov edx, [ebp+16] ;C
57         movq mm0, [ebx]
58         movq mm1, mm4
59         psubw mm0, mm1
60         movq mm1, [RedMask]
61         movq mm2, [GreenMask]
62         movq mm3, [BlueMask]
63         pand mm1, [edx]
64         pand mm2, [edx]
65
66         psrlw mm1, 5
67         pand mm3, [edx]
68
69         pmullw mm1, mm0
70         pmullw mm2, mm0
71         pmullw mm3, mm0
72
73         mov edx, [ebp+12] ;B
74         paddw mm5, mm1
75         paddw mm6, mm2
76         paddw mm7, mm3
77
78         movq mm0, [eax]
79         movq mm1, mm4
80         psubw mm0, mm1
81         movq mm1, [RedMask]
82         movq mm2, [GreenMask]
83         movq mm3, [BlueMask]
84         pand mm1, [edx]
85         pand mm2, [edx]
86         psrlw mm1, 5
87         pand mm3, [edx]
88
89
90         pmullw mm1, mm0
91         pmullw mm2, mm0
92         pmullw mm3, mm0
93
94         mov edx, [ebp+8] ;A
95         paddw mm5, mm1
96         paddw mm6, mm2
97         paddw mm7, mm3
98
99
100         movq mm0, [All32s]
101         movq mm1, mm4
102         movq mm2, [eax]
103         movq mm3, [ebx]
104         paddw mm0, mm1
105         paddw mm2, mm3
106         psubw mm0, mm2
107         movq mm1, [RedMask]
108         movq mm2, [GreenMask]
109         movq mm3, [BlueMask]
110         pand mm1, [edx]
111         pand mm2, [edx]
112         psrlw mm1, 5
113         pand mm3, [edx]
114
115         pmullw mm1, mm0
116         pmullw mm2, mm0
117         pmullw mm3, mm0
118
119         mov edx, [ebp+32]
120         paddw mm5, mm1
121         paddw mm6, mm2
122         paddw mm7, mm3
123
124         psrlw mm6, 5
125         psrlw mm7, 5
126
127         pand mm5, [RedMask]
128         pand mm6, [GreenMask]
129         pand mm7, [BlueMask]
130
131         por mm5, mm6
132         por mm7, mm5
133%ifdef FAR_POINTER
134         movq [fs:edx], mm7
135%else
136         movq [edx], mm7
137%endif
138         pop edx
139         pop ebx
140	 mov esp, ebp
141	 pop ebp
142	 ret
143
144
145NEWSYM _BilinearMMXGrid0
146; Store some stuff
147	 push ebp
148	 mov ebp, esp
149
150	 push ebx
151         mov eax, [ebp+24] ;dx
152         mov ebx, [ebp+28] ;dy
153         push edx
154
155         movq mm0, [eax]
156         movq mm1, [ebx]
157
158         psrlw mm0, 11  ;reduce to 5 bits
159         psrlw mm1, 11
160
161         movq [eax], mm0
162         movq [ebx], mm1
163
164         mov edx, [ebp+20]  ;D
165         pmullw mm0, mm1
166         movq mm5, [RedMask]
167         movq mm6, [GreenMask]
168         movq mm7, [BlueMask]
169         psrlw mm0, 5
170
171         pand mm5, [edx]
172         pand mm6, [edx]
173
174         psrlw mm5, 5
175         pand mm7, [edx]
176
177         pmullw mm5, mm0
178         pmullw mm6, mm0
179         pmullw mm7, mm0
180
181         movq mm4, mm0       ;store x*y
182
183
184         mov edx, [ebp+16] ;C
185         movq mm0, [ebx]
186         movq mm1, mm4
187         psubw mm0, mm1
188         movq mm1, [RedMask]
189         movq mm2, [GreenMask]
190         movq mm3, [BlueMask]
191         pand mm1, [edx]
192         pand mm2, [edx]
193
194         psrlw mm1, 5
195         pand mm3, [edx]
196
197         pmullw mm1, mm0
198         pmullw mm2, mm0
199         pmullw mm3, mm0
200
201         mov edx, [ebp+12] ;B
202         paddw mm5, mm1
203         paddw mm6, mm2
204         paddw mm7, mm3
205
206         movq mm0, [eax]
207         movq mm1, mm4
208         psubw mm0, mm1
209         movq mm1, [RedMask]
210         movq mm2, [GreenMask]
211         movq mm3, [BlueMask]
212         pand mm1, [edx]
213         pand mm2, [edx]
214         psrlw mm1, 5
215         pand mm3, [edx]
216
217
218         pmullw mm1, mm0
219         pmullw mm2, mm0
220         pmullw mm3, mm0
221
222         mov edx, [ebp+8] ;A
223         paddw mm5, mm1
224         paddw mm6, mm2
225         paddw mm7, mm3
226
227
228         movq mm0, [All32s]
229         movq mm1, mm4
230         movq mm2, [eax]
231         movq mm3, [ebx]
232         paddw mm0, mm1
233         paddw mm2, mm3
234         psubw mm0, mm2
235         movq mm1, [RedMask]
236         movq mm2, [GreenMask]
237         movq mm3, [BlueMask]
238         pand mm1, [edx]
239         pand mm2, [edx]
240         psrlw mm1, 5
241         pand mm3, [edx]
242
243         pmullw mm1, mm0
244         pmullw mm2, mm0
245         pmullw mm3, mm0
246
247         mov edx, [ebp+32]
248         paddw mm5, mm1
249         paddw mm6, mm2
250         paddw mm7, mm3
251
252         psrlw mm6, 5
253         psrlw mm7, 5
254
255         pand mm5, [RedMask]
256         pand mm6, [GreenMask]
257         pand mm7, [BlueMask]
258
259         por mm5, mm6
260         pxor mm0, mm0
261         movq mm6, mm7
262         por mm7, mm5
263         por mm6, mm5
264         punpcklwd mm6, mm0
265         punpckhwd mm7, mm0
266%ifdef FAR_POINTER
267         movq [fs:edx], mm6
268         movq [fs:edx+8], mm7
269%else
270         movq [edx], mm6
271         movq [edx+8], mm7
272%endif
273         pop edx
274         pop ebx
275	 mov esp, ebp
276	 pop ebp
277	 ret
278
279NEWSYM _BilinearMMXGrid1
280; Store some stuff
281	 push ebp
282	 mov ebp, esp
283
284	 push ebx
285         mov eax, [ebp+24] ;dx
286         mov ebx, [ebp+28] ;dy
287         push edx
288
289         movq mm0, [eax]
290         movq mm1, [ebx]
291
292         psrlw mm0, 11  ;reduce to 5 bits
293         psrlw mm1, 11
294
295         movq [eax], mm0
296         movq [ebx], mm1
297
298         mov edx, [ebp+20]  ;D
299         pmullw mm0, mm1
300         movq mm5, [RedMask]
301         movq mm6, [GreenMask]
302         movq mm7, [BlueMask]
303         psrlw mm0, 5
304
305         pand mm5, [edx]
306         pand mm6, [edx]
307
308         psrlw mm5, 5
309         pand mm7, [edx]
310
311         pmullw mm5, mm0
312         pmullw mm6, mm0
313         pmullw mm7, mm0
314
315         movq mm4, mm0       ;store x*y
316
317
318         mov edx, [ebp+16] ;C
319         movq mm0, [ebx]
320         movq mm1, mm4
321         psubw mm0, mm1
322         movq mm1, [RedMask]
323         movq mm2, [GreenMask]
324         movq mm3, [BlueMask]
325         pand mm1, [edx]
326         pand mm2, [edx]
327
328         psrlw mm1, 5
329         pand mm3, [edx]
330
331         pmullw mm1, mm0
332         pmullw mm2, mm0
333         pmullw mm3, mm0
334
335         mov edx, [ebp+12] ;B
336         paddw mm5, mm1
337         paddw mm6, mm2
338         paddw mm7, mm3
339
340         movq mm0, [eax]
341         movq mm1, mm4
342         psubw mm0, mm1
343         movq mm1, [RedMask]
344         movq mm2, [GreenMask]
345         movq mm3, [BlueMask]
346         pand mm1, [edx]
347         pand mm2, [edx]
348         psrlw mm1, 5
349         pand mm3, [edx]
350
351
352         pmullw mm1, mm0
353         pmullw mm2, mm0
354         pmullw mm3, mm0
355
356         mov edx, [ebp+8] ;A
357         paddw mm5, mm1
358         paddw mm6, mm2
359         paddw mm7, mm3
360
361
362         movq mm0, [All32s]
363         movq mm1, mm4
364         movq mm2, [eax]
365         movq mm3, [ebx]
366         paddw mm0, mm1
367         paddw mm2, mm3
368         psubw mm0, mm2
369         movq mm1, [RedMask]
370         movq mm2, [GreenMask]
371         movq mm3, [BlueMask]
372         pand mm1, [edx]
373         pand mm2, [edx]
374         psrlw mm1, 5
375         pand mm3, [edx]
376
377         pmullw mm1, mm0
378         pmullw mm2, mm0
379         pmullw mm3, mm0
380
381         mov edx, [ebp+32]
382         paddw mm5, mm1
383         paddw mm6, mm2
384         paddw mm7, mm3
385
386         psrlw mm6, 5
387         psrlw mm7, 5
388
389         pand mm5, [RedMask]
390         pand mm6, [GreenMask]
391         pand mm7, [BlueMask]
392
393         por mm5, mm6
394         pxor mm0, mm0
395         por mm7, mm5
396         pxor mm1, mm1
397         punpcklwd mm0, mm7
398         punpckhwd mm1, mm7
399%ifdef FAR_POINTER
400         movq [fs:edx], mm0
401         movq [fs:edx+8], mm1
402%else
403         movq [edx], mm0
404         movq [edx+8], mm1
405%endif
406         pop edx
407         pop ebx
408	 mov esp, ebp
409	 pop ebp
410	 ret
411
412
413
414NEWSYM _EndMMX
415         emms
416         ret
417
418	SECTION .data ALIGN = 32
419;Some constants
420RedMask       dd 0xF800F800, 0xF800F800
421BlueMask      dd 0x001F001F, 0x001F001F
422GreenMask     dd 0x07E007E0, 0x07E007E0
423All32s        dd 0x00200020, 0x00200020
424