1; OpenBOR - http://www.LavaLit.com
2; ----------------------------------------------------------------------
3; All rights reserved, see LICENSE in OpenBOR root for details.
4;
5; Copyright (c) 2004 - 2012 OpenBOR Team
6
7; Scales images by 2x using bilinear filtering.  This implementation uses the
8; SIMD capabilities of MMX to operate on four source pixels at a time.  The
9; GPL-encumbered bilinear filter implementation that was in OpenBOR before
10; November 2012 also had an MMX implementation, but this code was written from
11; scratch by Plombo and isn't related to the old MMX implementation.
12
13%macro FUNCTION 1
14%ifdef LINUX
15GLOBAL %1
16%1:
17%else
18GLOBAL _%1
19_%1:
20%endif
21%endmacro
22
23; void _BilinearMMX(u8* srcPtr, u32 srcPitch, u8* dstPtr, u32 dstPitch, int width, int height)
24FUNCTION _BilinearMMX
25push ebp
26mov ebp, esp
27
28mov ecx, [ebp+8]         ; srcPtr
29mov edx, [ebp+16]        ; dstPtr
30mov esi, [ebp+12]        ; srcPitch
31mov edi, [ebp+20]        ; dstPitch
32mov ebx, 0xf7def7de
33movd mm7, ebx
34punpckldq mm7, mm7       ; 0xf7def7def7def7de
35shr dword [ebp+24], 2
36mov ebx, [ebp+24]        ; width >> 2
37mov eax, [ebp+28]        ; height
38
39.loopstart:
40movq mm0, [ecx]          ; mm0 := S0 (and D0)
41movq mm1, [ecx+2]        ; mm1 := S1
42movq mm2, [ecx+esi]      ; mm2 := S2
43movq mm3, [ecx+esi+2]    ; mm3 := S3
44
45movq mm4, mm0
46pand mm4, mm7
47psrlw mm4, 1
48pand mm1, mm7
49psrlw mm1, 1
50paddw mm1, mm4           ; mm0 := D1
51movq mm5, mm0            ; mm5 := D0
52punpcklwd mm0, mm1
53punpckhwd mm5, mm1
54movq [edx], mm0
55movq [edx+8], mm5
56
57pand mm2, mm7
58psrlw mm2, 1
59paddw mm4, mm2            ; mm4 := D3
60pand mm3, mm7
61psrlw mm3, 1
62paddw mm2, mm3
63pand mm2, mm7
64psrlw mm2, 1
65pand mm1, mm7
66psrlw mm1, 1
67paddw mm1, mm2            ; mm1 := D4
68movq mm0, mm4
69punpcklwd mm0, mm1
70punpckhwd mm4, mm1
71movq [edx+edi], mm0
72movq [edx+edi+8], mm4
73
74; done with pixel, go to next pixel in row
75add ecx, 8
76add edx, 16
77dec ebx
78jnz .loopstart
79
80; done with row, go to next row in source image
81add [ebp+8], esi         ; srcPtr += srcPitch
82add [ebp+16], edi        ; dstPtr += dstPitch
83add [ebp+16], edi        ; dstPtr += dstPitch (because we write 2 lines of dst for every 1 line of src)
84mov ecx, [ebp+8]
85mov edx, [ebp+16]
86mov ebx, [ebp+24]
87dec eax
88jnz .loopstart
89
90; done with function
91mov esp, ebp
92pop ebp
93emms
94ret
95
96