1; OpenBOR - http://www.LavaLit.com 2; ---------------------------------------------------------------------- 3; All rights reserved, see LICENSE in OpenBOR root for details. 4; 5; Copyright (c) 2004 - 2012 OpenBOR Team 6 7; Scales images by 2x using bilinear filtering. This implementation uses the 8; SIMD capabilities of MMX to operate on four source pixels at a time. The 9; GPL-encumbered bilinear filter implementation that was in OpenBOR before 10; November 2012 also had an MMX implementation, but this code was written from 11; scratch by Plombo and isn't related to the old MMX implementation. 12 13%macro FUNCTION 1 14%ifdef LINUX 15GLOBAL %1 16%1: 17%else 18GLOBAL _%1 19_%1: 20%endif 21%endmacro 22 23; void _BilinearMMX(u8* srcPtr, u32 srcPitch, u8* dstPtr, u32 dstPitch, int width, int height) 24FUNCTION _BilinearMMX 25push ebp 26mov ebp, esp 27 28mov ecx, [ebp+8] ; srcPtr 29mov edx, [ebp+16] ; dstPtr 30mov esi, [ebp+12] ; srcPitch 31mov edi, [ebp+20] ; dstPitch 32mov ebx, 0xf7def7de 33movd mm7, ebx 34punpckldq mm7, mm7 ; 0xf7def7def7def7de 35shr dword [ebp+24], 2 36mov ebx, [ebp+24] ; width >> 2 37mov eax, [ebp+28] ; height 38 39.loopstart: 40movq mm0, [ecx] ; mm0 := S0 (and D0) 41movq mm1, [ecx+2] ; mm1 := S1 42movq mm2, [ecx+esi] ; mm2 := S2 43movq mm3, [ecx+esi+2] ; mm3 := S3 44 45movq mm4, mm0 46pand mm4, mm7 47psrlw mm4, 1 48pand mm1, mm7 49psrlw mm1, 1 50paddw mm1, mm4 ; mm0 := D1 51movq mm5, mm0 ; mm5 := D0 52punpcklwd mm0, mm1 53punpckhwd mm5, mm1 54movq [edx], mm0 55movq [edx+8], mm5 56 57pand mm2, mm7 58psrlw mm2, 1 59paddw mm4, mm2 ; mm4 := D3 60pand mm3, mm7 61psrlw mm3, 1 62paddw mm2, mm3 63pand mm2, mm7 64psrlw mm2, 1 65pand mm1, mm7 66psrlw mm1, 1 67paddw mm1, mm2 ; mm1 := D4 68movq mm0, mm4 69punpcklwd mm0, mm1 70punpckhwd mm4, mm1 71movq [edx+edi], mm0 72movq [edx+edi+8], mm4 73 74; done with pixel, go to next pixel in row 75add ecx, 8 76add edx, 16 77dec ebx 78jnz .loopstart 79 80; done with row, go to next row in source image 81add [ebp+8], esi ; srcPtr += srcPitch 82add [ebp+16], edi ; dstPtr += dstPitch 83add [ebp+16], edi ; dstPtr += dstPitch (because we write 2 lines of dst for every 1 line of src) 84mov ecx, [ebp+8] 85mov edx, [ebp+16] 86mov ebx, [ebp+24] 87dec eax 88jnz .loopstart 89 90; done with function 91mov esp, ebp 92pop ebp 93emms 94ret 95 96