1/* 2 * PROJECT: Win32 subsystem 3 * LICENSE: See COPYING in the top level directory 4 * FILE: win32ss/gdi/dib/i386/dib24bpp_hline.s 5 * PURPOSE: ASM optimised 24bpp HLine 6 * PROGRAMMERS: Magnus Olsen 7 */ 8 9#include <asm.inc> 10 11.code 12 13PUBLIC _DIB_24BPP_HLine 14 15 _DIB_24BPP_HLine: 16 push edi 17 push esi 18 push ebx 19 sub esp, 24 20 mov ebx, [esp+40] 21 mov edi, [esp+52] 22 mov ecx, [esp+44] 23 mov eax, [ebx+36] 24 mov esi, [ebx+32] 25 mov edx, [esp+48] 26 imul eax, edi 27 sub edx, ecx 28 mov [esp], edx 29 add eax, esi 30 lea eax, [eax+ecx*2] 31 add eax, ecx 32 cmp edx, 7 33 mov esi, edx 34 mov [esp+4], eax 35 ja Align4byte 36 lea eax, [edx-1] 37 mov [esp], eax 38 inc eax 39 jnz small_fill 40 add esp, 24 41 pop ebx 42 pop esi 43 pop edi 44 ret 45 46 /* For small fills, don't bother doing anything fancy */ 47 small_fill: 48 movzx ecx, word ptr [esp+58] 49 mov edx, [esp+4] 50 mov esi, [esp+56] 51 lea eax, [edx+2] 52 mov [esp+4], eax 53 mov [edx+2], cl 54 mov eax, [esp] 55 inc dword ptr [esp+4] 56 mov [edx], si 57 dec eax 58 mov [esp], eax 59 inc eax 60 jnz small_fill 61 add esp, 24 62 pop ebx 63 pop esi 64 pop edi 65 ret 66 67 Align4byte: 68 /* Align to 4-byte address */ 69 test al, 3 70 mov ecx, eax 71 jz loop1 72 lea esi, [esi+0] 73 lea edi, [edi+0] 74 75 loopasmversion: 76 /* This is about 30% faster than the generic C code below */ 77 movzx edx, word ptr [esp+58] 78 lea edi, [ecx+2] 79 mov eax, [esp+56] 80 mov [esp+4], edi 81 mov [ecx+2], dl 82 mov ebx, [esp+4] 83 mov [ecx], ax 84 mov edx, [esp] 85 inc ebx 86 mov [esp+4], ebx 87 dec edx 88 test bl, 3 89 mov [esp], edx 90 mov ecx, ebx 91 jnz loopasmversion 92 mov esi, edx 93 94 loop1: 95 mov ecx, [esp+56] 96 and ecx, 16777215 97 mov ebx, ecx 98 shr ebx, 8 99 mov eax, ecx 100 shl eax, 16 101 or ebx, eax 102 mov edx, ecx 103 shl edx, 8 104 mov eax, ecx 105 shr eax, 16 106 or edx, eax 107 mov eax, ecx 108 shl eax, 24 109 or eax, ecx 110 mov ecx, [esp] 111 shr ecx, 2 112 mov edi, [esp+4] 113 loop2: 114 mov [edi], eax 115 mov [edi+4], ebx 116 mov [edi+8], edx 117 add edi, 12 118 dec ecx 119 jnz loop2 120 mov [esp+4], edi 121 and esi, 3 122 lea eax, [esi-1] 123 mov [esp], eax 124 inc eax 125 jnz leftoverfromthemainloop 126 add esp, 24 127 pop ebx 128 pop esi 129 pop edi 130 ret 131 132 leftoverfromthemainloop: 133 134 /* Count = Count & 0x03; */ 135 mov ecx, [esp+4] 136 mov ebx, [esp+56] 137 lea esi, [ecx+2] 138 mov [ecx], bx 139 shr ebx, 16 140 mov [esp+4], esi 141 mov [ecx+2], bl 142 mov eax, [esp] 143 inc dword ptr [esp+4] 144 dec eax 145 mov [esp], eax 146 inc eax 147 jnz leftoverfromthemainloop 148 add esp, 24 149 pop ebx 150 pop esi 151 pop edi 152 ret 153 154END 155