xref: /reactos/win32ss/gdi/dib/i386/dib24bpp_hline.s (revision 4561998a)
1/*
2 * PROJECT:         Win32 subsystem
3 * LICENSE:         See COPYING in the top level directory
4 * FILE:            win32ss/gdi/dib/i386/dib24bpp_hline.s
5 * PURPOSE:         ASM optimised 24bpp HLine
6 * PROGRAMMERS:     Magnus Olsen
7 */
8
9#include <asm.inc>
10
11.code
12
13PUBLIC _DIB_24BPP_HLine
14
15      _DIB_24BPP_HLine:
16                         push    edi
17                         push    esi
18                         push    ebx
19                         sub     esp, 24
20                         mov     ebx, [esp+40]
21                         mov     edi, [esp+52]
22                         mov     ecx, [esp+44]
23                         mov     eax, [ebx+36]
24                         mov     esi, [ebx+32]
25                         mov     edx, [esp+48]
26                         imul    eax, edi
27                         sub     edx, ecx
28                         mov     [esp], edx
29                         add     eax, esi
30                         lea     eax, [eax+ecx*2]
31                         add     eax, ecx
32                         cmp     edx, 7
33                         mov     esi, edx
34                         mov     [esp+4], eax
35                         ja      Align4byte
36                         lea     eax, [edx-1]
37                         mov     [esp], eax
38                         inc     eax
39                         jnz     small_fill
40                         add     esp, 24
41                         pop     ebx
42                         pop     esi
43                         pop     edi
44                         ret
45
46                        /* For small fills, don't bother doing anything fancy */
47               small_fill:
48                         movzx   ecx, word ptr [esp+58]
49                         mov     edx, [esp+4]
50                         mov     esi, [esp+56]
51                         lea     eax, [edx+2]
52                         mov     [esp+4], eax
53                         mov     [edx+2], cl
54                         mov     eax, [esp]
55                         inc     dword ptr [esp+4]
56                         mov     [edx], si
57                         dec     eax
58                         mov     [esp], eax
59                         inc     eax
60                         jnz     small_fill
61                         add     esp, 24
62                         pop     ebx
63                         pop     esi
64                         pop     edi
65                         ret
66
67               Align4byte:
68                         /* Align to 4-byte address */
69                         test    al, 3
70                         mov     ecx, eax
71                         jz      loop1
72                         lea     esi, [esi+0]
73                         lea     edi, [edi+0]
74
75           loopasmversion:
76                        /* This is about 30% faster than the generic C code below */
77                         movzx   edx, word ptr [esp+58]
78                         lea     edi, [ecx+2]
79                         mov     eax, [esp+56]
80                         mov     [esp+4], edi
81                         mov     [ecx+2], dl
82                         mov     ebx, [esp+4]
83                         mov     [ecx], ax
84                         mov     edx, [esp]
85                         inc     ebx
86                         mov     [esp+4], ebx
87                         dec     edx
88                         test    bl, 3
89                         mov     [esp], edx
90                         mov     ecx, ebx
91                         jnz     loopasmversion
92                         mov     esi, edx
93
94                   loop1:
95                         mov     ecx, [esp+56]
96                         and     ecx, 16777215
97                         mov     ebx, ecx
98                         shr     ebx, 8
99                         mov     eax, ecx
100                         shl     eax, 16
101                         or      ebx, eax
102                         mov     edx, ecx
103                         shl     edx, 8
104                         mov     eax, ecx
105                         shr     eax, 16
106                         or      edx, eax
107                         mov     eax, ecx
108                         shl     eax, 24
109                         or      eax, ecx
110                         mov     ecx, [esp]
111                         shr     ecx, 2
112                         mov     edi, [esp+4]
113                    loop2:
114                         mov     [edi], eax
115                         mov     [edi+4], ebx
116                         mov     [edi+8], edx
117                         add     edi, 12
118                         dec     ecx
119                         jnz     loop2
120                         mov     [esp+4], edi
121                         and     esi, 3
122                         lea     eax, [esi-1]
123                         mov     [esp], eax
124                         inc     eax
125                         jnz     leftoverfromthemainloop
126                         add     esp, 24
127                         pop     ebx
128                         pop     esi
129                         pop     edi
130                         ret
131
132  leftoverfromthemainloop:
133
134                        /*  Count = Count & 0x03; */
135                         mov     ecx, [esp+4]
136                         mov     ebx, [esp+56]
137                         lea     esi, [ecx+2]
138                         mov     [ecx], bx
139                         shr     ebx, 16
140                         mov     [esp+4], esi
141                         mov     [ecx+2], bl
142                         mov     eax, [esp]
143                         inc     dword ptr [esp+4]
144                         dec     eax
145                         mov     [esp], eax
146                         inc     eax
147                         jnz     leftoverfromthemainloop
148                         add     esp, 24
149                         pop     ebx
150                         pop     esi
151                         pop     edi
152                         ret
153
154END
155