1{ 2 x86 surface clear routines for HERMES 3 Copyright (c) 1998 Christian Nentwich (c.nentwich@cs.ucl.ac.uk) 4 5 This library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version 9 with the following modification: 10 11 As a special exception, the copyright holders of this library give you 12 permission to link this library with independent modules to produce an 13 executable, regardless of the license terms of these independent modules,and 14 to copy and distribute the resulting executable under terms of your choice, 15 provided that you also meet, for each linked independent module, the terms 16 and conditions of the license of that module. An independent module is a 17 module which is not derived from or based on this library. If you modify 18 this library, you may extend this exception to your version of the library, 19 but you are not obligated to do so. If you do not wish to do so, delete this 20 exception statement from your version. 21 22 This library is distributed in the hope that it will be useful, 23 but WITHOUT ANY WARRANTY; without even the implied warranty of 24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 25 Lesser General Public License for more details. 26 27 You should have received a copy of the GNU Lesser General Public 28 License along with this library; if not, write to the Free Software 29 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 30 31 (04/10/99) Modified ClearX86_8 <Mikko.Tiihonen@hut.fi> 32} 33 34{$ASMMODE att} 35 36{ 37 -------------------------------------------------------------------------- 38 HermesClearInterface (ebp+..) 39 0: char8 *dest 40 4: int32 value 41 8: unsigned int width (already checked to be >0!) 42 12: unsigned int height (already checked to be >0!) 43 16: int add 44} 45procedure ClearX86_32(hci: PHermesClearInterface); cdecl; assembler; 46asm 47 pushl %ebp 48 49 movl 8(%ebp),%ebp 50 51 movl (%ebp),%edi // destination 52 movl 4(%ebp),%eax // pixel value 53 54 movl 12(%ebp),%edx // height 55.balign 4 56.L_y: 57 movl 8(%ebp),%ecx 58 rep 59 stosl 60 61 addl 16(%ebp),%edi 62 63 decl %edx 64 jnz .L_y 65 66 popl %ebp 67end; 68 69procedure ClearX86_24(hci: PHermesClearInterface); cdecl; assembler; 70asm 71end; 72 73procedure ClearX86_16(hci: PHermesClearInterface); cdecl; assembler; 74asm 75 pushl %ebp 76 77 movl 8(%ebp),%ebp 78 79 movl (%ebp),%edi // destination 80 movl 4(%ebp),%eax // pixel value 81 82 movl 12(%ebp),%edx // height 83 movl %eax,%ebx 84 85 shll $16,%eax // Duplicate pixel value 86 andl $0x0ffff,%ebx 87 88 orl %ebx,%eax 89.L_y: 90 movl 8(%ebp),%ecx 91 92 testl $3,%edi // Check if destination is aligned mod 4 93 jz .L_aligned 94 95 movw %ax,(%edi) // otherwise write one pixel 96 addl $2,%edi 97 98 decl %ecx 99 jz .L_endline 100 101.L_aligned: 102 shrl $1,%ecx 103 104rep 105 stosl 106 107 jnc .L_endline 108 109 movw %ax,(%edi) 110 addl $2,%edi 111 112.L_endline: 113 addl 16(%ebp),%edi 114 115 decl %edx 116 jnz .L_y 117 118 popl %ebp 119end; 120 121procedure ClearX86_8(hci: PHermesClearInterface); cdecl; nostackframe; assembler; 122asm 123 pushl %ebp 124 movl %esp,%ebp 125 126 movl 8(%ebp),%ebp 127 128 movl 4(%ebp),%eax // pixel value 129 movl 12(%ebp),%edx // height 130 131 movb %al,%ah 132 movl (%ebp),%edi // destination 133 134 movl %eax,%ecx 135 136 shll $16,%eax // Put the byte pixel value in all four bytes 137 andl $0x0ffff,%ecx // of eax 138 139 movl 8(%ebp),%ebx 140 orl %ecx,%eax 141 142 cmpl $5,%ebx // removes need for extra checks later 143 jbe .L_short_y 144 145.balign 4 146.L_y: 147 testl $3,%edi 148 jz .L_aligned 149 150 movl %edi,%ecx 151 negl %ecx 152 andl $3,%ecx 153 154 subl %ecx,%ebx 155 156 rep 157 stosb 158 159.L_aligned: 160 movl %ebx,%ecx 161 162 shrl $2,%ecx 163 andl $3,%ebx 164 165 rep 166 stosl 167 168 movl %ebx,%ecx 169 rep 170 stosb 171 172 addl 16(%ebp),%edi 173 174 decl %edx 175 movl 8(%ebp),%ebx 176 jnz .L_y 177 178 popl %ebp 179 ret 180 181// Short loop 182.balign 4 183.L_short_y: 184 movl %ebx,%ecx 185 186 rep 187 stosb 188 addl 16(%ebp),%edi 189 190 decl %edx 191 jnz .L_short_y 192 193 popl %ebp 194end; 195 196{ 197 ClearX86_8 version 2, 198 Im not sure wheather this is faster or not... 199 too many jumps could confuse the CPU branch guessing 200} 201procedure ClearX86_8_2(hci: PHermesClearInterface); cdecl; nostackframe; assembler; 202asm 203 pushl %ebp 204 movl %esp,%ebp 205 206 movl 8(%ebp),%ebp 207 208 movl 4(%ebp),%eax // pixel value 209 movl 12(%ebp),%edx // height 210 211 movb %al,%ah 212 movl (%ebp),%edi // destination 213 214 movl %eax,%ecx 215 216 shll $16,%eax // Put the byte pixel value in all four bytes 217 andl $0x0ffff,%ecx // of eax 218 219 movl 8(%ebp),%ebx 220 orl %ecx,%eax 221 222 cmpl $5,%ebx // removes need for extra checks in main loop 223 jbe .L_short_y 224 225 226.balign 4 227.L_y: 228 testl $3,%edi 229 jz .L_aligned 230 231 movl %edi,%ecx 232 negl %ecx 233 andl $3,%ecx 234 235 movb %al,(%edi) 236 subl %ecx,%ebx 237 238 incl %edi 239 240 decl %ecx 241 jz .L_aligned 242 243 movb %al,(%edi) 244 incl %edi 245 decl %ecx 246 jz .L_aligned 247 248 movb %al,(%edi) 249 incl %edi 250 251.L_aligned: 252 movl %ebx,%ecx 253 254 shrl $2,%ecx 255 andl $3,%ebx 256 257 rep 258 stosl 259 260 jz .L_endline 261 // ebx 262 263 movb %al,(%edi) 264 // Write remaining (1,2 or 3) pixels 265 incl %edi 266 decl %ebx 267 jz .L_endline 268 269 movb %al,(%edi) 270 incl %edi 271 decl %ebx 272 jz .L_endline 273 274 movb %al,(%edi) 275 incl %edi 276 decl %ebx 277 jz .L_endline 278 279 movb %al,(%edi) 280 incl %edi 281 282.L_endline: 283 addl 16(%ebp),%edi 284 285 decl %edx 286 movl 8(%ebp),%ebx 287 jnz .L_y 288 289 popl %ebp 290 ret 291 292// Short loop 293.balign 4 294.L_short_y: 295 movl %ebx,%ecx 296 297 rep 298 stosb 299 addl 16(%ebp),%edi 300 301 decl %edx 302 jnz .L_short_y 303 304 popl %ebp 305end; 306