1;/* 2; * OpenBOR - http://www.LavaLit.com 3; * ----------------------------------------------------------------------- 4; * All rights reserved, see LICENSE in OpenBOR root for details. 5; * 6; * Copyright (c) 2004 - 2011 OpenBOR Team 7; */ 8 9%include "macros.mac" 10 11;%define FAR_POINTER 12 13BITS 32 14 15SECTION .text 16 17NEWSYM _BilinearMMX 18; Store some stuff 19 push ebp 20 mov ebp, esp 21 22 push ebx 23 mov eax, [ebp+24] ;dx 24 mov ebx, [ebp+28] ;dy 25 push edx 26 27 movq mm0, [eax] 28 movq mm1, [ebx] 29 30 psrlw mm0, 11 ;reduce to 5 bits 31 psrlw mm1, 11 32 33 movq [eax], mm0 34 movq [ebx], mm1 35 36 mov edx, [ebp+20] ;D 37 pmullw mm0, mm1 38 movq mm5, [RedMask] 39 movq mm6, [GreenMask] 40 movq mm7, [BlueMask] 41 psrlw mm0, 5 42 43 pand mm5, [edx] 44 pand mm6, [edx] 45 46 psrlw mm5, 5 47 pand mm7, [edx] 48 49 pmullw mm5, mm0 50 pmullw mm6, mm0 51 pmullw mm7, mm0 52 53 movq mm4, mm0 ;store x*y 54 55 56 mov edx, [ebp+16] ;C 57 movq mm0, [ebx] 58 movq mm1, mm4 59 psubw mm0, mm1 60 movq mm1, [RedMask] 61 movq mm2, [GreenMask] 62 movq mm3, [BlueMask] 63 pand mm1, [edx] 64 pand mm2, [edx] 65 66 psrlw mm1, 5 67 pand mm3, [edx] 68 69 pmullw mm1, mm0 70 pmullw mm2, mm0 71 pmullw mm3, mm0 72 73 mov edx, [ebp+12] ;B 74 paddw mm5, mm1 75 paddw mm6, mm2 76 paddw mm7, mm3 77 78 movq mm0, [eax] 79 movq mm1, mm4 80 psubw mm0, mm1 81 movq mm1, [RedMask] 82 movq mm2, [GreenMask] 83 movq mm3, [BlueMask] 84 pand mm1, [edx] 85 pand mm2, [edx] 86 psrlw mm1, 5 87 pand mm3, [edx] 88 89 90 pmullw mm1, mm0 91 pmullw mm2, mm0 92 pmullw mm3, mm0 93 94 mov edx, [ebp+8] ;A 95 paddw mm5, mm1 96 paddw mm6, mm2 97 paddw mm7, mm3 98 99 100 movq mm0, [All32s] 101 movq mm1, mm4 102 movq mm2, [eax] 103 movq mm3, [ebx] 104 paddw mm0, mm1 105 paddw mm2, mm3 106 psubw mm0, mm2 107 movq mm1, [RedMask] 108 movq mm2, [GreenMask] 109 movq mm3, [BlueMask] 110 pand mm1, [edx] 111 pand mm2, [edx] 112 psrlw mm1, 5 113 pand mm3, [edx] 114 115 pmullw mm1, mm0 116 pmullw mm2, mm0 117 pmullw mm3, mm0 118 119 mov edx, [ebp+32] 120 paddw mm5, mm1 121 paddw mm6, mm2 122 paddw mm7, mm3 123 124 psrlw mm6, 5 125 psrlw mm7, 5 126 127 pand mm5, [RedMask] 128 pand mm6, [GreenMask] 129 pand mm7, [BlueMask] 130 131 por mm5, mm6 132 por mm7, mm5 133%ifdef FAR_POINTER 134 movq [fs:edx], mm7 135%else 136 movq [edx], mm7 137%endif 138 pop edx 139 pop ebx 140 mov esp, ebp 141 pop ebp 142 ret 143 144 145NEWSYM _BilinearMMXGrid0 146; Store some stuff 147 push ebp 148 mov ebp, esp 149 150 push ebx 151 mov eax, [ebp+24] ;dx 152 mov ebx, [ebp+28] ;dy 153 push edx 154 155 movq mm0, [eax] 156 movq mm1, [ebx] 157 158 psrlw mm0, 11 ;reduce to 5 bits 159 psrlw mm1, 11 160 161 movq [eax], mm0 162 movq [ebx], mm1 163 164 mov edx, [ebp+20] ;D 165 pmullw mm0, mm1 166 movq mm5, [RedMask] 167 movq mm6, [GreenMask] 168 movq mm7, [BlueMask] 169 psrlw mm0, 5 170 171 pand mm5, [edx] 172 pand mm6, [edx] 173 174 psrlw mm5, 5 175 pand mm7, [edx] 176 177 pmullw mm5, mm0 178 pmullw mm6, mm0 179 pmullw mm7, mm0 180 181 movq mm4, mm0 ;store x*y 182 183 184 mov edx, [ebp+16] ;C 185 movq mm0, [ebx] 186 movq mm1, mm4 187 psubw mm0, mm1 188 movq mm1, [RedMask] 189 movq mm2, [GreenMask] 190 movq mm3, [BlueMask] 191 pand mm1, [edx] 192 pand mm2, [edx] 193 194 psrlw mm1, 5 195 pand mm3, [edx] 196 197 pmullw mm1, mm0 198 pmullw mm2, mm0 199 pmullw mm3, mm0 200 201 mov edx, [ebp+12] ;B 202 paddw mm5, mm1 203 paddw mm6, mm2 204 paddw mm7, mm3 205 206 movq mm0, [eax] 207 movq mm1, mm4 208 psubw mm0, mm1 209 movq mm1, [RedMask] 210 movq mm2, [GreenMask] 211 movq mm3, [BlueMask] 212 pand mm1, [edx] 213 pand mm2, [edx] 214 psrlw mm1, 5 215 pand mm3, [edx] 216 217 218 pmullw mm1, mm0 219 pmullw mm2, mm0 220 pmullw mm3, mm0 221 222 mov edx, [ebp+8] ;A 223 paddw mm5, mm1 224 paddw mm6, mm2 225 paddw mm7, mm3 226 227 228 movq mm0, [All32s] 229 movq mm1, mm4 230 movq mm2, [eax] 231 movq mm3, [ebx] 232 paddw mm0, mm1 233 paddw mm2, mm3 234 psubw mm0, mm2 235 movq mm1, [RedMask] 236 movq mm2, [GreenMask] 237 movq mm3, [BlueMask] 238 pand mm1, [edx] 239 pand mm2, [edx] 240 psrlw mm1, 5 241 pand mm3, [edx] 242 243 pmullw mm1, mm0 244 pmullw mm2, mm0 245 pmullw mm3, mm0 246 247 mov edx, [ebp+32] 248 paddw mm5, mm1 249 paddw mm6, mm2 250 paddw mm7, mm3 251 252 psrlw mm6, 5 253 psrlw mm7, 5 254 255 pand mm5, [RedMask] 256 pand mm6, [GreenMask] 257 pand mm7, [BlueMask] 258 259 por mm5, mm6 260 pxor mm0, mm0 261 movq mm6, mm7 262 por mm7, mm5 263 por mm6, mm5 264 punpcklwd mm6, mm0 265 punpckhwd mm7, mm0 266%ifdef FAR_POINTER 267 movq [fs:edx], mm6 268 movq [fs:edx+8], mm7 269%else 270 movq [edx], mm6 271 movq [edx+8], mm7 272%endif 273 pop edx 274 pop ebx 275 mov esp, ebp 276 pop ebp 277 ret 278 279NEWSYM _BilinearMMXGrid1 280; Store some stuff 281 push ebp 282 mov ebp, esp 283 284 push ebx 285 mov eax, [ebp+24] ;dx 286 mov ebx, [ebp+28] ;dy 287 push edx 288 289 movq mm0, [eax] 290 movq mm1, [ebx] 291 292 psrlw mm0, 11 ;reduce to 5 bits 293 psrlw mm1, 11 294 295 movq [eax], mm0 296 movq [ebx], mm1 297 298 mov edx, [ebp+20] ;D 299 pmullw mm0, mm1 300 movq mm5, [RedMask] 301 movq mm6, [GreenMask] 302 movq mm7, [BlueMask] 303 psrlw mm0, 5 304 305 pand mm5, [edx] 306 pand mm6, [edx] 307 308 psrlw mm5, 5 309 pand mm7, [edx] 310 311 pmullw mm5, mm0 312 pmullw mm6, mm0 313 pmullw mm7, mm0 314 315 movq mm4, mm0 ;store x*y 316 317 318 mov edx, [ebp+16] ;C 319 movq mm0, [ebx] 320 movq mm1, mm4 321 psubw mm0, mm1 322 movq mm1, [RedMask] 323 movq mm2, [GreenMask] 324 movq mm3, [BlueMask] 325 pand mm1, [edx] 326 pand mm2, [edx] 327 328 psrlw mm1, 5 329 pand mm3, [edx] 330 331 pmullw mm1, mm0 332 pmullw mm2, mm0 333 pmullw mm3, mm0 334 335 mov edx, [ebp+12] ;B 336 paddw mm5, mm1 337 paddw mm6, mm2 338 paddw mm7, mm3 339 340 movq mm0, [eax] 341 movq mm1, mm4 342 psubw mm0, mm1 343 movq mm1, [RedMask] 344 movq mm2, [GreenMask] 345 movq mm3, [BlueMask] 346 pand mm1, [edx] 347 pand mm2, [edx] 348 psrlw mm1, 5 349 pand mm3, [edx] 350 351 352 pmullw mm1, mm0 353 pmullw mm2, mm0 354 pmullw mm3, mm0 355 356 mov edx, [ebp+8] ;A 357 paddw mm5, mm1 358 paddw mm6, mm2 359 paddw mm7, mm3 360 361 362 movq mm0, [All32s] 363 movq mm1, mm4 364 movq mm2, [eax] 365 movq mm3, [ebx] 366 paddw mm0, mm1 367 paddw mm2, mm3 368 psubw mm0, mm2 369 movq mm1, [RedMask] 370 movq mm2, [GreenMask] 371 movq mm3, [BlueMask] 372 pand mm1, [edx] 373 pand mm2, [edx] 374 psrlw mm1, 5 375 pand mm3, [edx] 376 377 pmullw mm1, mm0 378 pmullw mm2, mm0 379 pmullw mm3, mm0 380 381 mov edx, [ebp+32] 382 paddw mm5, mm1 383 paddw mm6, mm2 384 paddw mm7, mm3 385 386 psrlw mm6, 5 387 psrlw mm7, 5 388 389 pand mm5, [RedMask] 390 pand mm6, [GreenMask] 391 pand mm7, [BlueMask] 392 393 por mm5, mm6 394 pxor mm0, mm0 395 por mm7, mm5 396 pxor mm1, mm1 397 punpcklwd mm0, mm7 398 punpckhwd mm1, mm7 399%ifdef FAR_POINTER 400 movq [fs:edx], mm0 401 movq [fs:edx+8], mm1 402%else 403 movq [edx], mm0 404 movq [edx+8], mm1 405%endif 406 pop edx 407 pop ebx 408 mov esp, ebp 409 pop ebp 410 ret 411 412 413 414NEWSYM _EndMMX 415 emms 416 ret 417 418 SECTION .data ALIGN = 32 419;Some constants 420RedMask dd 0xF800F800, 0xF800F800 421BlueMask dd 0x001F001F, 0x001F001F 422GreenMask dd 0x07E007E0, 0x07E007E0 423All32s dd 0x00200020, 0x00200020 424