1/* 2 * rgbtoyuv.S 3 * 4 * Copyright (C) Peter Schlaile - February 2001 5 * 6 * This file is part of libdv, a free DV (IEC 61834/SMPTE 314M) 7 * codec. 8 * 9 * libdv is free software; you can redistribute it and/or modify it 10 * under the terms of the GNU Lesser Public License as published by 11 * the Free Software Foundation; either version 2.1, or (at your 12 * option) any later version. 13 * 14 * libdv is distributed in the hope that it will be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser Public License 20 * along with libdv; see the file COPYING. If not, write to 21 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 22 * 23 * The libdv homepage is http://libdv.sourceforge.net/. 24 */ 25 26 27# The loop processes interleaved RGB values for 8 pixels. 28# The notation in the comments which describe the data locate 29# the first byte on the right. For example in a register containing 30# G2R2B1G1R1B0G0R0, R0 is in the position of the lease significant 31# byte and G2 is in the position of the most significant byte. 32# The output is to separate Y, U, and V buffers. Input are bytes, 33# output are words 34 35#define CONSTSHIFT 15 36#define PRECISION 1 37#define FIXPSHIFT CONSTSHIFT-PRECISION 38 39#define DV_WIDTH_SHORT 720*2 40#define DV_WIDTH_BYTE 720 41#define DV_WIDTH_SHORT_HALF 720 42#define DV_WIDTH_BYTE_HALF 360 43 44.global _dv_rgbtoycb_mmx 45# .global yuvtoycb_mmx 46 47.data 48 49.align 8 50ZEROSX: .word 0,0,0,0 51ZEROS: .long 0,0 52 53ALLONE: .word 1,1,1,1 54 55OFFSETDX: .word 0,64,0,64 #offset used before shift 56OFFSETD: .long 0,0 57OFFSETWX: .word 128,0,128,0 #offset used before pack 32 58OFFSETW: .long 0,0 59OFFSETBX: .word 128,128,128,128 60OFFSETB: .long 0,0 61OFFSETY: .word (16-128) << PRECISION 62 .word (16-128) << PRECISION 63 .word (16-128) << PRECISION 64 .word (16-128) << PRECISION 65 66TEMP0: .long 0,0 67TEMPY: .long 0,0 68TEMPU: .long 0,0 69TEMPV: .long 0,0 70 71#if 0 /* Original YUV */ 72YR0GRX: .word 9798,19235,0,9798 73YBG0BX: .word 3736,0,19235,3736 74YR0GR: .long 0,0 75YBG0B: .long 0,0 76UR0GRX: .word -4784,-9437,0,-4784 77UBG0BX: .word 14221,0,-9437,14221 78UR0GR: .long 0,0 79UBG0B: .long 0,0 80VR0GRX: .word 20218,-16941,0,20218 81VBG0BX: .word -3277,0,-16941,-3277 82VR0GR: .long 0,0 83VBG0B: .long 0,0 84 85YR0GRX: .word 8420,16529,0,8420 86YBG0BX: .word 3203,0,16529,3203 87YR0GR: .long 0,0 88YBG0B: .long 0,0 89UR0GRX: .word 14391,-12055,0,14391 90UBG0BX: .word -2336,0,-12055,-2336 91UR0GR: .long 0,0 92UBG0B: .long 0,0 93VR0GRX: .word -4857,-9534,0,-4857 94VBG0BX: .word 14391,0,-9534,14391 95VR0GR: .long 0,0 96VBG0B: .long 0,0 97 98#else 99YR0GRX: .word 8414,16519,0,8414 100YBG0BX: .word 3208,0,16519,3208 101YR0GR: .long 0,0 102YBG0B: .long 0,0 103UR0GRX: .word 14392,-12061,0,14392 104UBG0BX: .word -2332,0,-12061,-2332 105UR0GR: .long 0,0 106UBG0B: .long 0,0 107VR0GRX: .word -4864,-9528,0,-4864 108VBG0BX: .word 14392,0,-9528,14392 109VR0GR: .long 0,0 110VBG0B: .long 0,0 111 112#endif 113 114.section .note.GNU-stack, "", @progbits 115 116.text 117 118#define _inPtr 8 119#define _rows 12 120#define _columns 16 121#define _outyPtr 20 122#define _outuPtr 24 123#define _outvPtr 28 124 125.global _dv_rgbtoycb_mmx 126.hidden _dv_rgbtoycb_mmx 127.type _dv_rgbtoycb_mmx,@function 128_dv_rgbtoycb_mmx: 129 130 pushl %ebp 131 movl %esp, %ebp 132 pushl %eax 133 pushl %ebx 134 pushl %ecx 135 pushl %edx 136 pushl %esi 137 pushl %edi 138 139 leal ZEROSX, %eax #This section gets around a bug 140 movq (%eax), %mm0 #unlikely to persist 141 movq %mm0, ZEROS 142 leal OFFSETDX, %eax 143 movq (%eax), %mm0 144 movq %mm0, OFFSETD 145 leal OFFSETWX, %eax 146 movq (%eax), %mm0 147 movq %mm0, OFFSETW 148 leal OFFSETBX, %eax 149 movq (%eax), %mm0 150 movq %mm0, OFFSETB 151 leal YR0GRX, %eax 152 movq (%eax), %mm0 153 movq %mm0, YR0GR 154 leal YBG0BX, %eax 155 movq (%eax), %mm0 156 movq %mm0, YBG0B 157 leal UR0GRX, %eax 158 movq (%eax), %mm0 159 movq %mm0, UR0GR 160 leal UBG0BX, %eax 161 movq (%eax), %mm0 162 movq %mm0, UBG0B 163 leal VR0GRX, %eax 164 movq (%eax), %mm0 165 movq %mm0, VR0GR 166 leal VBG0BX, %eax 167 movq (%eax), %mm0 168 movq %mm0, VBG0B 169 170 movl _rows(%ebp), %eax 171 movl _columns(%ebp), %ebx 172 mull %ebx #number pixels 173 shrl $3, %eax #number of loops 174 movl %eax, %edi #loop counter in edi 175 movl _inPtr(%ebp), %eax 176 movl _outyPtr(%ebp), %ebx 177 movl _outuPtr(%ebp), %ecx 178 movl _outvPtr(%ebp), %edx 179rgbtoycb_mmx_loop: 180 movq (%eax), %mm1 #load G2R2B1G1R1B0G0R0 181 pxor %mm6, %mm6 #0 -> mm6 182 183 movq %mm1, %mm0 #G2R2B1G1R1B0G0R0 -> mm0 184 psrlq $16, %mm1 #00G2R2B1G1R1B0-> mm1 185 186 punpcklbw %mm6, %mm0 #R1B0G0R0 -> mm0 187 movq %mm1, %mm7 #00G2R2B1G1R1B0-> mm7 188 189 punpcklbw %mm6, %mm1 #B1G1R1B0 -> mm1 190 movq %mm0, %mm2 #R1B0G0R0 -> mm2 191 192 pmaddwd YR0GR, %mm0 #yrR1,ygG0+yrR0 -> mm0 193 movq %mm1, %mm3 #B1G1R1B0 -> mm3 194 195 pmaddwd YBG0B, %mm1 #ybB1+ygG1,ybB0 -> mm1 196 movq %mm2, %mm4 #R1B0G0R0 -> mm4 197 198 pmaddwd UR0GR, %mm2 #urR1,ugG0+urR0 -> mm2 199 movq %mm3, %mm5 #B1G1R1B0 -> mm5 200 201 pmaddwd UBG0B, %mm3 #ubB1+ugG1,ubB0 -> mm3 202 punpckhbw %mm6, %mm7 # 00G2R2 -> mm7 203 204 pmaddwd VR0GR, %mm4 #vrR1,vgG0+vrR0 -> mm4 205 paddd %mm1, %mm0 #Y1Y0 -> mm0 206 207 pmaddwd VBG0B, %mm5 #vbB1+vgG1,vbB0 -> mm5 208 209 movq 8(%eax), %mm1 #R5B4G4R4B3G3R3B2 -> mm1 210 paddd %mm3, %mm2 #U1U0 -> mm2 211 212 movq %mm1, %mm6 #R5B4G4R4B3G3R3B2 -> mm6 213 214 punpcklbw ZEROS, %mm1 #B3G3R3B2 -> mm1 215 paddd %mm5, %mm4 #V1V0 -> mm4 216 217 movq %mm1, %mm5 #B3G3R3B2 -> mm5 218 psllq $32, %mm1 #R3B200 -> mm1 219 220 paddd %mm7, %mm1 #R3B200+00G2R2=R3B2G2R2->mm1 221 222 punpckhbw ZEROS, %mm6 #R5B4G4R3 -> mm6 223 movq %mm1, %mm3 #R3B2G2R2 -> mm3 224 225 pmaddwd YR0GR, %mm1 #yrR3,ygG2+yrR2 -> mm1 226 movq %mm5, %mm7 #B3G3R3B2 -> mm7 227 228 pmaddwd YBG0B, %mm5 #ybB3+ygG3,ybB2 -> mm5 229 psrad $FIXPSHIFT, %mm0 #32-bit scaled Y1Y0 -> mm0 230 231 movq %mm6, TEMP0 #R5B4G4R4 -> TEMP0 232 movq %mm3, %mm6 #R3B2G2R2 -> mm6 233 pmaddwd UR0GR, %mm6 #urR3,ugG2+urR2 -> mm6 234 psrad $FIXPSHIFT, %mm2 #32-bit scaled U1U0 -> mm2 235 236 paddd %mm5, %mm1 #Y3Y2 -> mm1 237 movq %mm7, %mm5 #B3G3R3B2 -> mm5 238 pmaddwd UBG0B, %mm7 #ubB3+ugG3,ubB2 239 psrad $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1 240 241 pmaddwd VR0GR, %mm3 #vrR3,vgG2+vgR2 242 packssdw %mm1, %mm0 #Y3Y2Y1Y0 -> mm0 243 244 pmaddwd VBG0B, %mm5 #vbB3+vgG3,vbB2 -> mm5 245 psrad $FIXPSHIFT, %mm4 #32-bit scaled V1V0 -> mm4 246 247 movq 16(%eax), %mm1 #B7G7R7B6G6R6B5G5 -> mm7 248 paddd %mm7, %mm6 #U3U2 -> mm6 249 250 movq %mm1, %mm7 #B7G7R7B6G6R6B5G5 -> mm1 251 psrad $FIXPSHIFT, %mm6 #32-bit scaled U3U2 -> mm6 252 253 paddd %mm5, %mm3 #V3V2 -> mm3 254 psllq $16, %mm7 #R7B6G6R6B5G500 -> mm7 255 256 movq %mm7, %mm5 #R7B6G6R6B5G500 -> mm5 257 psrad $FIXPSHIFT, %mm3 #32-bit scaled V3V2 -> mm3 258 259 paddw OFFSETY, %mm0 260 movq %mm0, (%ebx) #store Y3Y2Y1Y0 261 packssdw %mm6, %mm2 #32-bit scaled U3U2U1U0 -> mm2 262 263 movq TEMP0, %mm0 #R5B4G4R4 -> mm0 264 addl $8, %ebx 265 266 punpcklbw ZEROS, %mm7 #B5G500 -> mm7 267 movq %mm0, %mm6 #R5B4G4R4 -> mm6 268 269 movq %mm2, TEMPU #32-bit scaled U3U2U1U0 -> TEMPU 270 psrlq $32, %mm0 #00R5B4 -> mm0 271 272 paddw %mm0, %mm7 #B5G5R5B4 -> mm7 273 movq %mm6, %mm2 #B5B4G4R4 -> mm2 274 275 pmaddwd YR0GR, %mm2 #yrR5,ygG4+yrR4 -> mm2 276 movq %mm7, %mm0 #B5G5R5B4 -> mm0 277 278 pmaddwd YBG0B, %mm7 #ybB5+ygG5,ybB4 -> mm7 279 packssdw %mm3, %mm4 #32-bit scaled V3V2V1V0 -> mm4 280 281 addl $24, %eax #increment RGB count 282 283 movq %mm4, TEMPV #(V3V2V1V0)/256 -> mm4 284 movq %mm6, %mm4 #B5B4G4R4 -> mm4 285 286 pmaddwd UR0GR, %mm6 #urR5,ugG4+urR4 287 movq %mm0, %mm3 #B5G5R5B4 -> mm0 288 289 pmaddwd UBG0B, %mm0 #ubB5+ugG5,ubB4 290 paddd %mm7, %mm2 #Y5Y4 -> mm2 291 292 pmaddwd VR0GR, %mm4 #vrR5,vgG4+vrR4 -> mm4 293 pxor %mm7, %mm7 #0 -> mm7 294 295 pmaddwd VBG0B, %mm3 #vbB5+vgG5,vbB4 -> mm3 296 punpckhbw %mm7, %mm1 #B7G7R7B6 -> mm1 297 298 paddd %mm6, %mm0 #U5U4 -> mm0 299 movq %mm1, %mm6 #B7G7R7B6 -> mm6 300 301 pmaddwd YBG0B, %mm6 #ybB7+ygG7,ybB6 -> mm6 302 punpckhbw %mm7, %mm5 #R7B6G6R6 -> mm5 303 304 movq %mm5, %mm7 #R7B6G6R6 -> mm7 305 paddd %mm4, %mm3 #V5V4 -> mm3 306 307 pmaddwd YR0GR, %mm5 #yrR7,ygG6+yrR6 -> mm5 308 movq %mm1, %mm4 #B7G7R7B6 -> mm4 309 310 pmaddwd UBG0B, %mm4 #ubB7+ugG7,ubB6 -> mm4 311 psrad $FIXPSHIFT, %mm0 #32-bit scaled U5U4 -> mm0 312 313 psrad $FIXPSHIFT, %mm2 #32-bit scaled Y5Y4 -> mm2 314 315 paddd %mm5, %mm6 #Y7Y6 -> mm6 316 movq %mm7, %mm5 #R7B6G6R6 -> mm5 317 318 pmaddwd UR0GR, %mm7 #urR7,ugG6+ugR6 -> mm7 319 psrad $FIXPSHIFT, %mm3 #32-bit scaled V5V4 -> mm3 320 321 pmaddwd VBG0B, %mm1 #vbB7+vgG7,vbB6 -> mm1 322 psrad $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6 323 324 packssdw %mm6, %mm2 #Y7Y6Y5Y4 -> mm2 325 326 pmaddwd VR0GR, %mm5 #vrR7,vgG6+vrR6 -> mm5 327 paddd %mm4, %mm7 #U7U6 -> mm7 328 329 psrad $FIXPSHIFT, %mm7 #32-bit scaled U7U6 -> mm7 330 paddw OFFSETY, %mm2 331 movq %mm2, (%ebx) #store Y7Y6Y5Y4 332 333 movq ALLONE, %mm6 334 packssdw %mm7, %mm0 #32-bit scaled U7U6U5U4 -> mm0 335 336 movq TEMPU, %mm4 #32-bit scaled U3U2U1U0 -> mm4 337 pmaddwd %mm6, %mm0 #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0 338 339 pmaddwd %mm6, %mm4 #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4 340 341 paddd %mm5, %mm1 #V7V6 -> mm1 342 packssdw %mm0, %mm4 #UU3UU2UU1UU0 -> mm4 343 344 psrad $FIXPSHIFT, %mm1 #32-bit scaled V7V6 -> mm1 345 psraw $1, %mm4 #divide UU3 UU2 UU1 UU0 by 2 -> mm4 346 347 movq TEMPV, %mm5 #32-bit scaled V3V2V1V0 -> mm5 348 349 movq %mm4, (%ecx) # store U 350 351 pmaddwd %mm6, %mm5 #V3V2V1V0 averaged -> VV1 VV0 -> mm5 352 packssdw %mm1, %mm3 #V7V6V5V4 -> mm3 353 354 pmaddwd %mm6, %mm3 #V7V6V5V4 averaged -> VV3 VV2 -> mm3 355 356 packssdw %mm3, %mm5 # VV3 VV2 VV1 VV0 -> mm5 357 psraw $1, %mm5 358 359 addl $8, %ebx #increment Y count 360 addl $8, %ecx #increment U count 361 362 movq %mm5, (%edx) #store V 363 364 addl $8, %edx #increment V count 365 366 decl %edi #decrement loop counter 367 jnz rgbtoycb_mmx_loop #do 24 more bytes if not 0 368 369 popl %edi 370 popl %esi 371 popl %edx 372 popl %ecx 373 popl %ebx 374 popl %eax 375 popl %ebp 376 377 ret 378 379.global _dv_ppm_copy_y_block_mmx 380.hidden _dv_ppm_copy_y_block_mmx 381.type _dv_ppm_copy_y_block_mmx,@function 382_dv_ppm_copy_y_block_mmx: 383 384 pushl %ebp 385 movl %esp, %ebp 386 pushl %esi 387 pushl %edi 388 389 movl 8(%ebp), %edi # dest 390 movl 12(%ebp), %esi # src 391 392 movq (%esi), %mm0 393 movq 8(%esi), %mm1 394 movq %mm0, 0*8(%edi) 395 movq %mm1, 1*8(%edi) 396 movq DV_WIDTH_SHORT(%esi), %mm2 397 movq DV_WIDTH_SHORT+8(%esi), %mm3 398 movq %mm2, 2*8(%edi) 399 movq %mm3, 3*8(%edi) 400 movq DV_WIDTH_SHORT*2(%esi), %mm4 401 movq DV_WIDTH_SHORT*2+8(%esi), %mm5 402 movq %mm4, 4*8(%edi) 403 movq %mm5, 5*8(%edi) 404 movq DV_WIDTH_SHORT*3(%esi), %mm6 405 movq DV_WIDTH_SHORT*3+8(%esi), %mm7 406 movq %mm6, 6*8(%edi) 407 movq %mm7, 7*8(%edi) 408 409 movq DV_WIDTH_SHORT*4(%esi), %mm0 410 movq DV_WIDTH_SHORT*4+8(%esi), %mm1 411 movq %mm0, 8*8(%edi) 412 movq %mm1, 9*8(%edi) 413 movq DV_WIDTH_SHORT*5(%esi), %mm2 414 movq DV_WIDTH_SHORT*5+8(%esi), %mm3 415 movq %mm2, 10*8(%edi) 416 movq %mm3, 11*8(%edi) 417 movq DV_WIDTH_SHORT*6(%esi), %mm4 418 movq DV_WIDTH_SHORT*6+8(%esi), %mm5 419 movq %mm4, 12*8(%edi) 420 movq %mm5, 13*8(%edi) 421 movq DV_WIDTH_SHORT*7(%esi), %mm6 422 movq DV_WIDTH_SHORT*7+8(%esi), %mm7 423 movq %mm6, 14*8(%edi) 424 movq %mm7, 15*8(%edi) 425 426 pop %edi 427 pop %esi 428 pop %ebp 429 ret 430 431.global _dv_pgm_copy_y_block_mmx 432.hidden _dv_pgm_copy_y_block_mmx 433.type _dv_pgm_copy_y_block_mmx,@function 434_dv_pgm_copy_y_block_mmx: 435 436 pushl %ebp 437 movl %esp, %ebp 438 pushl %esi 439 pushl %edi 440 441 movl 8(%ebp), %edi # dest 442 movl 12(%ebp), %esi # src 443 444 movq OFFSETY, %mm7 445 pxor %mm6, %mm6 446 447 movq (%esi), %mm0 448 movq DV_WIDTH_BYTE(%esi), %mm1 449 450 movq %mm0, %mm2 451 movq %mm1, %mm3 452 453 punpcklbw %mm6, %mm0 454 punpcklbw %mm6, %mm1 455 456 punpckhbw %mm6, %mm2 457 punpckhbw %mm6, %mm3 458 459#if PRECISION > 0 460 psllw $PRECISION, %mm0 461 psllw $PRECISION, %mm1 462 psllw $PRECISION, %mm2 463 psllw $PRECISION, %mm3 464#endif 465 466 paddw %mm7, %mm0 467 paddw %mm7, %mm1 468 paddw %mm7, %mm2 469 paddw %mm7, %mm3 470 471 movq %mm0, (%edi) 472 movq %mm2, 8(%edi) 473 movq %mm1, 16(%edi) 474 movq %mm3, 24(%edi) 475 476 addl $2*DV_WIDTH_BYTE, %esi 477 addl $32, %edi 478 479 movq (%esi), %mm0 480 movq DV_WIDTH_BYTE(%esi), %mm1 481 482 movq %mm0, %mm2 483 movq %mm1, %mm3 484 485 punpcklbw %mm6, %mm0 486 punpcklbw %mm6, %mm1 487 488 punpckhbw %mm6, %mm2 489 punpckhbw %mm6, %mm3 490 491#if PRECISION > 0 492 psllw $PRECISION, %mm0 493 psllw $PRECISION, %mm1 494 psllw $PRECISION, %mm2 495 psllw $PRECISION, %mm3 496#endif 497 498 paddw %mm7, %mm0 499 paddw %mm7, %mm1 500 paddw %mm7, %mm2 501 paddw %mm7, %mm3 502 503 movq %mm0, (%edi) 504 movq %mm2, 8(%edi) 505 movq %mm1, 16(%edi) 506 movq %mm3, 24(%edi) 507 508 addl $2*DV_WIDTH_BYTE, %esi 509 addl $32, %edi 510 511 movq (%esi), %mm0 512 movq DV_WIDTH_BYTE(%esi), %mm1 513 514 movq %mm0, %mm2 515 movq %mm1, %mm3 516 517 punpcklbw %mm6, %mm0 518 punpcklbw %mm6, %mm1 519 520 punpckhbw %mm6, %mm2 521 punpckhbw %mm6, %mm3 522 523#if PRECISION > 0 524 psllw $PRECISION, %mm0 525 psllw $PRECISION, %mm1 526 psllw $PRECISION, %mm2 527 psllw $PRECISION, %mm3 528#endif 529 paddw %mm7, %mm0 530 paddw %mm7, %mm1 531 paddw %mm7, %mm2 532 paddw %mm7, %mm3 533 534 movq %mm0, (%edi) 535 movq %mm2, 8(%edi) 536 movq %mm1, 16(%edi) 537 movq %mm3, 24(%edi) 538 539 addl $2*DV_WIDTH_BYTE, %esi 540 addl $32, %edi 541 542 movq (%esi), %mm0 543 movq DV_WIDTH_BYTE(%esi), %mm1 544 545 movq %mm0, %mm2 546 movq %mm1, %mm3 547 548 punpcklbw %mm6, %mm0 549 punpcklbw %mm6, %mm1 550 551 punpckhbw %mm6, %mm2 552 punpckhbw %mm6, %mm3 553 554#if PRECISION > 0 555 psllw $PRECISION, %mm0 556 psllw $PRECISION, %mm1 557 psllw $PRECISION, %mm2 558 psllw $PRECISION, %mm3 559#endif 560 paddw %mm7, %mm0 561 paddw %mm7, %mm1 562 paddw %mm7, %mm2 563 paddw %mm7, %mm3 564 565 movq %mm0, (%edi) 566 movq %mm2, 8(%edi) 567 movq %mm1, 16(%edi) 568 movq %mm3, 24(%edi) 569 570 pop %edi 571 pop %esi 572 pop %ebp 573 ret 574 575.global _dv_video_copy_y_block_mmx 576.hidden _dv_video_copy_y_block_mmx 577.type _dv_video_copy_y_block_mmx,@function 578_dv_video_copy_y_block_mmx: 579 580 pushl %ebp 581 movl %esp, %ebp 582 pushl %esi 583 pushl %edi 584 585 movl 8(%ebp), %edi # dest 586 movl 12(%ebp), %esi # src 587 588 movq OFFSETBX, %mm7 589 pxor %mm6, %mm6 590 591 movq (%esi), %mm0 592 movq DV_WIDTH_BYTE(%esi), %mm1 593 594 movq %mm0, %mm2 595 movq %mm1, %mm3 596 597 punpcklbw %mm6, %mm0 598 punpcklbw %mm6, %mm1 599 600 punpckhbw %mm6, %mm2 601 punpckhbw %mm6, %mm3 602 603 psubw %mm7, %mm0 604 psubw %mm7, %mm1 605 psubw %mm7, %mm2 606 psubw %mm7, %mm3 607 608#if PRECISION > 0 609 psllw $PRECISION, %mm0 610 psllw $PRECISION, %mm1 611 psllw $PRECISION, %mm2 612 psllw $PRECISION, %mm3 613#endif 614 615 movq %mm0, (%edi) 616 movq %mm2, 8(%edi) 617 movq %mm1, 16(%edi) 618 movq %mm3, 24(%edi) 619 620 addl $2*DV_WIDTH_BYTE, %esi 621 addl $32, %edi 622 623 movq (%esi), %mm0 624 movq DV_WIDTH_BYTE(%esi), %mm1 625 626 movq %mm0, %mm2 627 movq %mm1, %mm3 628 629 punpcklbw %mm6, %mm0 630 punpcklbw %mm6, %mm1 631 632 punpckhbw %mm6, %mm2 633 punpckhbw %mm6, %mm3 634 635 psubw %mm7, %mm0 636 psubw %mm7, %mm1 637 psubw %mm7, %mm2 638 psubw %mm7, %mm3 639 640#if PRECISION > 0 641 psllw $PRECISION, %mm0 642 psllw $PRECISION, %mm1 643 psllw $PRECISION, %mm2 644 psllw $PRECISION, %mm3 645#endif 646 647 movq %mm0, (%edi) 648 movq %mm2, 8(%edi) 649 movq %mm1, 16(%edi) 650 movq %mm3, 24(%edi) 651 652 addl $2*DV_WIDTH_BYTE, %esi 653 addl $32, %edi 654 655 movq (%esi), %mm0 656 movq DV_WIDTH_BYTE(%esi), %mm1 657 658 movq %mm0, %mm2 659 movq %mm1, %mm3 660 661 punpcklbw %mm6, %mm0 662 punpcklbw %mm6, %mm1 663 664 punpckhbw %mm6, %mm2 665 punpckhbw %mm6, %mm3 666 667 psubw %mm7, %mm0 668 psubw %mm7, %mm1 669 psubw %mm7, %mm2 670 psubw %mm7, %mm3 671 672#if PRECISION > 0 673 psllw $PRECISION, %mm0 674 psllw $PRECISION, %mm1 675 psllw $PRECISION, %mm2 676 psllw $PRECISION, %mm3 677#endif 678 679 movq %mm0, (%edi) 680 movq %mm2, 8(%edi) 681 movq %mm1, 16(%edi) 682 movq %mm3, 24(%edi) 683 684 addl $2*DV_WIDTH_BYTE, %esi 685 addl $32, %edi 686 687 movq (%esi), %mm0 688 movq DV_WIDTH_BYTE(%esi), %mm1 689 690 movq %mm0, %mm2 691 movq %mm1, %mm3 692 693 punpcklbw %mm6, %mm0 694 punpcklbw %mm6, %mm1 695 696 punpckhbw %mm6, %mm2 697 punpckhbw %mm6, %mm3 698 699 psubw %mm7, %mm0 700 psubw %mm7, %mm1 701 psubw %mm7, %mm2 702 psubw %mm7, %mm3 703 704#if PRECISION > 0 705 psllw $PRECISION, %mm0 706 psllw $PRECISION, %mm1 707 psllw $PRECISION, %mm2 708 psllw $PRECISION, %mm3 709#endif 710 711 movq %mm0, (%edi) 712 movq %mm2, 8(%edi) 713 movq %mm1, 16(%edi) 714 movq %mm3, 24(%edi) 715 716 pop %edi 717 pop %esi 718 pop %ebp 719 ret 720 721 722.global _dv_ppm_copy_pal_c_block_mmx 723.hidden _dv_ppm_copy_pal_c_block_mmx 724.type _dv_ppm_copy_pal_c_block_mmx,@function 725_dv_ppm_copy_pal_c_block_mmx: 726 727 pushl %ebp 728 movl %esp, %ebp 729 pushl %esi 730 pushl %edi 731 pushl %ebx 732 733 movl 8(%ebp), %edi # dest 734 movl 12(%ebp), %esi # src 735 736 movq (%esi), %mm0 737 movq DV_WIDTH_SHORT_HALF(%esi), %mm1 738 movq 8(%esi), %mm2 739 movq DV_WIDTH_SHORT_HALF+8(%esi), %mm3 740 741 paddw %mm0, %mm1 742 paddw %mm2, %mm3 743 psraw $1, %mm1 744 psraw $1, %mm3 745 746 movq %mm1, 0*8(%edi) 747 movq %mm3, 1*8(%edi) 748 749 addl $DV_WIDTH_SHORT, %esi 750 addl $16, %edi 751 752 movq (%esi), %mm0 753 movq DV_WIDTH_SHORT_HALF(%esi), %mm1 754 movq 8(%esi), %mm2 755 movq DV_WIDTH_SHORT_HALF+8(%esi), %mm3 756 757 paddw %mm0, %mm1 758 paddw %mm2, %mm3 759 psraw $1, %mm1 760 psraw $1, %mm3 761 762 movq %mm1, 0*8(%edi) 763 movq %mm3, 1*8(%edi) 764 765 addl $DV_WIDTH_SHORT, %esi 766 addl $16, %edi 767 768 movq (%esi), %mm0 769 movq DV_WIDTH_SHORT_HALF(%esi), %mm1 770 movq 8(%esi), %mm2 771 movq DV_WIDTH_SHORT_HALF+8(%esi), %mm3 772 773 paddw %mm0, %mm1 774 paddw %mm2, %mm3 775 psraw $1, %mm1 776 psraw $1, %mm3 777 778 movq %mm1, 0*8(%edi) 779 movq %mm3, 1*8(%edi) 780 781 addl $DV_WIDTH_SHORT, %esi 782 addl $16, %edi 783 784 movq (%esi), %mm0 785 movq DV_WIDTH_SHORT_HALF(%esi), %mm1 786 movq 8(%esi), %mm2 787 movq DV_WIDTH_SHORT_HALF+8(%esi), %mm3 788 789 paddw %mm0, %mm1 790 paddw %mm2, %mm3 791 psraw $1, %mm1 792 psraw $1, %mm3 793 794 movq %mm1, 0*8(%edi) 795 movq %mm3, 1*8(%edi) 796 797 addl $DV_WIDTH_SHORT, %esi 798 addl $16, %edi 799 800 movq (%esi), %mm0 801 movq DV_WIDTH_SHORT_HALF(%esi), %mm1 802 movq 8(%esi), %mm2 803 movq DV_WIDTH_SHORT_HALF+8(%esi), %mm3 804 805 paddw %mm0, %mm1 806 paddw %mm2, %mm3 807 psraw $1, %mm1 808 psraw $1, %mm3 809 810 movq %mm1, 0*8(%edi) 811 movq %mm3, 1*8(%edi) 812 813 addl $DV_WIDTH_SHORT, %esi 814 addl $16, %edi 815 816 movq (%esi), %mm0 817 movq DV_WIDTH_SHORT_HALF(%esi), %mm1 818 movq 8(%esi), %mm2 819 movq DV_WIDTH_SHORT_HALF+8(%esi), %mm3 820 821 paddw %mm0, %mm1 822 paddw %mm2, %mm3 823 psraw $1, %mm1 824 psraw $1, %mm3 825 826 movq %mm1, 0*8(%edi) 827 movq %mm3, 1*8(%edi) 828 829 addl $DV_WIDTH_SHORT, %esi 830 addl $16, %edi 831 832 movq (%esi), %mm0 833 movq DV_WIDTH_SHORT_HALF(%esi), %mm1 834 movq 8(%esi), %mm2 835 movq DV_WIDTH_SHORT_HALF+8(%esi), %mm3 836 837 paddw %mm0, %mm1 838 paddw %mm2, %mm3 839 psraw $1, %mm1 840 psraw $1, %mm3 841 842 movq %mm1, 0*8(%edi) 843 movq %mm3, 1*8(%edi) 844 845 addl $DV_WIDTH_SHORT, %esi 846 addl $16, %edi 847 848 movq (%esi), %mm0 849 movq DV_WIDTH_SHORT_HALF(%esi), %mm1 850 movq 8(%esi), %mm2 851 movq DV_WIDTH_SHORT_HALF+8(%esi), %mm3 852 853 paddw %mm0, %mm1 854 paddw %mm2, %mm3 855 psraw $1, %mm1 856 psraw $1, %mm3 857 858 movq %mm1, 0*8(%edi) 859 movq %mm3, 1*8(%edi) 860 861 pop %ebx 862 pop %edi 863 pop %esi 864 pop %ebp 865 ret 866 867.global _dv_pgm_copy_pal_c_block_mmx 868.hidden _dv_pgm_copy_pal_c_block_mmx 869.type _dv_pgm_copy_pal_c_block_mmx,@function 870_dv_pgm_copy_pal_c_block_mmx: 871 872 pushl %ebp 873 movl %esp, %ebp 874 pushl %esi 875 pushl %edi 876 pushl %ebx 877 878 movl 8(%ebp), %edi # dest 879 movl 12(%ebp), %esi # src 880 881 882 movq OFFSETBX, %mm7 883 pxor %mm6, %mm6 884 885 886 movq (%esi), %mm0 887 movq DV_WIDTH_BYTE(%esi), %mm1 888 889 movq %mm0, %mm2 890 movq %mm1, %mm3 891 892 punpcklbw %mm6, %mm0 893 punpcklbw %mm6, %mm1 894 895 punpckhbw %mm6, %mm2 896 punpckhbw %mm6, %mm3 897 898 psubw %mm7, %mm0 899 psubw %mm7, %mm1 900 psubw %mm7, %mm2 901 psubw %mm7, %mm3 902 903#if PRECISION > 0 904 psllw $PRECISION, %mm0 905 psllw $PRECISION, %mm1 906 psllw $PRECISION, %mm2 907 psllw $PRECISION, %mm3 908#endif 909 910 movq %mm0, (%edi) 911 movq %mm2, 8(%edi) 912 movq %mm1, 16(%edi) 913 movq %mm3, 24(%edi) 914 915 addl $2*DV_WIDTH_BYTE, %esi 916 addl $32, %edi 917 918 movq (%esi), %mm0 919 movq DV_WIDTH_BYTE(%esi), %mm1 920 921 movq %mm0, %mm2 922 movq %mm1, %mm3 923 924 punpcklbw %mm6, %mm0 925 punpcklbw %mm6, %mm1 926 927 punpckhbw %mm6, %mm2 928 punpckhbw %mm6, %mm3 929 930 psubw %mm7, %mm0 931 psubw %mm7, %mm1 932 psubw %mm7, %mm2 933 psubw %mm7, %mm3 934 935#if PRECISION > 0 936 psllw $PRECISION, %mm0 937 psllw $PRECISION, %mm1 938 psllw $PRECISION, %mm2 939 psllw $PRECISION, %mm3 940#endif 941 942 movq %mm0, (%edi) 943 movq %mm2, 8(%edi) 944 movq %mm1, 16(%edi) 945 movq %mm3, 24(%edi) 946 947 addl $2*DV_WIDTH_BYTE, %esi 948 addl $32, %edi 949 950 movq (%esi), %mm0 951 movq DV_WIDTH_BYTE(%esi), %mm1 952 953 movq %mm0, %mm2 954 movq %mm1, %mm3 955 956 punpcklbw %mm6, %mm0 957 punpcklbw %mm6, %mm1 958 959 punpckhbw %mm6, %mm2 960 punpckhbw %mm6, %mm3 961 962 psubw %mm7, %mm0 963 psubw %mm7, %mm1 964 psubw %mm7, %mm2 965 psubw %mm7, %mm3 966 967#if PRECISION > 0 968 psllw $PRECISION, %mm0 969 psllw $PRECISION, %mm1 970 psllw $PRECISION, %mm2 971 psllw $PRECISION, %mm3 972#endif 973 974 movq %mm0, (%edi) 975 movq %mm2, 8(%edi) 976 movq %mm1, 16(%edi) 977 movq %mm3, 24(%edi) 978 979 addl $2*DV_WIDTH_BYTE, %esi 980 addl $32, %edi 981 982 movq (%esi), %mm0 983 movq DV_WIDTH_BYTE(%esi), %mm1 984 985 movq %mm0, %mm2 986 movq %mm1, %mm3 987 988 punpcklbw %mm6, %mm0 989 punpcklbw %mm6, %mm1 990 991 punpckhbw %mm6, %mm2 992 punpckhbw %mm6, %mm3 993 994 psubw %mm7, %mm0 995 psubw %mm7, %mm1 996 psubw %mm7, %mm2 997 psubw %mm7, %mm3 998 999#if PRECISION > 0 1000 psllw $PRECISION, %mm0 1001 psllw $PRECISION, %mm1 1002 psllw $PRECISION, %mm2 1003 psllw $PRECISION, %mm3 1004#endif 1005 1006 movq %mm0, (%edi) 1007 movq %mm2, 8(%edi) 1008 movq %mm1, 16(%edi) 1009 movq %mm3, 24(%edi) 1010 1011 pop %ebx 1012 pop %edi 1013 pop %esi 1014 pop %ebp 1015 ret 1016 1017.global _dv_video_copy_pal_c_block_mmx 1018.hidden _dv_video_copy_pal_c_block_mmx 1019.type _dv_video_copy_pal_c_block_mmx,@function 1020_dv_video_copy_pal_c_block_mmx: 1021 1022 pushl %ebp 1023 movl %esp, %ebp 1024 pushl %esi 1025 pushl %edi 1026 pushl %ebx 1027 1028 movl 8(%ebp), %edi # dest 1029 movl 12(%ebp), %esi # src 1030 1031 movq OFFSETBX, %mm7 1032 paddw %mm7, %mm7 1033 pxor %mm6, %mm6 1034 1035 movl $4, %ebx 1036 1037video_copy_pal_c_block_mmx_loop: 1038 1039 movq (%esi), %mm0 1040 movq DV_WIDTH_BYTE_HALF(%esi), %mm2 1041 1042 movq %mm0, %mm1 1043 movq %mm2, %mm3 1044 1045 punpcklbw %mm6, %mm0 1046 punpcklbw %mm6, %mm2 1047 1048 punpckhbw %mm6, %mm1 1049 punpckhbw %mm6, %mm3 1050 1051 paddw %mm0, %mm2 1052 paddw %mm1, %mm3 1053 1054 psubw %mm7, %mm2 1055 psubw %mm7, %mm3 1056 1057#if PRECISION == 0 1058 psraw $1, %mm2 1059 psraw $1, %mm3 1060#else 1061#if PRECISION > 1 1062 psllw $PRECISION-1, %mm2 1063 psllw $PRECISION-1, %mm3 1064#endif 1065#endif 1066 movq %mm2, 0*8(%edi) 1067 movq %mm3, 1*8(%edi) 1068 1069 addl $DV_WIDTH_BYTE, %esi 1070 addl $16, %edi 1071 1072 movq (%esi), %mm0 1073 movq DV_WIDTH_BYTE_HALF(%esi), %mm2 1074 1075 movq %mm0, %mm1 1076 movq %mm2, %mm3 1077 1078 punpcklbw %mm6, %mm0 1079 punpcklbw %mm6, %mm2 1080 1081 punpckhbw %mm6, %mm1 1082 punpckhbw %mm6, %mm3 1083 1084 paddw %mm0, %mm2 1085 paddw %mm1, %mm3 1086 1087 psubw %mm7, %mm2 1088 psubw %mm7, %mm3 1089 1090#if PRECISION == 0 1091 psraw $1, %mm2 1092 psraw $1, %mm3 1093#else 1094#if PRECISION > 1 1095 psllw $PRECISION-1, %mm2 1096 psllw $PRECISION-1, %mm3 1097#endif 1098#endif 1099 movq %mm2, 0*8(%edi) 1100 movq %mm3, 1*8(%edi) 1101 1102 addl $DV_WIDTH_BYTE, %esi 1103 addl $16, %edi 1104 1105 decl %ebx 1106 jnz video_copy_pal_c_block_mmx_loop 1107 1108 pop %ebx 1109 pop %edi 1110 pop %esi 1111 pop %ebp 1112 ret 1113 1114.global _dv_ppm_copy_ntsc_c_block_mmx 1115.hidden _dv_ppm_copy_ntsc_c_block_mmx 1116.type _dv_ppm_copy_ntsc_c_block_mmx,@function 1117_dv_ppm_copy_ntsc_c_block_mmx: 1118 1119 pushl %ebp 1120 movl %esp, %ebp 1121 pushl %esi 1122 pushl %edi 1123 pushl %ebx 1124 1125 movl 8(%ebp), %edi # dest 1126 movl 12(%ebp), %esi # src 1127 1128 movl $4, %ebx 1129 1130 movq ALLONE, %mm6 1131 1132ppm_copy_ntsc_c_block_mmx_loop: 1133 1134 movq (%esi), %mm0 1135 movq 8(%esi), %mm1 1136 movq 16(%esi), %mm2 1137 movq 24(%esi), %mm3 1138 1139 pmaddwd %mm6, %mm0 1140 pmaddwd %mm6, %mm1 1141 1142 pmaddwd %mm6, %mm2 1143 pmaddwd %mm6, %mm3 1144 1145 packssdw %mm1, %mm0 1146 packssdw %mm3, %mm2 1147 1148 psraw $1, %mm0 1149 psraw $1, %mm2 1150 1151 movq %mm0, 0*8(%edi) 1152 movq %mm2, 1*8(%edi) 1153 1154 addl $DV_WIDTH_SHORT_HALF, %esi 1155 addl $16, %edi 1156 1157 movq (%esi), %mm0 1158 movq 8(%esi), %mm1 1159 movq 16(%esi), %mm2 1160 movq 24(%esi), %mm3 1161 1162 pmaddwd %mm6, %mm0 1163 pmaddwd %mm6, %mm1 1164 1165 pmaddwd %mm6, %mm2 1166 pmaddwd %mm6, %mm3 1167 1168 packssdw %mm1, %mm0 1169 packssdw %mm3, %mm2 1170 1171 psraw $1, %mm0 1172 psraw $1, %mm2 1173 1174 movq %mm0, 0*8(%edi) 1175 movq %mm2, 1*8(%edi) 1176 1177 addl $DV_WIDTH_SHORT_HALF, %esi 1178 addl $16, %edi 1179 1180 decl %ebx 1181 jnz ppm_copy_ntsc_c_block_mmx_loop 1182 1183 pop %ebx 1184 pop %edi 1185 pop %esi 1186 pop %ebp 1187 ret 1188 1189.global _dv_pgm_copy_ntsc_c_block_mmx 1190.hidden _dv_pgm_copy_ntsc_c_block_mmx 1191.type _dv_pgm_copy_ntsc_c_block_mmx,@function 1192_dv_pgm_copy_ntsc_c_block_mmx: 1193 1194 pushl %ebp 1195 movl %esp, %ebp 1196 pushl %esi 1197 pushl %edi 1198 1199 movl 8(%ebp), %edi # dest 1200 movl 12(%ebp), %esi # src 1201 1202 movq OFFSETBX, %mm7 1203 paddw %mm7, %mm7 1204 pxor %mm6, %mm6 1205 1206 movq (%esi), %mm0 1207 movq 8(%esi), %mm2 1208 1209 movq %mm0, %mm1 1210 movq %mm2, %mm3 1211 1212 punpcklbw %mm6, %mm0 1213 punpcklbw %mm6, %mm2 1214 1215 punpckhbw %mm6, %mm1 1216 punpckhbw %mm6, %mm3 1217 1218 paddw %mm0, %mm1 1219 paddw %mm2, %mm3 1220 1221 psubw %mm7, %mm1 1222 psubw %mm7, %mm3 1223 1224#if PRECISION == 0 1225 psraw $1, %mm1 1226 psraw $1, %mm3 1227#else 1228#if PRECISION > 1 1229 psllw $PRECISION-1, %mm1 1230 psllw $PRECISION-1, %mm3 1231#endif 1232#endif 1233 movq %mm1, 0*8(%edi) 1234 movq %mm3, 1*8(%edi) 1235 movq %mm1, 2*8(%edi) 1236 movq %mm3, 3*8(%edi) 1237 1238 addl $DV_WIDTH_BYTE, %esi 1239 addl $32, %edi 1240 1241 movq (%esi), %mm0 1242 movq 8(%esi), %mm2 1243 1244 movq %mm0, %mm1 1245 movq %mm2, %mm3 1246 1247 punpcklbw %mm6, %mm0 1248 punpcklbw %mm6, %mm2 1249 1250 punpckhbw %mm6, %mm1 1251 punpckhbw %mm6, %mm3 1252 1253 paddw %mm0, %mm1 1254 paddw %mm2, %mm3 1255 1256 psubw %mm7, %mm1 1257 psubw %mm7, %mm3 1258 1259#if PRECISION == 0 1260 psraw $1, %mm1 1261 psraw $1, %mm3 1262#else 1263#if PRECISION > 1 1264 psllw $PRECISION-1, %mm1 1265 psllw $PRECISION-1, %mm3 1266#endif 1267#endif 1268 movq %mm1, 0*8(%edi) 1269 movq %mm3, 1*8(%edi) 1270 movq %mm1, 2*8(%edi) 1271 movq %mm3, 3*8(%edi) 1272 1273 addl $DV_WIDTH_BYTE, %esi 1274 addl $32, %edi 1275 1276 movq (%esi), %mm0 1277 movq 8(%esi), %mm2 1278 1279 movq %mm0, %mm1 1280 movq %mm2, %mm3 1281 1282 punpcklbw %mm6, %mm0 1283 punpcklbw %mm6, %mm2 1284 1285 punpckhbw %mm6, %mm1 1286 punpckhbw %mm6, %mm3 1287 1288 paddw %mm0, %mm1 1289 paddw %mm2, %mm3 1290 1291 psubw %mm7, %mm1 1292 psubw %mm7, %mm3 1293 1294#if PRECISION == 0 1295 psraw $1, %mm1 1296 psraw $1, %mm3 1297#else 1298#if PRECISION > 1 1299 psllw $PRECISION-1, %mm1 1300 psllw $PRECISION-1, %mm3 1301#endif 1302#endif 1303 movq %mm1, 0*8(%edi) 1304 movq %mm3, 1*8(%edi) 1305 movq %mm1, 2*8(%edi) 1306 movq %mm3, 3*8(%edi) 1307 1308 addl $DV_WIDTH_BYTE, %esi 1309 addl $32, %edi 1310 1311 movq (%esi), %mm0 1312 movq 8(%esi), %mm2 1313 1314 movq %mm0, %mm1 1315 movq %mm2, %mm3 1316 1317 punpcklbw %mm6, %mm0 1318 punpcklbw %mm6, %mm2 1319 1320 punpckhbw %mm6, %mm1 1321 punpckhbw %mm6, %mm3 1322 1323 paddw %mm0, %mm1 1324 paddw %mm2, %mm3 1325 1326 psubw %mm7, %mm1 1327 psubw %mm7, %mm3 1328 1329#if PRECISION == 0 1330 psraw $1, %mm1 1331 psraw $1, %mm3 1332#else 1333#if PRECISION > 1 1334 psllw $PRECISION-1, %mm1 1335 psllw $PRECISION-1, %mm3 1336#endif 1337#endif 1338 movq %mm1, 0*8(%edi) 1339 movq %mm3, 1*8(%edi) 1340 movq %mm1, 2*8(%edi) 1341 movq %mm3, 3*8(%edi) 1342 1343 pop %edi 1344 pop %esi 1345 pop %ebp 1346 ret 1347 1348.global _dv_video_copy_ntsc_c_block_mmx 1349.hidden _dv_video_copy_ntsc_c_block_mmx 1350.type _dv_video_copy_ntsc_c_block_mmx,@function 1351_dv_video_copy_ntsc_c_block_mmx: 1352 1353 pushl %ebp 1354 movl %esp, %ebp 1355 pushl %esi 1356 pushl %edi 1357 pushl %ebx 1358 1359 movl 8(%ebp), %edi # dest 1360 movl 12(%ebp), %esi # src 1361 1362 movq OFFSETBX, %mm7 1363 paddw %mm7, %mm7 1364 pxor %mm6, %mm6 1365 1366 movl $4, %ebx 1367 1368video_copy_ntsc_c_block_mmx_loop: 1369 1370 movq (%esi), %mm0 1371 movq 8(%esi), %mm2 1372 1373 movq %mm0, %mm1 1374 movq %mm2, %mm3 1375 1376 punpcklbw %mm6, %mm0 1377 punpcklbw %mm6, %mm2 1378 1379 punpckhbw %mm6, %mm1 1380 punpckhbw %mm6, %mm3 1381 1382 paddw %mm0, %mm1 1383 paddw %mm2, %mm3 1384 1385 psubw %mm7, %mm1 1386 psubw %mm7, %mm3 1387 1388#if PRECISION == 0 1389 psraw $1, %mm1 1390 psraw $1, %mm3 1391#else 1392#if PRECISION > 1 1393 psllw $PRECISION-1, %mm1 1394 psllw $PRECISION-1, %mm3 1395#endif 1396#endif 1397 movq %mm1, 0*8(%edi) 1398 movq %mm3, 1*8(%edi) 1399 1400 addl $DV_WIDTH_BYTE_HALF, %esi 1401 addl $16, %edi 1402 1403 movq (%esi), %mm0 1404 movq 8(%esi), %mm2 1405 1406 movq %mm0, %mm1 1407 movq %mm2, %mm3 1408 1409 punpcklbw %mm6, %mm0 1410 punpcklbw %mm6, %mm2 1411 1412 punpckhbw %mm6, %mm1 1413 punpckhbw %mm6, %mm3 1414 1415 paddw %mm0, %mm1 1416 paddw %mm2, %mm3 1417 1418 psubw %mm7, %mm1 1419 psubw %mm7, %mm3 1420 1421#if PRECISION == 0 1422 psraw $1, %mm1 1423 psraw $1, %mm3 1424#else 1425#if PRECISION > 1 1426 psllw $PRECISION-1, %mm1 1427 psllw $PRECISION-1, %mm3 1428#endif 1429#endif 1430 movq %mm1, 0*8(%edi) 1431 movq %mm3, 1*8(%edi) 1432 1433 addl $DV_WIDTH_BYTE_HALF, %esi 1434 addl $16, %edi 1435 1436 decl %ebx 1437 jnz video_copy_ntsc_c_block_mmx_loop 1438 1439 1440 pop %ebx 1441 pop %edi 1442 pop %esi 1443 pop %ebp 1444 ret 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454