1;/****************************************************************************
2; *
3; *  XVID MPEG-4 VIDEO CODEC
4; *  - MMX CBP computation -
5; *
6; *  Copyright (C) 2005      Carlo Bramini <carlo.bramix@libero.it>
7; *                2001-2003 Peter Ross <pross@xvid.org>
8; *                2002-2003 Pascal Massimino <skal@planet-d.net>
9; *
10; *  This program is free software ; you can redistribute it and/or modify
11; *  it under the terms of the GNU General Public License as published by
12; *  the Free Software Foundation ; either version 2 of the License, or
13; *  (at your option) any later version.
14; *
15; *  This program is distributed in the hope that it will be useful,
16; *  but WITHOUT ANY WARRANTY ; without even the implied warranty of
17; *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18; *  GNU General Public License for more details.
19; *
20; *  You should have received a copy of the GNU General Public License
21; *  along with this program ; if not, write to the Free Software
22; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
23; *
24; * $Id: cbp_mmx.asm,v 1.19 2009-09-16 17:07:58 Isibaar Exp $
25; *
26; ***************************************************************************/
27
28;=============================================================================
29; Macros
30;=============================================================================
31
32%include "nasm.inc"
33
34;=============================================================================
35; Local data
36;=============================================================================
37
38DATA
39
40ALIGN SECTION_ALIGN
41
42mult_mask:
43  db 0x10,0x20,0x04,0x08,0x01,0x02,0x00,0x00
44ignore_dc:
45  dw 0, -1, -1, -1
46
47;=============================================================================
48; Code
49;=============================================================================
50
51TEXT
52
53cglobal calc_cbp_mmx
54
55;-----------------------------------------------------------------------------
56; uint32_t calc_cbp_mmx(const int16_t coeff[6][64]);
57;-----------------------------------------------------------------------------
58
59%macro      MAKE_LOAD         2
60  por mm0, [%2-128*1+%1*8]
61  por mm1, [%2+128*0+%1*8]
62  por mm2, [%2+128*1+%1*8]
63  por mm3, [%2+128*2+%1*8]
64  por mm4, [%2+128*3+%1*8]
65  por mm5, [%2+128*4+%1*8]
66%endmacro
67
68ALIGN SECTION_ALIGN
69calc_cbp_mmx:
70  mov _EAX, prm1            ; coeff
71
72  movq mm7, [ignore_dc]
73  pxor mm6, mm6                ; used only for comparing
74  movq mm0, [_EAX+128*0]
75  movq mm1, [_EAX+128*1]
76  movq mm2, [_EAX+128*2]
77  movq mm3, [_EAX+128*3]
78  movq mm4, [_EAX+128*4]
79  movq mm5, [_EAX+128*5]
80  add _EAX, 8+128
81  pand mm0, mm7
82  pand mm1, mm7
83  pand mm2, mm7
84  pand mm3, mm7
85  pand mm4, mm7
86  pand mm5, mm7
87
88  MAKE_LOAD 0, _EAX
89  MAKE_LOAD 1, _EAX
90  MAKE_LOAD 2, _EAX
91  MAKE_LOAD 3, _EAX
92  MAKE_LOAD 4, _EAX
93  MAKE_LOAD 5, _EAX
94  MAKE_LOAD 6, _EAX
95  MAKE_LOAD 7, _EAX
96  MAKE_LOAD 8, _EAX
97  MAKE_LOAD 9, _EAX
98  MAKE_LOAD 10, _EAX
99  MAKE_LOAD 11, _EAX
100  MAKE_LOAD 12, _EAX
101  MAKE_LOAD 13, _EAX
102  MAKE_LOAD 14, _EAX
103
104  movq mm7, [mult_mask]
105  packssdw mm0, mm1
106  packssdw mm2, mm3
107  packssdw mm4, mm5
108  packssdw mm0, mm2
109  packssdw mm4, mm6
110  pcmpeqw mm0, mm6
111  pcmpeqw mm4, mm6
112  pcmpeqw mm0, mm6
113  pcmpeqw mm4, mm6
114  psrlw mm0, 15
115  psrlw mm4, 15
116  packuswb mm0, mm4
117  pmaddwd mm0, mm7
118
119  movq mm1, mm0
120  psrlq mm1, 32
121  paddusb mm0, mm1
122
123  movd eax, mm0
124  shr _EAX, 8
125  and _EAX, 0x3F
126  ret
127ENDFUNC
128
129NON_EXEC_STACK
130