1 /*
2 *
3 * This file is part of libmpeg3
4 *
5 * LibMPEG3
6 * Author: Adam Williams <broadcast@earthling.net>
7 * Page: heroine.linuxbox.com
8 * Page: http://www.smalltalkconsulting.com/html/mpeg3source.html (for Squeak)
9 *
10 LibMPEG3 was originally licenced under GPL. It was relicensed by
11 the author under the LGPL and the Squeak license on Nov 1st, 2000
12
13 This library is free software; you can redistribute it and/or
14 modify it under the terms of the GNU Lesser General Public
15 License as published by the Free Software Foundation; either
16 version 2.1 of the License, or (at your option) any later version.
17
18 This library is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 Lesser General Public License for more details.
22
23 You should have received a copy of the GNU Lesser General Public
24 License along with this library; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26
27 Also licensed under the Squeak license.
28 http://www.squeak.org/license.html
29 */
30 #include "mpeg3video.h"
31 #include <stdio.h>
32
33 #ifdef HAVE_MMX
34
35 #ifdef HAVE_3Dnow
recva_mmx(unsigned char * s,unsigned char * d,int lx,int lx2,int h)36 static inline void recva_mmx(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
37 {
38 __asm__(
39 ".align 8\n"
40 "1:"
41 "movq (%1), %%mm0\n" /* 8 s */
42 "movq 8(%1), %%mm1\n" /* 8 s */
43 "movq (%4), %%mm2\n" /* 8 s +lx */
44 "movq 8(%4), %%mm3\n" /* 8 s +lx **/
45
46 "pavgusb %%mm2, %%mm0\n"
47 "addl %3, %1\n"
48 "pavgusb %%mm3, %%mm1\n"
49
50 "movq (%2), %%mm2\n" /* 8 d */
51 "movq 8(%2), %%mm3\n" /* 8 d */
52 "pavgusb %%mm2, %%mm0\n"
53 "addl %3, %4\n"
54 "pavgusb %%mm3, %%mm1\n"
55
56 "movq %%mm0, (%2)\n"
57 "movq %%mm1, 8(%2)\n"
58 "addl %3, %2\n"
59 "loop 1b\n"
60 :
61 : "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
62 );
63 }
64
recvac_mmx(unsigned char * s,unsigned char * d,int lx,int lx2,int h)65 static inline void recvac_mmx(unsigned char *s, unsigned char *d, int lx,int lx2, int h)
66 {
67 __asm__(
68 ".align 8\n"
69 "1:"
70 "movq (%1), %%mm0\n" /* 8 s */
71 "movq (%4), %%mm2\n" /* 8 s +lx */
72 "addl %3, %1\n"
73 "pavgusb %%mm2, %%mm0\n"
74 "movq (%2), %%mm3\n" /* 8 d */
75 "addl %3, %4\n"
76 "pavgusb %%mm3, %%mm0\n"
77 "movq %%mm0, (%2)\n"
78 "addl %3, %2\n"
79 "loop 1b\n"
80 :
81 : "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
82 );
83 }
84
rech_mmx(unsigned char * s,unsigned char * d,int lx2,int h)85 static inline void rech_mmx(unsigned char *s, unsigned char *d, int lx2, int h)
86 {
87 __asm__ (
88 ".align 8\n"
89 "1:"
90 "movq (%1), %%mm0\n" /* 8 s */
91 "movq 8(%1), %%mm1\n" /* 8 s */
92 "movq 1(%1), %%mm2\n" /* 8 s */
93 "movq 9(%1), %%mm3\n" /* 8 s */
94
95 "pavgusb %%mm2, %%mm0\n"
96 "addl %3, %1\n"
97 "pavgusb %%mm3, %%mm1\n"
98
99 "movq %%mm0, (%2)\n"
100 "movq %%mm1, 8(%2)\n"
101 "addl %3, %2\n"
102 "loop 1b\n"
103 :
104 : "c" (h), "r" (s), "r" (d), "r" (lx2)
105 );
106 }
107
rechc_mmx(unsigned char * s,unsigned char * d,int lx2,int h)108 static inline void rechc_mmx(unsigned char *s, unsigned char *d, int lx2, int h)
109 {
110 __asm__ (
111 ".align 8\n"
112 "1:"
113 "movq (%1), %%mm0\n" /* 8 s */
114 "movq 1(%1), %%mm2\n" /* 8 s +1 */
115 "addl %3, %1\n"
116 "pavgusb %%mm2, %%mm0\n"
117 "movq %%mm0, (%2)\n"
118 "addl %3, %2\n"
119 "loop 1b\n"
120 :
121 : "c" (h), "r" (s), "r" (d), "r" (lx2)
122 );
123 }
124
recha_mmx(unsigned char * s,unsigned char * d,int lx2,int h)125 static inline void recha_mmx(unsigned char *s, unsigned char *d,int lx2, int h)
126 {
127 __asm__ (
128 ".align 8\n"
129 "1:"
130 "movq (%1), %%mm0\n" /* 8 s */
131 "movq 8(%1), %%mm1\n" /* 8 s */
132 "movq 1(%1), %%mm2\n" /* 8 s */
133 "movq 9(%1), %%mm3\n" /* 8 s */
134
135 "pavgusb %%mm2, %%mm0\n"
136 "addl %3, %1\n"
137 "pavgusb %%mm3, %%mm1\n"
138
139 "movq (%2), %%mm2\n" /* 8 d */
140 "movq 8(%2), %%mm3\n" /* 8 d */
141 "pavgusb %%mm2, %%mm0\n"
142 "pavgusb %%mm3, %%mm1\n"
143
144 "movq %%mm0, (%2)\n"
145 "movq %%mm1, 8(%2)\n"
146 "addl %3, %2\n"
147 "loop 1b\n"
148 :
149 : "c" (h), "r" (s), "r" (d), "r" (lx2)
150 );
151 }
152
rechac_mmx(unsigned char * s,unsigned char * d,int lx2,int h)153 static inline void rechac_mmx(unsigned char *s,unsigned char *d, int lx2, int h)
154 {
155 __asm__ (
156 ".align 8\n"
157 "1:"
158 "movq (%1), %%mm0\n" /* 8 s */
159 "movq 1(%1), %%mm2\n" /* 8 s */
160
161 "addl %3, %1\n"
162 "pavgusb %%mm2, %%mm0\n"
163
164 "movq (%2), %%mm1\n" /* 8 d */
165 "pavgusb %%mm1, %%mm0\n"
166
167 "movq %%mm0, (%2)\n"
168 "addl %3, %2\n"
169 "loop 1b\n"
170 :
171 : "c" (h), "r" (s), "r" (d), "r" (lx2)
172 );
173 }
174
rec4_mmx(unsigned char * s,unsigned char * d,int lx,int lx2,int h)175 static inline void rec4_mmx(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
176 {
177 __asm__ __volatile__(
178 "movq (%1), %%mm0\n" /* 8 s */
179 "movq 8(%1), %%mm1\n" /* 8 s */
180 "movq 1(%1), %%mm2\n" /* 8 s +1*/
181 "movq 9(%1), %%mm3\n" /* 8 s +1*/
182 ".align 8\n"
183 "1:"
184 "movq (%4), %%mm4\n" /* 8 s+lx */
185 "pavgusb %%mm2, %%mm0\n"
186 "movq 8(%4), %%mm5\n" /* 8 s+lx */
187 "pavgusb %%mm3, %%mm1\n"
188
189 "movq 1(%4), %%mm6\n" /* 8 s+lx +1*/
190 "pavgusb %%mm4, %%mm0\n"
191 "movq 9(%4), %%mm7\n" /* 8 s+lx +1*/
192 "pavgusb %%mm5, %%mm1\n"
193
194 "pavgusb %%mm6, %%mm0\n"
195 "addl %3, %4\n"
196 "pavgusb %%mm7, %%mm1\n"
197 "movq %%mm0, (%2)\n"
198 "movq %%mm6, %%mm2\n"
199 "movq %%mm7, %%mm3\n"
200 "movq %%mm1, 8(%2)\n"
201 "movq %%mm4, %%mm0\n"
202 "movq %%mm5, %%mm1\n"
203 "addl %3, %2\n"
204 "loop 1b\n"
205 :
206 : "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
207 );
208 }
209
rec4c_mmx(unsigned char * s,unsigned char * d,int lx,int lx2,int h)210 static inline void rec4c_mmx(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
211 {
212 __asm__ __volatile__(
213 "movq (%1), %%mm0\n" /* 8 s */
214 "movq 1(%1), %%mm2\n" /* 8 s +1*/
215 ".align 8\n"
216 "1:"
217 "movq (%4), %%mm4\n" /* 8 s+lx */
218 "pavgusb %%mm2, %%mm0\n"
219
220 "movq 1(%4), %%mm6\n" /* 8 s+lx +1*/
221 "pavgusb %%mm4, %%mm0\n"
222
223 "addl %3, %4\n"
224 "pavgusb %%mm6, %%mm0\n"
225 "movq %%mm0, (%2)\n"
226 "movq %%mm6, %%mm2\n"
227 "movq %%mm4, %%mm0\n"
228 "addl %3, %2\n"
229 "loop 1b\n"
230 :
231 : "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
232 );
233 }
234
rec4a_mmx(unsigned char * s,unsigned char * d,int lx,int lx2,int h)235 static inline void rec4a_mmx(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
236 {
237 __asm__ __volatile__(
238 "movq (%1), %%mm0\n" /* 8 s */
239 "movq 8(%1), %%mm1\n" /* 8 s */
240 "movq 1(%1), %%mm2\n" /* 8 s +1*/
241 "movq 9(%1), %%mm3\n" /* 8 s +1*/
242 ".align 8\n"
243 "1:"
244 "movq (%4), %%mm4\n" /* 8 s+lx */
245 "pavgusb %%mm2, %%mm0\n"
246 "movq 8(%4), %%mm5\n" /* 8 s+lx */
247 "pavgusb %%mm3, %%mm1\n"
248
249 "movq 1(%4), %%mm6\n" /* 8 s+lx +1*/
250 "pavgusb %%mm4, %%mm0\n"
251 "movq 9(%4), %%mm7\n" /* 8 s+lx +1*/
252 "pavgusb %%mm5, %%mm1\n"
253 "movq (%2), %%mm2\n"
254 "pavgusb %%mm6, %%mm0\n"
255 "movq 8(%2), %%mm3\n"
256
257 "pavgusb %%mm2, %%mm0\n"
258 "addl %3, %4\n"
259 "pavgusb %%mm3, %%mm1\n"
260 "movq %%mm0, (%2)\n"
261
262 "pavgusb %%mm7, %%mm1\n"
263 "movq %%mm6, %%mm2\n"
264 "movq %%mm7, %%mm3\n"
265 "movq %%mm1, 8(%2)\n"
266 "movq %%mm4, %%mm0\n"
267 "movq %%mm5, %%mm1\n"
268 "addl %3, %2\n"
269 "loop 1b\n"
270 :
271 : "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
272 );
273 }
274
rec4ac_mmx(unsigned char * s,unsigned char * d,int lx,int lx2,int h)275 static inline void rec4ac_mmx(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
276 {
277 __asm__ __volatile__(
278 "movq (%1), %%mm0\n" /* 8 s */
279 "movq 1(%1), %%mm2\n" /* 8 s +1*/
280 ".align 8\n"
281 "1:"
282 "movq (%4), %%mm4\n" /* 8 s+lx */
283 "pavgusb %%mm2, %%mm0\n"
284
285 "movq 1(%4), %%mm6\n" /* 8 s+lx +1*/
286 "pavgusb %%mm4, %%mm0\n"
287 "movq (%2), %%mm1\n" /* 8 d */
288 "pavgusb %%mm6, %%mm0\n"
289 "addl %3, %4\n"
290 "pavgusb %%mm1, %%mm0\n"
291 "movq %%mm6, %%mm2\n"
292 "movq %%mm0, (%2)\n"
293 "movq %%mm4, %%mm0\n"
294 "addl %3, %2\n"
295 "loop 1b\n"
296 :
297 : "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
298 );
299 }
300
301 #else // HAVE_3DNOW
302 static long long ADD_1 = 0x0101010101010101LL;
303 static long long MASK_AND = 0x7f7f7f7f7f7f7f7fLL;
304 #endif
305
rec_mmx(unsigned char * s,unsigned char * d,int lx2,int h)306 static inline void rec_mmx(unsigned char *s, unsigned char *d, int lx2, int h)
307 {
308 __asm__ __volatile__(
309 ".align 8\n"
310 "1:\t"
311 "movq ( %1 ), %%mm0\n" /* 8 s */
312 "movq 8( %1 ), %%mm2\n" /* 16 s */
313 "movq %%mm0, ( %2 )\n"
314 "addl %3, %1\n"
315 "movq %%mm2, 8( %2 )\n"
316 "decl %0\n"
317 "leal (%2, %3), %2\n"
318 "jnz 1b"
319 :
320 : "c" (h), "r" (s), "r" (d), "r" (lx2)
321 );
322 }
323
324
recc_mmx(unsigned char * s,unsigned char * d,int lx2,int h)325 static inline void recc_mmx(unsigned char *s, unsigned char *d, int lx2, int h)
326 {
327 __asm__ __volatile__(
328 ".align 8\n"
329 "1:\t"
330 "movq ( %1 ), %%mm0\n"
331 "addl %3, %1\n"
332 "movq %%mm0, ( %2 )\n"
333 "decl %0\n"
334 "leal (%2, %3), %2\n"
335 "jnz 1b"
336 :
337 : "c" (h), "r" (s), "r" (d), "r" (lx2)
338 );
339 }
340
341
reca_mmx(unsigned char * s,unsigned char * d,int lx2,int h)342 static inline void reca_mmx(unsigned char *s, unsigned char *d, int lx2, int h)
343 {
344 #ifdef HAVE_3Dnow
345 __asm__ (
346 ".align 8\n"
347 "1:"
348 "movq (%1), %%mm0\n" /* 8 s */
349 "movq (%2), %%mm2\n" /* 8 d */
350 "movq 8(%1), %%mm1\n" /* 8 s */
351 "movq 8(%2), %%mm3\n" /* 8 d */
352 "pavgusb %%mm2, %%mm0\n"
353 "addl %3, %1\n"
354 "pavgusb %%mm3, %%mm1\n"
355
356 "movq %%mm0, (%2)\n"
357 "movq %%mm1, 8(%2)\n"
358 "addl %3, %2\n"
359 "loop 1b\n"
360 :
361 : "c" (h), "r" (s), "r" (d), "r" (lx2)
362 );
363 #else /* No 3dnow */
364 __asm__ (
365 "movq _MASK_AND, %%mm5\n"
366 "movq _ADD_1, %%mm6\n"
367 "1:\t"
368 "movq (%1),%%mm0\n" /* Load 16 pixels from each row */
369 "movq (%2),%%mm1\n"
370 "movq 8(%1),%%mm2\n"
371 "movq 8(%2),%%mm3\n"
372 "psrlw $1,%%mm0\n" /* Shift pixels down */
373 "psrlw $1,%%mm1\n"
374 "pand %%mm5,%%mm0\n" /* Zero out significant bit */
375 "psrlw $1,%%mm2\n"
376 "pand %%mm5,%%mm1\n"
377 "psrlw $1,%%mm3\n"
378 "pand %%mm5,%%mm2\n"
379 "paddusb %%mm1,%%mm0\n" /* Add pixels */
380 "pand %%mm5,%%mm3\n"
381 "paddusb %%mm3,%%mm2\n"
382 "paddusb %%mm6,%%mm0\n" /* Add 1 to results */
383 "paddusb %%mm6,%%mm2\n"
384 "movq %%mm0,(%2)\n"
385 "addl %3,%1\n"
386 "movq %%mm2, 8(%2)\n"
387 "decl %0\n"
388 "leal (%2, %3), %2\n"
389 "jnz 1b\n"
390 :
391 : "c" (h), "r" (s), "r" (d), "r" (lx2)
392 );
393 #endif
394 }
395
396
recac_mmx(unsigned char * s,unsigned char * d,int lx2,int h)397 static inline void recac_mmx(unsigned char *s, unsigned char *d, int lx2, int h)
398 {
399 #ifdef HAVE_3Dnow
400 __asm__ (
401 ".align 8\n"
402 "1:"
403 "movq (%1), %%mm0\n" /* 8 s */
404 "movq (%2), %%mm2\n" /* 8 d */
405 "pavgusb %%mm2, %%mm0\n"
406 "addl %3, %1\n"
407 "movq %%mm0, (%2)\n"
408 "addl %3, %2\n"
409 "loop 1b\n"
410 :
411 : "c" (h), "r" (s), "r" (d), "r" (lx2)
412 );
413 #else /* No 3dnow */
414 __asm__ (
415 "movq _MASK_AND, %%mm5\n"
416 "movq _ADD_1, %%mm6\n"
417 "1:\t"
418 "movq (%1),%%mm0\n"
419 "movq (%2),%%mm1\n"
420 "psrlw $1,%%mm0\n"
421 "psrlw $1,%%mm1\n"
422 "pand %%mm5,%%mm0\n"
423 "pand %%mm5,%%mm1\n"
424 "paddusb %%mm1,%%mm0\n"
425 "paddusb %%mm6,%%mm0\n"
426 "addl %3,%1\n"
427 "movq %%mm0,(%2)\n"
428 "decl %0\n"
429 "leal (%2, %3), %2\n"
430 "jnz 1b\n"
431 :
432 : "c" (h), "r" (s), "r" (d), "r" (lx2)
433 );
434 #endif
435 }
436
437
recv_mmx(unsigned char * s,unsigned char * d,int lx,int lx2,int h)438 static inline void recv_mmx(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
439 {
440 #ifdef HAVE_3Dnow
441 __asm__(
442 ".align 8\n"
443 "1:"
444 "movq (%1), %%mm0\n" /* 8 s */
445 "movq (%4), %%mm2\n" /* 8 s +lx */
446 "movq 8(%1), %%mm1\n" /* 8 s */
447 "movq 8(%4), %%mm3\n" /* 8 s +lx **/
448
449 "pavgusb %%mm2, %%mm0\n"
450 "addl %3, %1\n"
451 "pavgusb %%mm3, %%mm1\n"
452
453 "movq %%mm0, (%2)\n"
454 "addl %3, %4\n"
455 "movq %%mm1, 8(%2)\n"
456 "addl %3, %2\n"
457 "loop 1b\n"
458 :
459 : "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
460 );
461 #else
462 __asm__ (
463 "movq _MASK_AND, %%mm5\n"
464 "movq _ADD_1, %%mm6\n"
465 "1:\t"
466 "movq (%1), %%mm0\n" /* 8 s */
467 "movq (%4), %%mm1\n" /* 8 s +lx */
468 "movq 8(%1), %%mm2\n" /* 8 s */
469 "movq 8(%4), %%mm3\n" /* 8 s +lx **/
470 "psrlw $1,%%mm0\n"
471 "psrlw $1,%%mm1\n"
472 "pand %%mm5,%%mm0\n"
473 "psrlw $1,%%mm2\n"
474 "pand %%mm5,%%mm1\n"
475 "psrlw $1,%%mm3\n"
476 "pand %%mm5,%%mm2\n"
477 "paddusb %%mm1,%%mm0\n"
478 "pand %%mm5,%%mm3\n"
479 "paddusb %%mm3,%%mm2\n"
480 "paddusb %%mm6,%%mm0\n"
481 "paddusb %%mm6,%%mm2\n"
482 "movq %%mm0,(%2)\n"
483 "addl %3,%1\n"
484 "movq %%mm2, 8(%2)\n"
485 "addl %3,%4\n"
486 "decl %0\n"
487 "leal (%2, %3), %2\n"
488 "jnz 1b\n"
489 :
490 : "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
491 );
492 #endif
493 }
494
495
recvc_mmx(unsigned char * s,unsigned char * d,int lx,int lx2,int h)496 static inline void recvc_mmx(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
497 {
498 #ifdef HAVE_3Dnow
499 __asm__(
500 ".align 8\n"
501 "1:"
502 "movq (%1), %%mm0\n" /* 8 s */
503 "movq (%4), %%mm2\n" /* 8 s +lx */
504 "addl %3, %1\n"
505 "pavgusb %%mm2, %%mm0\n"
506 "addl %3, %4\n"
507 "movq %%mm0, (%2)\n"
508 "addl %3, %2\n"
509 "loop 1b\n"
510 :
511 : "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
512 );
513 #else
514 __asm__ (
515 "movq _MASK_AND, %%mm5\n"
516 "movq _ADD_1, %%mm6\n"
517 "1:\t"
518 "movq (%1), %%mm0\n" /* 8 s */
519 "movq (%4), %%mm1\n" /* 8 s +lx */
520 "psrlw $1,%%mm0\n"
521 "psrlw $1,%%mm1\n"
522 "pand %%mm5,%%mm0\n"
523 "pand %%mm5,%%mm1\n"
524 "paddusb %%mm1,%%mm0\n"
525 "addl %3,%1\n"
526 "paddusb %%mm6,%%mm0\n"
527 "addl %3,%4\n"
528 "movq %%mm0,(%2)\n"
529 "decl %0\n"
530 "leal (%2, %3), %2\n"
531 "jnz 1b\n"
532 :
533 : "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
534 );
535 #endif
536 }
537
538 #endif // HAVE_MMX
539
rec(unsigned char * s,unsigned char * d,int lx2,int h)540 static inline void rec(unsigned char *s, unsigned char *d, int lx2, int h)
541 {
542 int j;
543 # ifdef __POWERPC__
544 unsigned long t1,t2,t3,t4;
545 #endif
546 for(j = 0; j < h; j++, s += lx2, d += lx2)
547 {
548 # ifdef __POWERPC__
549 t1 = *(unsigned long *) s;
550 t2 = *(((unsigned long *) s)+1);
551 t3 = *(((unsigned long *) s)+2);
552 t4 = *(((unsigned long *) s)+3);
553 *(unsigned long *) d = t1;
554 *(((unsigned long *) d)+1) = t2;
555 *(((unsigned long *) d)+2) = t3;
556 *(((unsigned long *) d)+3) = t4;
557 #else
558 d[0] = s[0]; d[1] = s[1]; d[2] = s[2]; d[3] = s[3];
559 d[4] = s[4]; d[5] = s[5]; d[6] = s[6]; d[7] = s[7];
560 d[8] = s[8]; d[9] = s[9]; d[10] = s[10]; d[11] = s[11];
561 d[12] = s[12]; d[13] = s[13]; d[14] = s[14]; d[15] = s[15];
562 #endif
563 }
564 }
565
566
567
recc(unsigned char * s,unsigned char * d,int lx2,int h)568 static inline void recc(unsigned char *s, unsigned char *d, int lx2, int h)
569 {
570 int j;
571 # ifdef __POWERPC__
572 long t1,t2;
573 #endif
574 for(j = 0; j < h; j++, s += lx2, d += lx2)
575 {
576 # ifdef __POWERPC__
577 t1 = *(long *) s;
578 t2 = *(((long *) s)+1);
579 *(long *) d = t1;
580 *(((long *) d)+1) = t2;
581 #else
582 d[0] = s[0]; d[1] = s[1]; d[2] = s[2]; d[3] = s[3];
583 d[4] = s[4]; d[5] = s[5]; d[6] = s[6]; d[7] = s[7];
584 #endif
585 }
586 }
587
reca(unsigned char * s,unsigned char * d,int lx2,int h)588 static inline void reca(unsigned char *s, unsigned char *d, int lx2, int h)
589 {
590 int j;
591 for(j = 0; j < h; j++, s +=lx2, d +=lx2)
592 {
593 d[0] = (unsigned int)(d[0] + s[0] + 1) >> 1;
594 d[1] = (unsigned int)(d[1] + s[1] + 1) >> 1;
595 d[2] = (unsigned int)(d[2] + s[2] + 1) >> 1;
596 d[3] = (unsigned int)(d[3] + s[3] + 1) >> 1;
597 d[4] = (unsigned int)(d[4] + s[4] + 1) >> 1;
598 d[5] = (unsigned int)(d[5] + s[5] + 1) >> 1;
599 d[6] = (unsigned int)(d[6] + s[6] + 1) >> 1;
600 d[7] = (unsigned int)(d[7] + s[7] + 1) >> 1;
601 d[8] = (unsigned int)(d[8] + s[8] + 1) >> 1;
602 d[9] = (unsigned int)(d[9] + s[9] + 1) >> 1;
603 d[10] = (unsigned int)(d[10] + s[10] + 1) >> 1;
604 d[11] = (unsigned int)(d[11] + s[11] + 1) >> 1;
605 d[12] = (unsigned int)(d[12] + s[12] + 1) >> 1;
606 d[13] = (unsigned int)(d[13] + s[13] + 1) >> 1;
607 d[14] = (unsigned int)(d[14] + s[14] + 1) >> 1;
608 d[15] = (unsigned int)(d[15] + s[15] + 1) >> 1;
609 }
610 }
611
recac(unsigned char * s,unsigned char * d,int lx2,int h)612 static inline void recac(unsigned char *s, unsigned char *d, int lx2, int h)
613 {
614 int j;
615 for(j = 0; j < h; j++, s += lx2, d += lx2)
616 {
617 d[0] = (unsigned int)(d[0] + s[0] + 1)>>1;
618 d[1] = (unsigned int)(d[1] + s[1] + 1)>>1;
619 d[2] = (unsigned int)(d[2] + s[2] + 1)>>1;
620 d[3] = (unsigned int)(d[3] + s[3] + 1)>>1;
621 d[4] = (unsigned int)(d[4] + s[4] + 1)>>1;
622 d[5] = (unsigned int)(d[5] + s[5] + 1)>>1;
623 d[6] = (unsigned int)(d[6] + s[6] + 1)>>1;
624 d[7] = (unsigned int)(d[7] + s[7] + 1)>>1;
625 }
626 }
627
recv(unsigned char * s,unsigned char * d,int lx,int lx2,int h)628 static inline void recv(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
629 {
630 unsigned char *dp,*sp,*sp2;
631 int j;
632 sp = s;
633 sp2 = s + lx;
634 dp = d;
635 for(j = 0; j < h; j++)
636 {
637 dp[0] = (unsigned int)(sp[0] + sp2[0] + 1) >> 1;
638 dp[1] = (unsigned int)(sp[1] + sp2[1] + 1) >> 1;
639 dp[2] = (unsigned int)(sp[2] + sp2[2] + 1) >> 1;
640 dp[3] = (unsigned int)(sp[3] + sp2[3] + 1) >> 1;
641 dp[4] = (unsigned int)(sp[4] + sp2[4] + 1) >> 1;
642 dp[5] = (unsigned int)(sp[5] + sp2[5] + 1) >> 1;
643 dp[6] = (unsigned int)(sp[6] + sp2[6] + 1) >> 1;
644 dp[7] = (unsigned int)(sp[7] + sp2[7] + 1) >> 1;
645 dp[8] = (unsigned int)(sp[8] + sp2[8] + 1) >> 1;
646 dp[9] = (unsigned int)(sp[9] + sp2[9] + 1) >> 1;
647 dp[10] = (unsigned int)(sp[10] + sp2[10] + 1) >> 1;
648 dp[11] = (unsigned int)(sp[11] + sp2[11] + 1) >> 1;
649 dp[12] = (unsigned int)(sp[12] + sp2[12] + 1) >> 1;
650 dp[13] = (unsigned int)(sp[13] + sp2[13] + 1) >> 1;
651 dp[14] = (unsigned int)(sp[14] + sp2[14] + 1) >> 1;
652 dp[15] = (unsigned int)(sp[15] + sp2[15] + 1) >> 1;
653 sp+= lx2;
654 sp2+= lx2;
655 dp+= lx2;
656 }
657 }
658
recvc(unsigned char * s,unsigned char * d,int lx,int lx2,int h)659 static inline void recvc(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
660 {
661 unsigned char *dp,*sp,*sp2;
662 int j;
663
664 sp = s;
665 sp2 = s+lx;
666 dp = d;
667 for(j = 0; j < h; j++)
668 {
669 dp[0] = (unsigned int)(sp[0]+sp2[0]+1)>>1;
670 dp[1] = (unsigned int)(sp[1]+sp2[1]+1)>>1;
671 dp[2] = (unsigned int)(sp[2]+sp2[2]+1)>>1;
672 dp[3] = (unsigned int)(sp[3]+sp2[3]+1)>>1;
673 dp[4] = (unsigned int)(sp[4]+sp2[4]+1)>>1;
674 dp[5] = (unsigned int)(sp[5]+sp2[5]+1)>>1;
675 dp[6] = (unsigned int)(sp[6]+sp2[6]+1)>>1;
676 dp[7] = (unsigned int)(sp[7]+sp2[7]+1)>>1;
677 sp+= lx2;
678 sp2+= lx2;
679 dp+= lx2;
680 }
681 }
682
683
recva(unsigned char * s,unsigned char * d,int lx,int lx2,int h)684 static inline void recva(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
685 {
686 unsigned char *dp,*sp,*sp2;
687 int j;
688
689 sp = s;
690 sp2 = s+lx;
691 dp = d;
692 for (j=0; j<h; j++){
693 dp[0] = (dp[0] + ((unsigned int)(sp[0]+sp2[0]+1)>>1) + 1)>>1;
694 dp[1] = (dp[1] + ((unsigned int)(sp[1]+sp2[1]+1)>>1) + 1)>>1;
695 dp[2] = (dp[2] + ((unsigned int)(sp[2]+sp2[2]+1)>>1) + 1)>>1;
696 dp[3] = (dp[3] + ((unsigned int)(sp[3]+sp2[3]+1)>>1) + 1)>>1;
697 dp[4] = (dp[4] + ((unsigned int)(sp[4]+sp2[4]+1)>>1) + 1)>>1;
698 dp[5] = (dp[5] + ((unsigned int)(sp[5]+sp2[5]+1)>>1) + 1)>>1;
699 dp[6] = (dp[6] + ((unsigned int)(sp[6]+sp2[6]+1)>>1) + 1)>>1;
700 dp[7] = (dp[7] + ((unsigned int)(sp[7]+sp2[7]+1)>>1) + 1)>>1;
701 dp[8] = (dp[8] + ((unsigned int)(sp[8]+sp2[8]+1)>>1) + 1)>>1;
702 dp[9] = (dp[9] + ((unsigned int)(sp[9]+sp2[9]+1)>>1) + 1)>>1;
703 dp[10] = (dp[10] + ((unsigned int)(sp[10]+sp2[10]+1)>>1) + 1)>>1;
704 dp[11] = (dp[11] + ((unsigned int)(sp[11]+sp2[11]+1)>>1) + 1)>>1;
705 dp[12] = (dp[12] + ((unsigned int)(sp[12]+sp2[12]+1)>>1) + 1)>>1;
706 dp[13] = (dp[13] + ((unsigned int)(sp[13]+sp2[13]+1)>>1) + 1)>>1;
707 dp[14] = (dp[14] + ((unsigned int)(sp[14]+sp2[14]+1)>>1) + 1)>>1;
708 dp[15] = (dp[15] + ((unsigned int)(sp[15]+sp2[15]+1)>>1) + 1)>>1;
709 sp+= lx2;
710 sp2+= lx2;
711 dp+= lx2;
712 }
713 }
714
715
recvac(unsigned char * s,unsigned char * d,int lx,int lx2,int h)716 static inline void recvac(unsigned char *s, unsigned char *d, int lx,int lx2, int h){
717 unsigned char *dp,*sp,*sp2;
718 int j;
719
720 sp = s;
721 sp2 = s+lx;
722 dp = d;
723 for (j=0; j<h; j++){
724 dp[0] = (dp[0] + ((unsigned int)(sp[0]+sp2[0]+1)>>1) + 1)>>1;
725 dp[1] = (dp[1] + ((unsigned int)(sp[1]+sp2[1]+1)>>1) + 1)>>1;
726 dp[2] = (dp[2] + ((unsigned int)(sp[2]+sp2[2]+1)>>1) + 1)>>1;
727 dp[3] = (dp[3] + ((unsigned int)(sp[3]+sp2[3]+1)>>1) + 1)>>1;
728 dp[4] = (dp[4] + ((unsigned int)(sp[4]+sp2[4]+1)>>1) + 1)>>1;
729 dp[5] = (dp[5] + ((unsigned int)(sp[5]+sp2[5]+1)>>1) + 1)>>1;
730 dp[6] = (dp[6] + ((unsigned int)(sp[6]+sp2[6]+1)>>1) + 1)>>1;
731 dp[7] = (dp[7] + ((unsigned int)(sp[7]+sp2[7]+1)>>1) + 1)>>1;
732 sp+= lx2;
733 sp2+= lx2;
734 dp+= lx2;
735 }
736 }
737
738
rech(unsigned char * s,unsigned char * d,int lx2,int h)739 static inline void rech(unsigned char *s, unsigned char *d, int lx2, int h){
740 unsigned char *dp,*sp;
741 unsigned int s1,s2;
742 int j;
743
744 sp = s;
745 dp = d;
746 for (j=0; j<h; j++){
747 s1=sp[0];
748 dp[0] = (unsigned int)(s1+(s2=sp[1])+1)>>1;
749 dp[1] = (unsigned int)(s2+(s1=sp[2])+1)>>1;
750 dp[2] = (unsigned int)(s1+(s2=sp[3])+1)>>1;
751 dp[3] = (unsigned int)(s2+(s1=sp[4])+1)>>1;
752 dp[4] = (unsigned int)(s1+(s2=sp[5])+1)>>1;
753 dp[5] = (unsigned int)(s2+(s1=sp[6])+1)>>1;
754 dp[6] = (unsigned int)(s1+(s2=sp[7])+1)>>1;
755 dp[7] = (unsigned int)(s2+(s1=sp[8])+1)>>1;
756 dp[8] = (unsigned int)(s1+(s2=sp[9])+1)>>1;
757 dp[9] = (unsigned int)(s2+(s1=sp[10])+1)>>1;
758 dp[10] = (unsigned int)(s1+(s2=sp[11])+1)>>1;
759 dp[11] = (unsigned int)(s2+(s1=sp[12])+1)>>1;
760 dp[12] = (unsigned int)(s1+(s2=sp[13])+1)>>1;
761 dp[13] = (unsigned int)(s2+(s1=sp[14])+1)>>1;
762 dp[14] = (unsigned int)(s1+(s2=sp[15])+1)>>1;
763 dp[15] = (unsigned int)(s2+sp[16]+1)>>1;
764 sp+= lx2;
765 dp+= lx2;
766 }
767 }
768
769
rechc(unsigned char * s,unsigned char * d,int lx2,int h)770 static inline void rechc(unsigned char *s,unsigned char *d, int lx2, int h){
771 unsigned char *dp,*sp;
772 unsigned int s1,s2;
773 int j;
774
775 sp = s;
776 dp = d;
777 for (j=0; j<h; j++){
778 s1=sp[0];
779 dp[0] = (unsigned int)(s1+(s2=sp[1])+1)>>1;
780 dp[1] = (unsigned int)(s2+(s1=sp[2])+1)>>1;
781 dp[2] = (unsigned int)(s1+(s2=sp[3])+1)>>1;
782 dp[3] = (unsigned int)(s2+(s1=sp[4])+1)>>1;
783 dp[4] = (unsigned int)(s1+(s2=sp[5])+1)>>1;
784 dp[5] = (unsigned int)(s2+(s1=sp[6])+1)>>1;
785 dp[6] = (unsigned int)(s1+(s2=sp[7])+1)>>1;
786 dp[7] = (unsigned int)(s2+sp[8]+1)>>1;
787 sp+= lx2;
788 dp+= lx2;
789 }
790 }
791
recha(unsigned char * s,unsigned char * d,int lx2,int h)792 static inline void recha(unsigned char *s, unsigned char *d,int lx2, int h)
793 {
794 unsigned char *dp,*sp;
795 unsigned int s1,s2;
796 int j;
797
798 sp = s;
799 dp = d;
800 for (j = 0; j < h; j++)
801 {
802 s1 = sp[0];
803 dp[0] = (dp[0] + ((unsigned int)(s1 + (s2 = sp[1]) + 1) >> 1) + 1) >> 1;
804 dp[1] = (dp[1] + ((unsigned int)(s2 + (s1 = sp[2]) + 1) >> 1) + 1) >> 1;
805 dp[2] = (dp[2] + ((unsigned int)(s1 + (s2 = sp[3]) + 1) >> 1) + 1) >> 1;
806 dp[3] = (dp[3] + ((unsigned int)(s2 + (s1 = sp[4]) + 1) >> 1) + 1) >> 1;
807 dp[4] = (dp[4] + ((unsigned int)(s1 + (s2 = sp[5]) + 1) >> 1) + 1) >> 1;
808 dp[5] = (dp[5] + ((unsigned int)(s2 + (s1 = sp[6]) + 1) >> 1) + 1) >> 1;
809 dp[6] = (dp[6] + ((unsigned int)(s1 + (s2 = sp[7]) + 1) >> 1) + 1) >> 1;
810 dp[7] = (dp[7] + ((unsigned int)(s2 + (s1 = sp[8]) + 1) >> 1) + 1) >> 1;
811 dp[8] = (dp[8] + ((unsigned int)(s1 + (s2 = sp[9]) + 1) >> 1) + 1) >> 1;
812 dp[9] = (dp[9] + ((unsigned int)(s2 + (s1 = sp[10]) + 1) >> 1) + 1) >> 1;
813 dp[10] = (dp[10] + ((unsigned int)(s1 + (s2 = sp[11]) + 1) >> 1) + 1) >> 1;
814 dp[11] = (dp[11] + ((unsigned int)(s2 + (s1 = sp[12]) + 1) >> 1) + 1) >> 1;
815 dp[12] = (dp[12] + ((unsigned int)(s1 + (s2 = sp[13]) + 1) >> 1) + 1) >> 1;
816 dp[13] = (dp[13] + ((unsigned int)(s2 + (s1 = sp[14]) + 1) >> 1) + 1) >> 1;
817 dp[14] = (dp[14] + ((unsigned int)(s1 + (s2 = sp[15]) + 1) >> 1) + 1) >> 1;
818 dp[15] = (dp[15] + ((unsigned int)(s2 + sp[16] + 1) >> 1) + 1) >> 1;
819 sp += lx2;
820 dp += lx2;
821 }
822 }
823
824
rechac(unsigned char * s,unsigned char * d,int lx2,int h)825 static inline void rechac(unsigned char *s,unsigned char *d, int lx2, int h)
826 {
827 unsigned char *dp,*sp;
828 unsigned int s1,s2;
829 int j;
830
831 sp = s;
832 dp = d;
833 for(j = 0; j < h; j++)
834 {
835 s1 = sp[0];
836 dp[0] = (dp[0] + ((unsigned int)(s1 + (s2 = sp[1]) + 1) >> 1) + 1) >> 1;
837 dp[1] = (dp[1] + ((unsigned int)(s2 + (s1 = sp[2]) + 1) >> 1) + 1) >> 1;
838 dp[2] = (dp[2] + ((unsigned int)(s1 + (s2 = sp[3]) + 1) >> 1) + 1) >> 1;
839 dp[3] = (dp[3] + ((unsigned int)(s2 + (s1 = sp[4]) + 1) >> 1) + 1) >> 1;
840 dp[4] = (dp[4] + ((unsigned int)(s1 + (s2 = sp[5]) + 1) >> 1) + 1) >> 1;
841 dp[5] = (dp[5] + ((unsigned int)(s2 + (s1 = sp[6]) + 1) >> 1) + 1) >> 1;
842 dp[6] = (dp[6] + ((unsigned int)(s1 + (s2 = sp[7]) + 1) >> 1) + 1) >> 1;
843 dp[7] = (dp[7] + ((unsigned int)(s2 + sp[8] + 1) >> 1) + 1) >> 1;
844 sp += lx2;
845 dp += lx2;
846 }
847 }
848
849
rec4(unsigned char * s,unsigned char * d,int lx,int lx2,int h)850 static inline void rec4(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
851 {
852 unsigned char *dp,*sp,*sp2;
853 unsigned int s1,s2,s3,s4;
854 int j;
855
856 sp = s;
857 sp2 = s+lx;
858 dp = d;
859 for (j=0; j<h; j++){
860 s1=sp[0]; s3=sp2[0];
861 dp[0] = (unsigned int)(s1+(s2=sp[1])+s3+(s4=sp2[1])+2)>>2;
862 dp[1] = (unsigned int)(s2+(s1=sp[2])+s4+(s3=sp2[2])+2)>>2;
863 dp[2] = (unsigned int)(s1+(s2=sp[3])+s3+(s4=sp2[3])+2)>>2;
864 dp[3] = (unsigned int)(s2+(s1=sp[4])+s4+(s3=sp2[4])+2)>>2;
865 dp[4] = (unsigned int)(s1+(s2=sp[5])+s3+(s4=sp2[5])+2)>>2;
866 dp[5] = (unsigned int)(s2+(s1=sp[6])+s4+(s3=sp2[6])+2)>>2;
867 dp[6] = (unsigned int)(s1+(s2=sp[7])+s3+(s4=sp2[7])+2)>>2;
868 dp[7] = (unsigned int)(s2+(s1=sp[8])+s4+(s3=sp2[8])+2)>>2;
869 dp[8] = (unsigned int)(s1+(s2=sp[9])+s3+(s4=sp2[9])+2)>>2;
870 dp[9] = (unsigned int)(s2+(s1=sp[10])+s4+(s3=sp2[10])+2)>>2;
871 dp[10] = (unsigned int)(s1+(s2=sp[11])+s3+(s4=sp2[11])+2)>>2;
872 dp[11] = (unsigned int)(s2+(s1=sp[12])+s4+(s3=sp2[12])+2)>>2;
873 dp[12] = (unsigned int)(s1+(s2=sp[13])+s3+(s4=sp2[13])+2)>>2;
874 dp[13] = (unsigned int)(s2+(s1=sp[14])+s4+(s3=sp2[14])+2)>>2;
875 dp[14] = (unsigned int)(s1+(s2=sp[15])+s3+(s4=sp2[15])+2)>>2;
876 dp[15] = (unsigned int)(s2+sp[16]+s4+sp2[16]+2)>>2;
877 sp+= lx2;
878 sp2+= lx2;
879 dp+= lx2;
880 }
881 }
882
883
rec4c(unsigned char * s,unsigned char * d,int lx,int lx2,int h)884 static inline void rec4c(unsigned char *s,unsigned char *d, int lx, int lx2, int h)
885 {
886 unsigned char *dp,*sp,*sp2;
887 unsigned int s1,s2,s3,s4;
888 int j;
889
890 sp = s;
891 sp2 = s+lx;
892 dp = d;
893 for (j=0; j<h; j++){
894 s1=sp[0]; s3=sp2[0];
895 dp[0] = (unsigned int)(s1+(s2=sp[1])+s3+(s4=sp2[1])+2)>>2;
896 dp[1] = (unsigned int)(s2+(s1=sp[2])+s4+(s3=sp2[2])+2)>>2;
897 dp[2] = (unsigned int)(s1+(s2=sp[3])+s3+(s4=sp2[3])+2)>>2;
898 dp[3] = (unsigned int)(s2+(s1=sp[4])+s4+(s3=sp2[4])+2)>>2;
899 dp[4] = (unsigned int)(s1+(s2=sp[5])+s3+(s4=sp2[5])+2)>>2;
900 dp[5] = (unsigned int)(s2+(s1=sp[6])+s4+(s3=sp2[6])+2)>>2;
901 dp[6] = (unsigned int)(s1+(s2=sp[7])+s3+(s4=sp2[7])+2)>>2;
902 dp[7] = (unsigned int)(s2+sp[8]+s4+sp2[8]+2)>>2;
903 sp+= lx2;
904 sp2+= lx2;
905 dp+= lx2;
906 }
907 }
908
909
rec4a(unsigned char * s,unsigned char * d,int lx,int lx2,int h)910 static inline void rec4a(unsigned char *s,unsigned char *d, int lx, int lx2, int h)
911 {
912 unsigned char *dp=d, *sp=s, *sp2=s+lx;
913 unsigned int s1, s2, s3, s4;
914 int j;
915
916 /*
917 sp = s;
918 sp2 = s+lx;
919 dp = d;
920 */
921 for (j=0; j<h; j++){
922 s1=sp[0]; s3=sp2[0];
923 dp[0] = (dp[0] + ((unsigned int)(s1+(s2=sp[1])+s3+(s4=sp2[1])+2)>>2) + 1)>>1;
924 dp[1] = (dp[1] + ((unsigned int)(s2+(s1=sp[2])+s4+(s3=sp2[2])+2)>>2) + 1)>>1;
925 dp[2] = (dp[2] + ((unsigned int)(s1+(s2=sp[3])+s3+(s4=sp2[3])+2)>>2) + 1)>>1;
926 dp[3] = (dp[3] + ((unsigned int)(s2+(s1=sp[4])+s4+(s3=sp2[4])+2)>>2) + 1)>>1;
927 dp[4] = (dp[4] + ((unsigned int)(s1+(s2=sp[5])+s3+(s4=sp2[5])+2)>>2) + 1)>>1;
928 dp[5] = (dp[5] + ((unsigned int)(s2+(s1=sp[6])+s4+(s3=sp2[6])+2)>>2) + 1)>>1;
929 dp[6] = (dp[6] + ((unsigned int)(s1+(s2=sp[7])+s3+(s4=sp2[7])+2)>>2) + 1)>>1;
930 dp[7] = (dp[7] + ((unsigned int)(s2+(s1=sp[8])+s4+(s3=sp2[8])+2)>>2) + 1)>>1;
931 dp[8] = (dp[8] + ((unsigned int)(s1+(s2=sp[9])+s3+(s4=sp2[9])+2)>>2) + 1)>>1;
932 dp[9] = (dp[9] + ((unsigned int)(s2+(s1=sp[10])+s4+(s3=sp2[10])+2)>>2) + 1)>>1;
933 dp[10] = (dp[10] + ((unsigned int)(s1+(s2=sp[11])+s3+(s4=sp2[11])+2)>>2) + 1)>>1;
934 dp[11] = (dp[11] + ((unsigned int)(s2+(s1=sp[12])+s4+(s3=sp2[12])+2)>>2) + 1)>>1;
935 dp[12] = (dp[12] + ((unsigned int)(s1+(s2=sp[13])+s3+(s4=sp2[13])+2)>>2) + 1)>>1;
936 dp[13] = (dp[13] + ((unsigned int)(s2+(s1=sp[14])+s4+(s3=sp2[14])+2)>>2) + 1)>>1;
937 dp[14] = (dp[14] + ((unsigned int)(s1+(s2=sp[15])+s3+(s4=sp2[15])+2)>>2) + 1)>>1;
938 dp[15] = (dp[15] + ((unsigned int)(s2+sp[16]+s4+sp2[16]+2)>>2) + 1)>>1;
939 sp+= lx2;
940 sp2+= lx2;
941 dp+= lx2;
942 }
943 }
944
945
rec4ac(unsigned char * s,unsigned char * d,int lx,int lx2,int h)946 static inline void rec4ac(unsigned char *s,unsigned char *d, int lx, int lx2, int h)
947 {
948 unsigned char *dp=d, *sp=s, *sp2=s+lx;
949 unsigned int s1,s2,s3,s4;
950 int j;
951
952 /*
953 sp = s;
954 sp2 = s+lx;
955 dp = d;
956 */
957 for (j=0; j<h; j++)
958 {
959 s1=sp[0]; s3=sp2[0];
960 dp[0] = (dp[0] + ((unsigned int)(s1+(s2=sp[1])+s3+(s4=sp2[1])+2)>>2) + 1)>>1;
961 dp[1] = (dp[1] + ((unsigned int)(s2+(s1=sp[2])+s4+(s3=sp2[2])+2)>>2) + 1)>>1;
962 dp[2] = (dp[2] + ((unsigned int)(s1+(s2=sp[3])+s3+(s4=sp2[3])+2)>>2) + 1)>>1;
963 dp[3] = (dp[3] + ((unsigned int)(s2+(s1=sp[4])+s4+(s3=sp2[4])+2)>>2) + 1)>>1;
964 dp[4] = (dp[4] + ((unsigned int)(s1+(s2=sp[5])+s3+(s4=sp2[5])+2)>>2) + 1)>>1;
965 dp[5] = (dp[5] + ((unsigned int)(s2+(s1=sp[6])+s4+(s3=sp2[6])+2)>>2) + 1)>>1;
966 dp[6] = (dp[6] + ((unsigned int)(s1+(s2=sp[7])+s3+(s4=sp2[7])+2)>>2) + 1)>>1;
967 dp[7] = (dp[7] + ((unsigned int)(s2+sp[8]+s4+sp2[8]+2)>>2) + 1)>>1;
968 sp+= lx2;
969 sp2+= lx2;
970 dp+= lx2;
971 }
972 }
973
974 static inline
recon_comp(mpeg3video_t * video,unsigned char * src,unsigned char * dst,int lx,int lx2,int w,int h,int x,int y,int dx,int dy,int addflag)975 void recon_comp(mpeg3video_t *video,
976 unsigned char *src,
977 unsigned char *dst,
978 int lx,
979 int lx2,
980 int w,
981 int h,
982 int x,
983 int y,
984 int dx,
985 int dy,
986 int addflag)
987 {
988 int switcher;
989 unsigned char *s, *d;
990
991 /* half pel scaling */
992 switcher = (dx & 1) << 3 | (dy & 1) << 2 | w;
993 if(addflag) switcher |= 2;
994 /* origins */
995 s = src + lx * (y + (dy >> 1)) + x + (dx >> 1);
996 d = dst + lx * y + x;
997
998 // Accelerated functions
999 #ifdef HAVE_MMX
1000 if(video->have_mmx)
1001 {
1002 switch(switcher)
1003 {
1004 case 0x3: reca_mmx(s, d, lx2, h); break;
1005 case 0x2: recac_mmx(s, d, lx2, h); break;
1006 case 0x1: rec_mmx(s, d, lx2, h); break;
1007 case 0x0: recc_mmx(s, d, lx2, h); break;
1008 case 0x7: recva(s, d, lx, lx2, h); break;
1009 case 0x6: recvac(s, d, lx, lx2, h); break;
1010 case 0x5: recv_mmx(s, d, lx, lx2, h); break;
1011 case 0x4: recvc_mmx(s, d, lx, lx2, h); break;
1012 case 0x9: rech(s, d, lx2, h); break;
1013 case 0x8: rechc(s, d, lx2, h); break;
1014 }
1015 }
1016 else
1017 #endif
1018 {
1019 switch(switcher)
1020 {
1021 case 0x3: reca(s, d, lx2, h); break;
1022 case 0x2: recac(s, d, lx2, h); break;
1023 case 0x1: rec(s, d, lx2, h); break;
1024 case 0x0: recc(s, d, lx2, h); break;
1025 case 0x7: recva(s, d, lx, lx2, h); break;
1026 case 0x6: recvac(s, d, lx, lx2, h); break;
1027 case 0x5: recv(s, d, lx, lx2, h); break;
1028 case 0x4: recvc(s, d, lx, lx2, h); break;
1029 case 0x9: rech(s, d, lx2, h); break;
1030 case 0x8: rechc(s, d, lx2, h); break;
1031 }
1032 }
1033
1034 // Unaccelerated functions
1035 switch(switcher)
1036 {
1037 case 0xb: recha(s, d, lx2, h); break;
1038 case 0xa: rechac(s, d, lx2, h); break;
1039 case 0xf: rec4a(s, d, lx, lx2, h); break;
1040 case 0xe: rec4ac(s, d, lx, lx2, h); break;
1041 case 0xd: rec4(s, d, lx, lx2, h); break;
1042 case 0xc: rec4c(s, d, lx, lx2, h); break;
1043 }
1044 }
1045
1046 /*
1047 unsigned char *src[]; * prediction source buffer *
1048 int sfield; * prediction source field number (0 or 1) *
1049 unsigned char *dst[]; * prediction destination buffer *
1050 int dfield; * prediction destination field number (0 or 1)*
1051 int lx,lx2; * horizontal offsets *
1052 int w,h; * prediction block/sub-block width, height *
1053 int x,y; * pixel co-ordinates of top-left sample in current MB *
1054 int dx,dy; * horizontal, vertical motion vector *
1055 int addflag; * add prediction error to prediction ? *
1056 */
recon(mpeg3video_t * video,unsigned char * src[],int sfield,unsigned char * dst[],int dfield,int lx,int lx2,int w,int h,int x,int y,int dx,int dy,int addflag)1057 static void recon(mpeg3video_t *video,
1058 unsigned char *src[],
1059 int sfield,
1060 unsigned char *dst[],
1061 int dfield,
1062 int lx,
1063 int lx2,
1064 int w,
1065 int h,
1066 int x,
1067 int y,
1068 int dx,
1069 int dy,
1070 int addflag)
1071 {
1072
1073 /* Y */
1074 recon_comp(video, (src[0] + (sfield ? (lx2 >> 1) : 0)),
1075 dst[0] + (dfield ? (lx2 >> 1) : 0),
1076 lx, lx2, w, h, x, y, dx, dy, addflag);
1077
1078 if(video->chroma_format != CHROMA444)
1079 {
1080 lx >>= 1;
1081 dx /= 2;
1082 lx2 >>= 1;
1083 w = 0;
1084 x >>= 1;
1085 }
1086
1087 if(video->chroma_format == CHROMA420)
1088 {
1089 h >>= 1;
1090 dy /= 2;
1091 y >>= 1;
1092 }
1093
1094 /* Cb */
1095 recon_comp(video, (src[1] + (sfield ? (lx2 >> 1) : 0)),
1096 dst[1] + (dfield ? (lx2 >> 1) : 0),
1097 lx, lx2, w, h, x, y, dx, dy, addflag);
1098
1099 /* Cr */
1100 recon_comp(video, (src[2] + (sfield ? (lx2 >> 1) : 0)),
1101 dst[2] + (dfield ? (lx2 >> 1) : 0),
1102 lx, lx2, w, h, x, y, dx, dy, addflag);
1103 }
1104
1105 #define WIDTH 1
1106
mpeg3video_reconstruct(mpeg3video_t * video,int bx,int by,int mb_type,int motion_type,int PMV[2][2][2],int mv_field_sel[2][2],int dmvector[2],int stwtype)1107 int mpeg3video_reconstruct(mpeg3video_t *video,
1108 int bx,
1109 int by,
1110 int mb_type,
1111 int motion_type,
1112 int PMV[2][2][2],
1113 int mv_field_sel[2][2],
1114 int dmvector[2],
1115 int stwtype)
1116 {
1117 int currentfield;
1118 unsigned char **predframe;
1119 int DMV[2][2];
1120 int stwtop, stwbot;
1121
1122 stwtop = stwtype % 3; /* 0:temporal, 1 : (spat+temp) / 2, 2 : spatial */
1123 stwbot = stwtype / 3;
1124
1125 if((mb_type & MB_FORWARD) || (video->pict_type == P_TYPE))
1126 {
1127 if(video->pict_struct == FRAME_PICTURE)
1128 {
1129 if((motion_type == MC_FRAME) || !(mb_type & MB_FORWARD))
1130 {
1131 /* frame-based prediction */
1132 {
1133 if(stwtop < 2)
1134 recon(video, video->oldrefframe, 0, video->newframe, 0,
1135 video->coded_picture_width, video->coded_picture_width << 1, WIDTH, 8, bx, by,
1136 PMV[0][0][0], PMV[0][0][1], stwtop);
1137
1138 if(stwbot < 2)
1139 recon(video, video->oldrefframe, 1, video->newframe, 1,
1140 video->coded_picture_width, video->coded_picture_width << 1, WIDTH, 8, bx, by,
1141 PMV[0][0][0], PMV[0][0][1], stwbot);
1142 }
1143 }
1144 else if(motion_type == MC_FIELD) /* field-based prediction */
1145 {
1146 /* top field prediction */
1147 if(stwtop < 2)
1148 recon(video, video->oldrefframe, mv_field_sel[0][0], video->newframe, 0,
1149 video->coded_picture_width << 1, video->coded_picture_width << 1, WIDTH, 8, bx, by >> 1,
1150 PMV[0][0][0], PMV[0][0][1] >> 1, stwtop);
1151
1152 /* bottom field prediction */
1153 if(stwbot < 2)
1154 recon(video, video->oldrefframe, mv_field_sel[1][0], video->newframe, 1,
1155 video->coded_picture_width << 1, video->coded_picture_width << 1, WIDTH, 8, bx, by >> 1,
1156 PMV[1][0][0], PMV[1][0][1] >> 1, stwbot);
1157 }
1158 else if(motion_type == MC_DMV)
1159 {
1160 /* dual prime prediction */
1161 /* calculate derived motion vectors */
1162 mpeg3video_calc_dmv(video,
1163 DMV,
1164 dmvector,
1165 PMV[0][0][0],
1166 PMV[0][0][1] >> 1);
1167
1168 if(stwtop < 2)
1169 {
1170 /* predict top field from top field */
1171 recon(video, video->oldrefframe, 0, video->newframe, 0,
1172 video->coded_picture_width << 1, video->coded_picture_width << 1, WIDTH, 8, bx, by>>1,
1173 PMV[0][0][0], PMV[0][0][1] >> 1, 0);
1174
1175 /* predict and add to top field from bottom field */
1176 recon(video, video->oldrefframe, 1, video->newframe, 0,
1177 video->coded_picture_width << 1, video->coded_picture_width << 1, WIDTH, 8, bx, by>>1,
1178 DMV[0][0], DMV[0][1], 1);
1179 }
1180
1181 if(stwbot < 2)
1182 {
1183 /* predict bottom field from bottom field */
1184 recon(video, video->oldrefframe, 1, video->newframe, 1,
1185 video->coded_picture_width << 1, video->coded_picture_width << 1, WIDTH, 8, bx, by>>1,
1186 PMV[0][0][0], PMV[0][0][1]>>1, 0);
1187
1188 /* predict and add to bottom field from top field */
1189 recon(video, video->oldrefframe, 0, video->newframe, 1,
1190 video->coded_picture_width << 1, video->coded_picture_width<<1, WIDTH, 8, bx, by>>1,
1191 DMV[1][0], DMV[1][1], 1);
1192 }
1193 }
1194 else
1195 /* invalid motion_type */
1196 /* fprintf(stderr, "reconstruct: invalid motion_type\n"); */
1197 ;
1198 }
1199 else
1200 {
1201 /* TOP_FIELD or BOTTOM_FIELD */
1202 /* field picture */
1203 currentfield = (video->pict_struct == BOTTOM_FIELD);
1204
1205 /* determine which frame to use for prediction */
1206 if((video->pict_type == P_TYPE) && video->secondfield
1207 && (currentfield != mv_field_sel[0][0]))
1208 predframe = video->refframe; /* same frame */
1209 else
1210 predframe = video->oldrefframe; /* previous frame */
1211
1212 if((motion_type == MC_FIELD) || !(mb_type & MB_FORWARD))
1213 {
1214 /* field-based prediction */
1215 if(stwtop < 2)
1216 recon(video, predframe,mv_field_sel[0][0],video->newframe,0,
1217 video->coded_picture_width << 1,video->coded_picture_width << 1,WIDTH,16,bx,by,
1218 PMV[0][0][0],PMV[0][0][1],stwtop);
1219 }
1220 else
1221 if(motion_type == MC_16X8)
1222 {
1223 if(stwtop < 2)
1224 {
1225 recon(video, predframe, mv_field_sel[0][0], video->newframe, 0,
1226 video->coded_picture_width << 1, video->coded_picture_width << 1, WIDTH, 8, bx, by,
1227 PMV[0][0][0], PMV[0][0][1], stwtop);
1228
1229 /* determine which frame to use for lower half prediction */
1230 if((video->pict_type==P_TYPE) && video->secondfield
1231 && (currentfield!=mv_field_sel[1][0]))
1232 predframe = video->refframe; /* same frame */
1233 else
1234 predframe = video->oldrefframe; /* previous frame */
1235
1236 recon(video, predframe, mv_field_sel[1][0], video->newframe, 0,
1237 video->coded_picture_width << 1, video->coded_picture_width << 1, WIDTH, 8, bx, by+8,
1238 PMV[1][0][0], PMV[1][0][1], stwtop);
1239 }
1240 }
1241 else
1242 if(motion_type == MC_DMV) /* dual prime prediction */
1243 {
1244 if(video->secondfield)
1245 predframe = video->refframe; /* same frame */
1246 else
1247 predframe = video->oldrefframe; /* previous frame */
1248
1249 /* calculate derived motion vectors */
1250 mpeg3video_calc_dmv(video,
1251 DMV,
1252 dmvector,
1253 PMV[0][0][0],
1254 PMV[0][0][1]);
1255
1256 /* predict from field of same parity */
1257 recon(video, video->oldrefframe, currentfield, video->newframe, 0,
1258 video->coded_picture_width << 1, video->coded_picture_width << 1, WIDTH, 16, bx, by,
1259 PMV[0][0][0], PMV[0][0][1], 0);
1260
1261 /* predict from field of opposite parity */
1262 recon(video, predframe, !currentfield, video->newframe, 0,
1263 video->coded_picture_width << 1, video->coded_picture_width << 1, WIDTH, 16, bx, by,
1264 DMV[0][0], DMV[0][1], 1);
1265 }
1266 else
1267 /* invalid motion_type */
1268 /* fprintf(stderr, "reconstruct: invalid motion_type\n"); */
1269 ;
1270 }
1271 stwtop = stwbot = 1;
1272 }
1273
1274 if(mb_type & MB_BACKWARD)
1275 {
1276 if(video->pict_struct == FRAME_PICTURE)
1277 {
1278 if(motion_type == MC_FRAME)
1279 {
1280 /* frame-based prediction */
1281 if(stwtop < 2)
1282 recon(video, video->refframe, 0, video->newframe, 0,
1283 video->coded_picture_width, video->coded_picture_width << 1, WIDTH, 8, bx, by,
1284 PMV[0][1][0], PMV[0][1][1], stwtop);
1285
1286 if(stwbot < 2)
1287 recon(video, video->refframe, 1, video->newframe, 1,
1288 video->coded_picture_width, video->coded_picture_width << 1, WIDTH, 8, bx, by,
1289 PMV[0][1][0], PMV[0][1][1], stwbot);
1290 }
1291 else
1292 {
1293 /* field-based prediction */
1294 /* top field prediction */
1295 if(stwtop < 2)
1296 {
1297 recon(video, video->refframe, mv_field_sel[0][1], video->newframe, 0,
1298 (video->coded_picture_width << 1), (video->coded_picture_width<<1), WIDTH, 8, bx, (by >> 1),
1299 PMV[0][1][0], (PMV[0][1][1] >> 1), stwtop);
1300 }
1301
1302 /* bottom field prediction */
1303 if(stwbot < 2)
1304 {
1305 recon(video, video->refframe, mv_field_sel[1][1], video->newframe, 1, (video->coded_picture_width << 1),
1306 (video->coded_picture_width << 1), WIDTH, 8, bx, (by>>1),
1307 PMV[1][1][0], (PMV[1][1][1]>>1), stwbot);
1308 }
1309 }
1310 }
1311 else
1312 {
1313 /* TOP_FIELD or BOTTOM_FIELD */
1314 /* field picture */
1315 if(motion_type == MC_FIELD)
1316 {
1317 /* field-based prediction */
1318 recon(video, video->refframe, mv_field_sel[0][1], video->newframe, 0,
1319 video->coded_picture_width << 1, video->coded_picture_width << 1, WIDTH, 16, bx, by,
1320 PMV[0][1][0], PMV[0][1][1], stwtop);
1321 }
1322 else if(motion_type==MC_16X8)
1323 {
1324 recon(video, video->refframe, mv_field_sel[0][1], video->newframe, 0,
1325 video->coded_picture_width << 1, video->coded_picture_width << 1, WIDTH, 8, bx, by,
1326 PMV[0][1][0], PMV[0][1][1], stwtop);
1327
1328 recon(video, video->refframe, mv_field_sel[1][1], video->newframe, 0,
1329 video->coded_picture_width << 1, video->coded_picture_width << 1, WIDTH, 8, bx, by+8,
1330 PMV[1][1][0], PMV[1][1][1], stwtop);
1331 }
1332 else
1333 /* invalid motion_type */
1334 /* fprintf(stderr, "reconstruct: invalid motion_type\n"); */
1335 ;
1336 }
1337 } /* mb_type & MB_BACKWARD */
1338 return 0;
1339 }
1340
1341
1342