1 /*****************************************************************************
2  * predict.c: intra prediction
3  *****************************************************************************
4  * Copyright (C) 2003-2014 x264 project
5  *
6  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7  *          Loren Merritt <lorenm@u.washington.edu>
8  *          Fiona Glaser <fiona@x264.com>
9  *          Henrik Gramner <henrik@gramner.com>
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
24  *
25  * This program is also available under a commercial proprietary license.
26  * For more information, contact us at licensing@x264.com.
27  *****************************************************************************/
28 
29 /* predict4x4 are inspired from ffmpeg h264 decoder */
30 
31 
32 #include "common.h"
33 
34 #if HAVE_MMX
35 #   include "x86/predict.h"
36 #endif
37 #if ARCH_PPC
38 #   include "ppc/predict.h"
39 #endif
40 #if ARCH_ARM
41 #   include "arm/predict.h"
42 #endif
43 #if ARCH_AARCH64
44 #   include "aarch64/predict.h"
45 #endif
46 
47 /****************************************************************************
48  * 16x16 prediction for intra luma block
49  ****************************************************************************/
50 
51 #define PREDICT_16x16_DC(v)\
52     for( int i = 0; i < 16; i++ )\
53     {\
54         MPIXEL_X4( src+ 0 ) = v;\
55         MPIXEL_X4( src+ 4 ) = v;\
56         MPIXEL_X4( src+ 8 ) = v;\
57         MPIXEL_X4( src+12 ) = v;\
58         src += FDEC_STRIDE;\
59     }
60 
x264_predict_16x16_dc_c(pixel * src)61 void x264_predict_16x16_dc_c( pixel *src )
62 {
63     int dc = 0;
64     int i2;
65     pixel4 dcsplat;
66 
67     for( i2 = 0; i2 < 16; i2++ )
68     {
69         dc += src[-1 + i2 * FDEC_STRIDE];
70         dc += src[i2 - FDEC_STRIDE];
71     }
72     dcsplat = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 );
73 
74 	PREDICT_16x16_DC( dcsplat );
75 }
76 
x264_predict_16x16_dc_left_c(pixel * src)77 static void x264_predict_16x16_dc_left_c( pixel *src )
78 {
79     int dc = 0;
80     int i2;
81     pixel4 dcsplat;
82 
83     for( i2 = 0; i2 < 16; i2++ ) {
84         dc += src[-1 + i2 * FDEC_STRIDE];
85     }
86     dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
87 
88 	PREDICT_16x16_DC( dcsplat );
89 }
90 
x264_predict_16x16_dc_top_c(pixel * src)91 static void x264_predict_16x16_dc_top_c( pixel *src )
92 {
93     int dc = 0;
94     int i2;
95     pixel4 dcsplat;
96 
97     for( i2 = 0; i2 < 16; i2++ )
98         dc += src[i2 - FDEC_STRIDE];
99     dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
100 
101 	PREDICT_16x16_DC( dcsplat );
102 }
103 
x264_predict_16x16_dc_128_c(pixel * src)104 static void x264_predict_16x16_dc_128_c( pixel *src )
105 {
106 	PREDICT_16x16_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
107 }
108 
x264_predict_16x16_h_c(pixel * src)109 void x264_predict_16x16_h_c( pixel *src )
110 {
111 	int i;
112 
113 	for( i = 0; i < 16; i++ )
114     {
115         const pixel4 v = PIXEL_SPLAT_X4( src[-1] );
116         MPIXEL_X4( src+ 0 ) = v;
117         MPIXEL_X4( src+ 4 ) = v;
118         MPIXEL_X4( src+ 8 ) = v;
119         MPIXEL_X4( src+12 ) = v;
120         src += FDEC_STRIDE;
121     }
122 }
123 
x264_predict_16x16_v_c(pixel * src)124 void x264_predict_16x16_v_c( pixel *src )
125 {
126     pixel4 v0 = MPIXEL_X4( &src[ 0-FDEC_STRIDE] );
127     pixel4 v1 = MPIXEL_X4( &src[ 4-FDEC_STRIDE] );
128     pixel4 v2 = MPIXEL_X4( &src[ 8-FDEC_STRIDE] );
129     pixel4 v3 = MPIXEL_X4( &src[12-FDEC_STRIDE] );
130     int i;
131 
132     for( i = 0; i < 16; i++ )
133     {
134         MPIXEL_X4( src+ 0 ) = v0;
135         MPIXEL_X4( src+ 4 ) = v1;
136         MPIXEL_X4( src+ 8 ) = v2;
137         MPIXEL_X4( src+12 ) = v3;
138         src += FDEC_STRIDE;
139     }
140 }
x264_predict_16x16_p_c(pixel * src)141 void x264_predict_16x16_p_c( pixel *src )
142 {
143     int H = 0, V = 0;
144     int i;
145     int a;
146     int b;
147     int c;
148     int i00;
149 	int y;
150 
151     /* calculate H and V */
152     for( i = 0; i <= 7; i++ )
153     {
154         H += ( i + 1 ) * ( src[ 8 + i - FDEC_STRIDE ] - src[6 -i -FDEC_STRIDE] );
155         V += ( i + 1 ) * ( src[-1 + (8+i)*FDEC_STRIDE] - src[-1 + (6-i)*FDEC_STRIDE] );
156     }
157 
158     a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[15 - FDEC_STRIDE] );
159     b = ( 5 * H + 32 ) >> 6;
160     c = ( 5 * V + 32 ) >> 6;
161 
162     i00 = a - b * 7 - c * 7 + 16;
163 
164     for( y = 0; y < 16; y++ )
165     {
166         int pix = i00;
167 		int x;
168 
169 		for( x = 0; x < 16; x++ )
170         {
171             src[x] = x264_clip_pixel( pix>>5 );
172             pix += b;
173         }
174         src += FDEC_STRIDE;
175         i00 += c;
176     }
177 }
178 
179 
180 /****************************************************************************
181  * 8x8 prediction for intra chroma block (4:2:0)
182  ****************************************************************************/
183 
x264_predict_8x8c_dc_128_c(pixel * src)184 static void x264_predict_8x8c_dc_128_c( pixel *src )
185 {
186 	int y;
187 
188 	for( y = 0; y < 8; y++ )
189     {
190         MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) );
191         MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) );
192         src += FDEC_STRIDE;
193     }
194 }
x264_predict_8x8c_dc_left_c(pixel * src)195 static void x264_predict_8x8c_dc_left_c( pixel *src )
196 {
197     int dc0 = 0, dc1 = 0;
198     int y;
199     pixel4 dc0splat;
200     pixel4 dc1splat;
201 
202     for( y = 0; y < 4; y++ )
203     {
204         dc0 += src[y * FDEC_STRIDE     - 1];
205         dc1 += src[(y+4) * FDEC_STRIDE - 1];
206     }
207     dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
208     dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
209 
210     for( y = 0; y < 4; y++ )
211     {
212         MPIXEL_X4( src+0 ) = dc0splat;
213         MPIXEL_X4( src+4 ) = dc0splat;
214         src += FDEC_STRIDE;
215     }
216     for( y = 0; y < 4; y++ )
217     {
218         MPIXEL_X4( src+0 ) = dc1splat;
219         MPIXEL_X4( src+4 ) = dc1splat;
220         src += FDEC_STRIDE;
221     }
222 
223 }
x264_predict_8x8c_dc_top_c(pixel * src)224 static void x264_predict_8x8c_dc_top_c( pixel *src )
225 {
226     int dc0 = 0, dc1 = 0;
227     int x;
228     pixel4 dc0splat;
229     pixel4 dc1splat;
230 	int y;
231 
232     for( x = 0; x < 4; x++ )
233     {
234         dc0 += src[x     - FDEC_STRIDE];
235         dc1 += src[x + 4 - FDEC_STRIDE];
236     }
237     dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
238     dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
239 
240     for( y = 0; y < 8; y++ )
241     {
242         MPIXEL_X4( src+0 ) = dc0splat;
243         MPIXEL_X4( src+4 ) = dc1splat;
244         src += FDEC_STRIDE;
245     }
246 }
247 
x264_predict_8x8c_dc_c(pixel * src)248 void x264_predict_8x8c_dc_c( pixel *src )
249 {
250     int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
251 	int i;
252     pixel4 dc0;
253     pixel4 dc1;
254     pixel4 dc2;
255     pixel4 dc3;
256 	int y;
257 
258     /*
259           s0 s1
260        s2
261        s3
262     */
263     for( i = 0; i < 4; i++ )
264     {
265         s0 += src[i - FDEC_STRIDE];
266         s1 += src[i + 4 - FDEC_STRIDE];
267         s2 += src[-1 + i * FDEC_STRIDE];
268         s3 += src[-1 + (i+4)*FDEC_STRIDE];
269     }
270     /*
271        dc0 dc1
272        dc2 dc3
273      */
274     dc0 = PIXEL_SPLAT_X4( ( s0 + s2 + 4 ) >> 3 );
275     dc1 = PIXEL_SPLAT_X4( ( s1 + 2 ) >> 2 );
276     dc2 = PIXEL_SPLAT_X4( ( s3 + 2 ) >> 2 );
277     dc3 = PIXEL_SPLAT_X4( ( s1 + s3 + 4 ) >> 3 );
278 
279     for( y = 0; y < 4; y++ )
280     {
281         MPIXEL_X4( src+0 ) = dc0;
282         MPIXEL_X4( src+4 ) = dc1;
283         src += FDEC_STRIDE;
284     }
285 
286     for( y = 0; y < 4; y++ )
287     {
288         MPIXEL_X4( src+0 ) = dc2;
289         MPIXEL_X4( src+4 ) = dc3;
290         src += FDEC_STRIDE;
291     }
292 }
293 
x264_predict_8x8c_h_c(pixel * src)294 void x264_predict_8x8c_h_c( pixel *src )
295 {
296 	int i;
297 
298 	for( i = 0; i < 8; i++ )
299     {
300         pixel4 v = PIXEL_SPLAT_X4( src[-1] );
301         MPIXEL_X4( src+0 ) = v;
302         MPIXEL_X4( src+4 ) = v;
303         src += FDEC_STRIDE;
304     }
305 }
x264_predict_8x8c_v_c(pixel * src)306 void x264_predict_8x8c_v_c( pixel *src )
307 {
308     pixel4 v0 = MPIXEL_X4( src+0-FDEC_STRIDE );
309     pixel4 v1 = MPIXEL_X4( src+4-FDEC_STRIDE );
310     int i;
311 
312     for( i = 0; i < 8; i++ )
313     {
314         MPIXEL_X4( src+0 ) = v0;
315         MPIXEL_X4( src+4 ) = v1;
316         src += FDEC_STRIDE;
317     }
318 }
x264_predict_8x8c_p_c(pixel * src)319 void x264_predict_8x8c_p_c( pixel *src )
320 {
321     int H = 0, V = 0;
322 	int i;
323     int a;
324     int b;
325     int c;
326     int i00;
327     int y;
328 	int x;
329 
330     for( i = 0; i < 4; i++ )
331     {
332         H += ( i + 1 ) * ( src[4+i - FDEC_STRIDE] - src[2 - i -FDEC_STRIDE] );
333         V += ( i + 1 ) * ( src[-1 +(i+4)*FDEC_STRIDE] - src[-1+(2-i)*FDEC_STRIDE] );
334     }
335 
336     a = 16 * ( src[-1+7*FDEC_STRIDE] + src[7 - FDEC_STRIDE] );
337     b = ( 17 * H + 16 ) >> 5;
338     c = ( 17 * V + 16 ) >> 5;
339     i00 = a -3*b -3*c + 16;
340 
341     for( y = 0; y < 8; y++ )
342     {
343         int pix = i00;
344         for( x = 0; x < 8; x++ )
345         {
346             src[x] = x264_clip_pixel( pix>>5 );
347             pix += b;
348         }
349         src += FDEC_STRIDE;
350         i00 += c;
351     }
352 }
353 
354 /****************************************************************************
355  * 8x16 prediction for intra chroma block (4:2:2)
356  ****************************************************************************/
357 
x264_predict_8x16c_dc_128_c(pixel * src)358 static void x264_predict_8x16c_dc_128_c( pixel *src )
359 {
360 	int y;
361 
362 	for( y = 0; y < 16; y++ )
363     {
364         MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) );
365         MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) );
366         src += FDEC_STRIDE;
367     }
368 }
x264_predict_8x16c_dc_left_c(pixel * src)369 static void x264_predict_8x16c_dc_left_c( pixel *src )
370 {
371 	int i;
372 
373 	for( i = 0; i < 4; i++ )
374     {
375         int dc = 0;
376         int y;
377         pixel4 dcsplat;
378 
379         for( y = 0; y < 4; y++ )
380             dc += src[y*FDEC_STRIDE - 1];
381 
382         dcsplat = PIXEL_SPLAT_X4( (dc + 2) >> 2 );
383 
384         for( y = 0; y < 4; y++ )
385         {
386             MPIXEL_X4( src+0 ) = dcsplat;
387             MPIXEL_X4( src+4 ) = dcsplat;
388             src += FDEC_STRIDE;
389         }
390     }
391 }
392 
x264_predict_8x16c_dc_top_c(pixel * src)393 static void x264_predict_8x16c_dc_top_c( pixel *src )
394 {
395     int dc0 = 0, dc1 = 0;
396 	int x;
397     pixel4 dc0splat;
398     pixel4 dc1splat;
399 	int y;
400 
401     for(x = 0; x < 4; x++ )
402     {
403         dc0 += src[x     - FDEC_STRIDE];
404         dc1 += src[x + 4 - FDEC_STRIDE];
405     }
406     dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
407     dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
408 
409     for( y = 0; y < 16; y++ )
410     {
411         MPIXEL_X4( src+0 ) = dc0splat;
412         MPIXEL_X4( src+4 ) = dc1splat;
413         src += FDEC_STRIDE;
414     }
415 }
416 
x264_predict_8x16c_dc_c(pixel * src)417 void x264_predict_8x16c_dc_c( pixel *src )
418 {
419     int s0 = 0, s1 = 0, s2 = 0, s3 = 0, s4 = 0, s5 = 0;
420 	int i;
421     pixel4 dc0;
422     pixel4 dc1;
423     pixel4 dc2;
424     pixel4 dc3;
425     pixel4 dc4;
426     pixel4 dc5;
427     pixel4 dc6;
428     pixel4 dc7;
429     int y;
430 
431     /*
432           s0 s1
433        s2
434        s3
435        s4
436        s5
437     */
438     for( i = 0; i < 4; i++ )
439     {
440         s0 += src[i+0 - FDEC_STRIDE];
441         s1 += src[i+4 - FDEC_STRIDE];
442         s2 += src[-1 + (i+0)  * FDEC_STRIDE];
443         s3 += src[-1 + (i+4)  * FDEC_STRIDE];
444         s4 += src[-1 + (i+8)  * FDEC_STRIDE];
445         s5 += src[-1 + (i+12) * FDEC_STRIDE];
446     }
447     /*
448        dc0 dc1
449        dc2 dc3
450        dc4 dc5
451        dc6 dc7
452     */
453     dc0 = PIXEL_SPLAT_X4( ( s0 + s2 + 4 ) >> 3 );
454     dc1 = PIXEL_SPLAT_X4( ( s1 + 2 ) >> 2 );
455     dc2 = PIXEL_SPLAT_X4( ( s3 + 2 ) >> 2 );
456     dc3 = PIXEL_SPLAT_X4( ( s1 + s3 + 4 ) >> 3 );
457     dc4 = PIXEL_SPLAT_X4( ( s4 + 2 ) >> 2 );
458     dc5 = PIXEL_SPLAT_X4( ( s1 + s4 + 4 ) >> 3 );
459     dc6 = PIXEL_SPLAT_X4( ( s5 + 2 ) >> 2 );
460     dc7 = PIXEL_SPLAT_X4( ( s1 + s5 + 4 ) >> 3 );
461 
462     for( y = 0; y < 4; y++ )
463     {
464         MPIXEL_X4( src+0 ) = dc0;
465         MPIXEL_X4( src+4 ) = dc1;
466         src += FDEC_STRIDE;
467     }
468     for( y = 0; y < 4; y++ )
469     {
470         MPIXEL_X4( src+0 ) = dc2;
471         MPIXEL_X4( src+4 ) = dc3;
472         src += FDEC_STRIDE;
473     }
474     for( y = 0; y < 4; y++ )
475     {
476         MPIXEL_X4( src+0 ) = dc4;
477         MPIXEL_X4( src+4 ) = dc5;
478         src += FDEC_STRIDE;
479     }
480     for( y = 0; y < 4; y++ )
481     {
482         MPIXEL_X4( src+0 ) = dc6;
483         MPIXEL_X4( src+4 ) = dc7;
484         src += FDEC_STRIDE;
485     }
486 }
487 
x264_predict_8x16c_h_c(pixel * src)488 void x264_predict_8x16c_h_c( pixel *src )
489 {
490 	int i;
491 
492 	for( i = 0; i < 16; i++ )
493     {
494         pixel4 v = PIXEL_SPLAT_X4( src[-1] );
495         MPIXEL_X4( src+0 ) = v;
496         MPIXEL_X4( src+4 ) = v;
497         src += FDEC_STRIDE;
498     }
499 }
x264_predict_8x16c_v_c(pixel * src)500 void x264_predict_8x16c_v_c( pixel *src )
501 {
502     pixel4 v0 = MPIXEL_X4( src+0-FDEC_STRIDE );
503     pixel4 v1 = MPIXEL_X4( src+4-FDEC_STRIDE );
504 	int i;
505 
506     for( i = 0; i < 16; i++ )
507     {
508         MPIXEL_X4( src+0 ) = v0;
509         MPIXEL_X4( src+4 ) = v1;
510         src += FDEC_STRIDE;
511     }
512 }
513 
x264_predict_8x16c_p_c(pixel * src)514 void x264_predict_8x16c_p_c( pixel *src )
515 {
516     int H = 0;
517     int V = 0;
518 	int i;
519 	int y;
520     int a;
521     int b;
522     int c;
523     int i00;
524     int x;
525 
526     for( i = 0; i < 4; i++ )
527         H += ( i + 1 ) * ( src[4 + i - FDEC_STRIDE] - src[2 - i - FDEC_STRIDE] );
528     for( i = 0; i < 8; i++ )
529         V += ( i + 1 ) * ( src[-1 + (i+8)*FDEC_STRIDE] - src[-1 + (6-i)*FDEC_STRIDE] );
530 
531     a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[7 - FDEC_STRIDE] );
532     b = ( 17 * H + 16 ) >> 5;
533     c = ( 5 * V + 32 ) >> 6;
534     i00 = a -3*b -7*c + 16;
535 
536     for( y = 0; y < 16; y++ )
537     {
538         int pix = i00;
539         for( x = 0; x < 8; x++ )
540         {
541             src[x] = x264_clip_pixel( pix>>5 );
542             pix += b;
543         }
544         src += FDEC_STRIDE;
545         i00 += c;
546     }
547 }
548 
549 /****************************************************************************
550  * 4x4 prediction for intra luma block
551  ****************************************************************************/
552 
553 #define SRC(x,y) src[(x)+(y)*FDEC_STRIDE]
554 #define SRC_X4(x,y) MPIXEL_X4( &SRC(x,y) )
555 
556 #define PREDICT_4x4_DC(v)\
557     SRC_X4(0,0) = SRC_X4(0,1) = SRC_X4(0,2) = SRC_X4(0,3) = v;
558 
x264_predict_4x4_dc_128_c(pixel * src)559 static void x264_predict_4x4_dc_128_c( pixel *src )
560 {
561     PREDICT_4x4_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
562 }
x264_predict_4x4_dc_left_c(pixel * src)563 static void x264_predict_4x4_dc_left_c( pixel *src )
564 {
565     pixel4 dc = PIXEL_SPLAT_X4( (SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) + 2) >> 2 );
566     PREDICT_4x4_DC( dc );
567 }
x264_predict_4x4_dc_top_c(pixel * src)568 static void x264_predict_4x4_dc_top_c( pixel *src )
569 {
570     pixel4 dc = PIXEL_SPLAT_X4( (SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 2) >> 2 );
571     PREDICT_4x4_DC( dc );
572 }
x264_predict_4x4_dc_c(pixel * src)573 void x264_predict_4x4_dc_c( pixel *src )
574 {
575     pixel4 dc = PIXEL_SPLAT_X4( (SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) +
576                                  SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 4) >> 3 );
577     PREDICT_4x4_DC( dc );
578 }
x264_predict_4x4_h_c(pixel * src)579 void x264_predict_4x4_h_c( pixel *src )
580 {
581     SRC_X4(0,0) = PIXEL_SPLAT_X4( SRC(-1,0) );
582     SRC_X4(0,1) = PIXEL_SPLAT_X4( SRC(-1,1) );
583     SRC_X4(0,2) = PIXEL_SPLAT_X4( SRC(-1,2) );
584     SRC_X4(0,3) = PIXEL_SPLAT_X4( SRC(-1,3) );
585 }
x264_predict_4x4_v_c(pixel * src)586 void x264_predict_4x4_v_c( pixel *src )
587 {
588     PREDICT_4x4_DC(SRC_X4(0,-1));
589 }
590 
591 #define PREDICT_4x4_LOAD_LEFT\
592     int l0 = SRC(-1,0);\
593     int l1 = SRC(-1,1);\
594     int l2 = SRC(-1,2);\
595     UNUSED int l3 = SRC(-1,3);
596 
597 #define PREDICT_4x4_LOAD_TOP\
598     int t0 = SRC(0,-1);\
599     int t1 = SRC(1,-1);\
600     int t2 = SRC(2,-1);\
601     UNUSED int t3 = SRC(3,-1);
602 
603 #define PREDICT_4x4_LOAD_TOP_RIGHT\
604     int t4 = SRC(4,-1);\
605     int t5 = SRC(5,-1);\
606     int t6 = SRC(6,-1);\
607     UNUSED int t7 = SRC(7,-1);
608 
609 #define F1(a,b)   (((a)+(b)+1)>>1)
610 #define F2(a,b,c) (((a)+2*(b)+(c)+2)>>2)
611 
x264_predict_4x4_ddl_c(pixel * src)612 static void x264_predict_4x4_ddl_c( pixel *src )
613 {
614     PREDICT_4x4_LOAD_TOP
615     PREDICT_4x4_LOAD_TOP_RIGHT
616     SRC(0,0)= F2(t0,t1,t2);
617     SRC(1,0)=SRC(0,1)= F2(t1,t2,t3);
618     SRC(2,0)=SRC(1,1)=SRC(0,2)= F2(t2,t3,t4);
619     SRC(3,0)=SRC(2,1)=SRC(1,2)=SRC(0,3)= F2(t3,t4,t5);
620     SRC(3,1)=SRC(2,2)=SRC(1,3)= F2(t4,t5,t6);
621     SRC(3,2)=SRC(2,3)= F2(t5,t6,t7);
622     SRC(3,3)= F2(t6,t7,t7);
623 }
x264_predict_4x4_ddr_c(pixel * src)624 static void x264_predict_4x4_ddr_c( pixel *src )
625 {
626     int lt = SRC(-1,-1);
627     PREDICT_4x4_LOAD_LEFT
628     PREDICT_4x4_LOAD_TOP
629     SRC(3,0)= F2(t3,t2,t1);
630     SRC(2,0)=SRC(3,1)= F2(t2,t1,t0);
631     SRC(1,0)=SRC(2,1)=SRC(3,2)= F2(t1,t0,lt);
632     SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)= F2(t0,lt,l0);
633     SRC(0,1)=SRC(1,2)=SRC(2,3)= F2(lt,l0,l1);
634     SRC(0,2)=SRC(1,3)= F2(l0,l1,l2);
635     SRC(0,3)= F2(l1,l2,l3);
636 }
637 
x264_predict_4x4_vr_c(pixel * src)638 static void x264_predict_4x4_vr_c( pixel *src )
639 {
640     int lt = SRC(-1,-1);
641     PREDICT_4x4_LOAD_LEFT
642     PREDICT_4x4_LOAD_TOP
643     SRC(0,3)= F2(l2,l1,l0);
644     SRC(0,2)= F2(l1,l0,lt);
645     SRC(0,1)=SRC(1,3)= F2(l0,lt,t0);
646     SRC(0,0)=SRC(1,2)= F1(lt,t0);
647     SRC(1,1)=SRC(2,3)= F2(lt,t0,t1);
648     SRC(1,0)=SRC(2,2)= F1(t0,t1);
649     SRC(2,1)=SRC(3,3)= F2(t0,t1,t2);
650     SRC(2,0)=SRC(3,2)= F1(t1,t2);
651     SRC(3,1)= F2(t1,t2,t3);
652     SRC(3,0)= F1(t2,t3);
653 }
654 
x264_predict_4x4_hd_c(pixel * src)655 static void x264_predict_4x4_hd_c( pixel *src )
656 {
657     int lt= SRC(-1,-1);
658     PREDICT_4x4_LOAD_LEFT
659     PREDICT_4x4_LOAD_TOP
660     SRC(0,3)= F1(l2,l3);
661     SRC(1,3)= F2(l1,l2,l3);
662     SRC(0,2)=SRC(2,3)= F1(l1,l2);
663     SRC(1,2)=SRC(3,3)= F2(l0,l1,l2);
664     SRC(0,1)=SRC(2,2)= F1(l0,l1);
665     SRC(1,1)=SRC(3,2)= F2(lt,l0,l1);
666     SRC(0,0)=SRC(2,1)= F1(lt,l0);
667     SRC(1,0)=SRC(3,1)= F2(t0,lt,l0);
668     SRC(2,0)= F2(t1,t0,lt);
669     SRC(3,0)= F2(t2,t1,t0);
670 }
671 
x264_predict_4x4_vl_c(pixel * src)672 static void x264_predict_4x4_vl_c( pixel *src )
673 {
674     PREDICT_4x4_LOAD_TOP
675     PREDICT_4x4_LOAD_TOP_RIGHT
676     SRC(0,0)= F1(t0,t1);
677     SRC(0,1)= F2(t0,t1,t2);
678     SRC(1,0)=SRC(0,2)= F1(t1,t2);
679     SRC(1,1)=SRC(0,3)= F2(t1,t2,t3);
680     SRC(2,0)=SRC(1,2)= F1(t2,t3);
681     SRC(2,1)=SRC(1,3)= F2(t2,t3,t4);
682     SRC(3,0)=SRC(2,2)= F1(t3,t4);
683     SRC(3,1)=SRC(2,3)= F2(t3,t4,t5);
684     SRC(3,2)= F1(t4,t5);
685     SRC(3,3)= F2(t4,t5,t6);
686 }
687 
x264_predict_4x4_hu_c(pixel * src)688 static void x264_predict_4x4_hu_c( pixel *src )
689 {
690     PREDICT_4x4_LOAD_LEFT
691     SRC(0,0)= F1(l0,l1);
692     SRC(1,0)= F2(l0,l1,l2);
693     SRC(2,0)=SRC(0,1)= F1(l1,l2);
694     SRC(3,0)=SRC(1,1)= F2(l1,l2,l3);
695     SRC(2,1)=SRC(0,2)= F1(l2,l3);
696     SRC(3,1)=SRC(1,2)= F2(l2,l3,l3);
697     SRC(3,2)=SRC(1,3)=SRC(0,3)=
698     SRC(2,2)=SRC(2,3)=SRC(3,3)= l3;
699 }
700 
701 /****************************************************************************
702  * 8x8 prediction for intra luma block
703  ****************************************************************************/
704 
705 #define PL(y) \
706     edge[14-y] = F2(SRC(-1,y-1), SRC(-1,y), SRC(-1,y+1));
707 #define PT(x) \
708     edge[16+x] = F2(SRC(x-1,-1), SRC(x,-1), SRC(x+1,-1));
709 
x264_predict_8x8_filter_c(pixel * src,pixel edge[36],int i_neighbor,int i_filters)710 static void x264_predict_8x8_filter_c( pixel *src, pixel edge[36], int i_neighbor, int i_filters )
711 {
712     /* edge[7..14] = l7..l0
713      * edge[15] = lt
714      * edge[16..31] = t0 .. t15
715      * edge[32] = t15 */
716 
717     int have_lt = i_neighbor & MB_TOPLEFT;
718     if( i_filters & MB_LEFT )
719     {
720         edge[15] = (SRC(0,-1) + 2*SRC(-1,-1) + SRC(-1,0) + 2) >> 2;
721         edge[14] = ((have_lt ? SRC(-1,-1) : SRC(-1,0))
722                  + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2;
723         PL(1) PL(2) PL(3) PL(4) PL(5) PL(6)
724         edge[6] =
725         edge[7] = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2;
726     }
727 
728     if( i_filters & MB_TOP )
729     {
730         int have_tr = i_neighbor & MB_TOPRIGHT;
731         edge[16] = ((have_lt ? SRC(-1,-1) : SRC(0,-1))
732                  + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2;
733         PT(1) PT(2) PT(3) PT(4) PT(5) PT(6)
734         edge[23] = (SRC(6,-1) + 2*SRC(7,-1)
735                  + (have_tr ? SRC(8,-1) : SRC(7,-1)) + 2) >> 2;
736 
737         if( i_filters & MB_TOPRIGHT )
738         {
739             if( have_tr )
740             {
741                 PT(8) PT(9) PT(10) PT(11) PT(12) PT(13) PT(14)
742                 edge[31] =
743                 edge[32] = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2;
744             }
745             else
746             {
747                 MPIXEL_X4( edge+24 ) = PIXEL_SPLAT_X4( SRC(7,-1) );
748                 MPIXEL_X4( edge+28 ) = PIXEL_SPLAT_X4( SRC(7,-1) );
749                 edge[32] = SRC(7,-1);
750             }
751         }
752     }
753 }
754 
755 #undef PL
756 #undef PT
757 
758 #define PL(y) \
759     UNUSED int l##y = edge[14-y];
760 #define PT(x) \
761     UNUSED int t##x = edge[16+x];
762 #define PREDICT_8x8_LOAD_TOPLEFT \
763     int lt = edge[15];
764 #define PREDICT_8x8_LOAD_LEFT \
765     PL(0) PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) PL(7)
766 #define PREDICT_8x8_LOAD_TOP \
767     PT(0) PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) PT(7)
768 #define PREDICT_8x8_LOAD_TOPRIGHT \
769     PT(8) PT(9) PT(10) PT(11) PT(12) PT(13) PT(14) PT(15)
770 
771 #define PREDICT_8x8_DC(v) \
772     for( int y = 0; y < 8; y++ ) { \
773         MPIXEL_X4( src+0 ) = v; \
774         MPIXEL_X4( src+4 ) = v; \
775         src += FDEC_STRIDE; \
776     }
777 
x264_predict_8x8_dc_128_c(pixel * src,pixel edge[36])778 static void x264_predict_8x8_dc_128_c( pixel *src, pixel edge[36] )
779 {
780 	PREDICT_8x8_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
781 }
782 
x264_predict_8x8_dc_left_c(pixel * src,pixel edge[36])783 static void x264_predict_8x8_dc_left_c( pixel *src, pixel edge[36] )
784 {
785     PREDICT_8x8_LOAD_LEFT
786     pixel4 dc = PIXEL_SPLAT_X4( (l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3 );
787 	PREDICT_8x8_DC( dc );
788 }
789 
x264_predict_8x8_dc_top_c(pixel * src,pixel edge[36])790 static void x264_predict_8x8_dc_top_c( pixel *src, pixel edge[36] )
791 {
792     PREDICT_8x8_LOAD_TOP
793     pixel4 dc = PIXEL_SPLAT_X4( (t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3 );
794 	PREDICT_8x8_DC( dc );
795 }
796 
x264_predict_8x8_dc_c(pixel * src,pixel edge[36])797 void x264_predict_8x8_dc_c( pixel *src, pixel edge[36] )
798 {
799     PREDICT_8x8_LOAD_LEFT
800     PREDICT_8x8_LOAD_TOP
801     pixel4 dc = PIXEL_SPLAT_X4( (l0+l1+l2+l3+l4+l5+l6+l7+t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4 );
802 	PREDICT_8x8_DC( dc );
803 }
804 
x264_predict_8x8_h_c(pixel * src,pixel edge[36])805 void x264_predict_8x8_h_c( pixel *src, pixel edge[36] )
806 {
807     PREDICT_8x8_LOAD_LEFT
808 #define ROW(y) MPIXEL_X4( src+y*FDEC_STRIDE+0 ) =\
809                MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = PIXEL_SPLAT_X4( l##y );
810     ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
811 #undef ROW
812 }
813 
x264_predict_8x8_v_c(pixel * src,pixel edge[36])814 void x264_predict_8x8_v_c( pixel *src, pixel edge[36] )
815 {
816     pixel4 top[2] = { MPIXEL_X4( edge+16 ),
817                       MPIXEL_X4( edge+20 ) };
818 	int y;
819 
820 	for( y = 0; y < 8; y++ )
821     {
822         MPIXEL_X4( src+y*FDEC_STRIDE+0 ) = top[0];
823         MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = top[1];
824     }
825 }
826 
x264_predict_8x8_ddl_c(pixel * src,pixel edge[36])827 static void x264_predict_8x8_ddl_c( pixel *src, pixel edge[36] )
828 {
829     PREDICT_8x8_LOAD_TOP
830     PREDICT_8x8_LOAD_TOPRIGHT
831     SRC(0,0)= F2(t0,t1,t2);
832     SRC(0,1)=SRC(1,0)= F2(t1,t2,t3);
833     SRC(0,2)=SRC(1,1)=SRC(2,0)= F2(t2,t3,t4);
834     SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= F2(t3,t4,t5);
835     SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= F2(t4,t5,t6);
836     SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= F2(t5,t6,t7);
837     SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= F2(t6,t7,t8);
838     SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= F2(t7,t8,t9);
839     SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= F2(t8,t9,t10);
840     SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= F2(t9,t10,t11);
841     SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= F2(t10,t11,t12);
842     SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= F2(t11,t12,t13);
843     SRC(5,7)=SRC(6,6)=SRC(7,5)= F2(t12,t13,t14);
844     SRC(6,7)=SRC(7,6)= F2(t13,t14,t15);
845     SRC(7,7)= F2(t14,t15,t15);
846 }
x264_predict_8x8_ddr_c(pixel * src,pixel edge[36])847 static void x264_predict_8x8_ddr_c( pixel *src, pixel edge[36] )
848 {
849     PREDICT_8x8_LOAD_TOP
850     PREDICT_8x8_LOAD_LEFT
851     PREDICT_8x8_LOAD_TOPLEFT
852     SRC(0,7)= F2(l7,l6,l5);
853     SRC(0,6)=SRC(1,7)= F2(l6,l5,l4);
854     SRC(0,5)=SRC(1,6)=SRC(2,7)= F2(l5,l4,l3);
855     SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= F2(l4,l3,l2);
856     SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= F2(l3,l2,l1);
857     SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= F2(l2,l1,l0);
858     SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= F2(l1,l0,lt);
859     SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= F2(l0,lt,t0);
860     SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= F2(lt,t0,t1);
861     SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= F2(t0,t1,t2);
862     SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= F2(t1,t2,t3);
863     SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= F2(t2,t3,t4);
864     SRC(5,0)=SRC(6,1)=SRC(7,2)= F2(t3,t4,t5);
865     SRC(6,0)=SRC(7,1)= F2(t4,t5,t6);
866     SRC(7,0)= F2(t5,t6,t7);
867 
868 }
x264_predict_8x8_vr_c(pixel * src,pixel edge[36])869 static void x264_predict_8x8_vr_c( pixel *src, pixel edge[36] )
870 {
871     PREDICT_8x8_LOAD_TOP
872     PREDICT_8x8_LOAD_LEFT
873     PREDICT_8x8_LOAD_TOPLEFT
874     SRC(0,6)= F2(l5,l4,l3);
875     SRC(0,7)= F2(l6,l5,l4);
876     SRC(0,4)=SRC(1,6)= F2(l3,l2,l1);
877     SRC(0,5)=SRC(1,7)= F2(l4,l3,l2);
878     SRC(0,2)=SRC(1,4)=SRC(2,6)= F2(l1,l0,lt);
879     SRC(0,3)=SRC(1,5)=SRC(2,7)= F2(l2,l1,l0);
880     SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= F2(l0,lt,t0);
881     SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= F1(lt,t0);
882     SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= F2(lt,t0,t1);
883     SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= F1(t0,t1);
884     SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= F2(t0,t1,t2);
885     SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= F1(t1,t2);
886     SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= F2(t1,t2,t3);
887     SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= F1(t2,t3);
888     SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= F2(t2,t3,t4);
889     SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= F1(t3,t4);
890     SRC(5,1)=SRC(6,3)=SRC(7,5)= F2(t3,t4,t5);
891     SRC(5,0)=SRC(6,2)=SRC(7,4)= F1(t4,t5);
892     SRC(6,1)=SRC(7,3)= F2(t4,t5,t6);
893     SRC(6,0)=SRC(7,2)= F1(t5,t6);
894     SRC(7,1)= F2(t5,t6,t7);
895     SRC(7,0)= F1(t6,t7);
896 }
x264_predict_8x8_hd_c(pixel * src,pixel edge[36])897 static void x264_predict_8x8_hd_c( pixel *src, pixel edge[36] )
898 {
899     PREDICT_8x8_LOAD_TOP
900     PREDICT_8x8_LOAD_LEFT
901     PREDICT_8x8_LOAD_TOPLEFT
902     int p1 = pack_pixel_1to2(F1(l6,l7), F2(l5,l6,l7));
903     int p2 = pack_pixel_1to2(F1(l5,l6), F2(l4,l5,l6));
904     int p3 = pack_pixel_1to2(F1(l4,l5), F2(l3,l4,l5));
905     int p4 = pack_pixel_1to2(F1(l3,l4), F2(l2,l3,l4));
906     int p5 = pack_pixel_1to2(F1(l2,l3), F2(l1,l2,l3));
907     int p6 = pack_pixel_1to2(F1(l1,l2), F2(l0,l1,l2));
908     int p7 = pack_pixel_1to2(F1(l0,l1), F2(lt,l0,l1));
909     int p8 = pack_pixel_1to2(F1(lt,l0), F2(l0,lt,t0));
910     int p9 = pack_pixel_1to2(F2(t1,t0,lt), F2(t2,t1,t0));
911     int p10 = pack_pixel_1to2(F2(t3,t2,t1), F2(t4,t3,t2));
912     int p11 = pack_pixel_1to2(F2(t5,t4,t3), F2(t6,t5,t4));
913     SRC_X4(0,7)= pack_pixel_2to4(p1,p2);
914     SRC_X4(0,6)= pack_pixel_2to4(p2,p3);
915     SRC_X4(4,7)=SRC_X4(0,5)= pack_pixel_2to4(p3,p4);
916     SRC_X4(4,6)=SRC_X4(0,4)= pack_pixel_2to4(p4,p5);
917     SRC_X4(4,5)=SRC_X4(0,3)= pack_pixel_2to4(p5,p6);
918     SRC_X4(4,4)=SRC_X4(0,2)= pack_pixel_2to4(p6,p7);
919     SRC_X4(4,3)=SRC_X4(0,1)= pack_pixel_2to4(p7,p8);
920     SRC_X4(4,2)=SRC_X4(0,0)= pack_pixel_2to4(p8,p9);
921     SRC_X4(4,1)= pack_pixel_2to4(p9,p10);
922     SRC_X4(4,0)= pack_pixel_2to4(p10,p11);
923 }
x264_predict_8x8_vl_c(pixel * src,pixel edge[36])924 static void x264_predict_8x8_vl_c( pixel *src, pixel edge[36] )
925 {
926     PREDICT_8x8_LOAD_TOP
927     PREDICT_8x8_LOAD_TOPRIGHT
928     SRC(0,0)= F1(t0,t1);
929     SRC(0,1)= F2(t0,t1,t2);
930     SRC(0,2)=SRC(1,0)= F1(t1,t2);
931     SRC(0,3)=SRC(1,1)= F2(t1,t2,t3);
932     SRC(0,4)=SRC(1,2)=SRC(2,0)= F1(t2,t3);
933     SRC(0,5)=SRC(1,3)=SRC(2,1)= F2(t2,t3,t4);
934     SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= F1(t3,t4);
935     SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= F2(t3,t4,t5);
936     SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= F1(t4,t5);
937     SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= F2(t4,t5,t6);
938     SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= F1(t5,t6);
939     SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= F2(t5,t6,t7);
940     SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= F1(t6,t7);
941     SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= F2(t6,t7,t8);
942     SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= F1(t7,t8);
943     SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= F2(t7,t8,t9);
944     SRC(5,6)=SRC(6,4)=SRC(7,2)= F1(t8,t9);
945     SRC(5,7)=SRC(6,5)=SRC(7,3)= F2(t8,t9,t10);
946     SRC(6,6)=SRC(7,4)= F1(t9,t10);
947     SRC(6,7)=SRC(7,5)= F2(t9,t10,t11);
948     SRC(7,6)= F1(t10,t11);
949     SRC(7,7)= F2(t10,t11,t12);
950 }
x264_predict_8x8_hu_c(pixel * src,pixel edge[36])951 static void x264_predict_8x8_hu_c( pixel *src, pixel edge[36] )
952 {
953     PREDICT_8x8_LOAD_LEFT
954     int p1 = pack_pixel_1to2(F1(l0,l1), F2(l0,l1,l2));
955     int p2 = pack_pixel_1to2(F1(l1,l2), F2(l1,l2,l3));
956     int p3 = pack_pixel_1to2(F1(l2,l3), F2(l2,l3,l4));
957     int p4 = pack_pixel_1to2(F1(l3,l4), F2(l3,l4,l5));
958     int p5 = pack_pixel_1to2(F1(l4,l5), F2(l4,l5,l6));
959     int p6 = pack_pixel_1to2(F1(l5,l6), F2(l5,l6,l7));
960     int p7 = pack_pixel_1to2(F1(l6,l7), F2(l6,l7,l7));
961     int p8 = pack_pixel_1to2(l7,l7);
962     SRC_X4(0,0)= pack_pixel_2to4(p1,p2);
963     SRC_X4(0,1)= pack_pixel_2to4(p2,p3);
964     SRC_X4(4,0)=SRC_X4(0,2)= pack_pixel_2to4(p3,p4);
965     SRC_X4(4,1)=SRC_X4(0,3)= pack_pixel_2to4(p4,p5);
966     SRC_X4(4,2)=SRC_X4(0,4)= pack_pixel_2to4(p5,p6);
967     SRC_X4(4,3)=SRC_X4(0,5)= pack_pixel_2to4(p6,p7);
968     SRC_X4(4,4)=SRC_X4(0,6)= pack_pixel_2to4(p7,p8);
969     SRC_X4(4,5)=SRC_X4(4,6)= SRC_X4(0,7) = SRC_X4(4,7) = pack_pixel_2to4(p8,p8);
970 }
971 
972 /****************************************************************************
973  * Exported functions:
974  ****************************************************************************/
x264_predict_16x16_init(int cpu,x264_predict_t pf[7])975 void x264_predict_16x16_init( int cpu, x264_predict_t pf[7] )
976 {
977     pf[I_PRED_16x16_V ]     = x264_predict_16x16_v_c;
978     pf[I_PRED_16x16_H ]     = x264_predict_16x16_h_c;
979     pf[I_PRED_16x16_DC]     = x264_predict_16x16_dc_c;
980     pf[I_PRED_16x16_P ]     = x264_predict_16x16_p_c;
981     pf[I_PRED_16x16_DC_LEFT]= x264_predict_16x16_dc_left_c;
982     pf[I_PRED_16x16_DC_TOP ]= x264_predict_16x16_dc_top_c;
983     pf[I_PRED_16x16_DC_128 ]= x264_predict_16x16_dc_128_c;
984 
985 #if HAVE_MMX
986     x264_predict_16x16_init_mmx( cpu, pf );
987 #endif
988 
989 #if HAVE_ALTIVEC
990     if( cpu&X264_CPU_ALTIVEC )
991         x264_predict_16x16_init_altivec( pf );
992 #endif
993 
994 #if HAVE_ARMV6
995     x264_predict_16x16_init_arm( cpu, pf );
996 #endif
997 
998 #if ARCH_AARCH64
999     x264_predict_16x16_init_aarch64( cpu, pf );
1000 #endif
1001 }
1002 
x264_predict_8x8c_init(int cpu,x264_predict_t pf[7])1003 void x264_predict_8x8c_init( int cpu, x264_predict_t pf[7] )
1004 {
1005     pf[I_PRED_CHROMA_V ]     = x264_predict_8x8c_v_c;
1006     pf[I_PRED_CHROMA_H ]     = x264_predict_8x8c_h_c;
1007     pf[I_PRED_CHROMA_DC]     = x264_predict_8x8c_dc_c;
1008     pf[I_PRED_CHROMA_P ]     = x264_predict_8x8c_p_c;
1009     pf[I_PRED_CHROMA_DC_LEFT]= x264_predict_8x8c_dc_left_c;
1010     pf[I_PRED_CHROMA_DC_TOP ]= x264_predict_8x8c_dc_top_c;
1011     pf[I_PRED_CHROMA_DC_128 ]= x264_predict_8x8c_dc_128_c;
1012 
1013 #if HAVE_MMX
1014     x264_predict_8x8c_init_mmx( cpu, pf );
1015 #endif
1016 
1017 #if HAVE_ALTIVEC
1018     if( cpu&X264_CPU_ALTIVEC )
1019         x264_predict_8x8c_init_altivec( pf );
1020 #endif
1021 
1022 #if HAVE_ARMV6
1023     x264_predict_8x8c_init_arm( cpu, pf );
1024 #endif
1025 
1026 #if ARCH_AARCH64
1027     x264_predict_8x8c_init_aarch64( cpu, pf );
1028 #endif
1029 }
1030 
x264_predict_8x16c_init(int cpu,x264_predict_t pf[7])1031 void x264_predict_8x16c_init( int cpu, x264_predict_t pf[7] )
1032 {
1033     pf[I_PRED_CHROMA_V ]     = x264_predict_8x16c_v_c;
1034     pf[I_PRED_CHROMA_H ]     = x264_predict_8x16c_h_c;
1035     pf[I_PRED_CHROMA_DC]     = x264_predict_8x16c_dc_c;
1036     pf[I_PRED_CHROMA_P ]     = x264_predict_8x16c_p_c;
1037     pf[I_PRED_CHROMA_DC_LEFT]= x264_predict_8x16c_dc_left_c;
1038     pf[I_PRED_CHROMA_DC_TOP ]= x264_predict_8x16c_dc_top_c;
1039     pf[I_PRED_CHROMA_DC_128 ]= x264_predict_8x16c_dc_128_c;
1040 
1041 #if HAVE_MMX
1042     x264_predict_8x16c_init_mmx( cpu, pf );
1043 #endif
1044 }
1045 
x264_predict_8x8_init(int cpu,x264_predict8x8_t pf[12],x264_predict_8x8_filter_t * predict_filter)1046 void x264_predict_8x8_init( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter )
1047 {
1048     pf[I_PRED_8x8_V]      = x264_predict_8x8_v_c;
1049     pf[I_PRED_8x8_H]      = x264_predict_8x8_h_c;
1050     pf[I_PRED_8x8_DC]     = x264_predict_8x8_dc_c;
1051     pf[I_PRED_8x8_DDL]    = x264_predict_8x8_ddl_c;
1052     pf[I_PRED_8x8_DDR]    = x264_predict_8x8_ddr_c;
1053     pf[I_PRED_8x8_VR]     = x264_predict_8x8_vr_c;
1054     pf[I_PRED_8x8_HD]     = x264_predict_8x8_hd_c;
1055     pf[I_PRED_8x8_VL]     = x264_predict_8x8_vl_c;
1056     pf[I_PRED_8x8_HU]     = x264_predict_8x8_hu_c;
1057     pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_c;
1058     pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_c;
1059     pf[I_PRED_8x8_DC_128] = x264_predict_8x8_dc_128_c;
1060     *predict_filter       = x264_predict_8x8_filter_c;
1061 
1062 #if HAVE_MMX
1063     x264_predict_8x8_init_mmx( cpu, pf, predict_filter );
1064 #endif
1065 
1066 #if HAVE_ARMV6
1067     x264_predict_8x8_init_arm( cpu, pf, predict_filter );
1068 #endif
1069 
1070 #if ARCH_AARCH64
1071     x264_predict_8x8_init_aarch64( cpu, pf, predict_filter );
1072 #endif
1073 }
1074 
x264_predict_4x4_init(int cpu,x264_predict_t pf[12])1075 void x264_predict_4x4_init( int cpu, x264_predict_t pf[12] )
1076 {
1077     pf[I_PRED_4x4_V]      = x264_predict_4x4_v_c;
1078     pf[I_PRED_4x4_H]      = x264_predict_4x4_h_c;
1079     pf[I_PRED_4x4_DC]     = x264_predict_4x4_dc_c;
1080     pf[I_PRED_4x4_DDL]    = x264_predict_4x4_ddl_c;
1081     pf[I_PRED_4x4_DDR]    = x264_predict_4x4_ddr_c;
1082     pf[I_PRED_4x4_VR]     = x264_predict_4x4_vr_c;
1083     pf[I_PRED_4x4_HD]     = x264_predict_4x4_hd_c;
1084     pf[I_PRED_4x4_VL]     = x264_predict_4x4_vl_c;
1085     pf[I_PRED_4x4_HU]     = x264_predict_4x4_hu_c;
1086     pf[I_PRED_4x4_DC_LEFT]= x264_predict_4x4_dc_left_c;
1087     pf[I_PRED_4x4_DC_TOP] = x264_predict_4x4_dc_top_c;
1088     pf[I_PRED_4x4_DC_128] = x264_predict_4x4_dc_128_c;
1089 
1090 #if HAVE_MMX
1091     x264_predict_4x4_init_mmx( cpu, pf );
1092 #endif
1093 
1094 #if HAVE_ARMV6
1095     x264_predict_4x4_init_arm( cpu, pf );
1096 #endif
1097 
1098 #if ARCH_AARCH64
1099     x264_predict_4x4_init_aarch64( cpu, pf );
1100 #endif
1101 }
1102 
1103