1 /*****************************************************************************
2 * rectangle.h: rectangle filling
3 *****************************************************************************
4 * Copyright (C) 2003-2021 x264 project
5 *
6 * Authors: Fiona Glaser <fiona@x264.com>
7 * Loren Merritt <lorenm@u.washington.edu>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22 *
23 * This program is also available under a commercial proprietary license.
24 * For more information, contact us at licensing@x264.com.
25 *****************************************************************************/
26
27 /* This function should only be called with constant w / h / s arguments! */
x264_macroblock_cache_rect(void * dst,int w,int h,int s,uint32_t v)28 static ALWAYS_INLINE void x264_macroblock_cache_rect( void *dst, int w, int h, int s, uint32_t v )
29 {
30 uint8_t *d = dst;
31 uint16_t v2 = s == 2 ? v : v * 0x101;
32 uint32_t v4 = s == 4 ? v : s == 2 ? v * 0x10001 : v * 0x1010101;
33 uint64_t v8 = v4 + ((uint64_t)v4 << 32);
34 s *= 8;
35
36 if( w == 2 )
37 {
38 M16( d+s*0 ) = v2;
39 if( h == 1 ) return;
40 M16( d+s*1 ) = v2;
41 if( h == 2 ) return;
42 M16( d+s*2 ) = v2;
43 M16( d+s*3 ) = v2;
44 }
45 else if( w == 4 )
46 {
47 M32( d+s*0 ) = v4;
48 if( h == 1 ) return;
49 M32( d+s*1 ) = v4;
50 if( h == 2 ) return;
51 M32( d+s*2 ) = v4;
52 M32( d+s*3 ) = v4;
53 }
54 else if( w == 8 )
55 {
56 if( WORD_SIZE == 8 )
57 {
58 M64( d+s*0 ) = v8;
59 if( h == 1 ) return;
60 M64( d+s*1 ) = v8;
61 if( h == 2 ) return;
62 M64( d+s*2 ) = v8;
63 M64( d+s*3 ) = v8;
64 }
65 else
66 {
67 M32( d+s*0+0 ) = v4;
68 M32( d+s*0+4 ) = v4;
69 if( h == 1 ) return;
70 M32( d+s*1+0 ) = v4;
71 M32( d+s*1+4 ) = v4;
72 if( h == 2 ) return;
73 M32( d+s*2+0 ) = v4;
74 M32( d+s*2+4 ) = v4;
75 M32( d+s*3+0 ) = v4;
76 M32( d+s*3+4 ) = v4;
77 }
78 }
79 else if( w == 16 )
80 {
81 /* height 1, width 16 doesn't occur */
82 assert( h != 1 );
83 #if HAVE_VECTOREXT && defined(__SSE__)
84 v4si v16 = {v,v,v,v};
85
86 M128( d+s*0+0 ) = (__m128)v16;
87 M128( d+s*1+0 ) = (__m128)v16;
88 if( h == 2 ) return;
89 M128( d+s*2+0 ) = (__m128)v16;
90 M128( d+s*3+0 ) = (__m128)v16;
91 #else
92 if( WORD_SIZE == 8 )
93 {
94 do
95 {
96 M64( d+s*0+0 ) = v8;
97 M64( d+s*0+8 ) = v8;
98 M64( d+s*1+0 ) = v8;
99 M64( d+s*1+8 ) = v8;
100 h -= 2;
101 d += s*2;
102 } while( h );
103 }
104 else
105 {
106 do
107 {
108 M32( d+ 0 ) = v4;
109 M32( d+ 4 ) = v4;
110 M32( d+ 8 ) = v4;
111 M32( d+12 ) = v4;
112 d += s;
113 } while( --h );
114 }
115 #endif
116 }
117 else
118 assert(0);
119 }
120
121 #define x264_cache_mv_func_table x264_template(cache_mv_func_table)
122 extern void (*x264_cache_mv_func_table[10])(void *, uint32_t);
123 #define x264_cache_mvd_func_table x264_template(cache_mvd_func_table)
124 extern void (*x264_cache_mvd_func_table[10])(void *, uint32_t);
125 #define x264_cache_ref_func_table x264_template(cache_ref_func_table)
126 extern void (*x264_cache_ref_func_table[10])(void *, uint32_t);
127
128 #define x264_macroblock_cache_mv_ptr( a, x, y, w, h, l, mv ) x264_macroblock_cache_mv( a, x, y, w, h, l, M32( mv ) )
x264_macroblock_cache_mv(x264_t * h,int x,int y,int width,int height,int i_list,uint32_t mv)129 static ALWAYS_INLINE void x264_macroblock_cache_mv( x264_t *h, int x, int y, int width, int height, int i_list, uint32_t mv )
130 {
131 void *mv_cache = &h->mb.cache.mv[i_list][X264_SCAN8_0+x+8*y];
132 if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
133 x264_cache_mv_func_table[width + (height<<1)-3]( mv_cache, mv );
134 else
135 x264_macroblock_cache_rect( mv_cache, width*4, height, 4, mv );
136 }
x264_macroblock_cache_mvd(x264_t * h,int x,int y,int width,int height,int i_list,uint16_t mvd)137 static ALWAYS_INLINE void x264_macroblock_cache_mvd( x264_t *h, int x, int y, int width, int height, int i_list, uint16_t mvd )
138 {
139 void *mvd_cache = &h->mb.cache.mvd[i_list][X264_SCAN8_0+x+8*y];
140 if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
141 x264_cache_mvd_func_table[width + (height<<1)-3]( mvd_cache, mvd );
142 else
143 x264_macroblock_cache_rect( mvd_cache, width*2, height, 2, mvd );
144 }
x264_macroblock_cache_ref(x264_t * h,int x,int y,int width,int height,int i_list,uint8_t ref)145 static ALWAYS_INLINE void x264_macroblock_cache_ref( x264_t *h, int x, int y, int width, int height, int i_list, uint8_t ref )
146 {
147 void *ref_cache = &h->mb.cache.ref[i_list][X264_SCAN8_0+x+8*y];
148 if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
149 x264_cache_ref_func_table[width + (height<<1)-3]( ref_cache, ref );
150 else
151 x264_macroblock_cache_rect( ref_cache, width, height, 1, ref );
152 }
x264_macroblock_cache_skip(x264_t * h,int x,int y,int width,int height,int b_skip)153 static ALWAYS_INLINE void x264_macroblock_cache_skip( x264_t *h, int x, int y, int width, int height, int b_skip )
154 {
155 x264_macroblock_cache_rect( &h->mb.cache.skip[X264_SCAN8_0+x+8*y], width, height, 1, b_skip );
156 }
x264_macroblock_cache_intra8x8_pred(x264_t * h,int x,int y,int i_mode)157 static ALWAYS_INLINE void x264_macroblock_cache_intra8x8_pred( x264_t *h, int x, int y, int i_mode )
158 {
159 x264_macroblock_cache_rect( &h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+x+8*y], 2, 2, 1, i_mode );
160 }
161