1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/intmath.h"
22 #include "libavutil/log.h"
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "me_cmp.h"
26 #include "snow_dwt.h"
27 #include "internal.h"
28 #include "snow.h"
29 #include "snowdata.h"
30
31 #include "rangecoder.h"
32 #include "mathops.h"
33 #include "h263.h"
34
35
ff_snow_inner_add_yblock(const uint8_t * obmc,const int obmc_stride,uint8_t ** block,int b_w,int b_h,int src_x,int src_y,int src_stride,slice_buffer * sb,int add,uint8_t * dst8)36 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
37 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
38 int y, x;
39 IDWTELEM * dst;
40 for(y=0; y<b_h; y++){
41 //FIXME ugly misuse of obmc_stride
42 const uint8_t *obmc1= obmc + y*obmc_stride;
43 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
44 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
45 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
46 dst = slice_buffer_get_line(sb, src_y + y);
47 for(x=0; x<b_w; x++){
48 int v= obmc1[x] * block[3][x + y*src_stride]
49 +obmc2[x] * block[2][x + y*src_stride]
50 +obmc3[x] * block[1][x + y*src_stride]
51 +obmc4[x] * block[0][x + y*src_stride];
52
53 v <<= 8 - LOG2_OBMC_MAX;
54 if(FRAC_BITS != 8){
55 v >>= 8 - FRAC_BITS;
56 }
57 if(add){
58 v += dst[x + src_x];
59 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
60 if(v&(~255)) v= ~(v>>31);
61 dst8[x + y*src_stride] = v;
62 }else{
63 dst[x + src_x] -= v;
64 }
65 }
66 }
67 }
68
ff_snow_get_buffer(SnowContext * s,AVFrame * frame)69 int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
70 {
71 int ret, i;
72 int edges_needed = av_codec_is_encoder(s->avctx->codec);
73
74 frame->width = s->avctx->width ;
75 frame->height = s->avctx->height;
76 if (edges_needed) {
77 frame->width += 2 * EDGE_WIDTH;
78 frame->height += 2 * EDGE_WIDTH;
79 }
80 if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
81 return ret;
82 if (edges_needed) {
83 for (i = 0; frame->data[i]; i++) {
84 int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
85 frame->linesize[i] +
86 (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
87 frame->data[i] += offset;
88 }
89 frame->width = s->avctx->width;
90 frame->height = s->avctx->height;
91 }
92
93 return 0;
94 }
95
ff_snow_reset_contexts(SnowContext * s)96 void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
97 int plane_index, level, orientation;
98
99 for(plane_index=0; plane_index<3; plane_index++){
100 for(level=0; level<MAX_DECOMPOSITIONS; level++){
101 for(orientation=level ? 1:0; orientation<4; orientation++){
102 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
103 }
104 }
105 }
106 memset(s->header_state, MID_STATE, sizeof(s->header_state));
107 memset(s->block_state, MID_STATE, sizeof(s->block_state));
108 }
109
ff_snow_alloc_blocks(SnowContext * s)110 int ff_snow_alloc_blocks(SnowContext *s){
111 int w= AV_CEIL_RSHIFT(s->avctx->width, LOG2_MB_SIZE);
112 int h= AV_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
113
114 s->b_width = w;
115 s->b_height= h;
116
117 av_free(s->block);
118 s->block= av_mallocz_array(w * h, sizeof(BlockNode) << (s->block_max_depth*2));
119 if (!s->block)
120 return AVERROR(ENOMEM);
121
122 return 0;
123 }
124
init_qexp(void)125 static av_cold void init_qexp(void){
126 int i;
127 double v=128;
128
129 for(i=0; i<QROOT; i++){
130 ff_qexp[i]= lrintf(v);
131 v *= pow(2, 1.0 / QROOT);
132 }
133 }
mc_block(Plane * p,uint8_t * dst,const uint8_t * src,int stride,int b_w,int b_h,int dx,int dy)134 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
135 static const uint8_t weight[64]={
136 8,7,6,5,4,3,2,1,
137 7,7,0,0,0,0,0,1,
138 6,0,6,0,0,0,2,0,
139 5,0,0,5,0,3,0,0,
140 4,0,0,0,4,0,0,0,
141 3,0,0,5,0,3,0,0,
142 2,0,6,0,0,0,2,0,
143 1,7,0,0,0,0,0,1,
144 };
145
146 static const uint8_t brane[256]={
147 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
148 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
149 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
150 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
151 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
152 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
153 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
154 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
155 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
156 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
157 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
158 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
159 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
160 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
161 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
162 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
163 };
164
165 static const uint8_t needs[16]={
166 0,1,0,0,
167 2,4,2,0,
168 0,1,0,0,
169 15
170 };
171
172 int x, y, b, r, l;
173 int16_t tmpIt [64*(32+HTAPS_MAX)];
174 uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
175 int16_t *tmpI= tmpIt;
176 uint8_t *tmp2= tmp2t[0];
177 const uint8_t *hpel[11];
178 av_assert2(dx<16 && dy<16);
179 r= brane[dx + 16*dy]&15;
180 l= brane[dx + 16*dy]>>4;
181
182 b= needs[l] | needs[r];
183 if(p && !p->diag_mc)
184 b= 15;
185
186 if(b&5){
187 for(y=0; y < b_h+HTAPS_MAX-1; y++){
188 for(x=0; x < b_w; x++){
189 int a_1=src[x + HTAPS_MAX/2-4];
190 int a0= src[x + HTAPS_MAX/2-3];
191 int a1= src[x + HTAPS_MAX/2-2];
192 int a2= src[x + HTAPS_MAX/2-1];
193 int a3= src[x + HTAPS_MAX/2+0];
194 int a4= src[x + HTAPS_MAX/2+1];
195 int a5= src[x + HTAPS_MAX/2+2];
196 int a6= src[x + HTAPS_MAX/2+3];
197 int am=0;
198 if(!p || p->fast_mc){
199 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
200 tmpI[x]= am;
201 am= (am+16)>>5;
202 }else{
203 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
204 tmpI[x]= am;
205 am= (am+32)>>6;
206 }
207
208 if(am&(~255)) am= ~(am>>31);
209 tmp2[x]= am;
210 }
211 tmpI+= 64;
212 tmp2+= 64;
213 src += stride;
214 }
215 src -= stride*y;
216 }
217 src += HTAPS_MAX/2 - 1;
218 tmp2= tmp2t[1];
219
220 if(b&2){
221 for(y=0; y < b_h; y++){
222 for(x=0; x < b_w+1; x++){
223 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
224 int a0= src[x + (HTAPS_MAX/2-3)*stride];
225 int a1= src[x + (HTAPS_MAX/2-2)*stride];
226 int a2= src[x + (HTAPS_MAX/2-1)*stride];
227 int a3= src[x + (HTAPS_MAX/2+0)*stride];
228 int a4= src[x + (HTAPS_MAX/2+1)*stride];
229 int a5= src[x + (HTAPS_MAX/2+2)*stride];
230 int a6= src[x + (HTAPS_MAX/2+3)*stride];
231 int am=0;
232 if(!p || p->fast_mc)
233 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
234 else
235 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
236
237 if(am&(~255)) am= ~(am>>31);
238 tmp2[x]= am;
239 }
240 src += stride;
241 tmp2+= 64;
242 }
243 src -= stride*y;
244 }
245 src += stride*(HTAPS_MAX/2 - 1);
246 tmp2= tmp2t[2];
247 tmpI= tmpIt;
248 if(b&4){
249 for(y=0; y < b_h; y++){
250 for(x=0; x < b_w; x++){
251 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
252 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
253 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
254 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
255 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
256 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
257 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
258 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
259 int am=0;
260 if(!p || p->fast_mc)
261 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
262 else
263 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
264 if(am&(~255)) am= ~(am>>31);
265 tmp2[x]= am;
266 }
267 tmpI+= 64;
268 tmp2+= 64;
269 }
270 }
271
272 hpel[ 0]= src;
273 hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
274 hpel[ 2]= src + 1;
275
276 hpel[ 4]= tmp2t[1];
277 hpel[ 5]= tmp2t[2];
278 hpel[ 6]= tmp2t[1] + 1;
279
280 hpel[ 8]= src + stride;
281 hpel[ 9]= hpel[1] + 64;
282 hpel[10]= hpel[8] + 1;
283
284 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
285
286 if(b==15){
287 int dxy = dx / 8 + dy / 8 * 4;
288 const uint8_t *src1 = hpel[dxy ];
289 const uint8_t *src2 = hpel[dxy + 1];
290 const uint8_t *src3 = hpel[dxy + 4];
291 const uint8_t *src4 = hpel[dxy + 5];
292 int stride1 = MC_STRIDE(dxy);
293 int stride2 = MC_STRIDE(dxy + 1);
294 int stride3 = MC_STRIDE(dxy + 4);
295 int stride4 = MC_STRIDE(dxy + 5);
296 dx&=7;
297 dy&=7;
298 for(y=0; y < b_h; y++){
299 for(x=0; x < b_w; x++){
300 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
301 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
302 }
303 src1+=stride1;
304 src2+=stride2;
305 src3+=stride3;
306 src4+=stride4;
307 dst +=stride;
308 }
309 }else{
310 const uint8_t *src1= hpel[l];
311 const uint8_t *src2= hpel[r];
312 int stride1 = MC_STRIDE(l);
313 int stride2 = MC_STRIDE(r);
314 int a= weight[((dx&7) + (8*(dy&7)))];
315 int b= 8-a;
316 for(y=0; y < b_h; y++){
317 for(x=0; x < b_w; x++){
318 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
319 }
320 src1+=stride1;
321 src2+=stride2;
322 dst +=stride;
323 }
324 }
325 }
326
ff_snow_pred_block(SnowContext * s,uint8_t * dst,uint8_t * tmp,ptrdiff_t stride,int sx,int sy,int b_w,int b_h,const BlockNode * block,int plane_index,int w,int h)327 void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){
328 if(block->type & BLOCK_INTRA){
329 int x, y;
330 const unsigned color = block->color[plane_index];
331 const unsigned color4 = color*0x01010101;
332 if(b_w==32){
333 for(y=0; y < b_h; y++){
334 *(uint32_t*)&dst[0 + y*stride]= color4;
335 *(uint32_t*)&dst[4 + y*stride]= color4;
336 *(uint32_t*)&dst[8 + y*stride]= color4;
337 *(uint32_t*)&dst[12+ y*stride]= color4;
338 *(uint32_t*)&dst[16+ y*stride]= color4;
339 *(uint32_t*)&dst[20+ y*stride]= color4;
340 *(uint32_t*)&dst[24+ y*stride]= color4;
341 *(uint32_t*)&dst[28+ y*stride]= color4;
342 }
343 }else if(b_w==16){
344 for(y=0; y < b_h; y++){
345 *(uint32_t*)&dst[0 + y*stride]= color4;
346 *(uint32_t*)&dst[4 + y*stride]= color4;
347 *(uint32_t*)&dst[8 + y*stride]= color4;
348 *(uint32_t*)&dst[12+ y*stride]= color4;
349 }
350 }else if(b_w==8){
351 for(y=0; y < b_h; y++){
352 *(uint32_t*)&dst[0 + y*stride]= color4;
353 *(uint32_t*)&dst[4 + y*stride]= color4;
354 }
355 }else if(b_w==4){
356 for(y=0; y < b_h; y++){
357 *(uint32_t*)&dst[0 + y*stride]= color4;
358 }
359 }else{
360 for(y=0; y < b_h; y++){
361 for(x=0; x < b_w; x++){
362 dst[x + y*stride]= color;
363 }
364 }
365 }
366 }else{
367 uint8_t *src= s->last_picture[block->ref]->data[plane_index];
368 const int scale= plane_index ? (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
369 int mx= block->mx*scale;
370 int my= block->my*scale;
371 const int dx= mx&15;
372 const int dy= my&15;
373 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
374 sx += (mx>>4) - (HTAPS_MAX/2-1);
375 sy += (my>>4) - (HTAPS_MAX/2-1);
376 src += sx + sy*stride;
377 if( (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
378 || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
379 s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
380 stride, stride,
381 b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
382 sx, sy, w, h);
383 src= tmp + MB_SIZE;
384 }
385
386 av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
387
388 av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
389 if( (dx&3) || (dy&3)
390 || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
391 || (b_w&(b_w-1))
392 || b_w == 1
393 || b_h == 1
394 || !s->plane[plane_index].fast_mc )
395 mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
396 else if(b_w==32){
397 int y;
398 for(y=0; y<b_h; y+=16){
399 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
400 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
401 }
402 }else if(b_w==b_h)
403 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
404 else if(b_w==2*b_h){
405 s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
406 s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
407 }else{
408 av_assert2(2*b_w==b_h);
409 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
410 s->h264qpel.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
411 }
412 }
413 }
414
415 #define mca(dx,dy,b_w)\
416 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
417 av_assert2(h==b_w);\
418 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
419 }
420
421 mca( 0, 0,16)
422 mca( 8, 0,16)
423 mca( 0, 8,16)
424 mca( 8, 8,16)
425 mca( 0, 0,8)
426 mca( 8, 0,8)
427 mca( 0, 8,8)
428 mca( 8, 8,8)
429
ff_snow_common_init(AVCodecContext * avctx)430 av_cold int ff_snow_common_init(AVCodecContext *avctx){
431 SnowContext *s = avctx->priv_data;
432 int width, height;
433 int i, j;
434
435 s->avctx= avctx;
436 s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
437 s->spatial_decomposition_count = 1;
438
439 ff_me_cmp_init(&s->mecc, avctx);
440 ff_hpeldsp_init(&s->hdsp, avctx->flags);
441 ff_videodsp_init(&s->vdsp, 8);
442 ff_dwt_init(&s->dwt);
443 ff_h264qpel_init(&s->h264qpel, 8);
444
445 #define mcf(dx,dy)\
446 s->qdsp.put_qpel_pixels_tab [0][dy+dx/4]=\
447 s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
448 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
449 s->qdsp.put_qpel_pixels_tab [1][dy+dx/4]=\
450 s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
451 s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
452
453 mcf( 0, 0)
454 mcf( 4, 0)
455 mcf( 8, 0)
456 mcf(12, 0)
457 mcf( 0, 4)
458 mcf( 4, 4)
459 mcf( 8, 4)
460 mcf(12, 4)
461 mcf( 0, 8)
462 mcf( 4, 8)
463 mcf( 8, 8)
464 mcf(12, 8)
465 mcf( 0,12)
466 mcf( 4,12)
467 mcf( 8,12)
468 mcf(12,12)
469
470 #define mcfh(dx,dy)\
471 s->hdsp.put_pixels_tab [0][dy/4+dx/8]=\
472 s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
473 mc_block_hpel ## dx ## dy ## 16;\
474 s->hdsp.put_pixels_tab [1][dy/4+dx/8]=\
475 s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
476 mc_block_hpel ## dx ## dy ## 8;
477
478 mcfh(0, 0)
479 mcfh(8, 0)
480 mcfh(0, 8)
481 mcfh(8, 8)
482
483 init_qexp();
484
485 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
486
487 width= s->avctx->width;
488 height= s->avctx->height;
489
490 if (!FF_ALLOCZ_TYPED_ARRAY(s->spatial_idwt_buffer, width * height) ||
491 !FF_ALLOCZ_TYPED_ARRAY(s->spatial_dwt_buffer, width * height) || //FIXME this does not belong here
492 !FF_ALLOCZ_TYPED_ARRAY(s->temp_dwt_buffer, width) ||
493 !FF_ALLOCZ_TYPED_ARRAY(s->temp_idwt_buffer, width) ||
494 !FF_ALLOCZ_TYPED_ARRAY(s->run_buffer, ((width + 1) >> 1) * ((height + 1) >> 1)))
495 return AVERROR(ENOMEM);
496
497 for(i=0; i<MAX_REF_FRAMES; i++) {
498 for(j=0; j<MAX_REF_FRAMES; j++)
499 ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
500 s->last_picture[i] = av_frame_alloc();
501 if (!s->last_picture[i])
502 return AVERROR(ENOMEM);
503 }
504
505 s->mconly_picture = av_frame_alloc();
506 s->current_picture = av_frame_alloc();
507 if (!s->mconly_picture || !s->current_picture)
508 return AVERROR(ENOMEM);
509
510 return 0;
511 }
512
ff_snow_common_init_after_header(AVCodecContext * avctx)513 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
514 SnowContext *s = avctx->priv_data;
515 int plane_index, level, orientation;
516 int ret, emu_buf_size;
517
518 if(!s->scratchbuf) {
519 if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
520 AV_GET_BUFFER_FLAG_REF)) < 0)
521 return ret;
522 emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
523 if (!FF_ALLOCZ_TYPED_ARRAY(s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * 7 * MB_SIZE) ||
524 !FF_ALLOCZ_TYPED_ARRAY(s->emu_edge_buffer, emu_buf_size))
525 return AVERROR(ENOMEM);
526 }
527
528 if(s->mconly_picture->format != avctx->pix_fmt) {
529 av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
530 return AVERROR_INVALIDDATA;
531 }
532
533 for(plane_index=0; plane_index < s->nb_planes; plane_index++){
534 int w= s->avctx->width;
535 int h= s->avctx->height;
536
537 if(plane_index){
538 w = AV_CEIL_RSHIFT(w, s->chroma_h_shift);
539 h = AV_CEIL_RSHIFT(h, s->chroma_v_shift);
540 }
541 s->plane[plane_index].width = w;
542 s->plane[plane_index].height= h;
543
544 for(level=s->spatial_decomposition_count-1; level>=0; level--){
545 for(orientation=level ? 1 : 0; orientation<4; orientation++){
546 SubBand *b= &s->plane[plane_index].band[level][orientation];
547
548 b->buf= s->spatial_dwt_buffer;
549 b->level= level;
550 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
551 b->width = (w + !(orientation&1))>>1;
552 b->height= (h + !(orientation>1))>>1;
553
554 b->stride_line = 1 << (s->spatial_decomposition_count - level);
555 b->buf_x_offset = 0;
556 b->buf_y_offset = 0;
557
558 if(orientation&1){
559 b->buf += (w+1)>>1;
560 b->buf_x_offset = (w+1)>>1;
561 }
562 if(orientation>1){
563 b->buf += b->stride>>1;
564 b->buf_y_offset = b->stride_line >> 1;
565 }
566 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
567
568 if(level)
569 b->parent= &s->plane[plane_index].band[level-1][orientation];
570 //FIXME avoid this realloc
571 av_freep(&b->x_coeff);
572 b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff));
573 if (!b->x_coeff)
574 return AVERROR(ENOMEM);
575 }
576 w= (w+1)>>1;
577 h= (h+1)>>1;
578 }
579 }
580
581 return 0;
582 }
583
584 #define USE_HALFPEL_PLANE 0
585
halfpel_interpol(SnowContext * s,uint8_t * halfpel[4][4],AVFrame * frame)586 static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
587 int p,x,y;
588
589 for(p=0; p < s->nb_planes; p++){
590 int is_chroma= !!p;
591 int w= is_chroma ? AV_CEIL_RSHIFT(s->avctx->width, s->chroma_h_shift) : s->avctx->width;
592 int h= is_chroma ? AV_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
593 int ls= frame->linesize[p];
594 uint8_t *src= frame->data[p];
595
596 halfpel[1][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
597 halfpel[2][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
598 halfpel[3][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
599 if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) {
600 av_freep(&halfpel[1][p]);
601 av_freep(&halfpel[2][p]);
602 av_freep(&halfpel[3][p]);
603 return AVERROR(ENOMEM);
604 }
605 halfpel[1][p] += EDGE_WIDTH * (1 + ls);
606 halfpel[2][p] += EDGE_WIDTH * (1 + ls);
607 halfpel[3][p] += EDGE_WIDTH * (1 + ls);
608
609 halfpel[0][p]= src;
610 for(y=0; y<h; y++){
611 for(x=0; x<w; x++){
612 int i= y*ls + x;
613
614 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
615 }
616 }
617 for(y=0; y<h; y++){
618 for(x=0; x<w; x++){
619 int i= y*ls + x;
620
621 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
622 }
623 }
624 src= halfpel[1][p];
625 for(y=0; y<h; y++){
626 for(x=0; x<w; x++){
627 int i= y*ls + x;
628
629 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
630 }
631 }
632
633 //FIXME border!
634 }
635 return 0;
636 }
637
ff_snow_release_buffer(AVCodecContext * avctx)638 void ff_snow_release_buffer(AVCodecContext *avctx)
639 {
640 SnowContext *s = avctx->priv_data;
641 int i;
642
643 if(s->last_picture[s->max_ref_frames-1]->data[0]){
644 av_frame_unref(s->last_picture[s->max_ref_frames-1]);
645 for(i=0; i<9; i++)
646 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
647 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
648 s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
649 }
650 }
651 }
652
ff_snow_frame_start(SnowContext * s)653 int ff_snow_frame_start(SnowContext *s){
654 AVFrame *tmp;
655 int i, ret;
656
657 ff_snow_release_buffer(s->avctx);
658
659 tmp= s->last_picture[s->max_ref_frames-1];
660 for(i=s->max_ref_frames-1; i>0; i--)
661 s->last_picture[i] = s->last_picture[i-1];
662 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
663 if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
664 if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
665 return ret;
666 }
667 s->last_picture[0] = s->current_picture;
668 s->current_picture = tmp;
669
670 if(s->keyframe){
671 s->ref_frames= 0;
672 }else{
673 int i;
674 for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
675 if(i && s->last_picture[i-1]->key_frame)
676 break;
677 s->ref_frames= i;
678 if(s->ref_frames==0){
679 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
680 return AVERROR_INVALIDDATA;
681 }
682 }
683 if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
684 return ret;
685
686 s->current_picture->key_frame= s->keyframe;
687
688 return 0;
689 }
690
ff_snow_common_end(SnowContext * s)691 av_cold void ff_snow_common_end(SnowContext *s)
692 {
693 int plane_index, level, orientation, i;
694
695 av_freep(&s->spatial_dwt_buffer);
696 av_freep(&s->temp_dwt_buffer);
697 av_freep(&s->spatial_idwt_buffer);
698 av_freep(&s->temp_idwt_buffer);
699 av_freep(&s->run_buffer);
700
701 s->m.me.temp= NULL;
702 av_freep(&s->m.me.scratchpad);
703 av_freep(&s->m.me.map);
704 av_freep(&s->m.me.score_map);
705 av_freep(&s->m.sc.obmc_scratchpad);
706
707 av_freep(&s->block);
708 av_freep(&s->scratchbuf);
709 av_freep(&s->emu_edge_buffer);
710
711 for(i=0; i<MAX_REF_FRAMES; i++){
712 av_freep(&s->ref_mvs[i]);
713 av_freep(&s->ref_scores[i]);
714 if(s->last_picture[i] && s->last_picture[i]->data[0]) {
715 av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
716 }
717 av_frame_free(&s->last_picture[i]);
718 }
719
720 for(plane_index=0; plane_index < MAX_PLANES; plane_index++){
721 for(level=MAX_DECOMPOSITIONS-1; level>=0; level--){
722 for(orientation=level ? 1 : 0; orientation<4; orientation++){
723 SubBand *b= &s->plane[plane_index].band[level][orientation];
724
725 av_freep(&b->x_coeff);
726 }
727 }
728 }
729 av_frame_free(&s->mconly_picture);
730 av_frame_free(&s->current_picture);
731 }
732