1 /*
2  * H.265 video codec.
3  * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de>
4  *
5  * This file is part of libde265.
6  *
7  * libde265 is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Lesser General Public License as
9  * published by the Free Software Foundation, either version 3 of
10  * the License, or (at your option) any later version.
11  *
12  * libde265 is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License
18  * along with libde265.  If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "motion.h"
22 #include "decctx.h"
23 #include "util.h"
24 #include "dpb.h"
25 #include <assert.h>
26 
27 
28 #include <sys/types.h>
29 #include <signal.h>
30 #include <string.h>
31 
32 #if defined(_MSC_VER) || defined(__MINGW32__)
33 # include <malloc.h>
34 #elif defined(HAVE_ALLOCA_H)
35 # include <alloca.h>
36 #endif
37 
38 
39 #define MAX_CU_SIZE 64
40 
41 
42 static int extra_before[4] = { 0,3,3,2 };
43 static int extra_after [4] = { 0,3,4,4 };
44 
45 
46 
47 template <class pixel_t>
mc_luma(const base_context * ctx,const seq_parameter_set * sps,int mv_x,int mv_y,int xP,int yP,int16_t * out,int out_stride,const pixel_t * ref,int ref_stride,int nPbW,int nPbH,int bitDepth_L)48 void mc_luma(const base_context* ctx,
49              const seq_parameter_set* sps, int mv_x, int mv_y,
50              int xP,int yP,
51              int16_t* out, int out_stride,
52              const pixel_t* ref, int ref_stride,
53              int nPbW, int nPbH, int bitDepth_L)
54 {
55   int xFracL = mv_x & 3;
56   int yFracL = mv_y & 3;
57 
58   int xIntOffsL = xP + (mv_x>>2);
59   int yIntOffsL = yP + (mv_y>>2);
60 
61   // luma sample interpolation process (8.5.3.2.2.1)
62 
63   //const int shift1 = sps->BitDepth_Y-8;
64   //const int shift2 = 6;
65   const int shift3 = 14 - sps->BitDepth_Y;
66 
67   int w = sps->pic_width_in_luma_samples;
68   int h = sps->pic_height_in_luma_samples;
69 
70   ALIGNED_16(int16_t) mcbuffer[MAX_CU_SIZE * (MAX_CU_SIZE+7)];
71 
72   if (xFracL==0 && yFracL==0) {
73 
74     if (xIntOffsL >= 0 && yIntOffsL >= 0 &&
75         nPbW+xIntOffsL <= w && nPbH+yIntOffsL <= h) {
76 
77       ctx->acceleration.put_hevc_qpel(out, out_stride,
78                                       &ref[yIntOffsL*ref_stride + xIntOffsL],
79                                       ref_stride /* sizeof(pixel_t)*/,
80                                       nPbW,nPbH, mcbuffer, 0,0, bitDepth_L);
81     }
82     else {
83       for (int y=0;y<nPbH;y++)
84         for (int x=0;x<nPbW;x++) {
85 
86           int xA = Clip3(0,w-1,x + xIntOffsL);
87           int yA = Clip3(0,h-1,y + yIntOffsL);
88 
89           out[y*out_stride+x] = ref[ xA + yA*ref_stride ] << shift3;
90         }
91     }
92 
93 #ifdef DE265_LOG_TRACE
94     logtrace(LogMotion,"---MC luma %d %d = direct---\n",xFracL,yFracL);
95 
96     for (int y=0;y<nPbH;y++) {
97       for (int x=0;x<nPbW;x++) {
98 
99         int xA = Clip3(0,w-1,x + xIntOffsL);
100         int yA = Clip3(0,h-1,y + yIntOffsL);
101 
102         logtrace(LogMotion,"%02x ", ref[ xA + yA*ref_stride ]);
103       }
104       logtrace(LogMotion,"\n");
105     }
106 
107     logtrace(LogMotion," -> \n");
108 
109     for (int y=0;y<nPbH;y++) {
110       for (int x=0;x<nPbW;x++) {
111 
112         logtrace(LogMotion,"%02x ",out[y*out_stride+x] >> 6); // 6 will be used when summing predictions
113       }
114       logtrace(LogMotion,"\n");
115     }
116 #endif
117   }
118   else {
119     int extra_left   = extra_before[xFracL];
120     int extra_right  = extra_after [xFracL];
121     int extra_top    = extra_before[yFracL];
122     int extra_bottom = extra_after [yFracL];
123 
124     //int nPbW_extra = extra_left + nPbW + extra_right;
125     //int nPbH_extra = extra_top  + nPbH + extra_bottom;
126 
127 
128     pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+7)];
129 
130     const pixel_t* src_ptr;
131     int src_stride;
132 
133     if (-extra_left + xIntOffsL >= 0 &&
134         -extra_top  + yIntOffsL >= 0 &&
135         nPbW+extra_right  + xIntOffsL < w &&
136         nPbH+extra_bottom + yIntOffsL < h) {
137       src_ptr = &ref[xIntOffsL + yIntOffsL*ref_stride];
138       src_stride = ref_stride;
139     }
140     else {
141       for (int y=-extra_top;y<nPbH+extra_bottom;y++) {
142         for (int x=-extra_left;x<nPbW+extra_right;x++) {
143 
144           int xA = Clip3(0,w-1,x + xIntOffsL);
145           int yA = Clip3(0,h-1,y + yIntOffsL);
146 
147           padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ];
148         }
149       }
150 
151       src_ptr = &padbuf[extra_top*(MAX_CU_SIZE+16) + extra_left];
152       src_stride = MAX_CU_SIZE+16;
153     }
154 
155     ctx->acceleration.put_hevc_qpel(out, out_stride,
156                                     src_ptr, src_stride /* sizeof(pixel_t) */,
157                                     nPbW,nPbH, mcbuffer, xFracL,yFracL, bitDepth_L);
158 
159 
160     logtrace(LogMotion,"---V---\n");
161     for (int y=0;y<nPbH;y++) {
162       for (int x=0;x<nPbW;x++) {
163         logtrace(LogMotion,"%04x ",out[x+y*out_stride]);
164       }
165       logtrace(LogMotion,"\n");
166     }
167   }
168 }
169 
170 
171 
172 template <class pixel_t>
mc_chroma(const base_context * ctx,const seq_parameter_set * sps,int mv_x,int mv_y,int xP,int yP,int16_t * out,int out_stride,const pixel_t * ref,int ref_stride,int nPbWC,int nPbHC,int bit_depth_C)173 void mc_chroma(const base_context* ctx,
174                const seq_parameter_set* sps,
175                int mv_x, int mv_y,
176                int xP,int yP,
177                int16_t* out, int out_stride,
178                const pixel_t* ref, int ref_stride,
179                int nPbWC, int nPbHC, int bit_depth_C)
180 {
181   // chroma sample interpolation process (8.5.3.2.2.2)
182 
183   //const int shift1 = sps->BitDepth_C-8;
184   //const int shift2 = 6;
185   const int shift3 = 14 - sps->BitDepth_C;
186 
187   int wC = sps->pic_width_in_luma_samples /sps->SubWidthC;
188   int hC = sps->pic_height_in_luma_samples/sps->SubHeightC;
189 
190   mv_x *= 2 / sps->SubWidthC;
191   mv_y *= 2 / sps->SubHeightC;
192 
193   int xFracC = mv_x & 7;
194   int yFracC = mv_y & 7;
195 
196   int xIntOffsC = xP/sps->SubWidthC  + (mv_x>>3);
197   int yIntOffsC = yP/sps->SubHeightC + (mv_y>>3);
198 
199   ALIGNED_32(int16_t mcbuffer[MAX_CU_SIZE*(MAX_CU_SIZE+7)]);
200 
201   if (xFracC == 0 && yFracC == 0) {
202     if (xIntOffsC>=0 && nPbWC+xIntOffsC<=wC &&
203         yIntOffsC>=0 && nPbHC+yIntOffsC<=hC) {
204       ctx->acceleration.put_hevc_epel(out, out_stride,
205                                       &ref[xIntOffsC + yIntOffsC*ref_stride], ref_stride,
206                                       nPbWC,nPbHC, 0,0, NULL, bit_depth_C);
207     }
208     else
209       {
210         for (int y=0;y<nPbHC;y++)
211           for (int x=0;x<nPbWC;x++) {
212 
213             int xB = Clip3(0,wC-1,x + xIntOffsC);
214             int yB = Clip3(0,hC-1,y + yIntOffsC);
215 
216             out[y*out_stride+x] = ref[ xB + yB*ref_stride ] << shift3;
217           }
218       }
219   }
220   else {
221     pixel_t padbuf[(MAX_CU_SIZE+16)*(MAX_CU_SIZE+3)];
222 
223     const pixel_t* src_ptr;
224     int src_stride;
225 
226     int extra_top  = 1;
227     int extra_left = 1;
228     int extra_right  = 2;
229     int extra_bottom = 2;
230 
231     if (xIntOffsC>=1 && nPbWC+xIntOffsC<=wC-2 &&
232         yIntOffsC>=1 && nPbHC+yIntOffsC<=hC-2) {
233       src_ptr = &ref[xIntOffsC + yIntOffsC*ref_stride];
234       src_stride = ref_stride;
235     }
236     else {
237       for (int y=-extra_top;y<nPbHC+extra_bottom;y++) {
238         for (int x=-extra_left;x<nPbWC+extra_right;x++) {
239 
240           int xA = Clip3(0,wC-1,x + xIntOffsC);
241           int yA = Clip3(0,hC-1,y + yIntOffsC);
242 
243           padbuf[x+extra_left + (y+extra_top)*(MAX_CU_SIZE+16)] = ref[ xA + yA*ref_stride ];
244         }
245       }
246 
247       src_ptr = &padbuf[extra_left + extra_top*(MAX_CU_SIZE+16)];
248       src_stride = MAX_CU_SIZE+16;
249     }
250 
251 
252     if (xFracC && yFracC) {
253       ctx->acceleration.put_hevc_epel_hv(out, out_stride,
254                                          src_ptr, src_stride,
255                                          nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
256     }
257     else if (xFracC) {
258       ctx->acceleration.put_hevc_epel_h(out, out_stride,
259                                         src_ptr, src_stride,
260                                         nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
261     }
262     else if (yFracC) {
263       ctx->acceleration.put_hevc_epel_v(out, out_stride,
264                                         src_ptr, src_stride,
265                                         nPbWC,nPbHC, xFracC,yFracC, mcbuffer, bit_depth_C);
266     }
267     else {
268       assert(false); // full-pel shifts are handled above
269     }
270   }
271 }
272 
273 
274 
275 // 8.5.3.2
276 // NOTE: for full-pel shifts, we can introduce a fast path, simply copying without shifts
generate_inter_prediction_samples(base_context * ctx,const slice_segment_header * shdr,de265_image * img,int xC,int yC,int xB,int yB,int nCS,int nPbW,int nPbH,const MotionVectorSpec * vi)277 void generate_inter_prediction_samples(base_context* ctx,
278                                        const slice_segment_header* shdr,
279                                        de265_image* img,
280                                        int xC,int yC,
281                                        int xB,int yB,
282                                        int nCS, int nPbW,int nPbH,
283                                        const MotionVectorSpec* vi)
284 {
285   int xP = xC+xB;
286   int yP = yC+yB;
287 
288   void*  pixels[3];
289   int    stride[3];
290 
291   const int SubWidthC  = img->sps.SubWidthC;
292   const int SubHeightC = img->sps.SubHeightC;
293 
294   pixels[0] = img->get_image_plane_at_pos_any_depth(0,xP,yP);
295   stride[0] = img->get_image_stride(0);
296 
297   pixels[1] = img->get_image_plane_at_pos_any_depth(1,xP/SubWidthC,yP/SubHeightC);
298   stride[1] = img->get_image_stride(1);
299 
300   pixels[2] = img->get_image_plane_at_pos_any_depth(2,xP/SubWidthC,yP/SubHeightC);
301   stride[2] = img->get_image_stride(2);
302 
303 
304   ALIGNED_16(int16_t) predSamplesL                 [2 /* LX */][MAX_CU_SIZE* MAX_CU_SIZE];
305   ALIGNED_16(int16_t) predSamplesC[2 /* chroma */ ][2 /* LX */][MAX_CU_SIZE* MAX_CU_SIZE];
306 
307   //int xP = xC+xB;
308   //int yP = yC+yB;
309 
310   int predFlag[2];
311   predFlag[0] = vi->predFlag[0];
312   predFlag[1] = vi->predFlag[1];
313 
314   const int bit_depth_L = img->sps.BitDepth_Y;
315   const int bit_depth_C = img->sps.BitDepth_C;
316 
317   // Some encoders use bi-prediction with two similar MVs.
318   // Identify this case and use only one MV.
319 
320   // do this only without weighted prediction, because the weights/offsets may be different
321   if (img->pps.weighted_pred_flag==0) {
322     if (predFlag[0] && predFlag[1]) {
323       if (vi->mv[0].x == vi->mv[1].x &&
324           vi->mv[0].y == vi->mv[1].y &&
325           shdr->RefPicList[0][vi->refIdx[0]] ==
326           shdr->RefPicList[1][vi->refIdx[1]]) {
327         predFlag[1] = 0;
328       }
329     }
330   }
331 
332 
333   for (int l=0;l<2;l++) {
334     if (predFlag[l]) {
335       // 8.5.3.2.1
336 
337       if (vi->refIdx[l] >= MAX_NUM_REF_PICS) {
338         img->integrity = INTEGRITY_DECODING_ERRORS;
339         ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false);
340         return;
341       }
342 
343       const de265_image* refPic = ctx->get_image(shdr->RefPicList[l][vi->refIdx[l]]);
344 
345       logtrace(LogMotion, "refIdx: %d -> dpb[%d]\n", vi->refIdx[l], shdr->RefPicList[l][vi->refIdx[l]]);
346 
347       if (refPic->PicState == UnusedForReference) {
348         img->integrity = INTEGRITY_DECODING_ERRORS;
349         ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false);
350 
351         // TODO: fill predSamplesC with black or grey
352       }
353       else {
354         // 8.5.3.2.2
355 
356         logtrace(LogMotion,"do MC: L%d,MV=%d;%d RefPOC=%d\n",
357                  l,vi->mv[l].x,vi->mv[l].y,refPic->PicOrderCntVal);
358 
359 
360         // TODO: must predSamples stride really be nCS or can it be somthing smaller like nPbW?
361 
362         if (img->high_bit_depth(0)) {
363           mc_luma(ctx, &img->sps, vi->mv[l].x, vi->mv[l].y, xP,yP,
364                   predSamplesL[l],nCS,
365                   (const uint16_t*)refPic->get_image_plane(0),
366                   refPic->get_luma_stride(), nPbW,nPbH, bit_depth_L);
367         }
368         else {
369           mc_luma(ctx, &img->sps, vi->mv[l].x, vi->mv[l].y, xP,yP,
370                   predSamplesL[l],nCS,
371                   (const uint8_t*)refPic->get_image_plane(0),
372                   refPic->get_luma_stride(), nPbW,nPbH, bit_depth_L);
373         }
374 
375         if (img->high_bit_depth(0)) {
376           mc_chroma(ctx, &img->sps, vi->mv[l].x, vi->mv[l].y, xP,yP,
377                     predSamplesC[0][l],nCS, (const uint16_t*)refPic->get_image_plane(1),
378                     refPic->get_chroma_stride(), nPbW/SubWidthC,nPbH/SubHeightC, bit_depth_C);
379           mc_chroma(ctx, &img->sps, vi->mv[l].x, vi->mv[l].y, xP,yP,
380                     predSamplesC[1][l],nCS, (const uint16_t*)refPic->get_image_plane(2),
381                     refPic->get_chroma_stride(), nPbW/SubWidthC,nPbH/SubHeightC, bit_depth_C);
382         }
383         else {
384           mc_chroma(ctx, &img->sps, vi->mv[l].x, vi->mv[l].y, xP,yP,
385                     predSamplesC[0][l],nCS, (const uint8_t*)refPic->get_image_plane(1),
386                     refPic->get_chroma_stride(), nPbW/SubWidthC,nPbH/SubHeightC, bit_depth_C);
387           mc_chroma(ctx, &img->sps, vi->mv[l].x, vi->mv[l].y, xP,yP,
388                     predSamplesC[1][l],nCS, (const uint8_t*)refPic->get_image_plane(2),
389                     refPic->get_chroma_stride(), nPbW/SubWidthC,nPbH/SubHeightC, bit_depth_C);
390         }
391       }
392     }
393   }
394 
395 
396   // weighted sample prediction  (8.5.3.2.3)
397 
398   const int shift1_L = libde265_max(2,14-img->sps.BitDepth_Y);
399   const int offset_shift1_L = img->sps.WpOffsetBdShiftY;
400   const int shift1_C = libde265_max(2,14-img->sps.BitDepth_C);
401   const int offset_shift1_C = img->sps.WpOffsetBdShiftC;
402 
403   /*
404   const int shift1_L = 14-img->sps.BitDepth_Y;
405   const int offset_shift1_L = img->sps.BitDepth_Y-8;
406   const int shift1_C = 14-img->sps.BitDepth_C;
407   const int offset_shift1_C = img->sps.BitDepth_C-8;
408   */
409 
410   /*
411   if (0)
412   printf("%d/%d %d/%d %d/%d %d/%d\n",
413          shift1_L,
414          Nshift1_L,
415          offset_shift1_L,
416          Noffset_shift1_L,
417          shift1_C,
418          Nshift1_C,
419          offset_shift1_C,
420          Noffset_shift1_C);
421 
422   assert(shift1_L==
423          Nshift1_L);
424   assert(offset_shift1_L==
425          Noffset_shift1_L);
426   assert(shift1_C==
427          Nshift1_C);
428   assert(offset_shift1_C==
429          Noffset_shift1_C);
430   */
431 
432 
433   logtrace(LogMotion,"predFlags (modified): %d %d\n", predFlag[0], predFlag[1]);
434 
435   if (shdr->slice_type == SLICE_TYPE_P) {
436     if (img->pps.weighted_pred_flag==0) {
437       if (predFlag[0]==1 && predFlag[1]==0) {
438         ctx->acceleration.put_unweighted_pred(pixels[0], stride[0],
439                                               predSamplesL[0],nCS, nPbW,nPbH, bit_depth_L);
440         ctx->acceleration.put_unweighted_pred(pixels[1], stride[1],
441                                               predSamplesC[0][0],nCS,
442                                               nPbW/SubWidthC,nPbH/SubHeightC, bit_depth_C);
443         ctx->acceleration.put_unweighted_pred(pixels[2], stride[2],
444                                               predSamplesC[1][0],nCS,
445                                               nPbW/SubWidthC,nPbH/SubHeightC, bit_depth_C);
446       }
447       else {
448         ctx->add_warning(DE265_WARNING_BOTH_PREDFLAGS_ZERO, false);
449         img->integrity = INTEGRITY_DECODING_ERRORS;
450       }
451     }
452     else {
453       // weighted prediction
454 
455       if (predFlag[0]==1 && predFlag[1]==0) {
456 
457         int refIdx0 = vi->refIdx[0];
458 
459         int luma_log2WD   = shdr->luma_log2_weight_denom + shift1_L;
460         int chroma_log2WD = shdr->ChromaLog2WeightDenom  + shift1_C;
461 
462         int luma_w0 = shdr->LumaWeight[0][refIdx0];
463         int luma_o0 = shdr->luma_offset[0][refIdx0] * (1<<(offset_shift1_L));
464 
465         int chroma0_w0 = shdr->ChromaWeight[0][refIdx0][0];
466         int chroma0_o0 = shdr->ChromaOffset[0][refIdx0][0] * (1<<(offset_shift1_C));
467         int chroma1_w0 = shdr->ChromaWeight[0][refIdx0][1];
468         int chroma1_o0 = shdr->ChromaOffset[0][refIdx0][1] * (1<<(offset_shift1_C));
469 
470         logtrace(LogMotion,"weighted-0 [%d] %d %d %d  %dx%d\n", refIdx0, luma_log2WD-6,luma_w0,luma_o0,nPbW,nPbH);
471 
472         ctx->acceleration.put_weighted_pred(pixels[0], stride[0],
473                                             predSamplesL[0],nCS, nPbW,nPbH,
474                                             luma_w0, luma_o0, luma_log2WD, bit_depth_L);
475         ctx->acceleration.put_weighted_pred(pixels[1], stride[1],
476                                             predSamplesC[0][0],nCS, nPbW/SubWidthC,nPbH/SubHeightC,
477                                             chroma0_w0, chroma0_o0, chroma_log2WD, bit_depth_C);
478         ctx->acceleration.put_weighted_pred(pixels[2], stride[2],
479                                             predSamplesC[1][0],nCS, nPbW/SubWidthC,nPbH/SubHeightC,
480                                             chroma1_w0, chroma1_o0, chroma_log2WD, bit_depth_C);
481       }
482       else {
483         ctx->add_warning(DE265_WARNING_BOTH_PREDFLAGS_ZERO, false);
484         img->integrity = INTEGRITY_DECODING_ERRORS;
485       }
486     }
487   }
488   else {
489     assert(shdr->slice_type == SLICE_TYPE_B);
490 
491     if (predFlag[0]==1 && predFlag[1]==1) {
492       if (img->pps.weighted_bipred_flag==0) {
493         //const int shift2  = 15-8; // TODO: real bit depth
494         //const int offset2 = 1<<(shift2-1);
495 
496         int16_t* in0 = predSamplesL[0];
497         int16_t* in1 = predSamplesL[1];
498 
499         ctx->acceleration.put_weighted_pred_avg(pixels[0], stride[0],
500                                                 in0,in1, nCS, nPbW, nPbH, bit_depth_L);
501 
502         int16_t* in00 = predSamplesC[0][0];
503         int16_t* in01 = predSamplesC[0][1];
504         int16_t* in10 = predSamplesC[1][0];
505         int16_t* in11 = predSamplesC[1][1];
506 
507         ctx->acceleration.put_weighted_pred_avg(pixels[1], stride[1],
508                                                 in00,in01, nCS,
509                                                 nPbW/SubWidthC, nPbH/SubHeightC, bit_depth_C);
510         ctx->acceleration.put_weighted_pred_avg(pixels[2], stride[2],
511                                                 in10,in11, nCS,
512                                                 nPbW/SubWidthC, nPbH/SubHeightC, bit_depth_C);
513       }
514       else {
515         // weighted prediction
516 
517         int refIdx0 = vi->refIdx[0];
518         int refIdx1 = vi->refIdx[1];
519 
520         int luma_log2WD   = shdr->luma_log2_weight_denom + shift1_L;
521         int chroma_log2WD = shdr->ChromaLog2WeightDenom + shift1_C;
522 
523         int luma_w0 = shdr->LumaWeight[0][refIdx0];
524         int luma_o0 = shdr->luma_offset[0][refIdx0] * (1<<(offset_shift1_L));
525         int luma_w1 = shdr->LumaWeight[1][refIdx1];
526         int luma_o1 = shdr->luma_offset[1][refIdx1] * (1<<(offset_shift1_L));
527 
528         int chroma0_w0 = shdr->ChromaWeight[0][refIdx0][0];
529         int chroma0_o0 = shdr->ChromaOffset[0][refIdx0][0] * (1<<(offset_shift1_C));
530         int chroma1_w0 = shdr->ChromaWeight[0][refIdx0][1];
531         int chroma1_o0 = shdr->ChromaOffset[0][refIdx0][1] * (1<<(offset_shift1_C));
532         int chroma0_w1 = shdr->ChromaWeight[1][refIdx1][0];
533         int chroma0_o1 = shdr->ChromaOffset[1][refIdx1][0] * (1<<(offset_shift1_C));
534         int chroma1_w1 = shdr->ChromaWeight[1][refIdx1][1];
535         int chroma1_o1 = shdr->ChromaOffset[1][refIdx1][1] * (1<<(offset_shift1_C));
536 
537         logtrace(LogMotion,"weighted-BI-0 [%d] %d %d %d  %dx%d\n", refIdx0, luma_log2WD-6,luma_w0,luma_o0,nPbW,nPbH);
538         logtrace(LogMotion,"weighted-BI-1 [%d] %d %d %d  %dx%d\n", refIdx1, luma_log2WD-6,luma_w1,luma_o1,nPbW,nPbH);
539 
540         int16_t* in0 = predSamplesL[0];
541         int16_t* in1 = predSamplesL[1];
542 
543         ctx->acceleration.put_weighted_bipred(pixels[0], stride[0],
544                                               in0,in1, nCS, nPbW, nPbH,
545                                               luma_w0,luma_o0,
546                                               luma_w1,luma_o1,
547                                               luma_log2WD, bit_depth_L);
548 
549         int16_t* in00 = predSamplesC[0][0];
550         int16_t* in01 = predSamplesC[0][1];
551         int16_t* in10 = predSamplesC[1][0];
552         int16_t* in11 = predSamplesC[1][1];
553 
554         ctx->acceleration.put_weighted_bipred(pixels[1], stride[1],
555                                               in00,in01, nCS, nPbW/SubWidthC, nPbH/SubHeightC,
556                                               chroma0_w0,chroma0_o0,
557                                               chroma0_w1,chroma0_o1,
558                                               chroma_log2WD, bit_depth_C);
559         ctx->acceleration.put_weighted_bipred(pixels[2], stride[2],
560                                               in10,in11, nCS, nPbW/SubWidthC, nPbH/SubHeightC,
561                                               chroma1_w0,chroma1_o0,
562                                               chroma1_w1,chroma1_o1,
563                                               chroma_log2WD, bit_depth_C);
564       }
565     }
566     else if (predFlag[0]==1 || predFlag[1]==1) {
567       int l = predFlag[0] ? 0 : 1;
568 
569       if (img->pps.weighted_bipred_flag==0) {
570         ctx->acceleration.put_unweighted_pred(pixels[0], stride[0],
571                                               predSamplesL[l],nCS, nPbW,nPbH, bit_depth_L);
572         ctx->acceleration.put_unweighted_pred(pixels[1], stride[1],
573                                               predSamplesC[0][l],nCS,
574                                               nPbW/SubWidthC,nPbH/SubHeightC, bit_depth_C);
575         ctx->acceleration.put_unweighted_pred(pixels[2], stride[2],
576                                               predSamplesC[1][l],nCS,
577                                               nPbW/SubWidthC,nPbH/SubHeightC, bit_depth_C);
578       }
579       else {
580         int refIdx = vi->refIdx[l];
581 
582         int luma_log2WD   = shdr->luma_log2_weight_denom + shift1_L;
583         int chroma_log2WD = shdr->ChromaLog2WeightDenom  + shift1_C;
584 
585         int luma_w = shdr->LumaWeight[l][refIdx];
586         int luma_o = shdr->luma_offset[l][refIdx] * (1<<(offset_shift1_L));
587 
588         int chroma0_w = shdr->ChromaWeight[l][refIdx][0];
589         int chroma0_o = shdr->ChromaOffset[l][refIdx][0] * (1<<(offset_shift1_C));
590         int chroma1_w = shdr->ChromaWeight[l][refIdx][1];
591         int chroma1_o = shdr->ChromaOffset[l][refIdx][1] * (1<<(offset_shift1_C));
592 
593         logtrace(LogMotion,"weighted-B-L%d [%d] %d %d %d  %dx%d\n", l, refIdx, luma_log2WD-6,luma_w,luma_o,nPbW,nPbH);
594 
595         ctx->acceleration.put_weighted_pred(pixels[0], stride[0],
596                                             predSamplesL[l],nCS, nPbW,nPbH,
597                                             luma_w, luma_o, luma_log2WD, bit_depth_L);
598         ctx->acceleration.put_weighted_pred(pixels[1], stride[1],
599                                             predSamplesC[0][l],nCS,
600                                             nPbW/SubWidthC,nPbH/SubHeightC,
601                                             chroma0_w, chroma0_o, chroma_log2WD, bit_depth_C);
602         ctx->acceleration.put_weighted_pred(pixels[2], stride[2],
603                                             predSamplesC[1][l],nCS,
604                                             nPbW/SubWidthC,nPbH/SubHeightC,
605                                             chroma1_w, chroma1_o, chroma_log2WD, bit_depth_C);
606       }
607     }
608     else {
609       // TODO: check why it can actually happen that both predFlags[] are false.
610       // For now, we ignore this and continue decoding.
611 
612       ctx->add_warning(DE265_WARNING_BOTH_PREDFLAGS_ZERO, false);
613       img->integrity = INTEGRITY_DECODING_ERRORS;
614     }
615   }
616 
617 #if defined(DE265_LOG_TRACE) && 0
618   logtrace(LogTransform,"MC pixels (luma), position %d %d:\n", xP,yP);
619 
620   for (int y=0;y<nPbH;y++) {
621     logtrace(LogTransform,"MC-y-%d-%d ",xP,yP+y);
622 
623     for (int x=0;x<nPbW;x++) {
624       logtrace(LogTransform,"*%02x ", pixels[0][x+y*stride[0]]);
625     }
626 
627     logtrace(LogTransform,"*\n");
628   }
629 
630 
631   logtrace(LogTransform,"MC pixels (chroma cb), position %d %d:\n", xP/2,yP/2);
632 
633   for (int y=0;y<nPbH/2;y++) {
634     logtrace(LogTransform,"MC-cb-%d-%d ",xP/2,yP/2+y);
635 
636     for (int x=0;x<nPbW/2;x++) {
637       logtrace(LogTransform,"*%02x ", pixels[1][x+y*stride[1]]);
638     }
639 
640     logtrace(LogTransform,"*\n");
641   }
642 
643 
644   logtrace(LogTransform,"MC pixels (chroma cr), position %d %d:\n", xP/2,yP/2);
645 
646   for (int y=0;y<nPbH/2;y++) {
647     logtrace(LogTransform,"MC-cr-%d-%d ",xP/2,yP/2+y);
648 
649     for (int x=0;x<nPbW/2;x++) {
650       logtrace(LogTransform,"*%02x ", pixels[2][x+y*stride[2]]);
651     }
652 
653     logtrace(LogTransform,"*\n");
654   }
655 #endif
656 }
657 
658 
659 #ifdef DE265_LOG_TRACE
logmvcand(const MotionVectorSpec & p)660 void logmvcand(const MotionVectorSpec& p)
661 {
662   for (int v=0;v<2;v++) {
663     if (p.predFlag[v]) {
664       logtrace(LogMotion,"  %d: %s  %d;%d ref=%d\n", v, p.predFlag[v] ? "yes":"no ",
665                p.mv[v].x,p.mv[v].y, p.refIdx[v]);
666     } else {
667       logtrace(LogMotion,"  %d: %s  --;-- ref=--\n", v, p.predFlag[v] ? "yes":"no ");
668     }
669   }
670 }
671 #else
672 #define logmvcand(p)
673 #endif
674 
675 
equal_cand_MV(const MotionVectorSpec * a,const MotionVectorSpec * b)676 LIBDE265_INLINE static bool equal_cand_MV(const MotionVectorSpec* a, const MotionVectorSpec* b)
677 {
678   // TODO: is this really correct? no check for predFlag? Standard says so... (p.127)
679 
680   for (int i=0;i<2;i++) {
681     if (a->predFlag[i] != b->predFlag[i]) return false;
682 
683     if (a->predFlag[i]) {
684       if (a->mv[i].x != b->mv[i].x) return false;
685       if (a->mv[i].y != b->mv[i].y) return false;
686       if (a->refIdx[i] != b->refIdx[i]) return false;
687     }
688   }
689 
690   return true;
691 }
692 
693 
694 /*
695   +--+                +--+--+
696   |B2|                |B1|B0|
697   +--+----------------+--+--+
698      |                   |
699      |                   |
700      |                   |
701      |                   |
702      |        PB         |
703      |                   |
704      |                   |
705   +--+                   |
706   |A1|                   |
707   +--+-------------------+
708   |A0|
709   +--+
710 */
711 
712 
713 // 8.5.3.1.2
714 // TODO: check: can we fill the candidate list directly in this function and omit to copy later
715 /*
716   xC/yC:  CB position
717   nCS:    CB size                 (probably modified because of singleMCLFlag)
718   xP/yP:  PB position (absolute)  (probably modified because of singleMCLFlag)
719   singleMCLFlag
720   nPbW/nPbH: PB size
721   partIdx
722   out_cand: merging candidate vectors
723 
724   Add these candidates:
725   - A1
726   - B1  (if != A1)
727   - B0  (if != B1)
728   - A0  (if != A1)
729   - B2  (if != A1 and != B1)
730 
731   A maximum of 4 candidates are generated.
732 
733   Note 1: For a CB splitted into two PBs, it does not make sense to merge the
734   second part to the parameters of the first part, since then, we could use 2Nx2N
735   right away. -> Exclude this candidate.
736 */
derive_spatial_merging_candidates(const de265_image * img,int xC,int yC,int nCS,int xP,int yP,uint8_t singleMCLFlag,int nPbW,int nPbH,int partIdx,MotionVectorSpec * out_cand,int maxCandidates)737 int derive_spatial_merging_candidates(const de265_image* img,
738                                       int xC, int yC, int nCS, int xP, int yP,
739                                       uint8_t singleMCLFlag,
740                                       int nPbW, int nPbH,
741                                       int partIdx,
742                                       MotionVectorSpec* out_cand,
743                                       int maxCandidates)
744 {
745   const pic_parameter_set* pps = &img->pps;
746   const int log2_parallel_merge_level = pps->log2_parallel_merge_level;
747 
748   enum PartMode PartMode = img->get_PartMode(xC,yC);
749 
750   /*
751   const int A0 = SpatialMergingCandidates::PRED_A0;
752   const int A1 = SpatialMergingCandidates::PRED_A1;
753   const int B0 = SpatialMergingCandidates::PRED_B0;
754   const int B1 = SpatialMergingCandidates::PRED_B1;
755   const int B2 = SpatialMergingCandidates::PRED_B2;
756   */
757 
758   // --- A1 ---
759 
760   // a pixel within A1 (bottom right of A1)
761   int xA1 = xP-1;
762   int yA1 = yP+nPbH-1;
763 
764   bool availableA1;
765   int idxA1;
766 
767   int computed_candidates = 0;
768 
769   // check if candidate is in same motion-estimation region (MER) -> discard
770   if ((xP>>log2_parallel_merge_level) == (xA1>>log2_parallel_merge_level) &&
771       (yP>>log2_parallel_merge_level) == (yA1>>log2_parallel_merge_level)) {
772     availableA1 = false;
773     logtrace(LogMotion,"spatial merging candidate A1: below parallel merge level\n");
774   }
775   // redundant candidate? (Note 1) -> discard
776   else if (// !singleMCLFlag &&    automatically true when partIdx==1
777            partIdx==1 &&
778            (PartMode==PART_Nx2N ||
779             PartMode==PART_nLx2N ||
780             PartMode==PART_nRx2N)) {
781     availableA1 = false;
782     logtrace(LogMotion,"spatial merging candidate A1: second part ignore\n");
783   }
784   // MV available in A1
785   else {
786     availableA1 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA1,yA1);
787     if (!availableA1) logtrace(LogMotion,"spatial merging candidate A1: unavailable\n");
788   }
789 
790   if (availableA1) {
791     idxA1 = computed_candidates++;
792     out_cand[idxA1] = *img->get_mv_info(xA1,yA1);
793 
794     logtrace(LogMotion,"spatial merging candidate A1:\n");
795     logmvcand(out_cand[idxA1]);
796   }
797 
798   if (computed_candidates>=maxCandidates) return computed_candidates;
799 
800 
801   // --- B1 ---
802 
803   int xB1 = xP+nPbW-1;
804   int yB1 = yP-1;
805 
806   bool availableB1;
807   int idxB1;
808 
809   // same MER -> discard
810   if ((xP>>log2_parallel_merge_level) == (xB1>>log2_parallel_merge_level) &&
811       (yP>>log2_parallel_merge_level) == (yB1>>log2_parallel_merge_level)) {
812     availableB1 = false;
813     logtrace(LogMotion,"spatial merging candidate B1: below parallel merge level\n");
814   }
815   // redundant candidate (Note 1) -> discard
816   else if (// !singleMCLFlag &&    automatically true when partIdx==1
817            partIdx==1 &&
818            (PartMode==PART_2NxN ||
819             PartMode==PART_2NxnU ||
820             PartMode==PART_2NxnD)) {
821     availableB1 = false;
822     logtrace(LogMotion,"spatial merging candidate B1: second part ignore\n");
823   }
824   // MV available in B1
825   else {
826     availableB1 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB1,yB1);
827     if (!availableB1) logtrace(LogMotion,"spatial merging candidate B1: unavailable\n");
828   }
829 
830   if (availableB1) {
831     const MotionVectorSpec* b1 = img->get_mv_info(xB1,yB1);
832 
833     // B1 == A1 -> discard B1
834     if (availableA1 &&
835         equal_cand_MV(&out_cand[idxA1], b1)) {
836       idxB1 = idxA1;
837       logtrace(LogMotion,"spatial merging candidate B1: redundant to A1\n");
838     }
839     else {
840       idxB1 = computed_candidates++;
841       out_cand[idxB1] = *b1;
842 
843       logtrace(LogMotion,"spatial merging candidate B1:\n");
844       logmvcand(out_cand[idxB1]);
845     }
846   }
847 
848   if (computed_candidates>=maxCandidates) return computed_candidates;
849 
850 
851   // --- B0 ---
852 
853   int xB0 = xP+nPbW;
854   int yB0 = yP-1;
855 
856   bool availableB0;
857   int idxB0;
858 
859   if ((xP>>log2_parallel_merge_level) == (xB0>>log2_parallel_merge_level) &&
860       (yP>>log2_parallel_merge_level) == (yB0>>log2_parallel_merge_level)) {
861     availableB0 = false;
862     logtrace(LogMotion,"spatial merging candidate B0: below parallel merge level\n");
863   }
864   else {
865     availableB0 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB0,yB0);
866     if (!availableB0) logtrace(LogMotion,"spatial merging candidate B0: unavailable\n");
867   }
868 
869   if (availableB0) {
870     const MotionVectorSpec* b0 = img->get_mv_info(xB0,yB0);
871 
872     // B0 == B1 -> discard B0
873     if (availableB1 &&
874         equal_cand_MV(&out_cand[idxB1], b0)) {
875       idxB0 = idxB1;
876       logtrace(LogMotion,"spatial merging candidate B0: redundant to B1\n");
877     }
878     else {
879       idxB0 = computed_candidates++;
880       out_cand[idxB0] = *b0;
881       logtrace(LogMotion,"spatial merging candidate B0:\n");
882       logmvcand(out_cand[idxB0]);
883     }
884   }
885 
886   if (computed_candidates>=maxCandidates) return computed_candidates;
887 
888 
889   // --- A0 ---
890 
891   int xA0 = xP-1;
892   int yA0 = yP+nPbH;
893 
894   bool availableA0;
895   int idxA0;
896 
897   if ((xP>>log2_parallel_merge_level) == (xA0>>log2_parallel_merge_level) &&
898       (yP>>log2_parallel_merge_level) == (yA0>>log2_parallel_merge_level)) {
899     availableA0 = false;
900     logtrace(LogMotion,"spatial merging candidate A0: below parallel merge level\n");
901   }
902   else {
903     availableA0 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA0,yA0);
904     if (!availableA0) logtrace(LogMotion,"spatial merging candidate A0: unavailable\n");
905   }
906 
907   if (availableA0) {
908     const MotionVectorSpec* a0 = img->get_mv_info(xA0,yA0);
909 
910     // A0 == A1 -> discard A0
911     if (availableA1 &&
912         equal_cand_MV(&out_cand[idxA1], a0)) {
913       idxA0 = idxA1;
914       logtrace(LogMotion,"spatial merging candidate A0: redundant to A1\n");
915     }
916     else {
917       idxA0 = computed_candidates++;
918       out_cand[idxA0] = *a0;
919       logtrace(LogMotion,"spatial merging candidate A0:\n");
920       logmvcand(out_cand[idxA0]);
921     }
922   }
923 
924   if (computed_candidates>=maxCandidates) return computed_candidates;
925 
926 
927   // --- B2 ---
928 
929   int xB2 = xP-1;
930   int yB2 = yP-1;
931 
932   bool availableB2;
933   int idxB2;
934 
935   // if we already have four candidates, do not consider B2 anymore
936   if (computed_candidates==4) {
937     availableB2 = false;
938     logtrace(LogMotion,"spatial merging candidate B2: ignore\n");
939   }
940   else if ((xP>>log2_parallel_merge_level) == (xB2>>log2_parallel_merge_level) &&
941            (yP>>log2_parallel_merge_level) == (yB2>>log2_parallel_merge_level)) {
942     availableB2 = false;
943     logtrace(LogMotion,"spatial merging candidate B2: below parallel merge level\n");
944   }
945   else {
946     availableB2 = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB2,yB2);
947     if (!availableB2) logtrace(LogMotion,"spatial merging candidate B2: unavailable\n");
948   }
949 
950   if (availableB2) {
951     const MotionVectorSpec* b2 = img->get_mv_info(xB2,yB2);
952 
953     // B2 == B1 -> discard B2
954     if (availableB1 &&
955         equal_cand_MV(&out_cand[idxB1], b2)) {
956       idxB2 = idxB1;
957       logtrace(LogMotion,"spatial merging candidate B2: redundant to B1\n");
958     }
959     // B2 == A1 -> discard B2
960     else if (availableA1 &&
961              equal_cand_MV(&out_cand[idxA1], b2)) {
962       idxB2 = idxA1;
963       logtrace(LogMotion,"spatial merging candidate B2: redundant to A1\n");
964     }
965     else {
966       idxB2 = computed_candidates++;
967       out_cand[idxB2] = *b2;
968       logtrace(LogMotion,"spatial merging candidate B2:\n");
969       logmvcand(out_cand[idxB2]);
970     }
971   }
972 
973   return computed_candidates;
974 }
975 
976 
977 // 8.5.3.1.4
derive_zero_motion_vector_candidates(const slice_segment_header * shdr,MotionVectorSpec * out_mergeCandList,int * inout_numCurrMergeCand,int maxCandidates)978 void derive_zero_motion_vector_candidates(const slice_segment_header* shdr,
979                                           MotionVectorSpec* out_mergeCandList,
980                                           int* inout_numCurrMergeCand,
981                                           int maxCandidates)
982 {
983   logtrace(LogMotion,"derive_zero_motion_vector_candidates\n");
984 
985   int numRefIdx;
986 
987   if (shdr->slice_type==SLICE_TYPE_P) {
988     numRefIdx = shdr->num_ref_idx_l0_active;
989   }
990   else {
991     numRefIdx = libde265_min(shdr->num_ref_idx_l0_active,
992                              shdr->num_ref_idx_l1_active);
993   }
994 
995 
996   //int numInputMergeCand = *inout_numMergeCand;
997   int zeroIdx = 0;
998 
999   while (*inout_numCurrMergeCand < maxCandidates) {
1000     // 1.
1001 
1002     logtrace(LogMotion,"zeroIdx:%d numRefIdx:%d\n", zeroIdx, numRefIdx);
1003 
1004     MotionVectorSpec* newCand = &out_mergeCandList[*inout_numCurrMergeCand];
1005 
1006     const int refIdx = (zeroIdx < numRefIdx) ? zeroIdx : 0;
1007 
1008     if (shdr->slice_type==SLICE_TYPE_P) {
1009       newCand->refIdx[0] = refIdx;
1010       newCand->refIdx[1] = -1;
1011       newCand->predFlag[0] = 1;
1012       newCand->predFlag[1] = 0;
1013     }
1014     else {
1015       newCand->refIdx[0] = refIdx;
1016       newCand->refIdx[1] = refIdx;
1017       newCand->predFlag[0] = 1;
1018       newCand->predFlag[1] = 1;
1019     }
1020 
1021     newCand->mv[0].x = 0;
1022     newCand->mv[0].y = 0;
1023     newCand->mv[1].x = 0;
1024     newCand->mv[1].y = 0;
1025 
1026     (*inout_numCurrMergeCand)++;
1027 
1028     // 2.
1029 
1030     zeroIdx++;
1031   }
1032 }
1033 
1034 
scale_mv(MotionVector * out_mv,MotionVector mv,int colDist,int currDist)1035 bool scale_mv(MotionVector* out_mv, MotionVector mv, int colDist, int currDist)
1036 {
1037   int td = Clip3(-128,127, colDist);
1038   int tb = Clip3(-128,127, currDist);
1039 
1040   if (td==0) {
1041     *out_mv = mv;
1042     return false;
1043   }
1044   else {
1045     int tx = (16384 + (abs_value(td)>>1)) / td;
1046     int distScaleFactor = Clip3(-4096,4095, (tb*tx+32)>>6);
1047     out_mv->x = Clip3(-32768,32767,
1048                       Sign(distScaleFactor*mv.x)*((abs_value(distScaleFactor*mv.x)+127)>>8));
1049     out_mv->y = Clip3(-32768,32767,
1050                       Sign(distScaleFactor*mv.y)*((abs_value(distScaleFactor*mv.y)+127)>>8));
1051     return true;
1052   }
1053 }
1054 
1055 
1056 // (L1003) 8.5.3.2.8
1057 
derive_collocated_motion_vectors(base_context * ctx,de265_image * img,const slice_segment_header * shdr,int xP,int yP,int colPic,int xColPb,int yColPb,int refIdxLX,int X,MotionVector * out_mvLXCol,uint8_t * out_availableFlagLXCol)1058 void derive_collocated_motion_vectors(base_context* ctx,
1059                                       de265_image* img,
1060                                       const slice_segment_header* shdr,
1061                                       int xP,int yP,
1062                                       int colPic,
1063                                       int xColPb,int yColPb,
1064                                       int refIdxLX,  // (always 0 for merge mode)
1065                                       int X,
1066                                       MotionVector* out_mvLXCol,
1067                                       uint8_t* out_availableFlagLXCol)
1068 {
1069   logtrace(LogMotion,"derive_collocated_motion_vectors %d;%d\n",xP,yP);
1070 
1071 
1072   // get collocated image and the prediction mode at the collocated position
1073 
1074   assert(ctx->has_image(colPic));
1075   const de265_image* colImg = ctx->get_image(colPic);
1076 
1077   // check for access outside image area
1078 
1079   if (xColPb >= colImg->get_width() ||
1080       yColPb >= colImg->get_height()) {
1081     ctx->add_warning(DE265_WARNING_COLLOCATED_MOTION_VECTOR_OUTSIDE_IMAGE_AREA, false);
1082     *out_availableFlagLXCol = 0;
1083     return;
1084   }
1085 
1086   enum PredMode predMode = colImg->get_pred_mode(xColPb,yColPb);
1087 
1088 
1089   // collocated block is Intra -> no collocated MV
1090 
1091   if (predMode == MODE_INTRA) {
1092     out_mvLXCol->x = 0;
1093     out_mvLXCol->y = 0;
1094     *out_availableFlagLXCol = 0;
1095     return;
1096   }
1097 
1098 
1099   logtrace(LogMotion,"colPic:%d (POC=%d) X:%d refIdxLX:%d refpiclist:%d\n",
1100            colPic,
1101            colImg->PicOrderCntVal,
1102            X,refIdxLX,shdr->RefPicList[X][refIdxLX]);
1103 
1104 
1105   // collocated reference image is unavailable -> no collocated MV
1106 
1107   if (colImg->integrity == INTEGRITY_UNAVAILABLE_REFERENCE) {
1108     out_mvLXCol->x = 0;
1109     out_mvLXCol->y = 0;
1110     *out_availableFlagLXCol = 0;
1111     return;
1112   }
1113 
1114 
1115   // get the collocated MV
1116 
1117   const MotionVectorSpec* mvi = colImg->get_mv_info(xColPb,yColPb);
1118   int listCol;
1119   int refIdxCol;
1120   MotionVector mvCol;
1121 
1122   logtrace(LogMotion,"read MVI %d;%d:\n",xColPb,yColPb);
1123   logmvcand(*mvi);
1124 
1125 
1126   // collocated MV uses only L1 -> use L1
1127   if (mvi->predFlag[0]==0) {
1128     mvCol = mvi->mv[1];
1129     refIdxCol = mvi->refIdx[1];
1130     listCol = 1;
1131   }
1132   // collocated MV uses only L0 -> use L0
1133   else if (mvi->predFlag[1]==0) {
1134     mvCol = mvi->mv[0];
1135     refIdxCol = mvi->refIdx[0];
1136     listCol = 0;
1137   }
1138   // collocated MV uses L0 and L1
1139   else {
1140     bool allRefFramesBeforeCurrentFrame = true;
1141 
1142     const int currentPOC = img->PicOrderCntVal;
1143 
1144     // all reference POCs earlier than current POC (list 1)
1145     // Test L1 first, because there is a higher change to find a future reference frame.
1146 
1147     for (int rIdx=0; rIdx<shdr->num_ref_idx_l1_active && allRefFramesBeforeCurrentFrame; rIdx++)
1148       {
1149         const de265_image* refimg = ctx->get_image(shdr->RefPicList[1][rIdx]);
1150         int refPOC = refimg->PicOrderCntVal;
1151 
1152         if (refPOC > currentPOC) {
1153           allRefFramesBeforeCurrentFrame = false;
1154         }
1155       }
1156 
1157     // all reference POCs earlier than current POC (list 0)
1158 
1159     for (int rIdx=0; rIdx<shdr->num_ref_idx_l0_active && allRefFramesBeforeCurrentFrame; rIdx++)
1160       {
1161         const de265_image* refimg = ctx->get_image(shdr->RefPicList[0][rIdx]);
1162         int refPOC = refimg->PicOrderCntVal;
1163 
1164         if (refPOC > currentPOC) {
1165           allRefFramesBeforeCurrentFrame = false;
1166         }
1167       }
1168 
1169 
1170     /* TODO: What is the rationale behind this ???
1171 
1172        My guess:
1173        when there are images before the current frame (most probably in L0) and images after
1174        the current frame (most probably in L1), we take the reference in the opposite
1175        direction than where the collocated frame is positioned in the hope that the distance
1176        to the current frame will be smaller and thus give a better prediction.
1177 
1178        If all references point into the past, we cannot say much about the temporal order or
1179        L0,L1 and thus take over both parts.
1180      */
1181 
1182     if (allRefFramesBeforeCurrentFrame) {
1183       mvCol = mvi->mv[X];
1184       refIdxCol = mvi->refIdx[X];
1185       listCol = X;
1186     }
1187     else {
1188       int N = shdr->collocated_from_l0_flag;
1189       mvCol = mvi->mv[N];
1190       refIdxCol = mvi->refIdx[N];
1191       listCol = N;
1192     }
1193   }
1194 
1195 
1196 
1197   const slice_segment_header* colShdr = colImg->slices[ colImg->get_SliceHeaderIndex(xColPb,yColPb) ];
1198 
1199   if (shdr->LongTermRefPic[X][refIdxLX] !=
1200       colShdr->LongTermRefPic[listCol][refIdxCol]) {
1201     *out_availableFlagLXCol = 0;
1202     out_mvLXCol->x = 0;
1203     out_mvLXCol->y = 0;
1204   }
1205   else {
1206     *out_availableFlagLXCol = 1;
1207 
1208     const bool isLongTerm = shdr->LongTermRefPic[X][refIdxLX];
1209 
1210     int colDist  = colImg->PicOrderCntVal - colShdr->RefPicList_POC[listCol][refIdxCol];
1211     int currDist = img->PicOrderCntVal - shdr->RefPicList_POC[X][refIdxLX];
1212 
1213     logtrace(LogMotion,"COLPOCDIFF %d %d [%d %d / %d %d]\n",colDist, currDist,
1214              colImg->PicOrderCntVal, colShdr->RefPicList_POC[listCol][refIdxCol],
1215              img->PicOrderCntVal, shdr->RefPicList_POC[X][refIdxLX]
1216              );
1217 
1218     if (isLongTerm || colDist == currDist) {
1219       *out_mvLXCol = mvCol;
1220     }
1221     else {
1222       if (!scale_mv(out_mvLXCol, mvCol, colDist, currDist)) {
1223         ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false);
1224         img->integrity = INTEGRITY_DECODING_ERRORS;
1225       }
1226 
1227       logtrace(LogMotion,"scale: %d;%d to %d;%d\n",
1228                mvCol.x,mvCol.y, out_mvLXCol->x,out_mvLXCol->y);
1229     }
1230   }
1231 }
1232 
1233 
1234 // 8.5.3.1.7
derive_temporal_luma_vector_prediction(base_context * ctx,de265_image * img,const slice_segment_header * shdr,int xP,int yP,int nPbW,int nPbH,int refIdxL,int X,MotionVector * out_mvLXCol,uint8_t * out_availableFlagLXCol)1235 void derive_temporal_luma_vector_prediction(base_context* ctx,
1236                                             de265_image* img,
1237                                             const slice_segment_header* shdr,
1238                                             int xP,int yP,
1239                                             int nPbW,int nPbH,
1240                                             int refIdxL,
1241                                             int X, // which MV (L0/L1) to get
1242                                             MotionVector* out_mvLXCol,
1243                                             uint8_t*      out_availableFlagLXCol)
1244 {
1245   // --- no temporal MVP -> exit ---
1246 
1247   if (shdr->slice_temporal_mvp_enabled_flag == 0) {
1248     out_mvLXCol->x = 0;
1249     out_mvLXCol->y = 0;
1250     *out_availableFlagLXCol = 0;
1251     return;
1252   }
1253 
1254 
1255   // --- find collocated reference image ---
1256 
1257   int Log2CtbSizeY = img->sps.Log2CtbSizeY;
1258 
1259   int colPic; // TODO: this is the same for the whole slice. We can precompute it.
1260 
1261   if (shdr->slice_type == SLICE_TYPE_B &&
1262       shdr->collocated_from_l0_flag == 0)
1263     {
1264       logtrace(LogMotion,"collocated L1 ref_idx=%d\n",shdr->collocated_ref_idx);
1265 
1266       colPic = shdr->RefPicList[1][ shdr->collocated_ref_idx ];
1267     }
1268   else
1269     {
1270       logtrace(LogMotion,"collocated L0 ref_idx=%d\n",shdr->collocated_ref_idx);
1271 
1272       colPic = shdr->RefPicList[0][ shdr->collocated_ref_idx ];
1273     }
1274 
1275 
1276   // check whether collocated reference picture exists
1277 
1278   if (!ctx->has_image(colPic)) {
1279     out_mvLXCol->x = 0;
1280     out_mvLXCol->y = 0;
1281     *out_availableFlagLXCol = 0;
1282 
1283     ctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED, false);
1284     return;
1285   }
1286 
1287 
1288   // --- get collocated MV either at bottom-right corner or from center of PB ---
1289 
1290   int xColPb,yColPb;
1291   int yColBr = yP + nPbH; // bottom right collocated motion vector position
1292   int xColBr = xP + nPbW;
1293 
1294   /* If neighboring pixel at bottom-right corner is in the same CTB-row and inside the image,
1295      use this (reduced down to 16 pixels resolution) as collocated MV position.
1296 
1297      Note: see 2014, Sze, Sect. 5.2.1.2 why candidate C0 is excluded when on another CTB-row.
1298      This is to reduce the memory bandwidth requirements.
1299    */
1300   if ((yP>>Log2CtbSizeY) == (yColBr>>Log2CtbSizeY) &&
1301       xColBr < img->sps.pic_width_in_luma_samples &&
1302       yColBr < img->sps.pic_height_in_luma_samples)
1303     {
1304       xColPb = xColBr & ~0x0F; // reduce resolution of collocated motion-vectors to 16 pixels grid
1305       yColPb = yColBr & ~0x0F;
1306 
1307       derive_collocated_motion_vectors(ctx,img,shdr, xP,yP, colPic, xColPb,yColPb, refIdxL, X,
1308                                        out_mvLXCol, out_availableFlagLXCol);
1309     }
1310   else
1311     {
1312       out_mvLXCol->x = 0;
1313       out_mvLXCol->y = 0;
1314       *out_availableFlagLXCol = 0;
1315     }
1316 
1317 
1318   if (*out_availableFlagLXCol==0) {
1319 
1320     int xColCtr = xP+(nPbW>>1);
1321     int yColCtr = yP+(nPbH>>1);
1322 
1323     xColPb = xColCtr & ~0x0F; // reduce resolution of collocated motion-vectors to 16 pixels grid
1324     yColPb = yColCtr & ~0x0F;
1325 
1326     derive_collocated_motion_vectors(ctx,img,shdr, xP,yP, colPic, xColPb,yColPb, refIdxL, X,
1327                                      out_mvLXCol, out_availableFlagLXCol);
1328   }
1329 }
1330 
1331 
1332 static int table_8_19[2][12] = {
1333   { 0,1,0,2,1,2,0,3,1,3,2,3 },
1334   { 1,0,2,0,2,1,3,0,3,1,3,2 }
1335   };
1336 
1337 // 8.5.3.1.3
1338 /* Note (TODO): during decoding, we know which of the candidates we will select.
1339 +   Hence, we do not really have to generate the other ones...
1340 + */
derive_combined_bipredictive_merging_candidates(const base_context * ctx,const slice_segment_header * shdr,MotionVectorSpec * inout_mergeCandList,int * inout_numMergeCand,int maxCandidates)1341 void derive_combined_bipredictive_merging_candidates(const base_context* ctx,
1342                                                      const slice_segment_header* shdr,
1343                                                      MotionVectorSpec* inout_mergeCandList,
1344                                                      int* inout_numMergeCand,
1345                                                      int maxCandidates)
1346 {
1347   if (*inout_numMergeCand>1 && *inout_numMergeCand < maxCandidates) {
1348     int numOrigMergeCand = *inout_numMergeCand;
1349 
1350     int numInputMergeCand = *inout_numMergeCand;
1351     int combIdx = 0;
1352     uint8_t combStop = false;
1353 
1354     while (!combStop) {
1355       int l0CandIdx = table_8_19[0][combIdx];
1356       int l1CandIdx = table_8_19[1][combIdx];
1357 
1358       if (l0CandIdx >= numInputMergeCand ||
1359           l1CandIdx >= numInputMergeCand) {
1360         assert(false); // bitstream error -> TODO: conceal error
1361       }
1362 
1363       MotionVectorSpec* l0Cand = &inout_mergeCandList[l0CandIdx];
1364       MotionVectorSpec* l1Cand = &inout_mergeCandList[l1CandIdx];
1365 
1366       logtrace(LogMotion,"add bipredictive merging candidate (combIdx:%d)\n",combIdx);
1367       logtrace(LogMotion,"l0Cand:\n"); logmvcand(*l0Cand);
1368       logtrace(LogMotion,"l1Cand:\n"); logmvcand(*l1Cand);
1369 
1370       const de265_image* img0 = l0Cand->predFlag[0] ? ctx->get_image(shdr->RefPicList[0][l0Cand->refIdx[0]]) : NULL;
1371       const de265_image* img1 = l1Cand->predFlag[1] ? ctx->get_image(shdr->RefPicList[1][l1Cand->refIdx[1]]) : NULL;
1372 
1373       if (l0Cand->predFlag[0] && !img0) {
1374         return; // TODO error
1375       }
1376 
1377       if (l1Cand->predFlag[1] && !img1) {
1378         return; // TODO error
1379       }
1380 
1381       if (l0Cand->predFlag[0] && l1Cand->predFlag[1] &&
1382           (img0->PicOrderCntVal != img1->PicOrderCntVal     ||
1383            l0Cand->mv[0].x != l1Cand->mv[1].x ||
1384            l0Cand->mv[0].y != l1Cand->mv[1].y)) {
1385         MotionVectorSpec* p = &inout_mergeCandList[ *inout_numMergeCand ];
1386         p->refIdx[0] = l0Cand->refIdx[0];
1387         p->refIdx[1] = l1Cand->refIdx[1];
1388         p->predFlag[0] = l0Cand->predFlag[0];
1389         p->predFlag[1] = l1Cand->predFlag[1];
1390         p->mv[0] = l0Cand->mv[0];
1391         p->mv[1] = l1Cand->mv[1];
1392         (*inout_numMergeCand)++;
1393 
1394         logtrace(LogMotion,"result:\n");
1395         logmvcand(*p);
1396       }
1397 
1398       combIdx++;
1399       if (combIdx == numOrigMergeCand*(numOrigMergeCand-1) ||
1400           *inout_numMergeCand == maxCandidates) {
1401         combStop = true;
1402       }
1403     }
1404   }
1405 }
1406 
1407 
1408 // 8.5.3.1.1
get_merge_candidate_list_without_step_9(base_context * ctx,const slice_segment_header * shdr,de265_image * img,int xC,int yC,int xP,int yP,int nCS,int nPbW,int nPbH,int partIdx,int max_merge_idx,MotionVectorSpec * mergeCandList)1409 static void get_merge_candidate_list_without_step_9(base_context* ctx,
1410                                                     const slice_segment_header* shdr,
1411                                                     de265_image* img,
1412                                                     int xC,int yC, int xP,int yP,
1413                                                     int nCS, int nPbW,int nPbH, int partIdx,
1414                                                     int max_merge_idx,
1415                                                     MotionVectorSpec* mergeCandList)
1416 {
1417 
1418   //int xOrigP = xP;
1419   //int yOrigP = yP;
1420   int nOrigPbW = nPbW;
1421   int nOrigPbH = nPbH;
1422 
1423   int singleMCLFlag; // single merge-candidate-list (MCL) flag
1424 
1425   /* Use single MCL for CBs of size 8x8, except when parallel-merge-level is at 4x4.
1426      Without this flag, PBs smaller than 8x8 would not receive as much merging candidates.
1427      Having additional candidates might have these advantages:
1428      - coding MVs for these small PBs is expensive, and
1429      - since the PBs are not far away from a proper (neighboring) merging candidate,
1430        the quality of the candidates will still be good.
1431    */
1432   singleMCLFlag = (img->pps.log2_parallel_merge_level > 2 && nCS==8);
1433 
1434   if (singleMCLFlag) {
1435     xP=xC;
1436     yP=yC;
1437     nPbW=nCS;
1438     nPbH=nCS;
1439     partIdx=0;
1440   }
1441 
1442   int maxCandidates = max_merge_idx+1;
1443   //MotionVectorSpec mergeCandList[5];
1444   int numMergeCand=0;
1445 
1446   // --- spatial merge candidates
1447 
1448   numMergeCand = derive_spatial_merging_candidates(img, xC,yC, nCS, xP,yP, singleMCLFlag,
1449                                                    nPbW,nPbH,partIdx, mergeCandList,
1450                                                    maxCandidates);
1451 
1452   // --- collocated merge candidate
1453   if (numMergeCand < maxCandidates) {
1454     int refIdxCol[2] = { 0,0 };
1455 
1456     MotionVector mvCol[2];
1457     uint8_t predFlagLCol[2];
1458     derive_temporal_luma_vector_prediction(ctx,img,shdr, xP,yP,nPbW,nPbH,
1459                                            refIdxCol[0],0, &mvCol[0],
1460                                            &predFlagLCol[0]);
1461 
1462     uint8_t availableFlagCol = predFlagLCol[0];
1463     predFlagLCol[1] = 0;
1464 
1465     if (shdr->slice_type == SLICE_TYPE_B) {
1466       derive_temporal_luma_vector_prediction(ctx,img,shdr,
1467                                              xP,yP,nPbW,nPbH, refIdxCol[1],1, &mvCol[1],
1468                                              &predFlagLCol[1]);
1469       availableFlagCol |= predFlagLCol[1];
1470     }
1471 
1472 
1473     if (availableFlagCol) {
1474       MotionVectorSpec* colVec = &mergeCandList[numMergeCand++];
1475 
1476       colVec->mv[0] = mvCol[0];
1477       colVec->mv[1] = mvCol[1];
1478       colVec->predFlag[0] = predFlagLCol[0];
1479       colVec->predFlag[1] = predFlagLCol[1];
1480       colVec->refIdx[0] = refIdxCol[0];
1481       colVec->refIdx[1] = refIdxCol[1];
1482     }
1483   }
1484 
1485 
1486   // --- bipredictive merge candidates ---
1487 
1488   if (shdr->slice_type == SLICE_TYPE_B) {
1489     derive_combined_bipredictive_merging_candidates(ctx, shdr,
1490                                                     mergeCandList, &numMergeCand, maxCandidates);
1491   }
1492 
1493 
1494   // --- zero-vector merge candidates ---
1495 
1496   derive_zero_motion_vector_candidates(shdr, mergeCandList, &numMergeCand, maxCandidates);
1497 
1498 
1499   logtrace(LogMotion,"mergeCandList:\n");
1500   for (int i=0;i<shdr->MaxNumMergeCand;i++)
1501     {
1502       //logtrace(LogMotion, " %d:%s\n", i, i==merge_idx ? " SELECTED":"");
1503       logmvcand(mergeCandList[i]);
1504     }
1505 }
1506 
1507 
1508 
get_merge_candidate_list(base_context * ctx,const slice_segment_header * shdr,de265_image * img,int xC,int yC,int xP,int yP,int nCS,int nPbW,int nPbH,int partIdx,MotionVectorSpec * mergeCandList)1509 void get_merge_candidate_list(base_context* ctx,
1510                               const slice_segment_header* shdr,
1511                               de265_image* img,
1512                               int xC,int yC, int xP,int yP,
1513                               int nCS, int nPbW,int nPbH, int partIdx,
1514                               MotionVectorSpec* mergeCandList)
1515 {
1516   int max_merge_idx = 5-shdr->five_minus_max_num_merge_cand -1;
1517 
1518   get_merge_candidate_list_without_step_9(ctx, shdr, img,
1519                                           xC,yC,xP,yP,nCS,nPbW,nPbH, partIdx,
1520                                           max_merge_idx, mergeCandList);
1521 
1522   // 9. for encoder: modify all merge candidates
1523 
1524   for (int i=0;i<=max_merge_idx;i++) {
1525     if (mergeCandList[i].predFlag[0] &&
1526         mergeCandList[i].predFlag[1] &&
1527         nPbW+nPbH==12)
1528       {
1529         mergeCandList[i].refIdx[1]   = -1;
1530         mergeCandList[i].predFlag[1] = 0;
1531       }
1532   }
1533 }
1534 
1535 
1536 
derive_luma_motion_merge_mode(base_context * ctx,const slice_segment_header * shdr,de265_image * img,int xC,int yC,int xP,int yP,int nCS,int nPbW,int nPbH,int partIdx,int merge_idx,MotionVectorSpec * out_vi)1537 void derive_luma_motion_merge_mode(base_context* ctx,
1538                                    const slice_segment_header* shdr,
1539                                    de265_image* img,
1540                                    int xC,int yC, int xP,int yP,
1541                                    int nCS, int nPbW,int nPbH, int partIdx,
1542                                    int merge_idx,
1543                                    MotionVectorSpec* out_vi)
1544 {
1545   MotionVectorSpec mergeCandList[5];
1546 
1547   get_merge_candidate_list_without_step_9(ctx, shdr, img,
1548                                           xC,yC,xP,yP,nCS,nPbW,nPbH, partIdx,
1549                                           merge_idx, mergeCandList);
1550 
1551 
1552   *out_vi = mergeCandList[merge_idx];
1553 
1554   // 8.5.3.1.1 / 9.
1555 
1556   if (out_vi->predFlag[0] && out_vi->predFlag[1] && nPbW+nPbH==12) {
1557     out_vi->refIdx[1] = -1;
1558     out_vi->predFlag[1] = 0;
1559   }
1560 }
1561 
1562 
1563 // 8.5.3.1.6
derive_spatial_luma_vector_prediction(base_context * ctx,de265_image * img,const slice_segment_header * shdr,int xC,int yC,int nCS,int xP,int yP,int nPbW,int nPbH,int X,int refIdxLX,int partIdx,uint8_t out_availableFlagLXN[2],MotionVector out_mvLXN[2])1564 void derive_spatial_luma_vector_prediction(base_context* ctx,
1565                                            de265_image* img,
1566                                            const slice_segment_header* shdr,
1567                                            int xC,int yC,int nCS,int xP,int yP,
1568                                            int nPbW,int nPbH, int X,
1569                                            int refIdxLX, int partIdx,
1570                                            uint8_t out_availableFlagLXN[2],
1571                                            MotionVector out_mvLXN[2])
1572 {
1573   int isScaledFlagLX = 0;
1574 
1575   const int A=0;
1576   const int B=1;
1577 
1578   out_availableFlagLXN[A] = 0;
1579   out_availableFlagLXN[B] = 0;
1580 
1581 
1582   // --- A ---
1583 
1584   // 1.
1585 
1586   int xA[2], yA[2];
1587   xA[0] = xP-1;
1588   yA[0] = yP + nPbH;
1589   xA[1] = xA[0];
1590   yA[1] = yA[0]-1;
1591 
1592   // 2.
1593 
1594   out_availableFlagLXN[A] = 0;
1595   out_mvLXN[A].x = 0;
1596   out_mvLXN[A].y = 0;
1597 
1598   // 3. / 4.
1599 
1600   bool availableA[2];
1601   availableA[0] = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA[0],yA[0]);
1602   availableA[1] = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xA[1],yA[1]);
1603 
1604   // 5.
1605 
1606   if (availableA[0] || availableA[1]) {
1607     isScaledFlagLX = 1;
1608   }
1609 
1610   // 6.  test A0 and A1  (Ak)
1611 
1612   int refIdxA=-1;
1613 
1614   // the POC we want to reference in this PB
1615   const de265_image* tmpimg = ctx->get_image(shdr->RefPicList[X][ refIdxLX ]);
1616   if (tmpimg==NULL) { return; }
1617   const int referenced_POC = tmpimg->PicOrderCntVal;
1618 
1619   for (int k=0;k<=1;k++) {
1620     if (availableA[k] &&
1621         out_availableFlagLXN[A]==0 && // no A?-predictor so far
1622         img->get_pred_mode(xA[k],yA[k]) != MODE_INTRA) {
1623 
1624       int Y=1-X;
1625 
1626       const MotionVectorSpec* vi = img->get_mv_info(xA[k],yA[k]);
1627       logtrace(LogMotion,"MVP A%d=\n",k);
1628       logmvcand(*vi);
1629 
1630       const de265_image* imgX = NULL;
1631       if (vi->predFlag[X]) imgX = ctx->get_image(shdr->RefPicList[X][ vi->refIdx[X] ]);
1632       const de265_image* imgY = NULL;
1633       if (vi->predFlag[Y]) imgY = ctx->get_image(shdr->RefPicList[Y][ vi->refIdx[Y] ]);
1634 
1635       // check whether the predictor X is available and references the same POC
1636       if (vi->predFlag[X] && imgX && imgX->PicOrderCntVal == referenced_POC) {
1637 
1638         logtrace(LogMotion,"take A%d/L%d as A candidate with same POC\n",k,X);
1639 
1640         out_availableFlagLXN[A]=1;
1641         out_mvLXN[A] = vi->mv[X];
1642         refIdxA = vi->refIdx[X];
1643       }
1644       // check whether the other predictor (Y) is available and references the same POC
1645       else if (vi->predFlag[Y] && imgY && imgY->PicOrderCntVal == referenced_POC) {
1646 
1647         logtrace(LogMotion,"take A%d/L%d as A candidate with same POC\n",k,Y);
1648 
1649         out_availableFlagLXN[A]=1;
1650         out_mvLXN[A] = vi->mv[Y];
1651         refIdxA = vi->refIdx[Y];
1652       }
1653     }
1654   }
1655 
1656   // 7. If there is no predictor referencing the same POC, we take any other reference as
1657   //    long as it is the same type of reference (long-term / short-term)
1658 
1659   for (int k=0 ; k<=1 && out_availableFlagLXN[A]==0 ; k++) {
1660     int refPicList=-1;
1661 
1662     if (availableA[k] &&
1663         // TODO: we could remove this call by storing the result of the similar computation above
1664         img->get_pred_mode(xA[k],yA[k]) != MODE_INTRA) {
1665 
1666       int Y=1-X;
1667 
1668       const MotionVectorSpec* vi = img->get_mv_info(xA[k],yA[k]);
1669       if (vi->predFlag[X]==1 &&
1670           shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[X][ vi->refIdx[X] ]) {
1671 
1672         logtrace(LogMotion,"take A%D/L%d as A candidate with different POCs\n",k,X);
1673 
1674         out_availableFlagLXN[A]=1;
1675         out_mvLXN[A] = vi->mv[X];
1676         refIdxA = vi->refIdx[X];
1677         refPicList = X;
1678       }
1679       else if (vi->predFlag[Y]==1 &&
1680                shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[Y][ vi->refIdx[Y] ]) {
1681 
1682         logtrace(LogMotion,"take A%d/L%d as A candidate with different POCs\n",k,Y);
1683 
1684         out_availableFlagLXN[A]=1;
1685         out_mvLXN[A] = vi->mv[Y];
1686         refIdxA = vi->refIdx[Y];
1687         refPicList = Y;
1688       }
1689     }
1690 
1691     if (out_availableFlagLXN[A]==1) {
1692       if (refIdxA<0) {
1693         out_availableFlagLXN[0] = out_availableFlagLXN[1] = false;
1694         return; // error
1695       }
1696 
1697       assert(refIdxA>=0);
1698       assert(refPicList>=0);
1699 
1700       const de265_image* refPicA = ctx->get_image(shdr->RefPicList[refPicList][refIdxA ]);
1701       const de265_image* refPicX = ctx->get_image(shdr->RefPicList[X         ][refIdxLX]);
1702 
1703       //int picStateA = shdr->RefPicList_PicState[refPicList][refIdxA ];
1704       //int picStateX = shdr->RefPicList_PicState[X         ][refIdxLX];
1705 
1706       int isLongTermA = shdr->LongTermRefPic[refPicList][refIdxA ];
1707       int isLongTermX = shdr->LongTermRefPic[X         ][refIdxLX];
1708 
1709       logtrace(LogMotion,"scale MVP A: A-POC:%d X-POC:%d\n",
1710                refPicA->PicOrderCntVal,refPicX->PicOrderCntVal);
1711 
1712       if (!isLongTermA && !isLongTermX)
1713       /*
1714       if (picStateA == UsedForShortTermReference &&
1715           picStateX == UsedForShortTermReference)
1716       */
1717         {
1718           int distA = img->PicOrderCntVal - refPicA->PicOrderCntVal;
1719           int distX = img->PicOrderCntVal - referenced_POC;
1720 
1721           if (!scale_mv(&out_mvLXN[A], out_mvLXN[A], distA, distX)) {
1722             ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false);
1723             img->integrity = INTEGRITY_DECODING_ERRORS;
1724           }
1725         }
1726     }
1727   }
1728 
1729 
1730   // --- B ---
1731 
1732   // 1.
1733 
1734   int xB[3], yB[3];
1735   xB[0] = xP+nPbW;
1736   yB[0] = yP-1;
1737   xB[1] = xB[0]-1;
1738   yB[1] = yP-1;
1739   xB[2] = xP-1;
1740   yB[2] = yP-1;
1741 
1742   // 2.
1743 
1744   out_availableFlagLXN[B] = 0;
1745   out_mvLXN[B].x = 0;
1746   out_mvLXN[B].y = 0;
1747 
1748   // 3. test B0,B1,B2 (Bk)
1749 
1750   int refIdxB=-1;
1751 
1752   bool availableB[3];
1753   for (int k=0;k<3;k++) {
1754     availableB[k] = img->available_pred_blk(xC,yC, nCS, xP,yP, nPbW,nPbH,partIdx, xB[k],yB[k]);
1755 
1756     if (availableB[k] && out_availableFlagLXN[B]==0) {
1757 
1758       int Y=1-X;
1759 
1760       const MotionVectorSpec* vi = img->get_mv_info(xB[k],yB[k]);
1761       logtrace(LogMotion,"MVP B%d=\n",k);
1762       logmvcand(*vi);
1763 
1764 
1765       const de265_image* imgX = NULL;
1766       if (vi->predFlag[X]) imgX = ctx->get_image(shdr->RefPicList[X][ vi->refIdx[X] ]);
1767       const de265_image* imgY = NULL;
1768       if (vi->predFlag[Y]) imgY = ctx->get_image(shdr->RefPicList[Y][ vi->refIdx[Y] ]);
1769 
1770       if (vi->predFlag[X] && imgX && imgX->PicOrderCntVal == referenced_POC) {
1771         logtrace(LogMotion,"a) take B%d/L%d as B candidate with same POC\n",k,X);
1772 
1773         out_availableFlagLXN[B]=1;
1774         out_mvLXN[B] = vi->mv[X];
1775         refIdxB = vi->refIdx[X];
1776       }
1777       else if (vi->predFlag[Y] && imgY && imgY->PicOrderCntVal == referenced_POC) {
1778         logtrace(LogMotion,"b) take B%d/L%d as B candidate with same POC\n",k,Y);
1779 
1780         out_availableFlagLXN[B]=1;
1781         out_mvLXN[B] = vi->mv[Y];
1782         refIdxB = vi->refIdx[Y];
1783       }
1784     }
1785   }
1786 
1787   // 4.
1788 
1789   if (isScaledFlagLX==0 &&      // no A predictor,
1790       out_availableFlagLXN[B])  // but an unscaled B predictor
1791     {
1792       // use unscaled B predictor as A predictor
1793 
1794       logtrace(LogMotion,"copy the same-POC B candidate as additional A candidate\n");
1795 
1796       out_availableFlagLXN[A]=1;
1797       out_mvLXN[A] = out_mvLXN[B];
1798       refIdxA = refIdxB;
1799     }
1800 
1801   // 5.
1802 
1803   // If no A predictor, we output the unscaled B as the A predictor (above)
1804   // and also add a scaled B predictor here.
1805   // If there is (probably) an A predictor, no differing-POC B predictor is generated.
1806   if (isScaledFlagLX==0) {
1807     out_availableFlagLXN[B]=0;
1808 
1809     for (int k=0 ; k<=2 && out_availableFlagLXN[B]==0 ; k++) {
1810       int refPicList=-1;
1811 
1812       if (availableB[k]) {
1813         int Y=1-X;
1814 
1815         const MotionVectorSpec* vi = img->get_mv_info(xB[k],yB[k]);
1816 
1817         if (vi->predFlag[X]==1 &&
1818             shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[X][ vi->refIdx[X] ]) {
1819           out_availableFlagLXN[B]=1;
1820           out_mvLXN[B] = vi->mv[X];
1821           refIdxB = vi->refIdx[X];
1822           refPicList = X;
1823         }
1824         else if (vi->predFlag[Y]==1 &&
1825                  shdr->LongTermRefPic[X][refIdxLX] == shdr->LongTermRefPic[Y][ vi->refIdx[Y] ]) {
1826           out_availableFlagLXN[B]=1;
1827           out_mvLXN[B] = vi->mv[Y];
1828           refIdxB = vi->refIdx[Y];
1829           refPicList = Y;
1830         }
1831       }
1832 
1833       if (out_availableFlagLXN[B]==1) {
1834         if (refIdxB<0) {
1835           out_availableFlagLXN[0] = out_availableFlagLXN[1] = false;
1836           return; // error
1837         }
1838 
1839         assert(refPicList>=0);
1840         assert(refIdxB>=0);
1841 
1842         const de265_image* refPicB=ctx->get_image(shdr->RefPicList[refPicList][refIdxB ]);
1843         const de265_image* refPicX=ctx->get_image(shdr->RefPicList[X         ][refIdxLX]);
1844 
1845         int isLongTermB = shdr->LongTermRefPic[refPicList][refIdxB ];
1846         int isLongTermX = shdr->LongTermRefPic[X         ][refIdxLX];
1847 
1848         if (refPicB==NULL || refPicX==NULL) {
1849           img->decctx->add_warning(DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED,false);
1850           img->integrity = INTEGRITY_DECODING_ERRORS;
1851         }
1852         else if (refPicB->PicOrderCntVal != refPicX->PicOrderCntVal &&
1853                  !isLongTermB && !isLongTermX) {
1854           int distB = img->PicOrderCntVal - refPicB->PicOrderCntVal;
1855           int distX = img->PicOrderCntVal - referenced_POC;
1856 
1857           logtrace(LogMotion,"scale MVP B: B-POC:%d X-POC:%d\n",refPicB->PicOrderCntVal,refPicX->PicOrderCntVal);
1858 
1859           if (!scale_mv(&out_mvLXN[B], out_mvLXN[B], distB, distX)) {
1860             ctx->add_warning(DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING, false);
1861             img->integrity = INTEGRITY_DECODING_ERRORS;
1862           }
1863         }
1864       }
1865     }
1866   }
1867 }
1868 
1869 
1870 // 8.5.3.1.5
fill_luma_motion_vector_predictors(base_context * ctx,const slice_segment_header * shdr,de265_image * img,int xC,int yC,int nCS,int xP,int yP,int nPbW,int nPbH,int l,int refIdx,int partIdx,MotionVector out_mvpList[2])1871 void fill_luma_motion_vector_predictors(base_context* ctx,
1872                                         const slice_segment_header* shdr,
1873                                         de265_image* img,
1874                                         int xC,int yC,int nCS,int xP,int yP,
1875                                         int nPbW,int nPbH, int l,
1876                                         int refIdx, int partIdx,
1877                                         MotionVector out_mvpList[2])
1878 {
1879   // 8.5.3.1.6: derive two spatial vector predictors A (0) and B (1)
1880 
1881   uint8_t availableFlagLXN[2];
1882   MotionVector mvLXN[2];
1883 
1884   derive_spatial_luma_vector_prediction(ctx, img, shdr, xC,yC, nCS, xP,yP,
1885                                         nPbW,nPbH, l, refIdx, partIdx,
1886                                         availableFlagLXN, mvLXN);
1887 
1888   // 8.5.3.1.7: if we only have one spatial vector or both spatial vectors are the same,
1889   // derive a temporal predictor
1890 
1891   uint8_t availableFlagLXCol;
1892   MotionVector mvLXCol;
1893 
1894 
1895   if (availableFlagLXN[0] &&
1896       availableFlagLXN[1] &&
1897       (mvLXN[0].x != mvLXN[1].x || mvLXN[0].y != mvLXN[1].y)) {
1898     availableFlagLXCol = 0;
1899   }
1900   else {
1901     derive_temporal_luma_vector_prediction(ctx, img, shdr,
1902                                            xP,yP, nPbW,nPbH, refIdx,l,
1903                                            &mvLXCol, &availableFlagLXCol);
1904   }
1905 
1906 
1907   // --- build candidate vector list with exactly two entries ---
1908 
1909   int numMVPCandLX=0;
1910 
1911   // spatial predictor A
1912 
1913   if (availableFlagLXN[0])
1914     {
1915       out_mvpList[numMVPCandLX++] = mvLXN[0];
1916     }
1917 
1918   // spatial predictor B (if not same as A)
1919 
1920   if (availableFlagLXN[1] &&
1921       (!availableFlagLXN[0] || // in case A in not available, but mvLXA initialized to same as mvLXB
1922        (mvLXN[0].x != mvLXN[1].x || mvLXN[0].y != mvLXN[1].y)))
1923     {
1924       out_mvpList[numMVPCandLX++] = mvLXN[1];
1925     }
1926 
1927   // temporal predictor
1928 
1929   if (availableFlagLXCol)
1930     {
1931       out_mvpList[numMVPCandLX++] = mvLXCol;
1932     }
1933 
1934   // fill with zero predictors
1935 
1936   while (numMVPCandLX<2) {
1937     out_mvpList[numMVPCandLX].x = 0;
1938     out_mvpList[numMVPCandLX].y = 0;
1939     numMVPCandLX++;
1940   }
1941 
1942 
1943   assert(numMVPCandLX==2);
1944 }
1945 
1946 
luma_motion_vector_prediction(base_context * ctx,const slice_segment_header * shdr,de265_image * img,const motion_spec & motion,int xC,int yC,int nCS,int xP,int yP,int nPbW,int nPbH,int l,int refIdx,int partIdx)1947 MotionVector luma_motion_vector_prediction(base_context* ctx,
1948                                            const slice_segment_header* shdr,
1949                                            de265_image* img,
1950                                            const motion_spec& motion,
1951                                            int xC,int yC,int nCS,int xP,int yP,
1952                                            int nPbW,int nPbH, int l,
1953                                            int refIdx, int partIdx)
1954 {
1955   MotionVector mvpList[2];
1956 
1957   fill_luma_motion_vector_predictors(ctx, shdr, img,
1958                                      xC,yC,nCS,xP,yP,
1959                                      nPbW, nPbH, l, refIdx, partIdx,
1960                                      mvpList);
1961 
1962   // select predictor according to mvp_lX_flag
1963 
1964   return mvpList[ l ? motion.mvp_l1_flag : motion.mvp_l0_flag ];
1965 }
1966 
1967 
1968 #if DE265_LOG_TRACE
logMV(int x0,int y0,int nPbW,int nPbH,const char * mode,const MotionVectorSpec * mv)1969 void logMV(int x0,int y0,int nPbW,int nPbH, const char* mode,const MotionVectorSpec* mv)
1970 {
1971   int pred0 = mv->predFlag[0];
1972   int pred1 = mv->predFlag[1];
1973 
1974   logtrace(LogMotion,
1975            "*MV %d;%d [%d;%d] %s: (%d) %d;%d @%d   (%d) %d;%d @%d\n", x0,y0,nPbW,nPbH,mode,
1976            pred0,
1977            pred0 ? mv->mv[0].x : 0,pred0 ? mv->mv[0].y : 0, pred0 ? mv->refIdx[0] : 0,
1978            pred1,
1979            pred1 ? mv->mv[1].x : 0,pred1 ? mv->mv[1].y : 0, pred1 ? mv->refIdx[1] : 0);
1980 }
1981 #else
1982 #define logMV(x0,y0,nPbW,nPbH,mode,mv)
1983 #endif
1984 
1985 
1986 
1987 // 8.5.3.1
motion_vectors_and_ref_indices(base_context * ctx,const slice_segment_header * shdr,de265_image * img,const motion_spec & motion,int xC,int yC,int xB,int yB,int nCS,int nPbW,int nPbH,int partIdx,MotionVectorSpec * out_vi)1988 void motion_vectors_and_ref_indices(base_context* ctx,
1989                                     const slice_segment_header* shdr,
1990                                     de265_image* img,
1991                                     const motion_spec& motion,
1992                                     int xC,int yC, int xB,int yB, int nCS, int nPbW,int nPbH,
1993                                     int partIdx,
1994                                     MotionVectorSpec* out_vi)
1995 {
1996   //slice_segment_header* shdr = tctx->shdr;
1997 
1998   int xP = xC+xB;
1999   int yP = yC+yB;
2000 
2001   enum PredMode predMode = img->get_pred_mode(xC,yC);
2002 
2003   if (predMode == MODE_SKIP ||
2004       (predMode == MODE_INTER && motion.merge_flag))
2005     {
2006       derive_luma_motion_merge_mode(ctx,shdr,img,
2007                                     xC,yC, xP,yP, nCS,nPbW,nPbH, partIdx,
2008                                     motion.merge_idx, out_vi);
2009 
2010       logMV(xP,yP,nPbW,nPbH, "merge_mode", out_vi);
2011     }
2012   else {
2013     int mvdL[2][2];
2014     MotionVector mvpL[2];
2015 
2016     for (int l=0;l<2;l++) {
2017       // 1.
2018 
2019       enum InterPredIdc inter_pred_idc = (enum InterPredIdc)motion.inter_pred_idc;
2020 
2021       if (inter_pred_idc == PRED_BI ||
2022           (inter_pred_idc == PRED_L0 && l==0) ||
2023           (inter_pred_idc == PRED_L1 && l==1)) {
2024         out_vi->refIdx[l] = motion.refIdx[l];
2025         out_vi->predFlag[l] = 1;
2026       }
2027       else {
2028         out_vi->refIdx[l] = -1;
2029         out_vi->predFlag[l] = 0;
2030       }
2031 
2032       // 2.
2033 
2034       mvdL[l][0] = motion.mvd[l][0];
2035       mvdL[l][1] = motion.mvd[l][1];
2036 
2037 
2038       if (out_vi->predFlag[l]) {
2039         // 3.
2040 
2041         mvpL[l] = luma_motion_vector_prediction(ctx,shdr,img,motion,
2042                                                 xC,yC,nCS,xP,yP, nPbW,nPbH, l,
2043                                                 out_vi->refIdx[l], partIdx);
2044 
2045         // 4.
2046 
2047         int32_t x = (mvpL[l].x + mvdL[l][0] + 0x10000) & 0xFFFF;
2048         int32_t y = (mvpL[l].y + mvdL[l][1] + 0x10000) & 0xFFFF;
2049 
2050         out_vi->mv[l].x = (x>=0x8000) ? x-0x10000 : x;
2051         out_vi->mv[l].y = (y>=0x8000) ? y-0x10000 : y;
2052       }
2053     }
2054 
2055     logMV(xP,yP,nPbW,nPbH, "mvp", out_vi);
2056   }
2057 }
2058 
2059 
2060 // 8.5.3
decode_prediction_unit(base_context * ctx,const slice_segment_header * shdr,de265_image * img,const motion_spec & motion,int xC,int yC,int xB,int yB,int nCS,int nPbW,int nPbH,int partIdx)2061 void decode_prediction_unit(base_context* ctx,
2062                             const slice_segment_header* shdr,
2063                             de265_image* img,
2064                             const motion_spec& motion,
2065                             int xC,int yC, int xB,int yB, int nCS, int nPbW,int nPbH, int partIdx)
2066 {
2067   logtrace(LogMotion,"decode_prediction_unit POC=%d %d;%d %dx%d\n",
2068            img->PicOrderCntVal, xC+xB,yC+yB, nPbW,nPbH);
2069 
2070   //slice_segment_header* shdr = tctx->shdr;
2071 
2072   // 1.
2073 
2074   MotionVectorSpec vi;
2075   motion_vectors_and_ref_indices(ctx, shdr, img, motion,
2076                                  xC,yC, xB,yB, nCS, nPbW,nPbH, partIdx, &vi);
2077 
2078   // 2.
2079 
2080   generate_inter_prediction_samples(ctx,shdr, img, xC,yC, xB,yB, nCS, nPbW,nPbH, &vi);
2081 
2082 
2083   img->set_mv_info(xC+xB,yC+yB,nPbW,nPbH, vi);
2084 }
2085