1 /*
2  * H.265 video codec.
3  * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de>
4  *
5  * This file is part of libde265.
6  *
7  * libde265 is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Lesser General Public License as
9  * published by the Free Software Foundation, either version 3 of
10  * the License, or (at your option) any later version.
11  *
12  * libde265 is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License
18  * along with libde265.  If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "image.h"
22 #include "decctx.h"
23 #include "encoder/encoder-context.h"
24 
25 #include <stdlib.h>
26 #include <string.h>
27 #include <assert.h>
28 
29 #include <limits>
30 
31 
32 #ifdef HAVE_MALLOC_H
33 #include <malloc.h>
34 #endif
35 
36 #ifdef HAVE_SSE4_1
37 #define MEMORY_PADDING  8
38 #else
39 #define MEMORY_PADDING  0
40 #endif
41 
42 #define STANDARD_ALIGNMENT 16
43 
44 #ifdef HAVE___MINGW_ALIGNED_MALLOC
45 #define ALLOC_ALIGNED(alignment, size)         __mingw_aligned_malloc((size), (alignment))
46 #define FREE_ALIGNED(mem)                      __mingw_aligned_free((mem))
47 #elif _WIN32
48 #define ALLOC_ALIGNED(alignment, size)         _aligned_malloc((size), (alignment))
49 #define FREE_ALIGNED(mem)                      _aligned_free((mem))
50 #elif defined(HAVE_POSIX_MEMALIGN)
ALLOC_ALIGNED(size_t alignment,size_t size)51 static inline void *ALLOC_ALIGNED(size_t alignment, size_t size) {
52     void *mem = NULL;
53     if (posix_memalign(&mem, alignment, size) != 0) {
54         return NULL;
55     }
56     return mem;
57 };
58 #define FREE_ALIGNED(mem)                      free((mem))
59 #else
60 #define ALLOC_ALIGNED(alignment, size)      memalign((alignment), (size))
61 #define FREE_ALIGNED(mem)                   free((mem))
62 #endif
63 
64 #define ALLOC_ALIGNED_16(size)              ALLOC_ALIGNED(16, size)
65 
66 static const int alignment = 16;
67 
de265_alloc_image_plane(struct de265_image * img,int cIdx,void * inputdata,int inputstride,void * userdata)68 LIBDE265_API void* de265_alloc_image_plane(struct de265_image* img, int cIdx,
69                                            void* inputdata, int inputstride, void *userdata)
70 {
71   int alignment = STANDARD_ALIGNMENT;
72   int stride = (img->get_width(cIdx) + alignment-1) / alignment * alignment;
73   int height = img->get_height(cIdx);
74 
75   uint8_t* p = (uint8_t *)ALLOC_ALIGNED_16(stride * height + MEMORY_PADDING);
76 
77   if (p==NULL) { return NULL; }
78 
79   img->set_image_plane(cIdx, p, stride, userdata);
80 
81   // copy input data if provided
82 
83   if (inputdata != NULL) {
84     if (inputstride == stride) {
85       memcpy(p, inputdata, stride*height);
86     }
87     else {
88       for (int y=0;y<height;y++) {
89         memcpy(p+y*stride, ((char*)inputdata) + inputstride*y, inputstride);
90       }
91     }
92   }
93 
94   return p;
95 }
96 
97 
de265_free_image_plane(struct de265_image * img,int cIdx)98 LIBDE265_API void de265_free_image_plane(struct de265_image* img, int cIdx)
99 {
100   uint8_t* p = (uint8_t*)img->get_image_plane(cIdx);
101   assert(p);
102   FREE_ALIGNED(p);
103 }
104 
105 
de265_image_get_buffer(de265_decoder_context * ctx,de265_image_spec * spec,de265_image * img,void * userdata)106 static int  de265_image_get_buffer(de265_decoder_context* ctx,
107                                    de265_image_spec* spec, de265_image* img, void* userdata)
108 {
109   const int rawChromaWidth  = spec->width  / img->sps.SubWidthC;
110   const int rawChromaHeight = spec->height / img->sps.SubHeightC;
111 
112   int luma_stride   = (spec->width    + spec->alignment-1) / spec->alignment * spec->alignment;
113   int chroma_stride = (rawChromaWidth + spec->alignment-1) / spec->alignment * spec->alignment;
114 
115   assert(img->sps.BitDepth_Y >= 8 && img->sps.BitDepth_Y <= 16);
116   assert(img->sps.BitDepth_C >= 8 && img->sps.BitDepth_C <= 16);
117 
118   int luma_bpl   = luma_stride   * ((img->sps.BitDepth_Y+7)/8);
119   int chroma_bpl = chroma_stride * ((img->sps.BitDepth_C+7)/8);
120 
121   int luma_height   = spec->height;
122   int chroma_height = rawChromaHeight;
123 
124   bool alloc_failed = false;
125 
126   uint8_t* p[3] = { 0,0,0 };
127   p[0] = (uint8_t *)ALLOC_ALIGNED_16(luma_height   * luma_bpl   + MEMORY_PADDING);
128   if (p[0]==NULL) { alloc_failed=true; }
129 
130   if (img->get_chroma_format() != de265_chroma_mono) {
131     p[1] = (uint8_t *)ALLOC_ALIGNED_16(chroma_height * chroma_bpl + MEMORY_PADDING);
132     p[2] = (uint8_t *)ALLOC_ALIGNED_16(chroma_height * chroma_bpl + MEMORY_PADDING);
133 
134     if (p[1]==NULL || p[2]==NULL) { alloc_failed=true; }
135   }
136   else {
137     p[1] = NULL;
138     p[2] = NULL;
139     chroma_stride = 0;
140   }
141 
142   if (alloc_failed) {
143     for (int i=0;i<3;i++)
144       if (p[i]) {
145         FREE_ALIGNED(p[i]);
146       }
147 
148     return 0;
149   }
150 
151   img->set_image_plane(0, p[0], luma_stride, NULL);
152   img->set_image_plane(1, p[1], chroma_stride, NULL);
153   img->set_image_plane(2, p[2], chroma_stride, NULL);
154 
155   return 1;
156 }
157 
de265_image_release_buffer(de265_decoder_context * ctx,de265_image * img,void * userdata)158 static void de265_image_release_buffer(de265_decoder_context* ctx,
159                                        de265_image* img, void* userdata)
160 {
161   for (int i=0;i<3;i++) {
162     uint8_t* p = (uint8_t*)img->get_image_plane(i);
163     if (p) {
164       FREE_ALIGNED(p);
165     }
166   }
167 }
168 
169 
170 de265_image_allocation de265_image::default_image_allocation = {
171   de265_image_get_buffer,
172   de265_image_release_buffer
173 };
174 
175 
set_image_plane(int cIdx,uint8_t * mem,int stride,void * userdata)176 void de265_image::set_image_plane(int cIdx, uint8_t* mem, int stride, void *userdata)
177 {
178   pixels[cIdx] = mem;
179   plane_user_data[cIdx] = userdata;
180 
181   if (cIdx==0) { this->stride        = stride; }
182   else         { this->chroma_stride = stride; }
183 }
184 
185 
186 uint32_t de265_image::s_next_image_ID = 0;
187 
de265_image()188 de265_image::de265_image()
189 {
190   ID = -1;
191   removed_at_picture_id = 0; // picture not used, so we can assume it has been removed
192 
193   decctx = NULL;
194   encctx = NULL;
195 
196   encoder_image_release_func = NULL;
197 
198   //alloc_functions.get_buffer = NULL;
199   //alloc_functions.release_buffer = NULL;
200 
201   for (int c=0;c<3;c++) {
202     pixels[c] = NULL;
203     pixels_confwin[c] = NULL;
204     plane_user_data[c] = NULL;
205   }
206 
207   width=height=0;
208 
209   pts = 0;
210   user_data = NULL;
211 
212   ctb_progress = NULL;
213 
214   integrity = INTEGRITY_NOT_DECODED;
215 
216   picture_order_cnt_lsb = -1; // undefined
217   PicOrderCntVal = -1; // undefined
218   PicState = UnusedForReference;
219   PicOutputFlag = false;
220 
221   nThreadsQueued   = 0;
222   nThreadsRunning  = 0;
223   nThreadsBlocked  = 0;
224   nThreadsFinished = 0;
225   nThreadsTotal    = 0;
226 
227   de265_mutex_init(&mutex);
228   de265_cond_init(&finished_cond);
229 }
230 
231 
alloc_image(int w,int h,enum de265_chroma c,const seq_parameter_set * sps,bool allocMetadata,decoder_context * dctx,encoder_context * ectx,de265_PTS pts,void * user_data,bool useCustomAllocFunc)232 de265_error de265_image::alloc_image(int w,int h, enum de265_chroma c,
233                                      const seq_parameter_set* sps, bool allocMetadata,
234                                      decoder_context* dctx,
235                                      encoder_context* ectx,
236                                      de265_PTS pts, void* user_data,
237                                      bool useCustomAllocFunc)
238 {
239   //if (allocMetadata) { assert(sps); }
240   assert(sps);
241 
242   this->sps = *sps;
243 
244   release(); /* TODO: review code for efficient allocation when arrays are already
245                 allocated to the requested size. Without the release, the old image-data
246                 will not be freed. */
247 
248   ID = s_next_image_ID++;
249   removed_at_picture_id = std::numeric_limits<int32_t>::max();
250 
251   decctx = dctx;
252   encctx = ectx;
253 
254   // --- allocate image buffer ---
255 
256   chroma_format= c;
257 
258   width = w;
259   height = h;
260   chroma_width = w;
261   chroma_height= h;
262 
263   this->user_data = user_data;
264   this->pts = pts;
265 
266   de265_image_spec spec;
267 
268   int WinUnitX, WinUnitY;
269 
270   switch (chroma_format) {
271   case de265_chroma_mono: WinUnitX=1; WinUnitY=1; break;
272   case de265_chroma_420:  WinUnitX=2; WinUnitY=2; break;
273   case de265_chroma_422:  WinUnitX=2; WinUnitY=1; break;
274   case de265_chroma_444:  WinUnitX=1; WinUnitY=1; break;
275   default:
276     assert(0);
277   }
278 
279   switch (chroma_format) {
280   case de265_chroma_420:
281     spec.format = de265_image_format_YUV420P8;
282     chroma_width  = (chroma_width +1)/2;
283     chroma_height = (chroma_height+1)/2;
284     break;
285 
286   case de265_chroma_422:
287     spec.format = de265_image_format_YUV422P8;
288     chroma_width = (chroma_width+1)/2;
289     break;
290 
291   case de265_chroma_444:
292     spec.format = de265_image_format_YUV444P8;
293     break;
294 
295   case de265_chroma_mono:
296     spec.format = de265_image_format_mono8;
297     chroma_width = 0;
298     chroma_height= 0;
299     break;
300 
301   default:
302     assert(false);
303     break;
304   }
305 
306   spec.width  = w;
307   spec.height = h;
308   spec.alignment = STANDARD_ALIGNMENT;
309 
310 
311   // conformance window cropping
312 
313   int left   = sps ? sps->conf_win_left_offset : 0;
314   int right  = sps ? sps->conf_win_right_offset : 0;
315   int top    = sps ? sps->conf_win_top_offset : 0;
316   int bottom = sps ? sps->conf_win_bottom_offset : 0;
317 
318   width_confwin = width - (left+right)*WinUnitX;
319   height_confwin= height- (top+bottom)*WinUnitY;
320   chroma_width_confwin = chroma_width -left-right;
321   chroma_height_confwin= chroma_height-top-bottom;
322 
323   spec.crop_left  = left *WinUnitX;
324   spec.crop_right = right*WinUnitX;
325   spec.crop_top   = top   *WinUnitY;
326   spec.crop_bottom= bottom*WinUnitY;
327 
328   spec.visible_width = width_confwin;
329   spec.visible_height= height_confwin;
330 
331 
332   bpp_shift[0] = (sps->BitDepth_Y > 8) ? 1 : 0;
333   bpp_shift[1] = (sps->BitDepth_C > 8) ? 1 : 0;
334   bpp_shift[2] = bpp_shift[1];
335 
336 
337   // allocate memory and set conformance window pointers
338 
339   void* alloc_userdata = NULL;
340   if (decctx) alloc_userdata = decctx->param_image_allocation_userdata;
341   if (encctx) alloc_userdata = encctx->param_image_allocation_userdata; // actually not needed
342 
343   if (encctx && useCustomAllocFunc) {
344     encoder_image_release_func = encctx->release_func;
345 
346     // if we do not provide a release function, use our own
347 
348     if (encoder_image_release_func == NULL) {
349       image_allocation_functions = de265_image::default_image_allocation;
350     }
351     else {
352       image_allocation_functions.get_buffer     = NULL;
353       image_allocation_functions.release_buffer = NULL;
354     }
355   }
356   else if (decctx && useCustomAllocFunc) {
357     image_allocation_functions = decctx->param_image_allocation_functions;
358   }
359   else {
360     image_allocation_functions = de265_image::default_image_allocation;
361   }
362 
363   bool mem_alloc_success = true;
364 
365   if (image_allocation_functions.get_buffer != NULL) {
366     mem_alloc_success = image_allocation_functions.get_buffer(decctx, &spec, this,
367                                                               alloc_userdata);
368 
369     pixels_confwin[0] = pixels[0] + left*WinUnitX + top*WinUnitY*stride;
370     pixels_confwin[1] = pixels[1] + left + top*chroma_stride;
371     pixels_confwin[2] = pixels[2] + left + top*chroma_stride;
372 
373 
374     // check for memory shortage
375 
376     if (!mem_alloc_success)
377       {
378         return DE265_ERROR_OUT_OF_MEMORY;
379       }
380   }
381 
382   //alloc_functions = *allocfunc;
383   //alloc_userdata  = userdata;
384 
385   // --- allocate decoding info arrays ---
386 
387   if (allocMetadata) {
388     // intra pred mode
389 
390     mem_alloc_success &= intraPredMode.alloc(sps->PicWidthInMinPUs, sps->PicHeightInMinPUs,
391                                              sps->Log2MinPUSize);
392 
393     mem_alloc_success &= intraPredModeC.alloc(sps->PicWidthInMinPUs, sps->PicHeightInMinPUs,
394                                               sps->Log2MinPUSize);
395 
396     // cb info
397 
398     mem_alloc_success &= cb_info.alloc(sps->PicWidthInMinCbsY, sps->PicHeightInMinCbsY,
399                                        sps->Log2MinCbSizeY);
400 
401     // pb info
402 
403     int puWidth  = sps->PicWidthInMinCbsY  << (sps->Log2MinCbSizeY -2);
404     int puHeight = sps->PicHeightInMinCbsY << (sps->Log2MinCbSizeY -2);
405 
406     mem_alloc_success &= pb_info.alloc(puWidth,puHeight, 2);
407 
408 
409     // tu info
410 
411     mem_alloc_success &= tu_info.alloc(sps->PicWidthInTbsY, sps->PicHeightInTbsY,
412                                        sps->Log2MinTrafoSize);
413 
414     // deblk info
415 
416     int deblk_w = (sps->pic_width_in_luma_samples +3)/4;
417     int deblk_h = (sps->pic_height_in_luma_samples+3)/4;
418 
419     mem_alloc_success &= deblk_info.alloc(deblk_w, deblk_h, 2);
420 
421     // CTB info
422 
423     if (ctb_info.data_size != sps->PicSizeInCtbsY)
424       {
425         delete[] ctb_progress;
426 
427         mem_alloc_success &= ctb_info.alloc(sps->PicWidthInCtbsY, sps->PicHeightInCtbsY,
428                                             sps->Log2CtbSizeY);
429 
430         ctb_progress = new de265_progress_lock[ ctb_info.data_size ];
431       }
432 
433 
434     // check for memory shortage
435 
436     if (!mem_alloc_success)
437       {
438         return DE265_ERROR_OUT_OF_MEMORY;
439       }
440   }
441 
442   return DE265_OK;
443 }
444 
445 
~de265_image()446 de265_image::~de265_image()
447 {
448   release();
449 
450   // free progress locks
451 
452   if (ctb_progress) {
453     delete[] ctb_progress;
454   }
455 
456   de265_cond_destroy(&finished_cond);
457   de265_mutex_destroy(&mutex);
458 }
459 
460 
release()461 void de265_image::release()
462 {
463   // free image memory
464 
465   if (pixels[0])
466     {
467       if (encoder_image_release_func != NULL) {
468         encoder_image_release_func(encctx, this,
469                                    encctx->param_image_allocation_userdata);
470       }
471       else {
472         image_allocation_functions.release_buffer(decctx, this,
473                                                 decctx ?
474                                                   decctx->param_image_allocation_userdata :
475                                                   NULL);
476       }
477 
478       for (int i=0;i<3;i++)
479         {
480           pixels[i] = NULL;
481           pixels_confwin[i] = NULL;
482         }
483     }
484 
485   // free slices
486 
487   for (int i=0;i<slices.size();i++) {
488     delete slices[i];
489   }
490   slices.clear();
491 }
492 
493 
fill_image(int y,int cb,int cr)494 void de265_image::fill_image(int y,int cb,int cr)
495 {
496   if (y>=0) {
497     memset(pixels[0], y, stride * height);
498   }
499 
500   if (cb>=0) {
501     memset(pixels[1], cb, chroma_stride * chroma_height);
502   }
503 
504   if (cr>=0) {
505     memset(pixels[2], cr, chroma_stride * chroma_height);
506   }
507 }
508 
509 
copy_image(const de265_image * src)510 de265_error de265_image::copy_image(const de265_image* src)
511 {
512   /* TODO: actually, since we allocate the image only for internal purpose, we
513      do not have to call the external allocation routines for this. However, then
514      we have to track for each image how to release it again.
515      Another option would be to safe the copied data not in an de265_image at all.
516   */
517 
518   de265_error err = alloc_image(src->width, src->height, src->chroma_format, &src->sps, false,
519                                 src->decctx, src->encctx, src->pts, src->user_data, false);
520   if (err != DE265_OK) {
521     return err;
522   }
523 
524   copy_lines_from(src, 0, src->height);
525 
526   return err;
527 }
528 
529 
530 // end = last line + 1
copy_lines_from(const de265_image * src,int first,int end)531 void de265_image::copy_lines_from(const de265_image* src, int first, int end)
532 {
533   if (end > src->height) end=src->height;
534 
535   assert(first % 2 == 0);
536   assert(end   % 2 == 0);
537 
538   int luma_bpp   = (sps.BitDepth_Y+7)/8;
539   int chroma_bpp = (sps.BitDepth_C+7)/8;
540 
541   if (src->stride == stride) {
542     memcpy(pixels[0]      + first*stride * luma_bpp,
543            src->pixels[0] + first*src->stride * luma_bpp,
544            (end-first)*stride * luma_bpp);
545   }
546   else {
547     for (int yp=first;yp<end;yp++) {
548       memcpy(pixels[0]+yp*stride * luma_bpp,
549              src->pixels[0]+yp*src->stride * luma_bpp,
550              src->width * luma_bpp);
551     }
552   }
553 
554   int first_chroma = first / src->sps.SubHeightC;
555   int end_chroma   = end / src->sps.SubHeightC;
556 
557   if (src->chroma_format != de265_chroma_mono) {
558     if (src->chroma_stride == chroma_stride) {
559       memcpy(pixels[1]      + first_chroma*chroma_stride * chroma_bpp,
560              src->pixels[1] + first_chroma*chroma_stride * chroma_bpp,
561              (end_chroma-first_chroma) * chroma_stride * chroma_bpp);
562       memcpy(pixels[2]      + first_chroma*chroma_stride * chroma_bpp,
563              src->pixels[2] + first_chroma*chroma_stride * chroma_bpp,
564              (end_chroma-first_chroma) * chroma_stride * chroma_bpp);
565     }
566     else {
567       for (int y=first_chroma;y<end_chroma;y++) {
568         memcpy(pixels[1]+y*chroma_stride * chroma_bpp,
569                src->pixels[1]+y*src->chroma_stride * chroma_bpp,
570                src->chroma_width * chroma_bpp);
571         memcpy(pixels[2]+y*chroma_stride * chroma_bpp,
572                src->pixels[2]+y*src->chroma_stride * chroma_bpp,
573                src->chroma_width * chroma_bpp);
574       }
575     }
576   }
577 }
578 
579 
exchange_pixel_data_with(de265_image & b)580 void de265_image::exchange_pixel_data_with(de265_image& b)
581 {
582   for (int i=0;i<3;i++) {
583     std::swap(pixels[i], b.pixels[i]);
584     std::swap(pixels_confwin[i], b.pixels_confwin[i]);
585     std::swap(plane_user_data[i], b.plane_user_data[i]);
586   }
587 
588   std::swap(stride, b.stride);
589   std::swap(chroma_stride, b.chroma_stride);
590   std::swap(image_allocation_functions, b.image_allocation_functions);
591 }
592 
593 
thread_start(int nThreads)594 void de265_image::thread_start(int nThreads)
595 {
596   de265_mutex_lock(&mutex);
597 
598   //printf("nThreads before: %d %d\n",nThreadsQueued, nThreadsTotal);
599 
600   nThreadsQueued += nThreads;
601   nThreadsTotal += nThreads;
602 
603   //printf("nThreads after: %d %d\n",nThreadsQueued, nThreadsTotal);
604 
605   de265_mutex_unlock(&mutex);
606 }
607 
thread_run(const thread_task * task)608 void de265_image::thread_run(const thread_task* task)
609 {
610   //printf("run thread %s\n", task->name().c_str());
611 
612   de265_mutex_lock(&mutex);
613   nThreadsQueued--;
614   nThreadsRunning++;
615   de265_mutex_unlock(&mutex);
616 }
617 
thread_blocks()618 void de265_image::thread_blocks()
619 {
620   de265_mutex_lock(&mutex);
621   nThreadsRunning--;
622   nThreadsBlocked++;
623   de265_mutex_unlock(&mutex);
624 }
625 
thread_unblocks()626 void de265_image::thread_unblocks()
627 {
628   de265_mutex_lock(&mutex);
629   nThreadsBlocked--;
630   nThreadsRunning++;
631   de265_mutex_unlock(&mutex);
632 }
633 
thread_finishes(const thread_task * task)634 void de265_image::thread_finishes(const thread_task* task)
635 {
636   //printf("finish thread %s\n", task->name().c_str());
637 
638   de265_mutex_lock(&mutex);
639 
640   nThreadsRunning--;
641   nThreadsFinished++;
642   assert(nThreadsRunning >= 0);
643 
644   if (nThreadsFinished==nThreadsTotal) {
645     de265_cond_broadcast(&finished_cond, &mutex);
646   }
647 
648   de265_mutex_unlock(&mutex);
649 }
650 
wait_for_progress(thread_task * task,int ctbx,int ctby,int progress)651 void de265_image::wait_for_progress(thread_task* task, int ctbx,int ctby, int progress)
652 {
653   const int ctbW = sps.PicWidthInCtbsY;
654 
655   wait_for_progress(task, ctbx + ctbW*ctby, progress);
656 }
657 
wait_for_progress(thread_task * task,int ctbAddrRS,int progress)658 void de265_image::wait_for_progress(thread_task* task, int ctbAddrRS, int progress)
659 {
660   if (task==NULL) { return; }
661 
662   de265_progress_lock* progresslock = &ctb_progress[ctbAddrRS];
663   if (progresslock->get_progress() < progress) {
664     thread_blocks();
665 
666     assert(task!=NULL);
667     task->state = thread_task::Blocked;
668 
669     /* TODO: check whether we are the first blocked task in the list.
670        If we are, we have to conceal input errors.
671        Simplest concealment: do not block.
672     */
673 
674     progresslock->wait_for_progress(progress);
675     task->state = thread_task::Running;
676     thread_unblocks();
677   }
678 }
679 
680 
wait_for_completion()681 void de265_image::wait_for_completion()
682 {
683   de265_mutex_lock(&mutex);
684   while (nThreadsFinished!=nThreadsTotal) {
685     de265_cond_wait(&finished_cond, &mutex);
686   }
687   de265_mutex_unlock(&mutex);
688 }
689 
debug_is_completed() const690 bool de265_image::debug_is_completed() const
691 {
692   return nThreadsFinished==nThreadsTotal;
693 }
694 
695 
696 
clear_metadata()697 void de265_image::clear_metadata()
698 {
699   // TODO: maybe we could avoid the memset by ensuring that all data is written to
700   // during decoding (especially log2CbSize), but it is unlikely to be faster than the memset.
701 
702   cb_info.clear();
703   //tu_info.clear();  // done on the fly
704   ctb_info.clear();
705   deblk_info.clear();
706 
707   // --- reset CTB progresses ---
708 
709   for (int i=0;i<ctb_info.data_size;i++) {
710     ctb_progress[i].reset(CTB_PROGRESS_NONE);
711   }
712 }
713 
714 
set_mv_info(int x,int y,int nPbW,int nPbH,const MotionVectorSpec & mv)715 void de265_image::set_mv_info(int x,int y, int nPbW,int nPbH, const MotionVectorSpec& mv)
716 {
717   int log2PuSize = 2;
718 
719   int xPu = x >> log2PuSize;
720   int yPu = y >> log2PuSize;
721   int wPu = nPbW >> log2PuSize;
722   int hPu = nPbH >> log2PuSize;
723 
724   int stride = pb_info.width_in_units;
725 
726   for (int pby=0;pby<hPu;pby++)
727     for (int pbx=0;pbx<wPu;pbx++)
728       {
729         pb_info[ xPu+pbx + (yPu+pby)*stride ].mv = mv;
730       }
731 }
732 
733 
available_zscan(int xCurr,int yCurr,int xN,int yN) const734 bool de265_image::available_zscan(int xCurr,int yCurr, int xN,int yN) const
735 {
736   if (xN<0 || yN<0) return false;
737   if (xN>=sps.pic_width_in_luma_samples ||
738       yN>=sps.pic_height_in_luma_samples) return false;
739 
740   int minBlockAddrN = pps.MinTbAddrZS[ (xN>>sps.Log2MinTrafoSize) +
741                                        (yN>>sps.Log2MinTrafoSize) * sps.PicWidthInTbsY ];
742   int minBlockAddrCurr = pps.MinTbAddrZS[ (xCurr>>sps.Log2MinTrafoSize) +
743                                           (yCurr>>sps.Log2MinTrafoSize) * sps.PicWidthInTbsY ];
744 
745   if (minBlockAddrN > minBlockAddrCurr) return false;
746 
747   int xCurrCtb = xCurr >> sps.Log2CtbSizeY;
748   int yCurrCtb = yCurr >> sps.Log2CtbSizeY;
749   int xNCtb = xN >> sps.Log2CtbSizeY;
750   int yNCtb = yN >> sps.Log2CtbSizeY;
751 
752   if (get_SliceAddrRS(xCurrCtb,yCurrCtb) !=
753       get_SliceAddrRS(xNCtb,   yNCtb)) {
754     return false;
755   }
756 
757   if (pps.TileIdRS[xCurrCtb + yCurrCtb*sps.PicWidthInCtbsY] !=
758       pps.TileIdRS[xNCtb    + yNCtb   *sps.PicWidthInCtbsY]) {
759     return false;
760   }
761 
762   return true;
763 }
764 
765 
available_pred_blk(int xC,int yC,int nCbS,int xP,int yP,int nPbW,int nPbH,int partIdx,int xN,int yN) const766 bool de265_image::available_pred_blk(int xC,int yC, int nCbS, int xP, int yP,
767                                      int nPbW, int nPbH, int partIdx, int xN,int yN) const
768 {
769   logtrace(LogMotion,"C:%d;%d P:%d;%d N:%d;%d size=%d;%d\n",xC,yC,xP,yP,xN,yN,nPbW,nPbH);
770 
771   int sameCb = (xC <= xN && xN < xC+nCbS &&
772                 yC <= yN && yN < yC+nCbS);
773 
774   bool availableN;
775 
776   if (!sameCb) {
777     availableN = available_zscan(xP,yP,xN,yN);
778   }
779   else {
780     availableN = !(nPbW<<1 == nCbS && nPbH<<1 == nCbS &&  // NxN
781                    partIdx==1 &&
782                    yN >= yC+nPbH && xN < xC+nPbW);  // xN/yN inside partIdx 2
783   }
784 
785   if (availableN && get_pred_mode(xN,yN) == MODE_INTRA) {
786     availableN = false;
787   }
788 
789   return availableN;
790 }
791