1 /* -----------------------------------------------------------------------------
2 The copyright in this software is being made available under the BSD
3 License, included below. No patent rights, trademark rights and/or
4 other Intellectual Property Rights other than the copyrights concerning
5 the Software are granted under this license.
6 
7 For any license concerning other Intellectual Property rights than the software,
8 especially patent licenses, a separate Agreement needs to be closed.
9 For more information please contact:
10 
11 Fraunhofer Heinrich Hertz Institute
12 Einsteinufer 37
13 10587 Berlin, Germany
14 www.hhi.fraunhofer.de/vvc
15 vvc@hhi.fraunhofer.de
16 
17 Copyright (c) 2018-2021, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V.
18 All rights reserved.
19 
20 Redistribution and use in source and binary forms, with or without
21 modification, are permitted provided that the following conditions are met:
22 
23  * Redistributions of source code must retain the above copyright notice,
24    this list of conditions and the following disclaimer.
25  * Redistributions in binary form must reproduce the above copyright notice,
26    this list of conditions and the following disclaimer in the documentation
27    and/or other materials provided with the distribution.
28  * Neither the name of Fraunhofer nor the names of its contributors may
29    be used to endorse or promote products derived from this software without
30    specific prior written permission.
31 
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
36 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
42 THE POSSIBILITY OF SUCH DAMAGE.
43 
44 
45 ------------------------------------------------------------------------------------------- */
46 
47 /** \file     Buffer.h
48  *  \brief    Low-overhead class describing 2D memory layout
49  */
50 
51 #pragma once
52 
53 #include "Common.h"
54 #include "CommonDef.h"
55 #include "ChromaFormat.h"
56 #include "MotionInfo.h"
57 
58 #include <string.h>
59 #include <type_traits>
60 #include <typeinfo>
61 #include <vector>
62 #include <utility>
63 
64 namespace vvdec
65 {
66 
67 #if ENABLE_SIMD_OPT_BUFFER
68 struct PelBufferOps
69 {
70   PelBufferOps();
71 
72 #ifdef TARGET_SIMD_X86
73   void initPelBufOpsX86();
74   template<X86_VEXT vext>
75   void _initPelBufOpsX86();
76 
77 #endif
78   void ( *addAvg4 )       ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height,            int shift, int offset,      const ClpRng& clpRng );
79   void ( *addAvg8 )       ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height,            int shift, int offset,      const ClpRng& clpRng );
80   void ( *addAvg16 )      ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height,            int shift, int offset,      const ClpRng& clpRng );
81   void ( *reco4 )         ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height,                                        const ClpRng& clpRng );
82   void ( *reco8 )         ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height,                                        const ClpRng& clpRng );
83   void ( *linTf4 )        ( const Pel* src0, ptrdiff_t src0Stride,                                        Pel *dst, ptrdiff_t dstStride, int width, int height, int scale, int shift, int offset,      const ClpRng& clpRng, bool bClip );
84   void ( *linTf8 )        ( const Pel* src0, ptrdiff_t src0Stride,                                        Pel *dst, ptrdiff_t dstStride, int width, int height, int scale, int shift, int offset,      const ClpRng& clpRng, bool bClip );
85   void ( *wghtAvg4 )      ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height, int shift, int offset, int w0, int w1, const ClpRng& clpRng );
86   void ( *wghtAvg8 )      ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height, int shift, int offset, int w0, int w1, const ClpRng& clpRng );
87   void ( *copyBuffer )    ( const char*src,  ptrdiff_t srcStride,        char* dst, ptrdiff_t  dstStride,                                int width, int height );
88   void ( *padding1 )      (       Pel *dst,  ptrdiff_t stride,                                                                           int width, int height );
89   void ( *padding2 )      (       Pel *dst,  ptrdiff_t stride,                                                                           int width, int height );
90   void ( *transpose4x4 )  ( const Pel* src,  ptrdiff_t srcStride, Pel* dst, ptrdiff_t dstStride );
91   void ( *transpose8x8 )  ( const Pel* src,  ptrdiff_t srcStride, Pel* dst, ptrdiff_t dstStride );
92   void ( *applyLut )      (       Pel* ptr,  ptrdiff_t ptrStride, int width, int height, const Pel* lut );
93   void ( *fillN_CU )      (       CodingUnit** ptr, ptrdiff_t ptrStride, int width, int height, CodingUnit* cuPtr );
94 
95   void (*sampleRateConv)  ( const std::pair<int, int> scalingRatio, const std::pair<int, int> compScale,
96                             const Pel* orgSrc, const int orgStride, const int orgWidth, const int orgHeight,
97                             const int beforeScaleLeftOffset, const int beforeScaleTopOffset,
98                             Pel* scaledSrc, const int scaledStride, const int scaledWidth, const int scaledHeight,
99                             const int afterScaleLeftOffset, const int afterScaleTopOffset,
100                             const int bitDepth, const bool useLumaFilter, const bool horCollocatedPositionFlag, const bool verCollocatedPositionFlag );
101 };
102 #endif
103 
104 extern PelBufferOps g_pelBufOP;
105 
106 #define INCX( ptr, stride ) { ptr++; }
107 #define INCY( ptr, stride ) { ptr += ( stride ); }
108 #define OFFSETX( ptr, stride, x ) { ptr += ( x ); }
109 #define OFFSETY( ptr, stride, y ) { ptr += ( y ) * ( stride ); }
110 #define OFFSET( ptr, stride, x, y ) { ptr += ( x ) + ( y ) * ( stride ); }
111 #define GET_OFFSETX( ptr, stride, x ) ( ( ptr ) + ( x ) )
112 #define GET_OFFSETY( ptr, stride, y ) ( ( ptr ) + ( y ) * ( stride ) )
113 #define GET_OFFSET( ptr, stride, x, y ) ( ( ptr ) + ( x ) + ( y ) * ( stride ) )
114 
115 class Window;
116 struct BitDepths;
117 
118 template<typename T>
119 struct AreaBuf : public Size
120 {
121   T*        buf;
122   ptrdiff_t stride;
123 
AreaBufAreaBuf124   AreaBuf()                                                                                     : Size(),                  buf( NULL ), stride( 0 )          { }
AreaBufAreaBuf125   AreaBuf( T *_buf, const Size &size )                                                          : Size( size ),            buf( _buf ), stride( size.width ) { }
AreaBufAreaBuf126   AreaBuf( T *_buf, const ptrdiff_t &_stride, const Size &size )                                : Size( size ),            buf( _buf ), stride( _stride )    { }
AreaBufAreaBuf127   AreaBuf( T *_buf, const SizeType &_width, const SizeType &_height )                           : Size( _width, _height ), buf( _buf ), stride( _width )     { }
AreaBufAreaBuf128   AreaBuf( T *_buf, const ptrdiff_t &_stride, const SizeType &_width, const SizeType &_height ) : Size( _width, _height ), buf( _buf ), stride( _stride )    { }
AreaBufAreaBuf129   AreaBuf( const AreaBuf<typename std::remove_const<T>::type >& other )                         : Size( other ),           buf( other.buf ), stride( other.stride ) { }
130 
131   void fill                 ( const T &val );
132   void memset               ( const int val );
133 
134   void copyFrom             ( const AreaBuf<const T> &other ) const;
135 
136   void reconstruct          ( const AreaBuf<const T> &pred, const AreaBuf<const T> &resi, const ClpRng& clpRng);
137 
138   void subtract             ( const AreaBuf<const T> &other );
139   void extendBorderPel      ( unsigned margin );
140   void extendBorderPel      ( unsigned margin, bool left, bool right, bool top, bool bottom );
141   void addWeightedAvg       ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng, const int8_t bcwIdx);
142   void addAvg               ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng );
143   void padBorderPel         ( unsigned marginX, unsigned marginY, int dir );
144 
145   void linearTransform      ( const int scale, const int shift, const int offset, bool bClip, const ClpRng& clpRng );
146 
147   void transposedFrom       ( const AreaBuf<const T> &other );
148 
149   void rspSignal            ( const Pel *lut );
150   void scaleSignal          ( const int scale, const ClpRng& clpRng);
151 
152   void rescaleBuf           ( const AreaBuf<const T>& beforeScaling, const ComponentID compID, const std::pair<int, int> scalingRatio, const Window& confBefore, const Window& confAfter, const ChromaFormat chromaFormatIDC, const BitDepths& bitDepths, const bool horCollocatedChromaFlag = false, const bool verCollocatedChromaFlag = false );
153 
atAreaBuf154         T& at( const int &x, const int &y )          { return buf[y * stride + x]; }
atAreaBuf155   const T& at( const int &x, const int &y ) const    { return buf[y * stride + x]; }
156 
atAreaBuf157         T& at( const Position &pos )                 { return buf[pos.y * stride + pos.x]; }
atAreaBuf158   const T& at( const Position &pos ) const           { return buf[pos.y * stride + pos.x]; }
159 
160 
bufAtAreaBuf161         T* bufAt( const int &x, const int &y )       { return GET_OFFSET( buf, stride,     x,     y ); }
bufAtAreaBuf162   const T* bufAt( const int &x, const int &y ) const { return GET_OFFSET( buf, stride,     x,     y ); }
bufAtAreaBuf163         T* bufAt( const Position& pos )              { return GET_OFFSET( buf, stride, pos.x, pos.y ); }
bufAtAreaBuf164   const T* bufAt( const Position& pos ) const        { return GET_OFFSET( buf, stride, pos.x, pos.y ); }
165 
subBufAreaBuf166   AreaBuf<      T> subBuf( const Area &area )                                                         { return subBuf( area.pos(), area.size() ); }
subBufAreaBuf167   AreaBuf<const T> subBuf( const Area &area )                                                   const { return subBuf( area.pos(), area.size() ); }
subBufAreaBuf168   AreaBuf<      T> subBuf( const Position &pos, const Size &size )                                    { return AreaBuf<      T>( bufAt( pos  ), stride, size   ); }
subBufAreaBuf169   AreaBuf<const T> subBuf( const Position &pos, const Size &size )                              const { return AreaBuf<const T>( bufAt( pos  ), stride, size   ); }
subBufAreaBuf170   AreaBuf<      T> subBuf( const int &x, const int &y, const unsigned &_w, const unsigned &_h )       { return AreaBuf<      T>( bufAt( x, y ), stride, _w, _h ); }
subBufAreaBuf171   AreaBuf<const T> subBuf( const int &x, const int &y, const unsigned &_w, const unsigned &_h ) const { return AreaBuf<const T>( bufAt( x, y ), stride, _w, _h ); }
172 };
173 
174 typedef AreaBuf<      Pel>  PelBuf;
175 typedef AreaBuf<const Pel> CPelBuf;
176 
177 typedef AreaBuf<      TCoeff>  CoeffBuf;
178 typedef AreaBuf<const TCoeff> CCoeffBuf;
179 
180 typedef AreaBuf<TCoeffSig>        CoeffSigBuf;
181 typedef AreaBuf<const TCoeffSig> CCoeffSigBuf;
182 
183 typedef AreaBuf<      MotionInfo>  MotionBuf;
184 typedef AreaBuf<const MotionInfo> CMotionBuf;
185 
186 typedef AreaBuf<      LoopFilterParam>  LFPBuf;
187 typedef AreaBuf<const LoopFilterParam> CLFPBuf;
188 
189 
190 #define SIZE_AWARE_PER_EL_OP( OP, INC )                     \
191 if( ( width & 7 ) == 0 )                                    \
192 {                                                           \
193   for( int y = 0; y < height; y++ )                         \
194   {                                                         \
195     for( int x = 0; x < width; x += 8 )                     \
196     {                                                       \
197       OP( x + 0 );                                          \
198       OP( x + 1 );                                          \
199       OP( x + 2 );                                          \
200       OP( x + 3 );                                          \
201       OP( x + 4 );                                          \
202       OP( x + 5 );                                          \
203       OP( x + 6 );                                          \
204       OP( x + 7 );                                          \
205     }                                                       \
206                                                             \
207     INC;                                                    \
208   }                                                         \
209 }                                                           \
210 else if( ( width & 3 ) == 0 )                               \
211 {                                                           \
212   for( int y = 0; y < height; y++ )                         \
213   {                                                         \
214     for( int x = 0; x < width; x += 4 )                     \
215     {                                                       \
216       OP( x + 0 );                                          \
217       OP( x + 1 );                                          \
218       OP( x + 2 );                                          \
219       OP( x + 3 );                                          \
220     }                                                       \
221                                                             \
222     INC;                                                    \
223   }                                                         \
224 }                                                           \
225 else if( ( width & 1 ) == 0 )                               \
226 {                                                           \
227   for( int y = 0; y < height; y++ )                         \
228   {                                                         \
229     for( int x = 0; x < width; x += 2 )                     \
230     {                                                       \
231       OP( x + 0 );                                          \
232       OP( x + 1 );                                          \
233     }                                                       \
234                                                             \
235     INC;                                                    \
236   }                                                         \
237 }                                                           \
238 else                                                        \
239 {                                                           \
240   for( int y = 0; y < height; y++ )                         \
241   {                                                         \
242     for( int x = 0; x < width; x++ )                        \
243     {                                                       \
244       OP( x );                                              \
245     }                                                       \
246                                                             \
247     INC;                                                    \
248   }                                                         \
249 }
250 
251 
252 template<typename TOP, typename TINC>
size_aware_pel_op(TOP op,TINC inc,int width,int height)253 static inline void size_aware_pel_op( TOP op, TINC inc, int width, int height )
254 {
255   if( ( width & 7 ) == 0 )
256   {
257     for( int y = 0; y < height; y++ )
258     {
259       for( int x = 0; x < width; x += 8 )
260       {
261         op( x + 0 );
262         op( x + 1 );
263         op( x + 2 );
264         op( x + 3 );
265         op( x + 4 );
266         op( x + 5 );
267         op( x + 6 );
268         op( x + 7 );
269       }
270 
271       inc();
272     }
273   }
274   else if( ( width & 3 ) == 0 )
275   {
276     for( int y = 0; y < height; y++ )
277     {
278       for( int x = 0; x < width; x += 4 )
279       {
280         op( x + 0 );
281         op( x + 1 );
282         op( x + 2 );
283         op( x + 3 );
284       }
285 
286       inc();
287     }
288   }
289   else if( ( width & 1 ) == 0 )
290   {
291     for( int y = 0; y < height; y++ )
292     {
293       for( int x = 0; x < width; x += 2 )
294       {
295         op( x + 0 );
296         op( x + 1 );
297       }
298 
299       inc();
300     }
301   }
302   else
303   {
304     for( int y = 0; y < height; y++ )
305     {
306       for( int x = 0; x < width; x++ )
307       {
308         op( x );
309       }
310 
311       inc();
312     }
313   }
314 }
315 
316 template<>
317 void AreaBuf<MotionInfo>::fill( const MotionInfo& val );
318 
319 template<typename T>
fill(const T & val)320 void AreaBuf<T>::fill(const T &val)
321 {
322   if( T( 0 ) == val )
323   {
324     if( width == stride )
325     {
326       ::memset( buf, 0, width * height * sizeof( T ) );
327     }
328     else
329     {
330       T* dest = buf;
331       size_t line = width * sizeof( T );
332 
333       for( unsigned y = 0; y < height; y++ )
334       {
335         ::memset( dest, 0, line );
336 
337         dest += stride;
338       }
339     }
340   }
341   else
342   {
343     if( width == stride )
344     {
345       std::fill_n( buf, width * height, val );
346     }
347     else
348     {
349       T* dest = buf;
350 
351       for( int y = 0; y < height; y++, dest += stride )
352       {
353         std::fill_n( dest, width, val );
354       }
355     }
356   }
357 }
358 
359 template<typename T>
memset(const int val)360 void AreaBuf<T>::memset( const int val )
361 {
362   GCC_WARNING_DISABLE_class_memaccess
363   if( width == stride )
364   {
365     ::memset( buf, val, width * height * sizeof( T ) );
366   }
367   else
368   {
369     T* dest = buf;
370     size_t line = width * sizeof( T );
371 
372     for( int y = 0; y < height; y++ )
373     {
374       ::memset( dest, val, line );
375 
376       dest += stride;
377     }
378   }
379   GCC_WARNING_RESET
380 }
381 
382 #if ENABLE_SIMD_OPT_BUFFER
383 template<typename T>
copyFrom(const AreaBuf<const T> & other)384 void AreaBuf<T>::copyFrom( const AreaBuf<const T> &other ) const
385 {
386 #if !defined(__GNUC__) || __GNUC__ > 5
387   static_assert( std::is_trivially_copyable<T>::value, "Type T is not trivially_copyable" );
388 #endif
389 
390   g_pelBufOP.copyBuffer( (const char *) other.buf, sizeof( T ) * other.stride, (char *) buf, sizeof( T ) * stride, sizeof( T ) * width, height );
391 }
392 #else
393 template<typename T>
copyFrom(const AreaBuf<const T> & other)394 void AreaBuf<T>::copyFrom( const AreaBuf<const T> &other ) const
395 {
396 #if !defined(__GNUC__) || __GNUC__ > 5
397   static_assert( std::is_trivially_copyable<T>::value, "Type T is not trivially_copyable" );
398 #endif
399 
400   CHECK( width  != other.width,  "Incompatible size" );
401   CHECK( height != other.height, "Incompatible size" );
402 
403   if( buf == other.buf )
404   {
405     return;
406   }
407 
408   if( ptrdiff_t( width ) == stride && stride == other.stride )
409   {
410     memcpy( buf, other.buf, width * height * sizeof( T ) );
411   }
412   else
413   {
414           T* dst              =       buf;
415     const T* src              = other.buf;
416     const ptrdiff_t srcStride = other.stride;
417 
418     for( unsigned y = 0; y < height; y++ )
419     {
420       memcpy( dst, src, width * sizeof( T ) );
421 
422       dst += stride;
423       src += srcStride;
424     }
425   }
426 }
427 #endif
428 
429 
430 template<typename T>
subtract(const AreaBuf<const T> & other)431 void AreaBuf<T>::subtract( const AreaBuf<const T> &other )
432 {
433   CHECK( width  != other.width,  "Incompatible size" );
434   CHECK( height != other.height, "Incompatible size" );
435 
436         T* dest =       buf;
437   const T* subs = other.buf;
438 
439 #define SUBS_INC        \
440   dest +=       stride; \
441   subs += other.stride; \
442 
443 #define SUBS_OP( ADDR ) dest[ADDR] -= subs[ADDR]
444 
445   SIZE_AWARE_PER_EL_OP( SUBS_OP, SUBS_INC );
446 
447 #undef SUBS_OP
448 #undef SUBS_INC
449 }
450 
451 template<typename T>
reconstruct(const AreaBuf<const T> & pred,const AreaBuf<const T> & resi,const ClpRng & clpRng)452 void AreaBuf<T>::reconstruct( const AreaBuf<const T> &pred, const AreaBuf<const T> &resi, const ClpRng& clpRng )
453 {
454   THROW( "Type not supported" );
455 }
456 
457 template<>
458 void AreaBuf<Pel>::reconstruct( const AreaBuf<const Pel> &pred, const AreaBuf<const Pel> &resi, const ClpRng& clpRng );
459 
460 
461 template<typename T>
addAvg(const AreaBuf<const T> & other1,const AreaBuf<const T> & other2,const ClpRng & clpRng)462 void AreaBuf<T>::addAvg( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng )
463 {
464   THROW( "Type not supported" );
465 }
466 
467 template<>
468 void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng );
469 
470 template<typename T>
linearTransform(const int scale,const int shift,const int offset,bool bClip,const ClpRng & clpRng)471 void AreaBuf<T>::linearTransform( const int scale, const int shift, const int offset, bool bClip, const ClpRng& clpRng )
472 {
473   THROW( "Type not supported" );
474 }
475 
476 template<>
477 void AreaBuf<Pel>::linearTransform( const int scale, const int shift, const int offset, bool bClip, const ClpRng& clpRng );
478 
479 
480 template<typename T>
rescaleBuf(const AreaBuf<const T> & beforeScaling,ComponentID compID,const std::pair<int,int> scalingRatio,const Window & confBefore,const Window & confAfter,const ChromaFormat chromaFormatIDC,const BitDepths & bitDepths,const bool horCollocatedChromaFlag,const bool verCollocatedChromaFlag)481 void AreaBuf<T>::rescaleBuf( const AreaBuf<const T>& beforeScaling, ComponentID compID, const std::pair<int, int> scalingRatio, const Window& confBefore, const Window& confAfter, const ChromaFormat chromaFormatIDC, const BitDepths& bitDepths, const bool horCollocatedChromaFlag, const bool verCollocatedChromaFlag )
482 {
483   THROW( "Type not supported" );
484 }
485 
486 template<>
487 void AreaBuf<Pel>::rescaleBuf( const AreaBuf<const Pel>& beforeScaling, ComponentID compID, const std::pair<int, int> scalingRatio, const Window& confBefore, const Window& confAfter, const ChromaFormat chromaFormatIDC, const BitDepths& bitDepths, const bool horCollocatedChromaFlag, const bool verCollocatedChromaFlag );
488 
489 template<typename T>
extendBorderPel(unsigned margin)490 void AreaBuf<T>::extendBorderPel( unsigned margin )
491 {
492   T*        p = buf;
493   int       h = height;
494   int       w = width;
495   ptrdiff_t s = stride;
496 
497   CHECK( ( w + 2 * margin ) > s, "Size of buffer too small to extend" );
498   // do left and right margins
499   for( int y = 0; y < h; y++ )
500   {
501     for( int x = 0; x < margin; x++ )
502     {
503       *( p - margin + x ) = p[0];
504       p[w + x]            = p[w - 1];
505     }
506     p += s;
507   }
508 
509   // p is now the (0,height) (bottom left of image within bigger picture
510   p -= ( s + margin );
511   // p is now the (-margin, height-1)
512   for( int y = 0; y < margin; y++ )
513   {
514     ::memcpy( p + ( y + 1 ) * s, p, sizeof( T ) * ( w + ( margin << 1 ) ) );
515   }
516 
517   // pi is still (-marginX, height-1)
518   p -= ( ( h - 1 ) * s );
519   // pi is now (-marginX, 0)
520   for( int y = 0; y < margin; y++ )
521   {
522     ::memcpy( p - ( y + 1 ) * s, p, sizeof( T ) * ( w + ( margin << 1 ) ) );
523   }
524 }
525 
526 template<typename T>
extendBorderPel(unsigned margin,bool left,bool right,bool top,bool bottom)527 void AreaBuf<T>::extendBorderPel(unsigned margin, bool left, bool right, bool top, bool bottom)
528 {
529   CHECK( ( width + left*margin + right*margin) > stride, "Size of buffer too small to extend" );
530   // do left and right margins
531 
532   if( left && right )
533   {
534     T* p = buf;
535     for( int y = 0; y < height; y++ )
536     {
537       for( int x = 0; x < margin; x++ )
538       {
539         p[-(int)margin + x] = p[0];
540         p[width + x]   = p[width - 1];
541       }
542       p += stride;
543     }
544   }
545 
546   else if( left )
547   {
548     T* p = buf;
549     for( int y = 0; y < height; y++ )
550     {
551       for( int x = 0; x < margin; x++ )
552       {
553         p[-(int)margin + x] = p[0];
554       }
555       p += stride;
556     }
557   }
558 
559   else if( right )
560   {
561     T* p = buf;
562     for( int y = 0; y < height; y++ )
563     {
564       for( int x = 0; x < margin; x++ )
565       {
566         p[width + x] = p[width - 1];
567       }
568       p += stride;
569     }
570   }
571 
572   const int copylen = width + ( left ? margin : 0 ) + ( right ? margin : 0 );
573   if( bottom )
574   {
575     T* p = buf + stride * height;
576     if( left )
577       p -= margin;
578 
579     // p is now the (-margin, height)
580     for( int y = 0; y < margin; y++ )
581     {
582       ::memcpy( p + y * stride, p - stride, sizeof( T ) * copylen );
583     }
584   }
585 
586   if( top )
587   {
588     T* p = buf;
589     if( left )
590       p -= margin;
591 
592     // pi is now (-marginX, 0)
593     for( int y = -(int)margin; y < 0; y++ )
594     {
595       ::memcpy( p + y * stride, p, sizeof( T ) * copylen );
596     }
597   }
598 }
599 
600 template<typename T>
padBorderPel(unsigned marginX,unsigned marginY,int dir)601 void AreaBuf<T>::padBorderPel( unsigned marginX, unsigned marginY, int dir )
602 {
603   T*  p = buf;
604   int s = stride;
605   int h = height;
606   int w = width;
607 
608   CHECK( w  > s, "Size of buffer too small to extend" );
609 
610   // top-left margin
611   if ( dir == 1 )
612   {
613     for( int y = 0; y < marginY; y++ )
614     {
615       for( int x = 0; x < marginX; x++ )
616       {
617         p[x] = p[marginX];
618       }
619       p += s;
620     }
621   }
622 
623   // bottom-right margin
624   if ( dir == 2 )
625   {
626     p = buf + s * ( h - marginY ) + w - marginX;
627 
628     for( int y = 0; y < marginY; y++ )
629     {
630       for( int x = 0; x < marginX; x++ )
631       {
632         p[x] = p[-1];
633       }
634       p += s;
635     }
636   }
637 }
638 
639 #if ENABLE_SIMD_OPT_BUFFER && defined(TARGET_SIMD_X86)
640 template<> void AreaBuf<Pel>::transposedFrom( const AreaBuf<const Pel> &other );
641 #endif
642 
643 template<typename T>
transposedFrom(const AreaBuf<const T> & other)644 void AreaBuf<T>::transposedFrom( const AreaBuf<const T> &other )
645 {
646   CHECK( width * height != other.width * other.height, "Incompatible size" );
647 
648         T* dst  =       buf;
649   const T* src  = other.buf;
650   width         = other.height;
651   height        = other.width;
652   stride        = stride < width ? width : stride;
653 
654   for( unsigned y = 0; y < other.height; y++ )
655   {
656     for( unsigned x = 0; x < other.width; x++ )
657     {
658       dst[y + x*stride] = src[x + y * other.stride];
659     }
660   }
661 }
662 
663 #ifndef DONT_UNDEF_SIZE_AWARE_PER_EL_OP
664 #undef SIZE_AWARE_PER_EL_OP
665 #endif // !DONT_UNDEF_SIZE_AWARE_PER_EL_OP
666 
667 // ---------------------------------------------------------------------------
668 // UnitBuf struct
669 // ---------------------------------------------------------------------------
670 
671 struct UnitArea;
672 
673 template<typename T>
674 struct UnitBuf
675 {
676   typedef static_vector<AreaBuf<T>,       MAX_NUM_COMPONENT> UnitBufBuffers;
677   typedef static_vector<AreaBuf<const T>, MAX_NUM_COMPONENT> ConstUnitBufBuffers;
678 
679   ChromaFormat chromaFormat;
680   UnitBufBuffers bufs;
681 
UnitBufUnitBuf682   UnitBuf() : chromaFormat( NUM_CHROMA_FORMAT ) { }
UnitBufUnitBuf683   UnitBuf( const ChromaFormat &_chromaFormat, const UnitBufBuffers&  _bufs ) : chromaFormat( _chromaFormat ), bufs( _bufs ) { }
UnitBufUnitBuf684   UnitBuf( const ChromaFormat &_chromaFormat,       UnitBufBuffers&& _bufs ) : chromaFormat( _chromaFormat ), bufs( std::forward<UnitBufBuffers>( _bufs ) ) { }
UnitBufUnitBuf685   UnitBuf( const ChromaFormat &_chromaFormat, const AreaBuf<T>  &blkY ) : chromaFormat( _chromaFormat ), bufs{ blkY } { }
UnitBufUnitBuf686   UnitBuf( const ChromaFormat &_chromaFormat,       AreaBuf<T> &&blkY ) : chromaFormat( _chromaFormat ), bufs{ std::forward<AreaBuf<T> >(blkY) } { }
UnitBufUnitBuf687   UnitBuf( const ChromaFormat &_chromaFormat, const AreaBuf<T>  &blkY, const AreaBuf<T>  &blkCb, const AreaBuf<T>  &blkCr ) : chromaFormat( _chromaFormat ), bufs{ blkY, blkCb, blkCr } { if( chromaFormat == CHROMA_400 ) bufs.resize( 1 ); }
UnitBufUnitBuf688   UnitBuf( const ChromaFormat &_chromaFormat,       AreaBuf<T> &&blkY,       AreaBuf<T> &&blkCb,       AreaBuf<T> &&blkCr ) : chromaFormat( _chromaFormat ), bufs{ std::forward<AreaBuf<T> >(blkY), std::forward<AreaBuf<T> >(blkCb), std::forward<AreaBuf<T> >(blkCr) } { if( chromaFormat == CHROMA_400 ) bufs.resize( 1 ); }
UnitBufUnitBuf689   UnitBuf( const UnitBuf<typename std::remove_const<T>::type>& other ) : chromaFormat( other.chromaFormat ), bufs{}
690   {
691     // TODO: delete to avoid unneccessary copying
692     for( auto &buf : other.bufs )
693     {
694       bufs.push_back( buf );
695     }
696   }
697 
getUnitBuf698         AreaBuf<T>& get( const ComponentID comp )        { return bufs[comp]; }
getUnitBuf699   const AreaBuf<T>& get( const ComponentID comp )  const { return bufs[comp]; }
700 
YUnitBuf701         AreaBuf<T>& Y()        { return bufs[0]; }
YUnitBuf702   const AreaBuf<T>& Y()  const { return bufs[0]; }
CbUnitBuf703         AreaBuf<T>& Cb()       { return bufs[1]; }
CbUnitBuf704   const AreaBuf<T>& Cb() const { return bufs[1]; }
CrUnitBuf705         AreaBuf<T>& Cr()       { return bufs[2]; }
CrUnitBuf706   const AreaBuf<T>& Cr() const { return bufs[2]; }
707 
708   void fill                 ( const T &val );
709   void copyFrom             ( const UnitBuf<const T> &other ) const;
710   void reconstruct          ( const UnitBuf<const T> &pred, const UnitBuf<const T> &resi, const ClpRngs& clpRngs );
711   void subtract             ( const UnitBuf<const T> &other );
712   void addWeightedAvg       ( const UnitBuf<      T> &other1, const UnitBuf<      T> &other2, const ClpRngs& clpRngs, const uint8_t bcwIdx = BCW_DEFAULT, const bool chromaOnly = false, const bool lumaOnly = false);
713   void addAvg               ( const UnitBuf<      T> &other1, const UnitBuf<      T> &other2, const ClpRngs& clpRngs, const bool chromaOnly = false, const bool lumaOnly = false);
714   void extendBorderPel      ( unsigned margin );
715   void extendBorderPel      ( unsigned margin, bool left, bool right, bool top, bool bottom );
716   void padBorderPel         ( unsigned margin, int dir );
717 
718   void rescaleBuf           ( const UnitBuf<const T>& beforeScaling, const std::pair<int, int> scalingRatio, const Window& confBefore, const Window& confAfter, const BitDepths& bitDepths, const bool horCollocatedChromaFlag = false, const bool verCollocatedChromaFlag = false );
719 
720         UnitBuf<      T> subBuf (const Area& subArea);
721   const UnitBuf<const T> subBuf (const Area& subArea) const;
722         UnitBuf<      T> subBuf (const UnitArea& subArea);
723   const UnitBuf<const T> subBuf (const UnitArea& subArea) const;
724   void colorSpaceConvert    ( const UnitBuf<T> &other, const ClpRng& clpRng );
725 
726   void writeToFile( std::string filename ) const;   // for debug purposes
727 };
728 
729 typedef UnitBuf<      Pel>  PelUnitBuf;
730 typedef UnitBuf<const Pel> CPelUnitBuf;
731 
732 typedef UnitBuf<      TCoeff>  CoeffUnitBuf;
733 typedef UnitBuf<const TCoeff> CCoeffUnitBuf;
734 
735 template<typename T>
fill(const T & val)736 void UnitBuf<T>::fill( const T &val )
737 {
738   for( unsigned i = 0; i < bufs.size(); i++ )
739   {
740     bufs[i].fill( val );
741   }
742 }
743 
744 template<typename T>
copyFrom(const UnitBuf<const T> & other)745 void UnitBuf<T>::copyFrom( const UnitBuf<const T> &other ) const
746 {
747   CHECK( chromaFormat != other.chromaFormat, "Incompatible formats" );
748 
749   for( unsigned i = 0; i < bufs.size(); i++ )
750   {
751     bufs[i].copyFrom( other.bufs[i] );
752   }
753 }
754 
755 
756 
757 template<typename T>
subtract(const UnitBuf<const T> & other)758 void UnitBuf<T>::subtract( const UnitBuf<const T> &other )
759 {
760   CHECK( chromaFormat != other.chromaFormat, "Incompatible formats" );
761 
762   for( unsigned i = 0; i < bufs.size(); i++ )
763   {
764     bufs[i].subtract( other.bufs[i] );
765   }
766 }
767 
768 template<typename T>
reconstruct(const UnitBuf<const T> & pred,const UnitBuf<const T> & resi,const ClpRngs & clpRngs)769 void UnitBuf<T>::reconstruct(const UnitBuf<const T> &pred, const UnitBuf<const T> &resi, const ClpRngs& clpRngs)
770 {
771   CHECK( chromaFormat != pred.chromaFormat, "Incompatible formats" );
772   CHECK( chromaFormat != resi.chromaFormat, "Incompatible formats" );
773 
774   for( unsigned i = 0; i < bufs.size(); i++ )
775   {
776     bufs[i].reconstruct( pred.bufs[i], resi.bufs[i], clpRngs );
777   }
778 }
779 
780 template<typename T>
addWeightedAvg(const UnitBuf<T> & other1,const UnitBuf<T> & other2,const ClpRngs & clpRngs,const uint8_t bcwIdx,const bool chromaOnly,const bool lumaOnly)781 void UnitBuf<T>::addWeightedAvg(const UnitBuf<T> &other1, const UnitBuf<T> &other2, const ClpRngs& clpRngs, const uint8_t bcwIdx /* = BCW_DEFAULT */, const bool chromaOnly /* = false */, const bool lumaOnly /* = false */)
782 {
783   const size_t istart = chromaOnly ? 1 : 0;
784   const size_t iend   = lumaOnly   ? 1 : bufs.size();
785 
786   CHECK(lumaOnly && chromaOnly, "should not happen");
787 
788   for(size_t i = istart; i < iend; i++)
789   {
790     bufs[i].addWeightedAvg(other1.bufs[i], other2.bufs[i], clpRngs, bcwIdx);
791   }
792 }
793 
794 template<typename T>
addAvg(const UnitBuf<T> & other1,const UnitBuf<T> & other2,const ClpRngs & clpRngs,const bool chromaOnly,const bool lumaOnly)795 void UnitBuf<T>::addAvg(const UnitBuf<T> &other1, const UnitBuf<T> &other2, const ClpRngs& clpRngs, const bool chromaOnly /* = false */, const bool lumaOnly /* = false */)
796 {
797   const size_t istart = chromaOnly ? 1 : 0;
798   const size_t iend   = lumaOnly   ? 1 : bufs.size();
799 
800   CHECK( lumaOnly && chromaOnly, "should not happen" );
801 
802   for( size_t i = istart; i < iend; i++)
803   {
804     bufs[i].addAvg( other1.bufs[i], other2.bufs[i], clpRngs );
805   }
806 }
807 
808 template<typename T>
colorSpaceConvert(const UnitBuf<T> & other,const ClpRng & clpRng)809 void UnitBuf<T>::colorSpaceConvert( const UnitBuf<T> &other, const ClpRng& clpRng )
810 {
811   THROW( "Type not supported" );
812 }
813 
814 template<>
815 void UnitBuf<Pel>::colorSpaceConvert( const UnitBuf<Pel> &other, const ClpRng& clpRng );
816 
817 template<typename T>
extendBorderPel(unsigned margin)818 void UnitBuf<T>::extendBorderPel( unsigned margin )
819 {
820   for( unsigned i = 0; i < bufs.size(); i++ )
821   {
822     bufs[i].extendBorderPel( margin );
823   }
824 }
825 
826 template<typename T>
extendBorderPel(unsigned margin,bool left,bool right,bool top,bool bottom)827 void UnitBuf<T>::extendBorderPel(unsigned margin, bool left, bool right, bool top, bool bottom)
828 {
829   for( unsigned i = 0; i < bufs.size(); i++ )
830   {
831     bufs[i].extendBorderPel( margin, left, right, top, bottom );
832   }
833 }
834 
835 template<typename T>
padBorderPel(unsigned margin,int dir)836 void UnitBuf<T>::padBorderPel( unsigned margin, int dir )
837 {
838   for( unsigned i = 0; i < bufs.size(); i++ )
839   {
840     bufs[i].padBorderPel( margin >> getComponentScaleX( ComponentID( i ), chromaFormat ), margin >> getComponentScaleY( ComponentID( i ), chromaFormat ), dir );
841   }
842 }
843 
844 template<typename T>
subBuf(const UnitArea & subArea)845 UnitBuf<T> UnitBuf<T>::subBuf( const UnitArea& subArea )
846 {
847   UnitBuf<T> subBuf;
848   subBuf.chromaFormat = chromaFormat;
849   unsigned blockIdx = 0;
850 
851   for( auto &subAreaBuf : bufs )
852   {
853     subBuf.bufs.push_back( subAreaBuf.subBuf( subArea.blocks[blockIdx].pos(), subArea.blocks[blockIdx].size() ) );
854     blockIdx++;
855   }
856 
857   return subBuf;
858 }
859 
860 
861 template<typename T>
subBuf(const UnitArea & subArea)862 const UnitBuf<const T> UnitBuf<T>::subBuf( const UnitArea& subArea ) const
863 {
864   UnitBuf<const T> subBuf;
865   subBuf.chromaFormat = chromaFormat;
866   unsigned blockIdx = 0;
867 
868   for( const auto &subAreaBuf : bufs )
869   {
870     subBuf.bufs.push_back( subAreaBuf.subBuf( subArea.blocks[blockIdx].pos(), subArea.blocks[blockIdx].size() ) );
871     blockIdx++;
872   }
873 
874   return subBuf;
875 }
876 
877 template<typename T>
subBuf(const Area & subArea)878 UnitBuf<T> UnitBuf<T>::subBuf( const Area & subArea )
879 {
880   UnitBuf<T> subBuf;
881   subBuf.chromaFormat = chromaFormat;
882   unsigned blockIdx = 0;
883 
884   for( auto &subAreaBuf : bufs )
885   {
886     const int scaleX = getComponentScaleX( ComponentID(blockIdx), chromaFormat);
887     const int scaleY = getComponentScaleY( ComponentID(blockIdx), chromaFormat);
888     const Area scaledArea( subArea.pos().x >> scaleX, subArea.pos().y >> scaleY, subArea.size().width >> scaleX, subArea.size().height >> scaleY );
889     subBuf.bufs.push_back( subAreaBuf.subBuf( scaledArea.pos(), scaledArea.size() ) );
890     blockIdx++;
891   }
892 
893   return subBuf;
894 }
895 
896 template<typename T>
subBuf(const Area & subArea)897 const UnitBuf<const T> UnitBuf<T>::subBuf( const Area & subArea ) const
898 {
899   UnitBuf<T> subBuf;
900   subBuf.chromaFormat = chromaFormat;
901   unsigned blockIdx = 0;
902 
903   for( auto &subAreaBuf : bufs )
904   {
905     const int scaleX = getComponentScaleX( ComponentID(blockIdx), chromaFormat);
906     const int scaleY = getComponentScaleY( ComponentID(blockIdx), chromaFormat);
907     const Area scaledArea( subArea.pos().x >> scaleX, subArea.pos().y >> scaleY, subArea.size().width >> scaleX, subArea.size().height >> scaleY );
908     subBuf.bufs.push_back( subAreaBuf.subBuf( scaledArea.pos(), scaledArea.size() ) );
909     blockIdx++;
910   }
911 
912   return subBuf;
913 }
914 
915 template<typename T>
rescaleBuf(const UnitBuf<const T> & beforeScaling,const std::pair<int,int> scalingRatio,const Window & confBefore,const Window & confAfter,const BitDepths & bitDepths,const bool horCollocatedChromaFlag,const bool verCollocatedChromaFlag)916 void UnitBuf<T>::rescaleBuf( const UnitBuf<const T>& beforeScaling, const std::pair<int, int> scalingRatio, const Window& confBefore, const Window& confAfter, const BitDepths& bitDepths, const bool horCollocatedChromaFlag, const bool verCollocatedChromaFlag )
917 {
918   for( unsigned i = 0; i < bufs.size(); i++ )
919   {
920     bufs[i].rescaleBuf( beforeScaling.bufs[i], ComponentID( i ), scalingRatio, confBefore, confAfter, chromaFormat, bitDepths, horCollocatedChromaFlag, verCollocatedChromaFlag );
921   }
922 }
923 
924 // ---------------------------------------------------------------------------
925 // PelStorage struct (PelUnitBuf which allocates its own memory)
926 // ---------------------------------------------------------------------------
927 
928 struct UnitArea;
929 struct CompArea;
930 
931 struct PelStorage : public PelUnitBuf
932 {
933   PelStorage();
934   ~PelStorage();
935 
936   void swap( PelStorage& other );
937   void createFromBuf( PelUnitBuf buf );
938   void create( const UnitArea &_unit );
939   void create( const ChromaFormat _chromaFormat, const Size& _size, const unsigned _maxCUSize = 0, const unsigned _margin = 0, const unsigned _alignment = 0, const bool _scaleChromaMargin = true );
940   void destroy();
941 
942          PelBuf getBuf( const CompArea &blk );
943   const CPelBuf getBuf( const CompArea &blk ) const;
944 
945          PelBuf getBuf( const ComponentID CompID );
946   const CPelBuf getBuf( const ComponentID CompID ) const;
947 
948          PelUnitBuf getBuf( const UnitArea &unit );
949   const CPelUnitBuf getBuf( const UnitArea &unit ) const;
getOriginPelStorage950   Pel *getOrigin( const int id ) const { return m_origin[id]; }
getOriginBufPelStorage951   PelBuf getOriginBuf( const int id ) { return PelBuf( m_origin[id], m_origSi[id] ); }
952 
953 private:
954 
955   Size m_origSi[MAX_NUM_COMPONENT];
956   Pel *m_origin[MAX_NUM_COMPONENT];
957 };
958 
959 }
960