1 /* -----------------------------------------------------------------------------
2 The copyright in this software is being made available under the BSD
3 License, included below. No patent rights, trademark rights and/or
4 other Intellectual Property Rights other than the copyrights concerning
5 the Software are granted under this license.
6
7 For any license concerning other Intellectual Property rights than the software,
8 especially patent licenses, a separate Agreement needs to be closed.
9 For more information please contact:
10
11 Fraunhofer Heinrich Hertz Institute
12 Einsteinufer 37
13 10587 Berlin, Germany
14 www.hhi.fraunhofer.de/vvc
15 vvc@hhi.fraunhofer.de
16
17 Copyright (c) 2018-2021, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V.
18 All rights reserved.
19
20 Redistribution and use in source and binary forms, with or without
21 modification, are permitted provided that the following conditions are met:
22
23 * Redistributions of source code must retain the above copyright notice,
24 this list of conditions and the following disclaimer.
25 * Redistributions in binary form must reproduce the above copyright notice,
26 this list of conditions and the following disclaimer in the documentation
27 and/or other materials provided with the distribution.
28 * Neither the name of Fraunhofer nor the names of its contributors may
29 be used to endorse or promote products derived from this software without
30 specific prior written permission.
31
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
36 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
42 THE POSSIBILITY OF SUCH DAMAGE.
43
44
45 ------------------------------------------------------------------------------------------- */
46
47 /** \file Buffer.h
48 * \brief Low-overhead class describing 2D memory layout
49 */
50
51 #pragma once
52
53 #include "Common.h"
54 #include "CommonDef.h"
55 #include "ChromaFormat.h"
56 #include "MotionInfo.h"
57
58 #include <string.h>
59 #include <type_traits>
60 #include <typeinfo>
61 #include <vector>
62 #include <utility>
63
64 namespace vvdec
65 {
66
67 #if ENABLE_SIMD_OPT_BUFFER
68 struct PelBufferOps
69 {
70 PelBufferOps();
71
72 #ifdef TARGET_SIMD_X86
73 void initPelBufOpsX86();
74 template<X86_VEXT vext>
75 void _initPelBufOpsX86();
76
77 #endif
78 void ( *addAvg4 ) ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng );
79 void ( *addAvg8 ) ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng );
80 void ( *addAvg16 ) ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng );
81 void ( *reco4 ) ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height, const ClpRng& clpRng );
82 void ( *reco8 ) ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height, const ClpRng& clpRng );
83 void ( *linTf4 ) ( const Pel* src0, ptrdiff_t src0Stride, Pel *dst, ptrdiff_t dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
84 void ( *linTf8 ) ( const Pel* src0, ptrdiff_t src0Stride, Pel *dst, ptrdiff_t dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
85 void ( *wghtAvg4 ) ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height, int shift, int offset, int w0, int w1, const ClpRng& clpRng );
86 void ( *wghtAvg8 ) ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height, int shift, int offset, int w0, int w1, const ClpRng& clpRng );
87 void ( *copyBuffer ) ( const char*src, ptrdiff_t srcStride, char* dst, ptrdiff_t dstStride, int width, int height );
88 void ( *padding1 ) ( Pel *dst, ptrdiff_t stride, int width, int height );
89 void ( *padding2 ) ( Pel *dst, ptrdiff_t stride, int width, int height );
90 void ( *transpose4x4 ) ( const Pel* src, ptrdiff_t srcStride, Pel* dst, ptrdiff_t dstStride );
91 void ( *transpose8x8 ) ( const Pel* src, ptrdiff_t srcStride, Pel* dst, ptrdiff_t dstStride );
92 void ( *applyLut ) ( Pel* ptr, ptrdiff_t ptrStride, int width, int height, const Pel* lut );
93 void ( *fillN_CU ) ( CodingUnit** ptr, ptrdiff_t ptrStride, int width, int height, CodingUnit* cuPtr );
94
95 void (*sampleRateConv) ( const std::pair<int, int> scalingRatio, const std::pair<int, int> compScale,
96 const Pel* orgSrc, const int orgStride, const int orgWidth, const int orgHeight,
97 const int beforeScaleLeftOffset, const int beforeScaleTopOffset,
98 Pel* scaledSrc, const int scaledStride, const int scaledWidth, const int scaledHeight,
99 const int afterScaleLeftOffset, const int afterScaleTopOffset,
100 const int bitDepth, const bool useLumaFilter, const bool horCollocatedPositionFlag, const bool verCollocatedPositionFlag );
101 };
102 #endif
103
104 extern PelBufferOps g_pelBufOP;
105
106 #define INCX( ptr, stride ) { ptr++; }
107 #define INCY( ptr, stride ) { ptr += ( stride ); }
108 #define OFFSETX( ptr, stride, x ) { ptr += ( x ); }
109 #define OFFSETY( ptr, stride, y ) { ptr += ( y ) * ( stride ); }
110 #define OFFSET( ptr, stride, x, y ) { ptr += ( x ) + ( y ) * ( stride ); }
111 #define GET_OFFSETX( ptr, stride, x ) ( ( ptr ) + ( x ) )
112 #define GET_OFFSETY( ptr, stride, y ) ( ( ptr ) + ( y ) * ( stride ) )
113 #define GET_OFFSET( ptr, stride, x, y ) ( ( ptr ) + ( x ) + ( y ) * ( stride ) )
114
115 class Window;
116 struct BitDepths;
117
118 template<typename T>
119 struct AreaBuf : public Size
120 {
121 T* buf;
122 ptrdiff_t stride;
123
AreaBufAreaBuf124 AreaBuf() : Size(), buf( NULL ), stride( 0 ) { }
AreaBufAreaBuf125 AreaBuf( T *_buf, const Size &size ) : Size( size ), buf( _buf ), stride( size.width ) { }
AreaBufAreaBuf126 AreaBuf( T *_buf, const ptrdiff_t &_stride, const Size &size ) : Size( size ), buf( _buf ), stride( _stride ) { }
AreaBufAreaBuf127 AreaBuf( T *_buf, const SizeType &_width, const SizeType &_height ) : Size( _width, _height ), buf( _buf ), stride( _width ) { }
AreaBufAreaBuf128 AreaBuf( T *_buf, const ptrdiff_t &_stride, const SizeType &_width, const SizeType &_height ) : Size( _width, _height ), buf( _buf ), stride( _stride ) { }
AreaBufAreaBuf129 AreaBuf( const AreaBuf<typename std::remove_const<T>::type >& other ) : Size( other ), buf( other.buf ), stride( other.stride ) { }
130
131 void fill ( const T &val );
132 void memset ( const int val );
133
134 void copyFrom ( const AreaBuf<const T> &other ) const;
135
136 void reconstruct ( const AreaBuf<const T> &pred, const AreaBuf<const T> &resi, const ClpRng& clpRng);
137
138 void subtract ( const AreaBuf<const T> &other );
139 void extendBorderPel ( unsigned margin );
140 void extendBorderPel ( unsigned margin, bool left, bool right, bool top, bool bottom );
141 void addWeightedAvg ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng, const int8_t bcwIdx);
142 void addAvg ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng );
143 void padBorderPel ( unsigned marginX, unsigned marginY, int dir );
144
145 void linearTransform ( const int scale, const int shift, const int offset, bool bClip, const ClpRng& clpRng );
146
147 void transposedFrom ( const AreaBuf<const T> &other );
148
149 void rspSignal ( const Pel *lut );
150 void scaleSignal ( const int scale, const ClpRng& clpRng);
151
152 void rescaleBuf ( const AreaBuf<const T>& beforeScaling, const ComponentID compID, const std::pair<int, int> scalingRatio, const Window& confBefore, const Window& confAfter, const ChromaFormat chromaFormatIDC, const BitDepths& bitDepths, const bool horCollocatedChromaFlag = false, const bool verCollocatedChromaFlag = false );
153
atAreaBuf154 T& at( const int &x, const int &y ) { return buf[y * stride + x]; }
atAreaBuf155 const T& at( const int &x, const int &y ) const { return buf[y * stride + x]; }
156
atAreaBuf157 T& at( const Position &pos ) { return buf[pos.y * stride + pos.x]; }
atAreaBuf158 const T& at( const Position &pos ) const { return buf[pos.y * stride + pos.x]; }
159
160
bufAtAreaBuf161 T* bufAt( const int &x, const int &y ) { return GET_OFFSET( buf, stride, x, y ); }
bufAtAreaBuf162 const T* bufAt( const int &x, const int &y ) const { return GET_OFFSET( buf, stride, x, y ); }
bufAtAreaBuf163 T* bufAt( const Position& pos ) { return GET_OFFSET( buf, stride, pos.x, pos.y ); }
bufAtAreaBuf164 const T* bufAt( const Position& pos ) const { return GET_OFFSET( buf, stride, pos.x, pos.y ); }
165
subBufAreaBuf166 AreaBuf< T> subBuf( const Area &area ) { return subBuf( area.pos(), area.size() ); }
subBufAreaBuf167 AreaBuf<const T> subBuf( const Area &area ) const { return subBuf( area.pos(), area.size() ); }
subBufAreaBuf168 AreaBuf< T> subBuf( const Position &pos, const Size &size ) { return AreaBuf< T>( bufAt( pos ), stride, size ); }
subBufAreaBuf169 AreaBuf<const T> subBuf( const Position &pos, const Size &size ) const { return AreaBuf<const T>( bufAt( pos ), stride, size ); }
subBufAreaBuf170 AreaBuf< T> subBuf( const int &x, const int &y, const unsigned &_w, const unsigned &_h ) { return AreaBuf< T>( bufAt( x, y ), stride, _w, _h ); }
subBufAreaBuf171 AreaBuf<const T> subBuf( const int &x, const int &y, const unsigned &_w, const unsigned &_h ) const { return AreaBuf<const T>( bufAt( x, y ), stride, _w, _h ); }
172 };
173
174 typedef AreaBuf< Pel> PelBuf;
175 typedef AreaBuf<const Pel> CPelBuf;
176
177 typedef AreaBuf< TCoeff> CoeffBuf;
178 typedef AreaBuf<const TCoeff> CCoeffBuf;
179
180 typedef AreaBuf<TCoeffSig> CoeffSigBuf;
181 typedef AreaBuf<const TCoeffSig> CCoeffSigBuf;
182
183 typedef AreaBuf< MotionInfo> MotionBuf;
184 typedef AreaBuf<const MotionInfo> CMotionBuf;
185
186 typedef AreaBuf< LoopFilterParam> LFPBuf;
187 typedef AreaBuf<const LoopFilterParam> CLFPBuf;
188
189
190 #define SIZE_AWARE_PER_EL_OP( OP, INC ) \
191 if( ( width & 7 ) == 0 ) \
192 { \
193 for( int y = 0; y < height; y++ ) \
194 { \
195 for( int x = 0; x < width; x += 8 ) \
196 { \
197 OP( x + 0 ); \
198 OP( x + 1 ); \
199 OP( x + 2 ); \
200 OP( x + 3 ); \
201 OP( x + 4 ); \
202 OP( x + 5 ); \
203 OP( x + 6 ); \
204 OP( x + 7 ); \
205 } \
206 \
207 INC; \
208 } \
209 } \
210 else if( ( width & 3 ) == 0 ) \
211 { \
212 for( int y = 0; y < height; y++ ) \
213 { \
214 for( int x = 0; x < width; x += 4 ) \
215 { \
216 OP( x + 0 ); \
217 OP( x + 1 ); \
218 OP( x + 2 ); \
219 OP( x + 3 ); \
220 } \
221 \
222 INC; \
223 } \
224 } \
225 else if( ( width & 1 ) == 0 ) \
226 { \
227 for( int y = 0; y < height; y++ ) \
228 { \
229 for( int x = 0; x < width; x += 2 ) \
230 { \
231 OP( x + 0 ); \
232 OP( x + 1 ); \
233 } \
234 \
235 INC; \
236 } \
237 } \
238 else \
239 { \
240 for( int y = 0; y < height; y++ ) \
241 { \
242 for( int x = 0; x < width; x++ ) \
243 { \
244 OP( x ); \
245 } \
246 \
247 INC; \
248 } \
249 }
250
251
252 template<typename TOP, typename TINC>
size_aware_pel_op(TOP op,TINC inc,int width,int height)253 static inline void size_aware_pel_op( TOP op, TINC inc, int width, int height )
254 {
255 if( ( width & 7 ) == 0 )
256 {
257 for( int y = 0; y < height; y++ )
258 {
259 for( int x = 0; x < width; x += 8 )
260 {
261 op( x + 0 );
262 op( x + 1 );
263 op( x + 2 );
264 op( x + 3 );
265 op( x + 4 );
266 op( x + 5 );
267 op( x + 6 );
268 op( x + 7 );
269 }
270
271 inc();
272 }
273 }
274 else if( ( width & 3 ) == 0 )
275 {
276 for( int y = 0; y < height; y++ )
277 {
278 for( int x = 0; x < width; x += 4 )
279 {
280 op( x + 0 );
281 op( x + 1 );
282 op( x + 2 );
283 op( x + 3 );
284 }
285
286 inc();
287 }
288 }
289 else if( ( width & 1 ) == 0 )
290 {
291 for( int y = 0; y < height; y++ )
292 {
293 for( int x = 0; x < width; x += 2 )
294 {
295 op( x + 0 );
296 op( x + 1 );
297 }
298
299 inc();
300 }
301 }
302 else
303 {
304 for( int y = 0; y < height; y++ )
305 {
306 for( int x = 0; x < width; x++ )
307 {
308 op( x );
309 }
310
311 inc();
312 }
313 }
314 }
315
316 template<>
317 void AreaBuf<MotionInfo>::fill( const MotionInfo& val );
318
319 template<typename T>
fill(const T & val)320 void AreaBuf<T>::fill(const T &val)
321 {
322 if( T( 0 ) == val )
323 {
324 if( width == stride )
325 {
326 ::memset( buf, 0, width * height * sizeof( T ) );
327 }
328 else
329 {
330 T* dest = buf;
331 size_t line = width * sizeof( T );
332
333 for( unsigned y = 0; y < height; y++ )
334 {
335 ::memset( dest, 0, line );
336
337 dest += stride;
338 }
339 }
340 }
341 else
342 {
343 if( width == stride )
344 {
345 std::fill_n( buf, width * height, val );
346 }
347 else
348 {
349 T* dest = buf;
350
351 for( int y = 0; y < height; y++, dest += stride )
352 {
353 std::fill_n( dest, width, val );
354 }
355 }
356 }
357 }
358
359 template<typename T>
memset(const int val)360 void AreaBuf<T>::memset( const int val )
361 {
362 GCC_WARNING_DISABLE_class_memaccess
363 if( width == stride )
364 {
365 ::memset( buf, val, width * height * sizeof( T ) );
366 }
367 else
368 {
369 T* dest = buf;
370 size_t line = width * sizeof( T );
371
372 for( int y = 0; y < height; y++ )
373 {
374 ::memset( dest, val, line );
375
376 dest += stride;
377 }
378 }
379 GCC_WARNING_RESET
380 }
381
382 #if ENABLE_SIMD_OPT_BUFFER
383 template<typename T>
copyFrom(const AreaBuf<const T> & other)384 void AreaBuf<T>::copyFrom( const AreaBuf<const T> &other ) const
385 {
386 #if !defined(__GNUC__) || __GNUC__ > 5
387 static_assert( std::is_trivially_copyable<T>::value, "Type T is not trivially_copyable" );
388 #endif
389
390 g_pelBufOP.copyBuffer( (const char *) other.buf, sizeof( T ) * other.stride, (char *) buf, sizeof( T ) * stride, sizeof( T ) * width, height );
391 }
392 #else
393 template<typename T>
copyFrom(const AreaBuf<const T> & other)394 void AreaBuf<T>::copyFrom( const AreaBuf<const T> &other ) const
395 {
396 #if !defined(__GNUC__) || __GNUC__ > 5
397 static_assert( std::is_trivially_copyable<T>::value, "Type T is not trivially_copyable" );
398 #endif
399
400 CHECK( width != other.width, "Incompatible size" );
401 CHECK( height != other.height, "Incompatible size" );
402
403 if( buf == other.buf )
404 {
405 return;
406 }
407
408 if( ptrdiff_t( width ) == stride && stride == other.stride )
409 {
410 memcpy( buf, other.buf, width * height * sizeof( T ) );
411 }
412 else
413 {
414 T* dst = buf;
415 const T* src = other.buf;
416 const ptrdiff_t srcStride = other.stride;
417
418 for( unsigned y = 0; y < height; y++ )
419 {
420 memcpy( dst, src, width * sizeof( T ) );
421
422 dst += stride;
423 src += srcStride;
424 }
425 }
426 }
427 #endif
428
429
430 template<typename T>
subtract(const AreaBuf<const T> & other)431 void AreaBuf<T>::subtract( const AreaBuf<const T> &other )
432 {
433 CHECK( width != other.width, "Incompatible size" );
434 CHECK( height != other.height, "Incompatible size" );
435
436 T* dest = buf;
437 const T* subs = other.buf;
438
439 #define SUBS_INC \
440 dest += stride; \
441 subs += other.stride; \
442
443 #define SUBS_OP( ADDR ) dest[ADDR] -= subs[ADDR]
444
445 SIZE_AWARE_PER_EL_OP( SUBS_OP, SUBS_INC );
446
447 #undef SUBS_OP
448 #undef SUBS_INC
449 }
450
451 template<typename T>
reconstruct(const AreaBuf<const T> & pred,const AreaBuf<const T> & resi,const ClpRng & clpRng)452 void AreaBuf<T>::reconstruct( const AreaBuf<const T> &pred, const AreaBuf<const T> &resi, const ClpRng& clpRng )
453 {
454 THROW( "Type not supported" );
455 }
456
457 template<>
458 void AreaBuf<Pel>::reconstruct( const AreaBuf<const Pel> &pred, const AreaBuf<const Pel> &resi, const ClpRng& clpRng );
459
460
461 template<typename T>
addAvg(const AreaBuf<const T> & other1,const AreaBuf<const T> & other2,const ClpRng & clpRng)462 void AreaBuf<T>::addAvg( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng )
463 {
464 THROW( "Type not supported" );
465 }
466
467 template<>
468 void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng );
469
470 template<typename T>
linearTransform(const int scale,const int shift,const int offset,bool bClip,const ClpRng & clpRng)471 void AreaBuf<T>::linearTransform( const int scale, const int shift, const int offset, bool bClip, const ClpRng& clpRng )
472 {
473 THROW( "Type not supported" );
474 }
475
476 template<>
477 void AreaBuf<Pel>::linearTransform( const int scale, const int shift, const int offset, bool bClip, const ClpRng& clpRng );
478
479
480 template<typename T>
rescaleBuf(const AreaBuf<const T> & beforeScaling,ComponentID compID,const std::pair<int,int> scalingRatio,const Window & confBefore,const Window & confAfter,const ChromaFormat chromaFormatIDC,const BitDepths & bitDepths,const bool horCollocatedChromaFlag,const bool verCollocatedChromaFlag)481 void AreaBuf<T>::rescaleBuf( const AreaBuf<const T>& beforeScaling, ComponentID compID, const std::pair<int, int> scalingRatio, const Window& confBefore, const Window& confAfter, const ChromaFormat chromaFormatIDC, const BitDepths& bitDepths, const bool horCollocatedChromaFlag, const bool verCollocatedChromaFlag )
482 {
483 THROW( "Type not supported" );
484 }
485
486 template<>
487 void AreaBuf<Pel>::rescaleBuf( const AreaBuf<const Pel>& beforeScaling, ComponentID compID, const std::pair<int, int> scalingRatio, const Window& confBefore, const Window& confAfter, const ChromaFormat chromaFormatIDC, const BitDepths& bitDepths, const bool horCollocatedChromaFlag, const bool verCollocatedChromaFlag );
488
489 template<typename T>
extendBorderPel(unsigned margin)490 void AreaBuf<T>::extendBorderPel( unsigned margin )
491 {
492 T* p = buf;
493 int h = height;
494 int w = width;
495 ptrdiff_t s = stride;
496
497 CHECK( ( w + 2 * margin ) > s, "Size of buffer too small to extend" );
498 // do left and right margins
499 for( int y = 0; y < h; y++ )
500 {
501 for( int x = 0; x < margin; x++ )
502 {
503 *( p - margin + x ) = p[0];
504 p[w + x] = p[w - 1];
505 }
506 p += s;
507 }
508
509 // p is now the (0,height) (bottom left of image within bigger picture
510 p -= ( s + margin );
511 // p is now the (-margin, height-1)
512 for( int y = 0; y < margin; y++ )
513 {
514 ::memcpy( p + ( y + 1 ) * s, p, sizeof( T ) * ( w + ( margin << 1 ) ) );
515 }
516
517 // pi is still (-marginX, height-1)
518 p -= ( ( h - 1 ) * s );
519 // pi is now (-marginX, 0)
520 for( int y = 0; y < margin; y++ )
521 {
522 ::memcpy( p - ( y + 1 ) * s, p, sizeof( T ) * ( w + ( margin << 1 ) ) );
523 }
524 }
525
526 template<typename T>
extendBorderPel(unsigned margin,bool left,bool right,bool top,bool bottom)527 void AreaBuf<T>::extendBorderPel(unsigned margin, bool left, bool right, bool top, bool bottom)
528 {
529 CHECK( ( width + left*margin + right*margin) > stride, "Size of buffer too small to extend" );
530 // do left and right margins
531
532 if( left && right )
533 {
534 T* p = buf;
535 for( int y = 0; y < height; y++ )
536 {
537 for( int x = 0; x < margin; x++ )
538 {
539 p[-(int)margin + x] = p[0];
540 p[width + x] = p[width - 1];
541 }
542 p += stride;
543 }
544 }
545
546 else if( left )
547 {
548 T* p = buf;
549 for( int y = 0; y < height; y++ )
550 {
551 for( int x = 0; x < margin; x++ )
552 {
553 p[-(int)margin + x] = p[0];
554 }
555 p += stride;
556 }
557 }
558
559 else if( right )
560 {
561 T* p = buf;
562 for( int y = 0; y < height; y++ )
563 {
564 for( int x = 0; x < margin; x++ )
565 {
566 p[width + x] = p[width - 1];
567 }
568 p += stride;
569 }
570 }
571
572 const int copylen = width + ( left ? margin : 0 ) + ( right ? margin : 0 );
573 if( bottom )
574 {
575 T* p = buf + stride * height;
576 if( left )
577 p -= margin;
578
579 // p is now the (-margin, height)
580 for( int y = 0; y < margin; y++ )
581 {
582 ::memcpy( p + y * stride, p - stride, sizeof( T ) * copylen );
583 }
584 }
585
586 if( top )
587 {
588 T* p = buf;
589 if( left )
590 p -= margin;
591
592 // pi is now (-marginX, 0)
593 for( int y = -(int)margin; y < 0; y++ )
594 {
595 ::memcpy( p + y * stride, p, sizeof( T ) * copylen );
596 }
597 }
598 }
599
600 template<typename T>
padBorderPel(unsigned marginX,unsigned marginY,int dir)601 void AreaBuf<T>::padBorderPel( unsigned marginX, unsigned marginY, int dir )
602 {
603 T* p = buf;
604 int s = stride;
605 int h = height;
606 int w = width;
607
608 CHECK( w > s, "Size of buffer too small to extend" );
609
610 // top-left margin
611 if ( dir == 1 )
612 {
613 for( int y = 0; y < marginY; y++ )
614 {
615 for( int x = 0; x < marginX; x++ )
616 {
617 p[x] = p[marginX];
618 }
619 p += s;
620 }
621 }
622
623 // bottom-right margin
624 if ( dir == 2 )
625 {
626 p = buf + s * ( h - marginY ) + w - marginX;
627
628 for( int y = 0; y < marginY; y++ )
629 {
630 for( int x = 0; x < marginX; x++ )
631 {
632 p[x] = p[-1];
633 }
634 p += s;
635 }
636 }
637 }
638
639 #if ENABLE_SIMD_OPT_BUFFER && defined(TARGET_SIMD_X86)
640 template<> void AreaBuf<Pel>::transposedFrom( const AreaBuf<const Pel> &other );
641 #endif
642
643 template<typename T>
transposedFrom(const AreaBuf<const T> & other)644 void AreaBuf<T>::transposedFrom( const AreaBuf<const T> &other )
645 {
646 CHECK( width * height != other.width * other.height, "Incompatible size" );
647
648 T* dst = buf;
649 const T* src = other.buf;
650 width = other.height;
651 height = other.width;
652 stride = stride < width ? width : stride;
653
654 for( unsigned y = 0; y < other.height; y++ )
655 {
656 for( unsigned x = 0; x < other.width; x++ )
657 {
658 dst[y + x*stride] = src[x + y * other.stride];
659 }
660 }
661 }
662
663 #ifndef DONT_UNDEF_SIZE_AWARE_PER_EL_OP
664 #undef SIZE_AWARE_PER_EL_OP
665 #endif // !DONT_UNDEF_SIZE_AWARE_PER_EL_OP
666
667 // ---------------------------------------------------------------------------
668 // UnitBuf struct
669 // ---------------------------------------------------------------------------
670
671 struct UnitArea;
672
673 template<typename T>
674 struct UnitBuf
675 {
676 typedef static_vector<AreaBuf<T>, MAX_NUM_COMPONENT> UnitBufBuffers;
677 typedef static_vector<AreaBuf<const T>, MAX_NUM_COMPONENT> ConstUnitBufBuffers;
678
679 ChromaFormat chromaFormat;
680 UnitBufBuffers bufs;
681
UnitBufUnitBuf682 UnitBuf() : chromaFormat( NUM_CHROMA_FORMAT ) { }
UnitBufUnitBuf683 UnitBuf( const ChromaFormat &_chromaFormat, const UnitBufBuffers& _bufs ) : chromaFormat( _chromaFormat ), bufs( _bufs ) { }
UnitBufUnitBuf684 UnitBuf( const ChromaFormat &_chromaFormat, UnitBufBuffers&& _bufs ) : chromaFormat( _chromaFormat ), bufs( std::forward<UnitBufBuffers>( _bufs ) ) { }
UnitBufUnitBuf685 UnitBuf( const ChromaFormat &_chromaFormat, const AreaBuf<T> &blkY ) : chromaFormat( _chromaFormat ), bufs{ blkY } { }
UnitBufUnitBuf686 UnitBuf( const ChromaFormat &_chromaFormat, AreaBuf<T> &&blkY ) : chromaFormat( _chromaFormat ), bufs{ std::forward<AreaBuf<T> >(blkY) } { }
UnitBufUnitBuf687 UnitBuf( const ChromaFormat &_chromaFormat, const AreaBuf<T> &blkY, const AreaBuf<T> &blkCb, const AreaBuf<T> &blkCr ) : chromaFormat( _chromaFormat ), bufs{ blkY, blkCb, blkCr } { if( chromaFormat == CHROMA_400 ) bufs.resize( 1 ); }
UnitBufUnitBuf688 UnitBuf( const ChromaFormat &_chromaFormat, AreaBuf<T> &&blkY, AreaBuf<T> &&blkCb, AreaBuf<T> &&blkCr ) : chromaFormat( _chromaFormat ), bufs{ std::forward<AreaBuf<T> >(blkY), std::forward<AreaBuf<T> >(blkCb), std::forward<AreaBuf<T> >(blkCr) } { if( chromaFormat == CHROMA_400 ) bufs.resize( 1 ); }
UnitBufUnitBuf689 UnitBuf( const UnitBuf<typename std::remove_const<T>::type>& other ) : chromaFormat( other.chromaFormat ), bufs{}
690 {
691 // TODO: delete to avoid unneccessary copying
692 for( auto &buf : other.bufs )
693 {
694 bufs.push_back( buf );
695 }
696 }
697
getUnitBuf698 AreaBuf<T>& get( const ComponentID comp ) { return bufs[comp]; }
getUnitBuf699 const AreaBuf<T>& get( const ComponentID comp ) const { return bufs[comp]; }
700
YUnitBuf701 AreaBuf<T>& Y() { return bufs[0]; }
YUnitBuf702 const AreaBuf<T>& Y() const { return bufs[0]; }
CbUnitBuf703 AreaBuf<T>& Cb() { return bufs[1]; }
CbUnitBuf704 const AreaBuf<T>& Cb() const { return bufs[1]; }
CrUnitBuf705 AreaBuf<T>& Cr() { return bufs[2]; }
CrUnitBuf706 const AreaBuf<T>& Cr() const { return bufs[2]; }
707
708 void fill ( const T &val );
709 void copyFrom ( const UnitBuf<const T> &other ) const;
710 void reconstruct ( const UnitBuf<const T> &pred, const UnitBuf<const T> &resi, const ClpRngs& clpRngs );
711 void subtract ( const UnitBuf<const T> &other );
712 void addWeightedAvg ( const UnitBuf< T> &other1, const UnitBuf< T> &other2, const ClpRngs& clpRngs, const uint8_t bcwIdx = BCW_DEFAULT, const bool chromaOnly = false, const bool lumaOnly = false);
713 void addAvg ( const UnitBuf< T> &other1, const UnitBuf< T> &other2, const ClpRngs& clpRngs, const bool chromaOnly = false, const bool lumaOnly = false);
714 void extendBorderPel ( unsigned margin );
715 void extendBorderPel ( unsigned margin, bool left, bool right, bool top, bool bottom );
716 void padBorderPel ( unsigned margin, int dir );
717
718 void rescaleBuf ( const UnitBuf<const T>& beforeScaling, const std::pair<int, int> scalingRatio, const Window& confBefore, const Window& confAfter, const BitDepths& bitDepths, const bool horCollocatedChromaFlag = false, const bool verCollocatedChromaFlag = false );
719
720 UnitBuf< T> subBuf (const Area& subArea);
721 const UnitBuf<const T> subBuf (const Area& subArea) const;
722 UnitBuf< T> subBuf (const UnitArea& subArea);
723 const UnitBuf<const T> subBuf (const UnitArea& subArea) const;
724 void colorSpaceConvert ( const UnitBuf<T> &other, const ClpRng& clpRng );
725
726 void writeToFile( std::string filename ) const; // for debug purposes
727 };
728
729 typedef UnitBuf< Pel> PelUnitBuf;
730 typedef UnitBuf<const Pel> CPelUnitBuf;
731
732 typedef UnitBuf< TCoeff> CoeffUnitBuf;
733 typedef UnitBuf<const TCoeff> CCoeffUnitBuf;
734
735 template<typename T>
fill(const T & val)736 void UnitBuf<T>::fill( const T &val )
737 {
738 for( unsigned i = 0; i < bufs.size(); i++ )
739 {
740 bufs[i].fill( val );
741 }
742 }
743
744 template<typename T>
copyFrom(const UnitBuf<const T> & other)745 void UnitBuf<T>::copyFrom( const UnitBuf<const T> &other ) const
746 {
747 CHECK( chromaFormat != other.chromaFormat, "Incompatible formats" );
748
749 for( unsigned i = 0; i < bufs.size(); i++ )
750 {
751 bufs[i].copyFrom( other.bufs[i] );
752 }
753 }
754
755
756
757 template<typename T>
subtract(const UnitBuf<const T> & other)758 void UnitBuf<T>::subtract( const UnitBuf<const T> &other )
759 {
760 CHECK( chromaFormat != other.chromaFormat, "Incompatible formats" );
761
762 for( unsigned i = 0; i < bufs.size(); i++ )
763 {
764 bufs[i].subtract( other.bufs[i] );
765 }
766 }
767
768 template<typename T>
reconstruct(const UnitBuf<const T> & pred,const UnitBuf<const T> & resi,const ClpRngs & clpRngs)769 void UnitBuf<T>::reconstruct(const UnitBuf<const T> &pred, const UnitBuf<const T> &resi, const ClpRngs& clpRngs)
770 {
771 CHECK( chromaFormat != pred.chromaFormat, "Incompatible formats" );
772 CHECK( chromaFormat != resi.chromaFormat, "Incompatible formats" );
773
774 for( unsigned i = 0; i < bufs.size(); i++ )
775 {
776 bufs[i].reconstruct( pred.bufs[i], resi.bufs[i], clpRngs );
777 }
778 }
779
780 template<typename T>
addWeightedAvg(const UnitBuf<T> & other1,const UnitBuf<T> & other2,const ClpRngs & clpRngs,const uint8_t bcwIdx,const bool chromaOnly,const bool lumaOnly)781 void UnitBuf<T>::addWeightedAvg(const UnitBuf<T> &other1, const UnitBuf<T> &other2, const ClpRngs& clpRngs, const uint8_t bcwIdx /* = BCW_DEFAULT */, const bool chromaOnly /* = false */, const bool lumaOnly /* = false */)
782 {
783 const size_t istart = chromaOnly ? 1 : 0;
784 const size_t iend = lumaOnly ? 1 : bufs.size();
785
786 CHECK(lumaOnly && chromaOnly, "should not happen");
787
788 for(size_t i = istart; i < iend; i++)
789 {
790 bufs[i].addWeightedAvg(other1.bufs[i], other2.bufs[i], clpRngs, bcwIdx);
791 }
792 }
793
794 template<typename T>
addAvg(const UnitBuf<T> & other1,const UnitBuf<T> & other2,const ClpRngs & clpRngs,const bool chromaOnly,const bool lumaOnly)795 void UnitBuf<T>::addAvg(const UnitBuf<T> &other1, const UnitBuf<T> &other2, const ClpRngs& clpRngs, const bool chromaOnly /* = false */, const bool lumaOnly /* = false */)
796 {
797 const size_t istart = chromaOnly ? 1 : 0;
798 const size_t iend = lumaOnly ? 1 : bufs.size();
799
800 CHECK( lumaOnly && chromaOnly, "should not happen" );
801
802 for( size_t i = istart; i < iend; i++)
803 {
804 bufs[i].addAvg( other1.bufs[i], other2.bufs[i], clpRngs );
805 }
806 }
807
808 template<typename T>
colorSpaceConvert(const UnitBuf<T> & other,const ClpRng & clpRng)809 void UnitBuf<T>::colorSpaceConvert( const UnitBuf<T> &other, const ClpRng& clpRng )
810 {
811 THROW( "Type not supported" );
812 }
813
814 template<>
815 void UnitBuf<Pel>::colorSpaceConvert( const UnitBuf<Pel> &other, const ClpRng& clpRng );
816
817 template<typename T>
extendBorderPel(unsigned margin)818 void UnitBuf<T>::extendBorderPel( unsigned margin )
819 {
820 for( unsigned i = 0; i < bufs.size(); i++ )
821 {
822 bufs[i].extendBorderPel( margin );
823 }
824 }
825
826 template<typename T>
extendBorderPel(unsigned margin,bool left,bool right,bool top,bool bottom)827 void UnitBuf<T>::extendBorderPel(unsigned margin, bool left, bool right, bool top, bool bottom)
828 {
829 for( unsigned i = 0; i < bufs.size(); i++ )
830 {
831 bufs[i].extendBorderPel( margin, left, right, top, bottom );
832 }
833 }
834
835 template<typename T>
padBorderPel(unsigned margin,int dir)836 void UnitBuf<T>::padBorderPel( unsigned margin, int dir )
837 {
838 for( unsigned i = 0; i < bufs.size(); i++ )
839 {
840 bufs[i].padBorderPel( margin >> getComponentScaleX( ComponentID( i ), chromaFormat ), margin >> getComponentScaleY( ComponentID( i ), chromaFormat ), dir );
841 }
842 }
843
844 template<typename T>
subBuf(const UnitArea & subArea)845 UnitBuf<T> UnitBuf<T>::subBuf( const UnitArea& subArea )
846 {
847 UnitBuf<T> subBuf;
848 subBuf.chromaFormat = chromaFormat;
849 unsigned blockIdx = 0;
850
851 for( auto &subAreaBuf : bufs )
852 {
853 subBuf.bufs.push_back( subAreaBuf.subBuf( subArea.blocks[blockIdx].pos(), subArea.blocks[blockIdx].size() ) );
854 blockIdx++;
855 }
856
857 return subBuf;
858 }
859
860
861 template<typename T>
subBuf(const UnitArea & subArea)862 const UnitBuf<const T> UnitBuf<T>::subBuf( const UnitArea& subArea ) const
863 {
864 UnitBuf<const T> subBuf;
865 subBuf.chromaFormat = chromaFormat;
866 unsigned blockIdx = 0;
867
868 for( const auto &subAreaBuf : bufs )
869 {
870 subBuf.bufs.push_back( subAreaBuf.subBuf( subArea.blocks[blockIdx].pos(), subArea.blocks[blockIdx].size() ) );
871 blockIdx++;
872 }
873
874 return subBuf;
875 }
876
877 template<typename T>
subBuf(const Area & subArea)878 UnitBuf<T> UnitBuf<T>::subBuf( const Area & subArea )
879 {
880 UnitBuf<T> subBuf;
881 subBuf.chromaFormat = chromaFormat;
882 unsigned blockIdx = 0;
883
884 for( auto &subAreaBuf : bufs )
885 {
886 const int scaleX = getComponentScaleX( ComponentID(blockIdx), chromaFormat);
887 const int scaleY = getComponentScaleY( ComponentID(blockIdx), chromaFormat);
888 const Area scaledArea( subArea.pos().x >> scaleX, subArea.pos().y >> scaleY, subArea.size().width >> scaleX, subArea.size().height >> scaleY );
889 subBuf.bufs.push_back( subAreaBuf.subBuf( scaledArea.pos(), scaledArea.size() ) );
890 blockIdx++;
891 }
892
893 return subBuf;
894 }
895
896 template<typename T>
subBuf(const Area & subArea)897 const UnitBuf<const T> UnitBuf<T>::subBuf( const Area & subArea ) const
898 {
899 UnitBuf<T> subBuf;
900 subBuf.chromaFormat = chromaFormat;
901 unsigned blockIdx = 0;
902
903 for( auto &subAreaBuf : bufs )
904 {
905 const int scaleX = getComponentScaleX( ComponentID(blockIdx), chromaFormat);
906 const int scaleY = getComponentScaleY( ComponentID(blockIdx), chromaFormat);
907 const Area scaledArea( subArea.pos().x >> scaleX, subArea.pos().y >> scaleY, subArea.size().width >> scaleX, subArea.size().height >> scaleY );
908 subBuf.bufs.push_back( subAreaBuf.subBuf( scaledArea.pos(), scaledArea.size() ) );
909 blockIdx++;
910 }
911
912 return subBuf;
913 }
914
915 template<typename T>
rescaleBuf(const UnitBuf<const T> & beforeScaling,const std::pair<int,int> scalingRatio,const Window & confBefore,const Window & confAfter,const BitDepths & bitDepths,const bool horCollocatedChromaFlag,const bool verCollocatedChromaFlag)916 void UnitBuf<T>::rescaleBuf( const UnitBuf<const T>& beforeScaling, const std::pair<int, int> scalingRatio, const Window& confBefore, const Window& confAfter, const BitDepths& bitDepths, const bool horCollocatedChromaFlag, const bool verCollocatedChromaFlag )
917 {
918 for( unsigned i = 0; i < bufs.size(); i++ )
919 {
920 bufs[i].rescaleBuf( beforeScaling.bufs[i], ComponentID( i ), scalingRatio, confBefore, confAfter, chromaFormat, bitDepths, horCollocatedChromaFlag, verCollocatedChromaFlag );
921 }
922 }
923
924 // ---------------------------------------------------------------------------
925 // PelStorage struct (PelUnitBuf which allocates its own memory)
926 // ---------------------------------------------------------------------------
927
928 struct UnitArea;
929 struct CompArea;
930
931 struct PelStorage : public PelUnitBuf
932 {
933 PelStorage();
934 ~PelStorage();
935
936 void swap( PelStorage& other );
937 void createFromBuf( PelUnitBuf buf );
938 void create( const UnitArea &_unit );
939 void create( const ChromaFormat _chromaFormat, const Size& _size, const unsigned _maxCUSize = 0, const unsigned _margin = 0, const unsigned _alignment = 0, const bool _scaleChromaMargin = true );
940 void destroy();
941
942 PelBuf getBuf( const CompArea &blk );
943 const CPelBuf getBuf( const CompArea &blk ) const;
944
945 PelBuf getBuf( const ComponentID CompID );
946 const CPelBuf getBuf( const ComponentID CompID ) const;
947
948 PelUnitBuf getBuf( const UnitArea &unit );
949 const CPelUnitBuf getBuf( const UnitArea &unit ) const;
getOriginPelStorage950 Pel *getOrigin( const int id ) const { return m_origin[id]; }
getOriginBufPelStorage951 PelBuf getOriginBuf( const int id ) { return PelBuf( m_origin[id], m_origSi[id] ); }
952
953 private:
954
955 Size m_origSi[MAX_NUM_COMPONENT];
956 Pel *m_origin[MAX_NUM_COMPONENT];
957 };
958
959 }
960