1 #ifndef GLOBAL_H_
2 #define GLOBAL_H_
3 /*****************************************************************************
4  * This file is part of Kvazaar HEVC encoder.
5  *
6  * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without modification,
10  * are permitted provided that the following conditions are met:
11  *
12  * * Redistributions of source code must retain the above copyright notice, this
13  *   list of conditions and the following disclaimer.
14  *
15  * * Redistributions in binary form must reproduce the above copyright notice, this
16  *   list of conditions and the following disclaimer in the documentation and/or
17  *   other materials provided with the distribution.
18  *
19  * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
20  *   contributors may be used to endorse or promote products derived from
21  *   this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
27  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28  * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
30  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
33  ****************************************************************************/
34 
35 /**
36  * \file
37  * Header that is included in every other header.
38  *
39  * This file contains global constants that can be referred to from any header
40  * or source file. It also contains some helper macros and includes stdint.h
41  * so that any file can refer to integer types with exact widths.
42  */
43 
44 #ifdef HAVE_CONFIG_H
45 // Include config.h generated by automake. This needs to be before any other
46 // includes in every file, which is why it's in global.
47 #include "config.h" // IWYU pragma: export
48 #endif
49 
50 // Include some basics in all files, like assert, primitives and NULL.
51 // If you add anything to this list with export pragma, think long and
52 // and hard if it's actually a good idea to incude it for every c-file.
53 #include <assert.h> // IWYU pragma: export
54 #include <stdbool.h> // IWYU pragma: export
55 #include <stdint.h> // IWYU pragma: export
56 #include <stddef.h> // IWYU pragma: export
57 
58 // The stdlib.h and string.h headers are needed because of MALLOC and FILL
59 // macros defined here, as IWYU will remove them from files that use only
60 // those macros.
61 #include <stdlib.h>
62 #include <string.h>
63 
64 /**
65  * \defgroup Bitstream
66  * HEVC bitstream coding
67  *
68  * \defgroup CABAC
69  * Context Adaptive Binary Arithmetic Encoder implementation
70  *
71  * \defgroup Compression
72  * Prediction parameter decisions and ratedistortion optimization
73  *
74  * \defgroup Control
75  * Initialization and control flow of the encoder
76  *
77  * \defgroup DataStructures
78  * Containers for images, predictions parameters and such
79  *
80  * \defgroup Extras
81  * 3rd party modules not considered part of the encoder.
82  *
83  * \defgroup Optimization
84  * Architecture dependant SIMD optimizations and dynamic dispatch mechanism
85  *
86  * \defgroup Reconstruction
87  * Stuff required for creating the resulting image after lossy compression
88  *
89  * \defgroup Threading
90  * Stuff related to multi-threading using pthreads
91  */
92 
93  // Pthreads-win32 tries to define timespec even if it has already been defined.
94  // In Visual Studio 2015 timespec is defined in time.h so we may need to define
95  // HAVE_STRUCT_TIMESPEC.
96 #if _MSC_VER >= 1900 && !defined(HAVE_STRUCT_TIMESPEC)
97 #   define HAVE_STRUCT_TIMESPEC
98 #endif
99 
100 #if defined(_MSC_VER) && defined(_M_AMD64)
101   #define X86_64
102 #endif
103 
104 #if defined(__GNUC__) && defined(__x86_64__)
105   #define X86_64
106 #endif
107 
108 #define PIXEL_MIN 0
109 #define PIXEL_MAX ((1 << KVZ_BIT_DEPTH) - 1)
110 
111 typedef int16_t coeff_t;
112 
113 //#define VERBOSE 1
114 
115 /* CONFIG VARIABLES */
116 
117 //spec: references to variables defined in Rec. ITU-T H.265 (04/2013)
118 
119 //! Limits for prediction block sizes. 0 = 64x64, 4 = 4x4.
120 #define PU_DEPTH_INTER_MIN 0
121 #define PU_DEPTH_INTER_MAX 3
122 #define PU_DEPTH_INTRA_MIN 0
123 #define PU_DEPTH_INTRA_MAX 4
124 
125 //! Maximum number of layers in GOP structure (for allocating structures)
126 #define MAX_GOP_LAYERS 6
127 
128 //! Maximum CU depth when descending form LCU level.
129 //! spec: log2_diff_max_min_luma_coding_block_size
130 #define MAX_DEPTH 3
131 //! Minimum log2 size of CUs.
132 //! spec: MinCbLog2SizeY
133 #define MIN_SIZE 3
134 //! Minimum log2 size of PUs.
135 //! Search is started at depth 0 and goes in Z-order to MAX_PU_DEPTH, see search_cu()
136 #define MAX_PU_DEPTH 4
137 
138 //! spec: pcm_enabled_flag, Setting to 1 will enable using PCM blocks (current intra-search does not consider PCM)
139 #define ENABLE_PCM 0
140 
141 //! skip residual coding when it's under _some_ threshold
142 #define OPTIMIZATION_SKIP_RESIDUAL_ON_THRESHOLD 0
143 
144 /* END OF CONFIG VARIABLES */
145 
146 //! pow(2, MIN_SIZE)
147 #define CU_MIN_SIZE_PIXELS (1 << MIN_SIZE)
148 //! spec: CtbSizeY
149 #define LCU_WIDTH (1 << (MIN_SIZE + MAX_DEPTH))
150 //! spec: CtbWidthC and CtbHeightC
151 #define LCU_WIDTH_C (LCU_WIDTH / 2)
152 
153 //! spec: Log2MaxTrafoSize <= Min(CtbLog2SizeY, 5)
154 #define TR_MAX_LOG2_SIZE 5
155 #define TR_MAX_WIDTH (1 << TR_MAX_LOG2_SIZE)
156 //! spec: Log2MinTrafoSize
157 #define TR_MIN_LOG2_SIZE 2
158 #define TR_MIN_WIDTH (1 << TR_MIN_LOG2_SIZE)
159 
160 #if LCU_WIDTH != 64
161   #error "Kvazaar only support LCU_WIDTH == 64"
162 #endif
163 
164 #define LCU_LUMA_SIZE (LCU_WIDTH * LCU_WIDTH)
165 #define LCU_CHROMA_SIZE (LCU_WIDTH * LCU_WIDTH >> 2)
166 
167 /**
168  * \brief Number of pixels to delay deblocking.
169  *
170  * Number of pixels at the bottom and right side of the LCU that are not
171  * deblocked until when filtering the neighboring LCU. The last four chroma
172  * pixels of the horizontal edges within the LCU are deblocked with the LCU
173  * to the right. Therefore, DEBLOCK_DELAY_PX is set to 8 pixels.
174  */
175 #define DEBLOCK_DELAY_PX 8
176 
177 /**
178  * \brief Number of pixels to delay SAO in horizontal and vertical
179  * directions.
180  *
181  * Number of pixels at the bottom and right side of the LCU that are not
182  * filtered with SAO until when filtering the neighboring LCU. SAO
183  * reconstruction requires that a one pixels border has been deblocked for
184  * both luma and chroma.  Therefore, SAO_DELAY_PX is set to
185  * DEBLOCK_DELAY_PX + 2.
186  */
187 #define SAO_DELAY_PX (DEBLOCK_DELAY_PX + 2)
188 
189 #define MAX_REF_PIC_COUNT 16
190 
191 #define AMVP_MAX_NUM_CANDS 2
192 #define AMVP_MAX_NUM_CANDS_MEM 3
193 #define MRG_MAX_NUM_CANDS 5
194 
195 /* Some tools */
196 #define ABS(a) ((a) >= 0 ? (a) : (-a))
197 #define MAX(a,b) (((a)>(b))?(a):(b))
198 #define MIN(a,b) (((a)<(b))?(a):(b))
199 #define CLIP(low,high,value) MAX((low),MIN((high),(value)))
200 #define CLIP_TO_PIXEL(value) CLIP(0, PIXEL_MAX, (value))
201 #define CLIP_TO_QP(value) CLIP(0, 51, (value))
202 #define SWAP(a,b,swaptype) { swaptype tempval; tempval = a; a = b; b = tempval; }
203 #define CU_WIDTH_FROM_DEPTH(depth) (LCU_WIDTH >> depth)
204 #define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val))
205 #define CEILDIV(x,y) (((x) + (y) - 1) / (y))
206 
207 #define LOG2_LCU_WIDTH 6
208 // CU_TO_PIXEL = y * lcu_width * pic_width + x * lcu_width
209 #define CU_TO_PIXEL(x, y, depth, stride) (((y) << (LOG2_LCU_WIDTH - (depth))) * (stride) \
210                                          + ((x) << (LOG2_LCU_WIDTH - (depth))))
211 //#define SIGN3(x) ((x) > 0) ? +1 : ((x) == 0 ? 0 : -1)
212 #define SIGN3(x) (((x) > 0) - ((x) < 0))
213 
214 
215 #define QUOTE(x) #x
216 #define QUOTE_EXPAND(x) QUOTE(x)
217 
218 // NOTE: When making a release, check to see if incrementing libversion in
219 // configure.ac is necessary.
220 #ifndef KVZ_VERSION
221 #define KVZ_VERSION 2.1.0
222 #endif
223 #define VERSION_STRING QUOTE_EXPAND(KVZ_VERSION)
224 
225 //#define VERBOSE 1
226 
227 #define SAO_ABS_OFFSET_MAX ((1 << (MIN(KVZ_BIT_DEPTH, 10) - 5)) - 1)
228 
229 #define MAX_TILES_PER_DIM 48
230 #define MAX_SLICES 16
231 
232 /* Inlining functions */
233 #ifdef _MSC_VER /* Visual studio */
234   #define INLINE __forceinline
235   #pragma inline_recursion(on)
236 #else /* others */
237   #define INLINE inline
238 #endif
239 
240 // Return the next aligned address for *p. Result is at most alignment larger than p.
241 #define ALIGNED_POINTER(p, alignment) (void*)((intptr_t)(p) + (alignment) - ((intptr_t)(p) % (alignment)))
242 // 32 bytes is enough for AVX2
243 #define SIMD_ALIGNMENT 32
244 
245 #ifdef _MSC_VER
246   #define ALIGNED(alignment) __declspec(align(alignment))
247 #else
248   #define ALIGNED(alignment) __attribute__((aligned (alignment)))
249 #endif
250 
251 #ifdef _MSC_VER
252 // Buggy VS2010 throws intellisense warnings if void* is not casted.
253   #define MALLOC(type, num) (type *)malloc(sizeof(type) * (num))
254   #define MALLOC_SIMD_PADDED(type, num, padding) (type *)malloc(sizeof(type) * (num) + (padding))
255 #else
256   #define MALLOC(type, num) malloc(sizeof(type) * (num))
257   #define MALLOC_SIMD_PADDED(type, num, padding) malloc(sizeof(type) * (num) + (padding))
258 #endif
259 
260 // Use memset through FILL and FILL_ARRAY when appropriate, such as when
261 // initializing whole structures or arrays. It's still ok to use memset
262 // directly when doing something more complicated.
263 
264 // Fill a structure or a static array with val bytes.
265 #define FILL(var, val) memset(&(var), (val), sizeof(var))
266 // Fill a number of elements in an array with val bytes.
267 #define FILL_ARRAY(ar, val, size) \
268 {\
269   void *temp_ptr = (void*)(ar);\
270   memset((temp_ptr), (val), (size) * sizeof(*(ar)));\
271 }
272 
273 #define FREE_POINTER(pointer) { free((void*)pointer); pointer = NULL; }
274 #define MOVE_POINTER(dst_pointer,src_pointer) { dst_pointer = src_pointer; src_pointer = NULL; }
275 
276 #ifndef MAX_INT
277 #define MAX_INT 0x7FFFFFFF
278 #endif
279 #ifndef MAX_INT64
280 #define MAX_INT64 0x7FFFFFFFFFFFFFFFLL
281 #endif
282 #ifndef MAX_DOUBLE
283 #define MAX_DOUBLE 1.7e+308
284 #endif
285 
286 //For transform.h and encoder.h
287 #define SCALING_LIST_4x4      0
288 #define SCALING_LIST_8x8      1
289 #define SCALING_LIST_16x16    2
290 #define SCALING_LIST_32x32    3
291 #define SCALING_LIST_SIZE_NUM 4
292 #define SCALING_LIST_NUM      6
293 #define MAX_MATRIX_COEF_NUM   64
294 #define SCALING_LIST_REM_NUM  6
295 
296 #define MAX_TR_DYNAMIC_RANGE 15
297 
298 //Constants
299 typedef enum { COLOR_Y = 0, COLOR_U, COLOR_V } color_t;
300 
301 
302 // Hardware data (abstraction of defines). Extend for other compilers
303 #if defined(_M_IX86) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(_M_X64) || defined(_M_AMD64) || defined(__amd64__) || defined(__x86_64__)
304 #  define COMPILE_INTEL 1
305 #else
306 #  define COMPILE_INTEL 0
307 #endif
308 
309 // Visual Studio note:
310 // Because these macros are only used to guard code that is guarded by CPUID
311 // at runtime, use /arch parameter to disable them, but enable all intrinsics
312 // supported by VisualStudio if SSE2 (highest) is enabled.
313 // AVX and AVX2 are handled by /arch directly and sse intrinsics will use VEX
314 // versions if they are defined.
315 #define MSC_X86_SIMD(level) (_M_X64 || (_M_IX86_FP >= (level)))
316 
317 #if COMPILE_INTEL
318 #  if defined(__MMX__) || MSC_X86_SIMD(1)
319 #    define COMPILE_INTEL_MMX 1
320 #  endif
321 #  if defined(__SSE__) || MSC_X86_SIMD(1)
322 #    define COMPILE_INTEL_SSE 1
323 #  endif
324 #  if defined(__SSE2__) || MSC_X86_SIMD(2)
325 #    define COMPILE_INTEL_SSE2 1
326 #  endif
327 #  if defined(__SSE3__)
328 #    define COMPILE_INTEL_SSE3 1
329 #  endif
330 #  if defined(__SSSE3__) || MSC_X86_SIMD(2)
331 #    define COMPILE_INTEL_SSSE3 1
332 #  endif
333 #  if defined(__SSE4_1__) || MSC_X86_SIMD(2)
334 #    define COMPILE_INTEL_SSE41 1
335 #  endif
336 #  if defined(__SSE4_2__) || MSC_X86_SIMD(2)
337 #    define COMPILE_INTEL_SSE42 1
338 #  endif
339 #  if defined(__AVX__)
340 #    define COMPILE_INTEL_AVX 1
341 #   endif
342 #  if defined(__AVX2__)
343 #    define COMPILE_INTEL_AVX2 1
344 #   endif
345 #endif
346 
347 #if defined (_M_PPC) || defined(__powerpc64__) || defined(__powerpc__)
348 #  define COMPILE_POWERPC 1
349 #  ifdef __ALTIVEC__
350 #    define COMPILE_POWERPC_ALTIVEC 1
351 #  else
352 #    define COMPILE_POWERPC_ALTIVEC 0
353 #  endif
354 #else
355 #  define COMPILE_POWERPC 0
356 #endif
357 
358 #if defined (_M_ARM) || defined(__arm__) || defined(__thumb__)
359 #  define COMPILE_ARM 1
360 #else
361 #  define COMPILE_ARM 0
362 #endif
363 
364 // Min & max delta QP limits based on bit depth
365 #define KVZ_QP_DELTA_MIN -(26 + 3 * (KVZ_BIT_DEPTH - 8))
366 #define KVZ_QP_DELTA_MAX 25 + 3 * (KVZ_BIT_DEPTH - 8)
367 
368 #endif
369