1 #ifndef GLOBAL_H_ 2 #define GLOBAL_H_ 3 /***************************************************************************** 4 * This file is part of Kvazaar HEVC encoder. 5 * 6 * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without modification, 10 * are permitted provided that the following conditions are met: 11 * 12 * * Redistributions of source code must retain the above copyright notice, this 13 * list of conditions and the following disclaimer. 14 * 15 * * Redistributions in binary form must reproduce the above copyright notice, this 16 * list of conditions and the following disclaimer in the documentation and/or 17 * other materials provided with the distribution. 18 * 19 * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 26 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 27 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 28 * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON 30 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS 33 ****************************************************************************/ 34 35 /** 36 * \file 37 * Header that is included in every other header. 38 * 39 * This file contains global constants that can be referred to from any header 40 * or source file. It also contains some helper macros and includes stdint.h 41 * so that any file can refer to integer types with exact widths. 42 */ 43 44 #ifdef HAVE_CONFIG_H 45 // Include config.h generated by automake. This needs to be before any other 46 // includes in every file, which is why it's in global. 47 #include "config.h" // IWYU pragma: export 48 #endif 49 50 // Include some basics in all files, like assert, primitives and NULL. 51 // If you add anything to this list with export pragma, think long and 52 // and hard if it's actually a good idea to incude it for every c-file. 53 #include <assert.h> // IWYU pragma: export 54 #include <stdbool.h> // IWYU pragma: export 55 #include <stdint.h> // IWYU pragma: export 56 #include <stddef.h> // IWYU pragma: export 57 58 // The stdlib.h and string.h headers are needed because of MALLOC and FILL 59 // macros defined here, as IWYU will remove them from files that use only 60 // those macros. 61 #include <stdlib.h> 62 #include <string.h> 63 64 /** 65 * \defgroup Bitstream 66 * HEVC bitstream coding 67 * 68 * \defgroup CABAC 69 * Context Adaptive Binary Arithmetic Encoder implementation 70 * 71 * \defgroup Compression 72 * Prediction parameter decisions and ratedistortion optimization 73 * 74 * \defgroup Control 75 * Initialization and control flow of the encoder 76 * 77 * \defgroup DataStructures 78 * Containers for images, predictions parameters and such 79 * 80 * \defgroup Extras 81 * 3rd party modules not considered part of the encoder. 82 * 83 * \defgroup Optimization 84 * Architecture dependant SIMD optimizations and dynamic dispatch mechanism 85 * 86 * \defgroup Reconstruction 87 * Stuff required for creating the resulting image after lossy compression 88 * 89 * \defgroup Threading 90 * Stuff related to multi-threading using pthreads 91 */ 92 93 // Pthreads-win32 tries to define timespec even if it has already been defined. 94 // In Visual Studio 2015 timespec is defined in time.h so we may need to define 95 // HAVE_STRUCT_TIMESPEC. 96 #if _MSC_VER >= 1900 && !defined(HAVE_STRUCT_TIMESPEC) 97 # define HAVE_STRUCT_TIMESPEC 98 #endif 99 100 #if defined(_MSC_VER) && defined(_M_AMD64) 101 #define X86_64 102 #endif 103 104 #if defined(__GNUC__) && defined(__x86_64__) 105 #define X86_64 106 #endif 107 108 #define PIXEL_MIN 0 109 #define PIXEL_MAX ((1 << KVZ_BIT_DEPTH) - 1) 110 111 typedef int16_t coeff_t; 112 113 //#define VERBOSE 1 114 115 /* CONFIG VARIABLES */ 116 117 //spec: references to variables defined in Rec. ITU-T H.265 (04/2013) 118 119 //! Limits for prediction block sizes. 0 = 64x64, 4 = 4x4. 120 #define PU_DEPTH_INTER_MIN 0 121 #define PU_DEPTH_INTER_MAX 3 122 #define PU_DEPTH_INTRA_MIN 0 123 #define PU_DEPTH_INTRA_MAX 4 124 125 //! Maximum number of layers in GOP structure (for allocating structures) 126 #define MAX_GOP_LAYERS 6 127 128 //! Maximum CU depth when descending form LCU level. 129 //! spec: log2_diff_max_min_luma_coding_block_size 130 #define MAX_DEPTH 3 131 //! Minimum log2 size of CUs. 132 //! spec: MinCbLog2SizeY 133 #define MIN_SIZE 3 134 //! Minimum log2 size of PUs. 135 //! Search is started at depth 0 and goes in Z-order to MAX_PU_DEPTH, see search_cu() 136 #define MAX_PU_DEPTH 4 137 138 //! spec: pcm_enabled_flag, Setting to 1 will enable using PCM blocks (current intra-search does not consider PCM) 139 #define ENABLE_PCM 0 140 141 //! skip residual coding when it's under _some_ threshold 142 #define OPTIMIZATION_SKIP_RESIDUAL_ON_THRESHOLD 0 143 144 /* END OF CONFIG VARIABLES */ 145 146 //! pow(2, MIN_SIZE) 147 #define CU_MIN_SIZE_PIXELS (1 << MIN_SIZE) 148 //! spec: CtbSizeY 149 #define LCU_WIDTH (1 << (MIN_SIZE + MAX_DEPTH)) 150 //! spec: CtbWidthC and CtbHeightC 151 #define LCU_WIDTH_C (LCU_WIDTH / 2) 152 153 //! spec: Log2MaxTrafoSize <= Min(CtbLog2SizeY, 5) 154 #define TR_MAX_LOG2_SIZE 5 155 #define TR_MAX_WIDTH (1 << TR_MAX_LOG2_SIZE) 156 //! spec: Log2MinTrafoSize 157 #define TR_MIN_LOG2_SIZE 2 158 #define TR_MIN_WIDTH (1 << TR_MIN_LOG2_SIZE) 159 160 #if LCU_WIDTH != 64 161 #error "Kvazaar only support LCU_WIDTH == 64" 162 #endif 163 164 #define LCU_LUMA_SIZE (LCU_WIDTH * LCU_WIDTH) 165 #define LCU_CHROMA_SIZE (LCU_WIDTH * LCU_WIDTH >> 2) 166 167 /** 168 * \brief Number of pixels to delay deblocking. 169 * 170 * Number of pixels at the bottom and right side of the LCU that are not 171 * deblocked until when filtering the neighboring LCU. The last four chroma 172 * pixels of the horizontal edges within the LCU are deblocked with the LCU 173 * to the right. Therefore, DEBLOCK_DELAY_PX is set to 8 pixels. 174 */ 175 #define DEBLOCK_DELAY_PX 8 176 177 /** 178 * \brief Number of pixels to delay SAO in horizontal and vertical 179 * directions. 180 * 181 * Number of pixels at the bottom and right side of the LCU that are not 182 * filtered with SAO until when filtering the neighboring LCU. SAO 183 * reconstruction requires that a one pixels border has been deblocked for 184 * both luma and chroma. Therefore, SAO_DELAY_PX is set to 185 * DEBLOCK_DELAY_PX + 2. 186 */ 187 #define SAO_DELAY_PX (DEBLOCK_DELAY_PX + 2) 188 189 #define MAX_REF_PIC_COUNT 16 190 191 #define AMVP_MAX_NUM_CANDS 2 192 #define AMVP_MAX_NUM_CANDS_MEM 3 193 #define MRG_MAX_NUM_CANDS 5 194 195 /* Some tools */ 196 #define ABS(a) ((a) >= 0 ? (a) : (-a)) 197 #define MAX(a,b) (((a)>(b))?(a):(b)) 198 #define MIN(a,b) (((a)<(b))?(a):(b)) 199 #define CLIP(low,high,value) MAX((low),MIN((high),(value))) 200 #define CLIP_TO_PIXEL(value) CLIP(0, PIXEL_MAX, (value)) 201 #define CLIP_TO_QP(value) CLIP(0, 51, (value)) 202 #define SWAP(a,b,swaptype) { swaptype tempval; tempval = a; a = b; b = tempval; } 203 #define CU_WIDTH_FROM_DEPTH(depth) (LCU_WIDTH >> depth) 204 #define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val)) 205 #define CEILDIV(x,y) (((x) + (y) - 1) / (y)) 206 207 #define LOG2_LCU_WIDTH 6 208 // CU_TO_PIXEL = y * lcu_width * pic_width + x * lcu_width 209 #define CU_TO_PIXEL(x, y, depth, stride) (((y) << (LOG2_LCU_WIDTH - (depth))) * (stride) \ 210 + ((x) << (LOG2_LCU_WIDTH - (depth)))) 211 //#define SIGN3(x) ((x) > 0) ? +1 : ((x) == 0 ? 0 : -1) 212 #define SIGN3(x) (((x) > 0) - ((x) < 0)) 213 214 215 #define QUOTE(x) #x 216 #define QUOTE_EXPAND(x) QUOTE(x) 217 218 // NOTE: When making a release, check to see if incrementing libversion in 219 // configure.ac is necessary. 220 #ifndef KVZ_VERSION 221 #define KVZ_VERSION 2.1.0 222 #endif 223 #define VERSION_STRING QUOTE_EXPAND(KVZ_VERSION) 224 225 //#define VERBOSE 1 226 227 #define SAO_ABS_OFFSET_MAX ((1 << (MIN(KVZ_BIT_DEPTH, 10) - 5)) - 1) 228 229 #define MAX_TILES_PER_DIM 48 230 #define MAX_SLICES 16 231 232 /* Inlining functions */ 233 #ifdef _MSC_VER /* Visual studio */ 234 #define INLINE __forceinline 235 #pragma inline_recursion(on) 236 #else /* others */ 237 #define INLINE inline 238 #endif 239 240 // Return the next aligned address for *p. Result is at most alignment larger than p. 241 #define ALIGNED_POINTER(p, alignment) (void*)((intptr_t)(p) + (alignment) - ((intptr_t)(p) % (alignment))) 242 // 32 bytes is enough for AVX2 243 #define SIMD_ALIGNMENT 32 244 245 #ifdef _MSC_VER 246 #define ALIGNED(alignment) __declspec(align(alignment)) 247 #else 248 #define ALIGNED(alignment) __attribute__((aligned (alignment))) 249 #endif 250 251 #ifdef _MSC_VER 252 // Buggy VS2010 throws intellisense warnings if void* is not casted. 253 #define MALLOC(type, num) (type *)malloc(sizeof(type) * (num)) 254 #define MALLOC_SIMD_PADDED(type, num, padding) (type *)malloc(sizeof(type) * (num) + (padding)) 255 #else 256 #define MALLOC(type, num) malloc(sizeof(type) * (num)) 257 #define MALLOC_SIMD_PADDED(type, num, padding) malloc(sizeof(type) * (num) + (padding)) 258 #endif 259 260 // Use memset through FILL and FILL_ARRAY when appropriate, such as when 261 // initializing whole structures or arrays. It's still ok to use memset 262 // directly when doing something more complicated. 263 264 // Fill a structure or a static array with val bytes. 265 #define FILL(var, val) memset(&(var), (val), sizeof(var)) 266 // Fill a number of elements in an array with val bytes. 267 #define FILL_ARRAY(ar, val, size) \ 268 {\ 269 void *temp_ptr = (void*)(ar);\ 270 memset((temp_ptr), (val), (size) * sizeof(*(ar)));\ 271 } 272 273 #define FREE_POINTER(pointer) { free((void*)pointer); pointer = NULL; } 274 #define MOVE_POINTER(dst_pointer,src_pointer) { dst_pointer = src_pointer; src_pointer = NULL; } 275 276 #ifndef MAX_INT 277 #define MAX_INT 0x7FFFFFFF 278 #endif 279 #ifndef MAX_INT64 280 #define MAX_INT64 0x7FFFFFFFFFFFFFFFLL 281 #endif 282 #ifndef MAX_DOUBLE 283 #define MAX_DOUBLE 1.7e+308 284 #endif 285 286 //For transform.h and encoder.h 287 #define SCALING_LIST_4x4 0 288 #define SCALING_LIST_8x8 1 289 #define SCALING_LIST_16x16 2 290 #define SCALING_LIST_32x32 3 291 #define SCALING_LIST_SIZE_NUM 4 292 #define SCALING_LIST_NUM 6 293 #define MAX_MATRIX_COEF_NUM 64 294 #define SCALING_LIST_REM_NUM 6 295 296 #define MAX_TR_DYNAMIC_RANGE 15 297 298 //Constants 299 typedef enum { COLOR_Y = 0, COLOR_U, COLOR_V } color_t; 300 301 302 // Hardware data (abstraction of defines). Extend for other compilers 303 #if defined(_M_IX86) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(_M_X64) || defined(_M_AMD64) || defined(__amd64__) || defined(__x86_64__) 304 # define COMPILE_INTEL 1 305 #else 306 # define COMPILE_INTEL 0 307 #endif 308 309 // Visual Studio note: 310 // Because these macros are only used to guard code that is guarded by CPUID 311 // at runtime, use /arch parameter to disable them, but enable all intrinsics 312 // supported by VisualStudio if SSE2 (highest) is enabled. 313 // AVX and AVX2 are handled by /arch directly and sse intrinsics will use VEX 314 // versions if they are defined. 315 #define MSC_X86_SIMD(level) (_M_X64 || (_M_IX86_FP >= (level))) 316 317 #if COMPILE_INTEL 318 # if defined(__MMX__) || MSC_X86_SIMD(1) 319 # define COMPILE_INTEL_MMX 1 320 # endif 321 # if defined(__SSE__) || MSC_X86_SIMD(1) 322 # define COMPILE_INTEL_SSE 1 323 # endif 324 # if defined(__SSE2__) || MSC_X86_SIMD(2) 325 # define COMPILE_INTEL_SSE2 1 326 # endif 327 # if defined(__SSE3__) 328 # define COMPILE_INTEL_SSE3 1 329 # endif 330 # if defined(__SSSE3__) || MSC_X86_SIMD(2) 331 # define COMPILE_INTEL_SSSE3 1 332 # endif 333 # if defined(__SSE4_1__) || MSC_X86_SIMD(2) 334 # define COMPILE_INTEL_SSE41 1 335 # endif 336 # if defined(__SSE4_2__) || MSC_X86_SIMD(2) 337 # define COMPILE_INTEL_SSE42 1 338 # endif 339 # if defined(__AVX__) 340 # define COMPILE_INTEL_AVX 1 341 # endif 342 # if defined(__AVX2__) 343 # define COMPILE_INTEL_AVX2 1 344 # endif 345 #endif 346 347 #if defined (_M_PPC) || defined(__powerpc64__) || defined(__powerpc__) 348 # define COMPILE_POWERPC 1 349 # ifdef __ALTIVEC__ 350 # define COMPILE_POWERPC_ALTIVEC 1 351 # else 352 # define COMPILE_POWERPC_ALTIVEC 0 353 # endif 354 #else 355 # define COMPILE_POWERPC 0 356 #endif 357 358 #if defined (_M_ARM) || defined(__arm__) || defined(__thumb__) 359 # define COMPILE_ARM 1 360 #else 361 # define COMPILE_ARM 0 362 #endif 363 364 // Min & max delta QP limits based on bit depth 365 #define KVZ_QP_DELTA_MIN -(26 + 3 * (KVZ_BIT_DEPTH - 8)) 366 #define KVZ_QP_DELTA_MAX 25 + 3 * (KVZ_BIT_DEPTH - 8) 367 368 #endif 369