1 /*****************************************************************************
2  * Copyright (C) 2013-2020 MulticoreWare, Inc
3  *
4  * Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
5  *          Min Chen <chenm003@163.com>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20  *
21  * This program is also available under a commercial proprietary license.
22  * For more information, contact us at license @ x265.com.
23  *****************************************************************************/
24 
25 #ifndef X265_COMMON_H
26 #define X265_COMMON_H
27 
28 #include <algorithm>
29 #include <climits>
30 #include <cmath>
31 #include <cstdarg>
32 #include <cstddef>
33 #include <cstdio>
34 #include <cstdlib>
35 #include <cstring>
36 #include <cctype>
37 #include <ctime>
38 
39 #include <stdint.h>
40 #include <memory.h>
41 #include <assert.h>
42 
43 #include "x265.h"
44 
45 #if ENABLE_PPA && ENABLE_VTUNE
46 #error "PPA and VTUNE cannot both be enabled. Disable one of them."
47 #endif
48 #if ENABLE_PPA
49 #include "profile/PPA/ppa.h"
50 #define ProfileScopeEvent(x) PPAScopeEvent(x)
51 #define THREAD_NAME(n,i)
52 #define PROFILE_INIT()       PPA_INIT()
53 #define PROFILE_PAUSE()
54 #define PROFILE_RESUME()
55 #elif ENABLE_VTUNE
56 #include "profile/vtune/vtune.h"
57 #define ProfileScopeEvent(x) VTuneScopeEvent _vtuneTask(x)
58 #define THREAD_NAME(n,i)     vtuneSetThreadName(n, i)
59 #define PROFILE_INIT()       vtuneInit()
60 #define PROFILE_PAUSE()      __itt_pause()
61 #define PROFILE_RESUME()     __itt_resume()
62 #else
63 #define ProfileScopeEvent(x)
64 #define THREAD_NAME(n,i)
65 #define PROFILE_INIT()
66 #define PROFILE_PAUSE()
67 #define PROFILE_RESUME()
68 #endif
69 
70 #define FENC_STRIDE 64
71 #define NUM_INTRA_MODE 35
72 
73 #if defined(__GNUC__)
74 #define ALIGN_VAR_4(T, var)  T var __attribute__((aligned(4)))
75 #define ALIGN_VAR_8(T, var)  T var __attribute__((aligned(8)))
76 #define ALIGN_VAR_16(T, var) T var __attribute__((aligned(16)))
77 #define ALIGN_VAR_32(T, var) T var __attribute__((aligned(32)))
78 #define ALIGN_VAR_64(T, var) T var __attribute__((aligned(64)))
79 #if defined(__MINGW32__)
80 #define fseeko fseeko64
81 #define ftello ftello64
82 #endif
83 #elif defined(_MSC_VER)
84 
85 #define ALIGN_VAR_4(T, var)  __declspec(align(4)) T var
86 #define ALIGN_VAR_8(T, var)  __declspec(align(8)) T var
87 #define ALIGN_VAR_16(T, var) __declspec(align(16)) T var
88 #define ALIGN_VAR_32(T, var) __declspec(align(32)) T var
89 #define ALIGN_VAR_64(T, var) __declspec(align(64)) T var
90 #define fseeko _fseeki64
91 #define ftello _ftelli64
92 #endif // if defined(__GNUC__)
93 #if HAVE_INT_TYPES_H
94 #define __STDC_FORMAT_MACROS
95 #include <inttypes.h>
96 #define X265_LL "%" PRIu64
97 #else
98 #define X265_LL "%lld"
99 #endif
100 
101 #if _DEBUG && defined(_MSC_VER)
102 #define DEBUG_BREAK() __debugbreak()
103 #elif __APPLE_CC__
104 #define DEBUG_BREAK() __builtin_trap()
105 #else
106 #define DEBUG_BREAK() abort()
107 #endif
108 
109 /* If compiled with CHECKED_BUILD perform run-time checks and log any that
110  * fail, both to stderr and to a file */
111 #if CHECKED_BUILD || _DEBUG
112 namespace X265_NS { extern int g_checkFailures; }
113 #define X265_CHECK(expr, ...) if (!(expr)) { \
114     x265_log(NULL, X265_LOG_ERROR, __VA_ARGS__); \
115     FILE *fp = fopen("x265_check_failures.txt", "a"); \
116     if (fp) { fprintf(fp, "%s:%d\n", __FILE__, __LINE__); fprintf(fp, __VA_ARGS__); fclose(fp); } \
117     g_checkFailures++; DEBUG_BREAK(); \
118 }
119 #if _MSC_VER
120 #pragma warning(disable: 4127) // some checks have constant conditions
121 #endif
122 #else
123 #define X265_CHECK(expr, ...)
124 #endif
125 
126 #if HIGH_BIT_DEPTH
127 typedef uint16_t pixel;
128 typedef uint32_t sum_t;
129 typedef uint64_t sum2_t;
130 typedef uint64_t pixel4;
131 typedef int64_t  ssum2_t;
132 #define SHIFT_TO_BITPLANE 9
133 #define HISTOGRAM_BINS 1024
134 #else
135 typedef uint8_t  pixel;
136 typedef uint16_t sum_t;
137 typedef uint32_t sum2_t;
138 typedef uint32_t pixel4;
139 typedef int32_t  ssum2_t; // Signed sum
140 #define SHIFT_TO_BITPLANE 7
141 #define HISTOGRAM_BINS 256
142 #endif // if HIGH_BIT_DEPTH
143 
144 #if X265_DEPTH < 10
145 typedef uint32_t sse_t;
146 #else
147 typedef uint64_t sse_t;
148 #endif
149 
150 #ifndef NULL
151 #define NULL 0
152 #endif
153 
154 #define MAX_UINT        0xFFFFFFFFU // max. value of unsigned 32-bit integer
155 #define MAX_INT         2147483647  // max. value of signed 32-bit integer
156 #define MAX_INT64       0x7FFFFFFFFFFFFFFFLL  // max. value of signed 64-bit integer
157 #define MAX_DOUBLE      1.7e+308    // max. value of double-type value
158 
159 #define QP_MIN          0
160 #define QP_MAX_SPEC     51 /* max allowed signaled QP in HEVC */
161 #define QP_MAX_MAX      69 /* max allowed QP to be output by rate control */
162 
163 #define MIN_QPSCALE     0.21249999999999999
164 #define MAX_MAX_QPSCALE 615.46574234477100
165 
166 
167 template<typename T>
x265_min(T a,T b)168 inline T x265_min(T a, T b) { return a < b ? a : b; }
169 
170 template<typename T>
x265_max(T a,T b)171 inline T x265_max(T a, T b) { return a > b ? a : b; }
172 
173 template<typename T>
x265_clip3(T minVal,T maxVal,T a)174 inline T x265_clip3(T minVal, T maxVal, T a) { return x265_min(x265_max(minVal, a), maxVal); }
175 
176 template<typename T> /* clip to pixel range, 0..255 or 0..1023 */
x265_clip(T x)177 inline pixel x265_clip(T x) { return (pixel)x265_min<T>(T((1 << X265_DEPTH) - 1), x265_max<T>(T(0), x)); }
178 
179 typedef int16_t  coeff_t;      // transform coefficient
180 
181 #define X265_MIN(a, b) ((a) < (b) ? (a) : (b))
182 #define X265_MAX(a, b) ((a) > (b) ? (a) : (b))
183 #define COPY1_IF_LT(x, y) {if ((y) < (x)) (x) = (y);}
184 #define COPY2_IF_LT(x, y, a, b) \
185     if ((y) < (x)) \
186     { \
187         (x) = (y); \
188         (a) = (b); \
189     }
190 #define COPY3_IF_LT(x, y, a, b, c, d) \
191     if ((y) < (x)) \
192     { \
193         (x) = (y); \
194         (a) = (b); \
195         (c) = (d); \
196     }
197 #define COPY4_IF_LT(x, y, a, b, c, d, e, f) \
198     if ((y) < (x)) \
199     { \
200         (x) = (y); \
201         (a) = (b); \
202         (c) = (d); \
203         (e) = (f); \
204     }
205 #define X265_MIN3(a, b, c) X265_MIN((a), X265_MIN((b), (c)))
206 #define X265_MAX3(a, b, c) X265_MAX((a), X265_MAX((b), (c)))
207 #define X265_MIN4(a, b, c, d) X265_MIN((a), X265_MIN3((b), (c), (d)))
208 #define X265_MAX4(a, b, c, d) X265_MAX((a), X265_MAX3((b), (c), (d)))
209 #define QP_BD_OFFSET (6 * (X265_DEPTH - 8))
210 #define MAX_CHROMA_LAMBDA_OFFSET 36
211 
212 // arbitrary, but low because SATD scores are 1/4 normal
213 #define X265_LOOKAHEAD_QP (12 + QP_BD_OFFSET)
214 
215 // Use the same size blocks as x264.  Using larger blocks seems to give artificially
216 // high cost estimates (intra and inter both suffer)
217 #define X265_LOWRES_CU_SIZE   8
218 #define X265_LOWRES_CU_BITS   3
219 
220 #define X265_MALLOC(type, count)    (type*)x265_malloc(sizeof(type) * (count))
221 #define X265_FREE(ptr)              x265_free(ptr)
222 #define X265_FREE_ZERO(ptr)         x265_free(ptr); (ptr) = NULL
223 #define CHECKED_MALLOC(var, type, count) \
224     { \
225         var = (type*)x265_malloc(sizeof(type) * (count)); \
226         if (!var) \
227         { \
228             x265_log(NULL, X265_LOG_ERROR, "malloc of size %d failed\n", sizeof(type) * (count)); \
229             goto fail; \
230         } \
231     }
232 #define CHECKED_MALLOC_ZERO(var, type, count) \
233     { \
234         var = (type*)x265_malloc(sizeof(type) * (count)); \
235         if (var) \
236             memset((void*)var, 0, sizeof(type) * (count)); \
237         else \
238         { \
239             x265_log(NULL, X265_LOG_ERROR, "malloc of size %d failed\n", sizeof(type) * (count)); \
240             goto fail; \
241         } \
242     }
243 
244 #if defined(_MSC_VER)
245 #define X265_LOG2F(x) (logf((float)(x)) * 1.44269504088896405f)
246 #define X265_LOG2(x) (log((double)(x)) * 1.4426950408889640513713538072172)
247 #else
248 #define X265_LOG2F(x) log2f(x)
249 #define X265_LOG2(x)  log2(x)
250 #endif
251 
252 #define NUM_CU_DEPTH            4                           // maximum number of CU depths
253 #define NUM_FULL_DEPTH          5                           // maximum number of full depths
254 #define MIN_LOG2_CU_SIZE        3                           // log2(minCUSize)
255 #define MAX_LOG2_CU_SIZE        6                           // log2(maxCUSize)
256 #define MIN_CU_SIZE             (1 << MIN_LOG2_CU_SIZE)     // minimum allowable size of CU
257 #define MAX_CU_SIZE             (1 << MAX_LOG2_CU_SIZE)     // maximum allowable size of CU
258 
259 #define LOG2_UNIT_SIZE          2                           // log2(unitSize)
260 #define UNIT_SIZE               (1 << LOG2_UNIT_SIZE)       // unit size of CU partition
261 
262 #define LOG2_RASTER_SIZE        (MAX_LOG2_CU_SIZE - LOG2_UNIT_SIZE)
263 #define RASTER_SIZE             (1 << LOG2_RASTER_SIZE)
264 #define MAX_NUM_PARTITIONS      (RASTER_SIZE * RASTER_SIZE)
265 
266 #define MIN_PU_SIZE             4
267 #define MIN_TU_SIZE             4
268 #define MAX_NUM_SPU_W           (MAX_CU_SIZE / MIN_PU_SIZE) // maximum number of SPU in horizontal line
269 
270 #define MAX_LOG2_TR_SIZE 5
271 #define MAX_LOG2_TS_SIZE 2 // TODO: RExt
272 #define MAX_TR_SIZE (1 << MAX_LOG2_TR_SIZE)
273 #define MAX_TS_SIZE (1 << MAX_LOG2_TS_SIZE)
274 
275 #define RDCOST_BASED_RSKIP 1
276 #define EDGE_BASED_RSKIP 2
277 
278 #define COEF_REMAIN_BIN_REDUCTION   3 // indicates the level at which the VLC
279                                       // transitions from Golomb-Rice to TU+EG(k)
280 
281 #define SBH_THRESHOLD               4 // fixed sign bit hiding controlling threshold
282 
283 #define C1FLAG_NUMBER               8 // maximum number of largerThan1 flag coded in one chunk:  16 in HM5
284 #define C2FLAG_NUMBER               1 // maximum number of largerThan2 flag coded in one chunk:  16 in HM5
285 
286 #define SAO_ENCODING_RATE           0.75
287 #define SAO_ENCODING_RATE_CHROMA    0.5
288 
289 #define MLS_GRP_NUM                 64 // Max number of coefficient groups, max(16, 64)
290 #define MLS_CG_SIZE                 4  // Coefficient group size of 4x4
291 #define MLS_CG_BLK_SIZE             (MLS_CG_SIZE * MLS_CG_SIZE)
292 #define MLS_CG_LOG2_SIZE            2
293 
294 #define QUANT_IQUANT_SHIFT          20 // Q(QP%6) * IQ(QP%6) = 2^20
295 #define QUANT_SHIFT                 14 // Q(4) = 2^14
296 #define SCALE_BITS                  15 // Inherited from TMuC, presumably for fractional bit estimates in RDOQ
297 #define MAX_TR_DYNAMIC_RANGE        15 // Maximum transform dynamic range (excluding sign bit)
298 
299 #define SHIFT_INV_1ST               7  // Shift after first inverse transform stage
300 #define SHIFT_INV_2ND               12 // Shift after second inverse transform stage
301 
302 #define AMVP_DECIMATION_FACTOR      4
303 
304 #define SCAN_SET_SIZE               16
305 #define LOG2_SCAN_SET_SIZE          4
306 
307 #define ALL_IDX                     -1
308 #define PLANAR_IDX                  0
309 #define VER_IDX                     26 // index for intra VERTICAL   mode
310 #define HOR_IDX                     10 // index for intra HORIZONTAL mode
311 #define DC_IDX                      1  // index for intra DC mode
312 #define NUM_CHROMA_MODE             5  // total number of chroma modes
313 #define DM_CHROMA_IDX               36 // chroma mode index for derived from luma intra mode
314 
315 #define MDCS_ANGLE_LIMIT            4 // distance from true angle that horiz or vertical scan is allowed
316 #define MDCS_LOG2_MAX_SIZE          3 // TUs with log2 of size greater than this can only use diagonal scan
317 
318 #define MAX_NUM_REF_PICS            16 // max. number of pictures used for reference
319 #define MAX_NUM_REF                 16 // max. number of entries in picture reference list
320 #define MAX_NUM_SHORT_TERM_RPS      64 // max. number of short term reference picture set in SPS
321 
322 #define REF_NOT_VALID               -1
323 
324 #define AMVP_NUM_CANDS              2 // number of AMVP candidates
325 #define MRG_MAX_NUM_CANDS           5 // max number of final merge candidates
326 
327 #define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422)
328 #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
329 #define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8
330 
331 #define MAX_NUM_TR_COEFFS           MAX_TR_SIZE * MAX_TR_SIZE // Maximum number of transform coefficients, for a 32x32 transform
332 #define MAX_NUM_TR_CATEGORIES       16                        // 32, 16, 8, 4 transform categories each for luma and chroma
333 
334 #define PIXEL_MAX ((1 << X265_DEPTH) - 1)
335 
336 #define INTEGRAL_PLANE_NUM          12 // 12 integral planes for 32x32, 32x24, 32x8, 24x32, 16x16, 16x12, 16x4, 12x16, 8x32, 8x8, 4x16 and 4x4.
337 
338 #define NAL_TYPE_OVERHEAD 2
339 #define START_CODE_OVERHEAD 3
340 #define FILLER_OVERHEAD (NAL_TYPE_OVERHEAD + START_CODE_OVERHEAD + 1)
341 
342 #define MAX_NUM_DYN_REFINE          (NUM_CU_DEPTH * X265_REFINE_INTER_LEVELS)
343 
344 namespace X265_NS {
345 
346 enum { SAO_NUM_OFFSET = 4 };
347 
348 enum SaoMergeMode
349 {
350     SAO_MERGE_NONE,
351     SAO_MERGE_LEFT,
352     SAO_MERGE_UP
353 };
354 
355 struct SaoCtuParam
356 {
357     SaoMergeMode mergeMode;
358     int  typeIdx;
359     uint32_t bandPos;    // BO band position
360     int  offset[SAO_NUM_OFFSET];
361 
resetSaoCtuParam362     void reset()
363     {
364         mergeMode = SAO_MERGE_NONE;
365         typeIdx = -1;
366         bandPos = 0;
367         offset[0] = 0;
368         offset[1] = 0;
369         offset[2] = 0;
370         offset[3] = 0;
371     }
372 };
373 
374 struct SAOParam
375 {
376     SaoCtuParam* ctuParam[3];
377     bool         bSaoFlag[2];
378     int          numCuInWidth;
379 
SAOParamSAOParam380     SAOParam()
381     {
382         for (int i = 0; i < 3; i++)
383             ctuParam[i] = NULL;
384     }
385 
~SAOParamSAOParam386     ~SAOParam()
387     {
388         delete[] ctuParam[0];
389         delete[] ctuParam[1];
390         delete[] ctuParam[2];
391     }
392 };
393 enum TextType
394 {
395     TEXT_LUMA     = 0,  // luma
396     TEXT_CHROMA_U = 1,  // chroma U
397     TEXT_CHROMA_V = 2,  // chroma V
398     MAX_NUM_COMPONENT = 3
399 };
400 
401 // coefficient scanning type used in ACS
402 enum ScanType
403 {
404     SCAN_DIAG = 0,     // up-right diagonal scan
405     SCAN_HOR = 1,      // horizontal first scan
406     SCAN_VER = 2,      // vertical first scan
407     NUM_SCAN_TYPE = 3
408 };
409 
410 enum SignificanceMapContextType
411 {
412     CONTEXT_TYPE_4x4 = 0,
413     CONTEXT_TYPE_8x8 = 1,
414     CONTEXT_TYPE_NxN = 2,
415     CONTEXT_NUMBER_OF_TYPES = 3
416 };
417 
418 /* located in pixel.cpp */
419 void extendPicBorder(pixel* recon, intptr_t stride, int width, int height, int marginX, int marginY);
420 
421 /* located in common.cpp */
422 int64_t  x265_mdate(void);
423 #define  x265_log(param, ...) general_log(param, "x265", __VA_ARGS__)
424 #define  x265_log_file(param, ...) general_log_file(param, "x265", __VA_ARGS__)
425 void     general_log(const x265_param* param, const char* caller, int level, const char* fmt, ...);
426 #if _WIN32
427 void     general_log_file(const x265_param* param, const char* caller, int level, const char* fmt, ...);
428 FILE*    x265_fopen(const char* fileName, const char* mode);
429 int      x265_unlink(const char* fileName);
430 int      x265_rename(const char* oldName, const char* newName);
431 #else
432 #define  general_log_file(param, caller, level, fmt, ...) general_log(param, caller, level, fmt, __VA_ARGS__)
433 #define  x265_fopen(fileName, mode) fopen(fileName, mode)
434 #define  x265_unlink(fileName) unlink(fileName)
435 #define  x265_rename(oldName, newName) rename(oldName, newName)
436 #endif
437 int      x265_exp2fix8(double x);
438 
439 double   x265_ssim2dB(double ssim);
440 double   x265_qScale2qp(double qScale);
441 double   x265_qp2qScale(double qp);
442 uint32_t x265_picturePlaneSize(int csp, int width, int height, int plane);
443 
444 void*    x265_malloc(size_t size);
445 void     x265_free(void *ptr);
446 char*    x265_slurp_file(const char *filename);
447 
448 /* located in primitives.cpp */
449 void     x265_setup_primitives(x265_param* param);
450 void     x265_report_simd(x265_param* param);
451 }
452 
453 #include "constants.h"
454 
455 #endif // ifndef X265_COMMON_H
456