1 /*****************************************************************************
2  * Copyright (C) 2013-2020 MulticoreWare, Inc
3  *
4  * Authors: Steve Borho <steve@borho.org>
5  *          Min Chen <chenm003@163.com>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20  *
21  * This program is also available under a commercial proprietary license.
22  * For more information, contact us at license @ x265.com.
23  *****************************************************************************/
24 
25 #ifndef X265_SLICETYPE_H
26 #define X265_SLICETYPE_H
27 
28 #include "common.h"
29 #include "slice.h"
30 #include "motion.h"
31 #include "piclist.h"
32 #include "threadpool.h"
33 
34 namespace X265_NS {
35 // private namespace
36 
37 struct Lowres;
38 class Frame;
39 class Lookahead;
40 
41 #define LOWRES_COST_MASK  ((1 << 14) - 1)
42 #define LOWRES_COST_SHIFT 14
43 #define AQ_EDGE_BIAS 0.5
44 #define EDGE_INCLINATION 45
45 #define TEMPORAL_SCENECUT_THRESHOLD 50
46 
47 #if HIGH_BIT_DEPTH
48 #define EDGE_THRESHOLD 1023.0
49 #else
50 #define EDGE_THRESHOLD 255.0
51 #endif
52 #define PI 3.14159265
53 
54 /* Thread local data for lookahead tasks */
55 struct LookaheadTLD
56 {
57     MotionEstimate  me;
58     pixel*          wbuffer[4];
59     int             widthInCU;
60     int             heightInCU;
61     int             ncu;
62     int             paddedLines;
63 
64 #if DETAILED_CU_STATS
65     int64_t         batchElapsedTime;
66     int64_t         coopSliceElapsedTime;
67     uint64_t        countBatches;
68     uint64_t        countCoopSlices;
69 #endif
70 
LookaheadTLDLookaheadTLD71     LookaheadTLD()
72     {
73         me.init(X265_CSP_I400);
74         me.setQP(X265_LOOKAHEAD_QP);
75         for (int i = 0; i < 4; i++)
76             wbuffer[i] = NULL;
77         widthInCU = heightInCU = ncu = paddedLines = 0;
78 
79 #if DETAILED_CU_STATS
80         batchElapsedTime = 0;
81         coopSliceElapsedTime = 0;
82         countBatches = 0;
83         countCoopSlices = 0;
84 #endif
85     }
86 
initLookaheadTLD87     void init(int w, int h, int n)
88     {
89         widthInCU = w;
90         heightInCU = h;
91         ncu = n;
92     }
93 
~LookaheadTLDLookaheadTLD94     ~LookaheadTLD() { X265_FREE(wbuffer[0]); }
95 
96     void calcAdaptiveQuantFrame(Frame *curFrame, x265_param* param);
97     void lowresIntraEstimate(Lowres& fenc, uint32_t qgSize);
98 
99     void weightsAnalyse(Lowres& fenc, Lowres& ref);
100     void xPreanalyze(Frame* curFrame);
101     void xPreanalyzeQp(Frame* curFrame);
102 protected:
103 
104     uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize);
105     uint32_t edgeDensityCu(Frame* curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
106     uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
107     uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
108     bool     allocWeightedRef(Lowres& fenc);
109 };
110 
111 class Lookahead : public JobProvider
112 {
113 public:
114 
115     PicList       m_inputQueue;      // input pictures in order received
116     PicList       m_outputQueue;     // pictures to be encoded, in encode order
117     Lock          m_inputLock;
118     Lock          m_outputLock;
119     Event         m_outputSignal;
120     LookaheadTLD* m_tld;
121     x265_param*   m_param;
122     Lowres*       m_lastNonB;
123     int*          m_scratch;         // temp buffer for cutree propagate
124 
125     /* pre-lookahead */
126     int           m_fullQueueSize;
127     int           m_histogram[X265_BFRAME_MAX + 1];
128     int           m_lastKeyframe;
129     int           m_8x8Width;
130     int           m_8x8Height;
131     int           m_8x8Blocks;
132     int           m_cuCount;
133     int           m_numCoopSlices;
134     int           m_numRowsPerSlice;
135     int           m_inputCount;
136     double        m_cuTreeStrength;
137 
138     /* HME */
139     int           m_4x4Width;
140     int           m_4x4Height;
141 
142     bool          m_isActive;
143     bool          m_sliceTypeBusy;
144     bool          m_bAdaptiveQuant;
145     bool          m_outputSignalRequired;
146     bool          m_bBatchMotionSearch;
147     bool          m_bBatchFrameCosts;
148     bool          m_filled;
149     bool          m_isSceneTransition;
150     int           m_numPools;
151     bool          m_extendGopBoundary;
152     double        m_frameVariance[X265_BFRAME_MAX + 4];
153     bool          m_isFadeIn;
154     uint64_t      m_fadeCount;
155     int           m_fadeStart;
156     Lookahead(x265_param *param, ThreadPool *pool);
157 #if DETAILED_CU_STATS
158     int64_t       m_slicetypeDecideElapsedTime;
159     int64_t       m_preLookaheadElapsedTime;
160     uint64_t      m_countSlicetypeDecide;
161     uint64_t      m_countPreLookahead;
162     void          getWorkerStats(int64_t& batchElapsedTime, uint64_t& batchCount, int64_t& coopSliceElapsedTime, uint64_t& coopSliceCount);
163 #endif
164 
165     bool    create();
166     void    destroy();
167     void    stopJobs();
168 
169     void    addPicture(Frame&, int sliceType);
170     void    addPicture(Frame& curFrame);
171     void    checkLookaheadQueue(int &frameCnt);
172     void    flush();
173     Frame*  getDecidedPicture();
174 
175     void    getEstimatedPictureCost(Frame *pic);
176     void    setLookaheadQueue();
177 
178 protected:
179 
180     void    findJob(int workerThreadID);
181     void    slicetypeDecide();
182     void    slicetypeAnalyse(Lowres **frames, bool bKeyframe);
183 
184     /* called by slicetypeAnalyse() to make slice decisions */
185     bool    scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, int numFrames);
186     bool    scenecutInternal(Lowres **frames, int p0, int p1, bool bRealScenecut);
187     void    slicetypePath(Lowres **frames, int length, char(*best_paths)[X265_LOOKAHEAD_MAX + 1]);
188     int64_t slicetypePathCost(Lowres **frames, char *path, int64_t threshold);
189     int64_t vbvFrameCost(Lowres **frames, int p0, int p1, int b);
190     void    vbvLookahead(Lowres **frames, int numFrames, int keyframes);
191     void    aqMotion(Lowres **frames, bool bintra);
192     void    calcMotionAdaptiveQuantFrame(Lowres **frames, int p0, int p1, int b);
193     /* called by slicetypeAnalyse() to effect cuTree adjustments to adaptive
194      * quant offsets */
195     void    cuTree(Lowres **frames, int numframes, bool bintra);
196     void    estimateCUPropagate(Lowres **frames, double average_duration, int p0, int p1, int b, int referenced);
197     void    cuTreeFinish(Lowres *frame, double averageDuration, int ref0Distance);
198     void    computeCUTreeQpOffset(Lowres *frame, double averageDuration, int ref0Distance);
199 
200     /* called by getEstimatedPictureCost() to finalize cuTree costs */
201     int64_t frameCostRecalculate(Lowres **frames, int p0, int p1, int b);
202 };
203 
204 class PreLookaheadGroup : public BondedTaskGroup
205 {
206 public:
207 
208     Frame* m_preframes[X265_LOOKAHEAD_MAX];
209     Lookahead& m_lookahead;
210 
PreLookaheadGroup(Lookahead & l)211     PreLookaheadGroup(Lookahead& l) : m_lookahead(l) {}
212 
213     void processTasks(int workerThreadID);
214 
215 protected:
216 
217     PreLookaheadGroup& operator=(const PreLookaheadGroup&);
218 };
219 
220 class CostEstimateGroup : public BondedTaskGroup
221 {
222 public:
223 
224     Lookahead& m_lookahead;
225     Lowres**   m_frames;
226     bool       m_batchMode;
227 
CostEstimateGroup(Lookahead & l,Lowres ** f)228     CostEstimateGroup(Lookahead& l, Lowres** f) : m_lookahead(l), m_frames(f), m_batchMode(false) {}
229 
230     /* Cooperative cost estimate using multiple slices of downscaled frame */
231     struct Coop
232     {
233         int  p0, b, p1;
234         bool bDoSearch[2];
235     } m_coop;
236 
237     enum { MAX_COOP_SLICES = 32 };
238     struct Slice
239     {
240         int  costEst;
241         int  costEstAq;
242         int  intraMbs;
243     } m_slice[MAX_COOP_SLICES];
244 
245     int64_t singleCost(int p0, int p1, int b, bool intraPenalty = false);
246 
247     /* Batch cost estimates, using one worker thread per estimateFrameCost() call */
248     enum { MAX_BATCH_SIZE = 512 };
249     struct Estimate
250     {
251         int  p0, b, p1;
252     } m_estimates[MAX_BATCH_SIZE];
253 
254     void add(int p0, int p1, int b);
255     void finishBatch();
256 
257 protected:
258 
259     static const int s_merange = 16;
260 
261     void    processTasks(int workerThreadID);
262 
263     int64_t estimateFrameCost(LookaheadTLD& tld, int p0, int p1, int b, bool intraPenalty);
264     void    estimateCUCost(LookaheadTLD& tld, int cux, int cuy, int p0, int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme);
265 
266     CostEstimateGroup& operator=(const CostEstimateGroup&);
267 };
268 
269 bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel = EDGE_THRESHOLD);
270 }
271 #endif // ifndef X265_SLICETYPE_H
272