1 /*****************************************************************************
2  * Copyright (C) 2013-2020 MulticoreWare, Inc
3  *
4  * Authors: Shin Yee <shinyee@multicorewareinc.com>
5  *          Min Chen <chenm003@163.com>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20  *
21  * This program is also available under a commercial proprietary license.
22  * For more information, contact us at license @ x265.com.
23  *****************************************************************************/
24 
25 #ifndef X265_FRAMEENCODER_H
26 #define X265_FRAMEENCODER_H
27 
28 #include "common.h"
29 #include "wavefront.h"
30 #include "bitstream.h"
31 #include "frame.h"
32 #include "picyuv.h"
33 #include "md5.h"
34 
35 #include "analysis.h"
36 #include "sao.h"
37 
38 #include "entropy.h"
39 #include "framefilter.h"
40 #include "ratecontrol.h"
41 #include "reference.h"
42 #include "nal.h"
43 
44 namespace X265_NS {
45 // private x265 namespace
46 
47 class ThreadPool;
48 class Encoder;
49 
50 #define ANGULAR_MODE_ID 2
51 #define AMP_ID 3
52 
53 struct StatisticLog
54 {
55     uint64_t cntInter[4];
56     uint64_t cntIntra[4];
57     uint64_t cuInterDistribution[4][INTER_MODES];
58     uint64_t cuIntraDistribution[4][INTRA_MODES];
59     uint64_t cntIntraNxN;
60     uint64_t cntSkipCu[4];
61     uint64_t cntTotalCu[4];
62     uint64_t totalCu;
63 
StatisticLogStatisticLog64     StatisticLog()
65     {
66         memset(this, 0, sizeof(StatisticLog));
67     }
68 };
69 
70 /* manages the state of encoding one row of CTU blocks.  When
71  * WPP is active, several rows will be simultaneously encoded. */
72 struct CTURow
73 {
74     Entropy           bufferedEntropy;  /* store CTU2 context for next row CTU0 */
75     Entropy           rowGoOnCoder;     /* store context between CTUs, code bitstream if !SAO */
76     unsigned int      sliceId;          /* store current row slice id */
77 
78     FrameStats        rowStats;
79 
80     /* Threading variables */
81 
82     /* This lock must be acquired when reading or writing m_active or m_busy */
83     Lock              lock;
84 
85     /* row is ready to run, has no neighbor dependencies. The row may have
86      * external dependencies (reference frame pixels) that prevent it from being
87      * processed, so it may stay with m_active=true for some time before it is
88      * encoded by a worker thread. */
89     volatile bool     active;
90 
91     /* row is being processed by a worker thread.  This flag is only true when a
92      * worker thread is within the context of FrameEncoder::processRow(). This
93      * flag is used to detect multiple possible wavefront problems. */
94     volatile bool     busy;
95 
96     /* count of completed CUs in this row */
97     volatile uint32_t completed;
98     volatile uint32_t avgQPComputed;
99 
100     volatile int      reEncode;
101 
102     /* called at the start of each frame to initialize state */
initCTURow103     void init(Entropy& initContext, unsigned int sid)
104     {
105         active = false;
106         busy = false;
107         completed = 0;
108         avgQPComputed = 0;
109         sliceId = sid;
110         reEncode = 0;
111         memset(&rowStats, 0, sizeof(rowStats));
112         rowGoOnCoder.load(initContext);
113     }
114 };
115 
116 // Manages the wave-front processing of a single encoding frame
117 class FrameEncoder : public WaveFront, public Thread
118 {
119 public:
120 
121     FrameEncoder();
122 
~FrameEncoder()123     virtual ~FrameEncoder() {}
124 
125     virtual bool init(Encoder *top, int numRows, int numCols);
126 
127     void destroy();
128 
129     /* triggers encode of a new frame by the worker thread */
130     bool startCompressFrame(Frame* curFrame);
131 
132     /* blocks until worker thread is done, returns access unit */
133     Frame *getEncodedPicture(NALList& list);
134 
135     void initDecodedPictureHashSEI(int row, int cuAddr, int height);
136 
137     Event                    m_enable;
138     Event                    m_done;
139     Event                    m_completionEvent;
140     int                      m_localTldIdx;
141     bool                     m_reconfigure; /* reconfigure in progress */
142     volatile bool            m_threadActive;
143     volatile bool            m_bAllRowsStop;
144     volatile int             m_completionCount;
145     volatile int             m_vbvResetTriggerRow;
146     volatile int             m_sliceCnt;
147 
148     uint32_t                 m_numRows;
149     uint32_t                 m_numCols;
150     uint32_t                 m_filterRowDelay;
151     uint32_t                 m_filterRowDelayCus;
152     uint32_t                 m_refLagRows;
153     bool                     m_bUseSao;
154 
155     CTURow*                  m_rows;
156     uint16_t                 m_sliceAddrBits;
157     uint32_t                 m_sliceGroupSize;
158     uint32_t*                m_sliceBaseRow;
159     uint32_t*                m_sliceMaxBlockRow;
160     int64_t                  m_rowSliceTotalBits[2];
161     RateControlEntry         m_rce;
162     SEIDecodedPictureHash    m_seiReconPictureDigest;
163 
164     uint64_t                 m_SSDY;
165     uint64_t                 m_SSDU;
166     uint64_t                 m_SSDV;
167     double                   m_ssim;
168     uint64_t                 m_accessUnitBits;
169     uint32_t                 m_ssimCnt;
170 
171     volatile int             m_activeWorkerCount;        // count of workers currently encoding or filtering CTUs
172     volatile int             m_totalActiveWorkerCount;   // sum of m_activeWorkerCount sampled at end of each CTU
173     volatile int             m_activeWorkerCountSamples; // count of times m_activeWorkerCount was sampled (think vbv restarts)
174     volatile int             m_countRowBlocks;           // count of workers forced to abandon a row because of top dependency
175     int64_t                  m_startCompressTime;        // timestamp when frame encoder is given a frame
176     int64_t                  m_row0WaitTime;             // timestamp when row 0 is allowed to start
177     int64_t                  m_allRowsAvailableTime;     // timestamp when all reference dependencies are resolved
178     int64_t                  m_endCompressTime;          // timestamp after all CTUs are compressed
179     int64_t                  m_endFrameTime;             // timestamp after RCEnd, NR updates, etc
180     int64_t                  m_stallStartTime;           // timestamp when worker count becomes 0
181     int64_t                  m_prevOutputTime;           // timestamp when prev frame was retrieved by API thread
182     int64_t                  m_slicetypeWaitTime;        // total elapsed time waiting for decided frame
183     int64_t                  m_totalWorkerElapsedTime;   // total elapsed time spent by worker threads processing CTUs
184     int64_t                  m_totalNoWorkerTime;        // total elapsed time without any active worker threads
185 #if DETAILED_CU_STATS
186     CUStats                  m_cuStats;
187 #endif
188 
189     Encoder*                 m_top;
190     x265_param*              m_param;
191     Frame*                   m_frame;
192     NoiseReduction*          m_nr;
193     ThreadLocalData*         m_tld; /* for --no-wpp */
194     Bitstream*               m_outStreams;
195     Bitstream*               m_backupStreams;
196     uint32_t*                m_substreamSizes;
197 
198     CUGeom*                  m_cuGeoms;
199     uint32_t*                m_ctuGeomMap;
200 
201     Bitstream                m_bs;
202     MotionReference          m_mref[2][MAX_NUM_REF + 1];
203     Entropy                  m_entropyCoder;
204     Entropy                  m_initSliceContext;
205     FrameFilter              m_frameFilter;
206     NALList                  m_nalList;
207 
208     class WeightAnalysis : public BondedTaskGroup
209     {
210     public:
211 
212         FrameEncoder& master;
213 
WeightAnalysis(FrameEncoder & fe)214         WeightAnalysis(FrameEncoder& fe) : master(fe) {}
215 
216         void processTasks(int workerThreadId);
217 
218     protected:
219 
220         WeightAnalysis operator=(const WeightAnalysis&);
221     };
222 
223 protected:
224 
225     bool initializeGeoms();
226 
227     /* analyze / compress frame, can be run in parallel within reference constraints */
228     void compressFrame();
229 
230     /* called by compressFrame to generate final per-row bitstreams */
231     void encodeSlice(uint32_t sliceAddr);
232 
233     void threadMain();
234     int  collectCTUStatistics(const CUData& ctu, FrameStats* frameLog);
235     void noiseReductionUpdate();
236     void writeTrailingSEIMessages();
237     bool writeToneMapInfo(x265_sei_payload *payload);
238 
239     /* Called by WaveFront::findJob() */
240     virtual void processRow(int row, int threadId);
241     virtual void processRowEncoder(int row, ThreadLocalData& tld);
242 
enqueueRowEncoder(int row)243     void enqueueRowEncoder(int row) { WaveFront::enqueueRow(row * 2 + 0); }
enqueueRowFilter(int row)244     void enqueueRowFilter(int row)  { WaveFront::enqueueRow(row * 2 + 1); }
enableRowEncoder(int row)245     void enableRowEncoder(int row)  { WaveFront::enableRow(row * 2 + 0); }
enableRowFilter(int row)246     void enableRowFilter(int row)   { WaveFront::enableRow(row * 2 + 1); }
247 #if ENABLE_LIBVMAF
248     void vmafFrameLevelScore();
249 #endif
250     void collectDynDataFrame();
251     void computeAvgTrainingData();
252     void collectDynDataRow(CUData& ctu, FrameStats* rowStats);
253 };
254 }
255 
256 #endif // ifndef X265_FRAMEENCODER_H
257