1 /***************************************************************************** 2 * Copyright (C) 2013-2020 MulticoreWare, Inc 3 * 4 * Authors: Steve Borho <steve@borho.org> 5 * Min Chen <chenm003@163.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. 20 * 21 * This program is also available under a commercial proprietary license. 22 * For more information, contact us at license @ x265.com. 23 *****************************************************************************/ 24 25 #ifndef X265_SLICETYPE_H 26 #define X265_SLICETYPE_H 27 28 #include "common.h" 29 #include "slice.h" 30 #include "motion.h" 31 #include "piclist.h" 32 #include "threadpool.h" 33 34 namespace X265_NS { 35 // private namespace 36 37 struct Lowres; 38 class Frame; 39 class Lookahead; 40 41 #define LOWRES_COST_MASK ((1 << 14) - 1) 42 #define LOWRES_COST_SHIFT 14 43 #define AQ_EDGE_BIAS 0.5 44 #define EDGE_INCLINATION 45 45 #define TEMPORAL_SCENECUT_THRESHOLD 50 46 47 #if HIGH_BIT_DEPTH 48 #define EDGE_THRESHOLD 1023.0 49 #else 50 #define EDGE_THRESHOLD 255.0 51 #endif 52 #define PI 3.14159265 53 54 /* Thread local data for lookahead tasks */ 55 struct LookaheadTLD 56 { 57 MotionEstimate me; 58 pixel* wbuffer[4]; 59 int widthInCU; 60 int heightInCU; 61 int ncu; 62 int paddedLines; 63 64 #if DETAILED_CU_STATS 65 int64_t batchElapsedTime; 66 int64_t coopSliceElapsedTime; 67 uint64_t countBatches; 68 uint64_t countCoopSlices; 69 #endif 70 LookaheadTLDLookaheadTLD71 LookaheadTLD() 72 { 73 me.init(X265_CSP_I400); 74 me.setQP(X265_LOOKAHEAD_QP); 75 for (int i = 0; i < 4; i++) 76 wbuffer[i] = NULL; 77 widthInCU = heightInCU = ncu = paddedLines = 0; 78 79 #if DETAILED_CU_STATS 80 batchElapsedTime = 0; 81 coopSliceElapsedTime = 0; 82 countBatches = 0; 83 countCoopSlices = 0; 84 #endif 85 } 86 initLookaheadTLD87 void init(int w, int h, int n) 88 { 89 widthInCU = w; 90 heightInCU = h; 91 ncu = n; 92 } 93 ~LookaheadTLDLookaheadTLD94 ~LookaheadTLD() { X265_FREE(wbuffer[0]); } 95 96 void calcAdaptiveQuantFrame(Frame *curFrame, x265_param* param); 97 void lowresIntraEstimate(Lowres& fenc, uint32_t qgSize); 98 99 void weightsAnalyse(Lowres& fenc, Lowres& ref); 100 void xPreanalyze(Frame* curFrame); 101 void xPreanalyzeQp(Frame* curFrame); 102 protected: 103 104 uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize); 105 uint32_t edgeDensityCu(Frame* curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize); 106 uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, uint32_t qgSize); 107 uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp); 108 bool allocWeightedRef(Lowres& fenc); 109 }; 110 111 class Lookahead : public JobProvider 112 { 113 public: 114 115 PicList m_inputQueue; // input pictures in order received 116 PicList m_outputQueue; // pictures to be encoded, in encode order 117 Lock m_inputLock; 118 Lock m_outputLock; 119 Event m_outputSignal; 120 LookaheadTLD* m_tld; 121 x265_param* m_param; 122 Lowres* m_lastNonB; 123 int* m_scratch; // temp buffer for cutree propagate 124 125 /* pre-lookahead */ 126 int m_fullQueueSize; 127 int m_histogram[X265_BFRAME_MAX + 1]; 128 int m_lastKeyframe; 129 int m_8x8Width; 130 int m_8x8Height; 131 int m_8x8Blocks; 132 int m_cuCount; 133 int m_numCoopSlices; 134 int m_numRowsPerSlice; 135 int m_inputCount; 136 double m_cuTreeStrength; 137 138 /* HME */ 139 int m_4x4Width; 140 int m_4x4Height; 141 142 bool m_isActive; 143 bool m_sliceTypeBusy; 144 bool m_bAdaptiveQuant; 145 bool m_outputSignalRequired; 146 bool m_bBatchMotionSearch; 147 bool m_bBatchFrameCosts; 148 bool m_filled; 149 bool m_isSceneTransition; 150 int m_numPools; 151 bool m_extendGopBoundary; 152 double m_frameVariance[X265_BFRAME_MAX + 4]; 153 bool m_isFadeIn; 154 uint64_t m_fadeCount; 155 int m_fadeStart; 156 Lookahead(x265_param *param, ThreadPool *pool); 157 #if DETAILED_CU_STATS 158 int64_t m_slicetypeDecideElapsedTime; 159 int64_t m_preLookaheadElapsedTime; 160 uint64_t m_countSlicetypeDecide; 161 uint64_t m_countPreLookahead; 162 void getWorkerStats(int64_t& batchElapsedTime, uint64_t& batchCount, int64_t& coopSliceElapsedTime, uint64_t& coopSliceCount); 163 #endif 164 165 bool create(); 166 void destroy(); 167 void stopJobs(); 168 169 void addPicture(Frame&, int sliceType); 170 void addPicture(Frame& curFrame); 171 void checkLookaheadQueue(int &frameCnt); 172 void flush(); 173 Frame* getDecidedPicture(); 174 175 void getEstimatedPictureCost(Frame *pic); 176 void setLookaheadQueue(); 177 178 protected: 179 180 void findJob(int workerThreadID); 181 void slicetypeDecide(); 182 void slicetypeAnalyse(Lowres **frames, bool bKeyframe); 183 184 /* called by slicetypeAnalyse() to make slice decisions */ 185 bool scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, int numFrames); 186 bool scenecutInternal(Lowres **frames, int p0, int p1, bool bRealScenecut); 187 void slicetypePath(Lowres **frames, int length, char(*best_paths)[X265_LOOKAHEAD_MAX + 1]); 188 int64_t slicetypePathCost(Lowres **frames, char *path, int64_t threshold); 189 int64_t vbvFrameCost(Lowres **frames, int p0, int p1, int b); 190 void vbvLookahead(Lowres **frames, int numFrames, int keyframes); 191 void aqMotion(Lowres **frames, bool bintra); 192 void calcMotionAdaptiveQuantFrame(Lowres **frames, int p0, int p1, int b); 193 /* called by slicetypeAnalyse() to effect cuTree adjustments to adaptive 194 * quant offsets */ 195 void cuTree(Lowres **frames, int numframes, bool bintra); 196 void estimateCUPropagate(Lowres **frames, double average_duration, int p0, int p1, int b, int referenced); 197 void cuTreeFinish(Lowres *frame, double averageDuration, int ref0Distance); 198 void computeCUTreeQpOffset(Lowres *frame, double averageDuration, int ref0Distance); 199 200 /* called by getEstimatedPictureCost() to finalize cuTree costs */ 201 int64_t frameCostRecalculate(Lowres **frames, int p0, int p1, int b); 202 }; 203 204 class PreLookaheadGroup : public BondedTaskGroup 205 { 206 public: 207 208 Frame* m_preframes[X265_LOOKAHEAD_MAX]; 209 Lookahead& m_lookahead; 210 PreLookaheadGroup(Lookahead & l)211 PreLookaheadGroup(Lookahead& l) : m_lookahead(l) {} 212 213 void processTasks(int workerThreadID); 214 215 protected: 216 217 PreLookaheadGroup& operator=(const PreLookaheadGroup&); 218 }; 219 220 class CostEstimateGroup : public BondedTaskGroup 221 { 222 public: 223 224 Lookahead& m_lookahead; 225 Lowres** m_frames; 226 bool m_batchMode; 227 CostEstimateGroup(Lookahead & l,Lowres ** f)228 CostEstimateGroup(Lookahead& l, Lowres** f) : m_lookahead(l), m_frames(f), m_batchMode(false) {} 229 230 /* Cooperative cost estimate using multiple slices of downscaled frame */ 231 struct Coop 232 { 233 int p0, b, p1; 234 bool bDoSearch[2]; 235 } m_coop; 236 237 enum { MAX_COOP_SLICES = 32 }; 238 struct Slice 239 { 240 int costEst; 241 int costEstAq; 242 int intraMbs; 243 } m_slice[MAX_COOP_SLICES]; 244 245 int64_t singleCost(int p0, int p1, int b, bool intraPenalty = false); 246 247 /* Batch cost estimates, using one worker thread per estimateFrameCost() call */ 248 enum { MAX_BATCH_SIZE = 512 }; 249 struct Estimate 250 { 251 int p0, b, p1; 252 } m_estimates[MAX_BATCH_SIZE]; 253 254 void add(int p0, int p1, int b); 255 void finishBatch(); 256 257 protected: 258 259 static const int s_merange = 16; 260 261 void processTasks(int workerThreadID); 262 263 int64_t estimateFrameCost(LookaheadTLD& tld, int p0, int p1, int b, bool intraPenalty); 264 void estimateCUCost(LookaheadTLD& tld, int cux, int cuy, int p0, int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme); 265 266 CostEstimateGroup& operator=(const CostEstimateGroup&); 267 }; 268 269 bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel = EDGE_THRESHOLD); 270 } 271 #endif // ifndef X265_SLICETYPE_H 272