1 /*****************************************************************************
2 * Copyright (C) 2013-2020 MulticoreWare, Inc
3 *
4 * Authors: Pooja Venkatesan <pooja@multicorewareinc.com>
5 *          Aruna Matheswaran <aruna@multicorewareinc.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20 *
21 * This program is also available under a commercial proprietary license.
22 * For more information, contact us at license @ x265.com.
23 *****************************************************************************/
24 
25 #include "abrEncApp.h"
26 #include "mv.h"
27 #include "slice.h"
28 #include "param.h"
29 
30 #include <signal.h>
31 #include <errno.h>
32 
33 #include <queue>
34 
35 using namespace X265_NS;
36 
37 /* Ctrl-C handler */
38 static volatile sig_atomic_t b_ctrl_c /* = 0 */;
sigint_handler(int)39 static void sigint_handler(int)
40 {
41     b_ctrl_c = 1;
42 }
43 
44 namespace X265_NS {
45     // private namespace
46 #define X265_INPUT_QUEUE_SIZE 250
47 
AbrEncoder(CLIOptions cliopt[],uint8_t numEncodes,int & ret)48     AbrEncoder::AbrEncoder(CLIOptions cliopt[], uint8_t numEncodes, int &ret)
49     {
50         m_numEncodes = numEncodes;
51         m_numActiveEncodes.set(numEncodes);
52         m_queueSize = (numEncodes > 1) ? X265_INPUT_QUEUE_SIZE : 1;
53         m_passEnc = X265_MALLOC(PassEncoder*, m_numEncodes);
54 
55         for (uint8_t i = 0; i < m_numEncodes; i++)
56         {
57             m_passEnc[i] = new PassEncoder(i, cliopt[i], this);
58             if (!m_passEnc[i])
59             {
60                 x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for passEncoder\n");
61                 ret = 4;
62             }
63             m_passEnc[i]->init(ret);
64         }
65 
66         if (!allocBuffers())
67         {
68             x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for buffers\n");
69             ret = 4;
70         }
71 
72         /* start passEncoder worker threads */
73         for (uint8_t pass = 0; pass < m_numEncodes; pass++)
74             m_passEnc[pass]->startThreads();
75     }
76 
allocBuffers()77     bool AbrEncoder::allocBuffers()
78     {
79         m_inputPicBuffer = X265_MALLOC(x265_picture**, m_numEncodes);
80         m_analysisBuffer = X265_MALLOC(x265_analysis_data*, m_numEncodes);
81 
82         m_picWriteCnt = new ThreadSafeInteger[m_numEncodes];
83         m_picReadCnt = new ThreadSafeInteger[m_numEncodes];
84         m_analysisWriteCnt = new ThreadSafeInteger[m_numEncodes];
85         m_analysisReadCnt = new ThreadSafeInteger[m_numEncodes];
86 
87         m_picIdxReadCnt = X265_MALLOC(ThreadSafeInteger*, m_numEncodes);
88         m_analysisWrite = X265_MALLOC(ThreadSafeInteger*, m_numEncodes);
89         m_analysisRead = X265_MALLOC(ThreadSafeInteger*, m_numEncodes);
90         m_readFlag = X265_MALLOC(int*, m_numEncodes);
91 
92         for (uint8_t pass = 0; pass < m_numEncodes; pass++)
93         {
94             m_inputPicBuffer[pass] = X265_MALLOC(x265_picture*, m_queueSize);
95             for (uint32_t idx = 0; idx < m_queueSize; idx++)
96             {
97                 m_inputPicBuffer[pass][idx] = x265_picture_alloc();
98                 x265_picture_init(m_passEnc[pass]->m_param, m_inputPicBuffer[pass][idx]);
99             }
100 
101             m_analysisBuffer[pass] = X265_MALLOC(x265_analysis_data, m_queueSize);
102             m_picIdxReadCnt[pass] = new ThreadSafeInteger[m_queueSize];
103             m_analysisWrite[pass] = new ThreadSafeInteger[m_queueSize];
104             m_analysisRead[pass] = new ThreadSafeInteger[m_queueSize];
105             m_readFlag[pass] = X265_MALLOC(int, m_queueSize);
106         }
107         return true;
108     }
109 
destroy()110     void AbrEncoder::destroy()
111     {
112         x265_cleanup(); /* Free library singletons */
113         for (uint8_t pass = 0; pass < m_numEncodes; pass++)
114         {
115             for (uint32_t index = 0; index < m_queueSize; index++)
116             {
117                 X265_FREE(m_inputPicBuffer[pass][index]->planes[0]);
118                 x265_picture_free(m_inputPicBuffer[pass][index]);
119             }
120 
121             X265_FREE(m_inputPicBuffer[pass]);
122             X265_FREE(m_analysisBuffer[pass]);
123             X265_FREE(m_readFlag[pass]);
124             delete[] m_picIdxReadCnt[pass];
125             delete[] m_analysisWrite[pass];
126             delete[] m_analysisRead[pass];
127             m_passEnc[pass]->destroy();
128             delete m_passEnc[pass];
129         }
130         X265_FREE(m_inputPicBuffer);
131         X265_FREE(m_analysisBuffer);
132         X265_FREE(m_readFlag);
133 
134         delete[] m_picWriteCnt;
135         delete[] m_picReadCnt;
136         delete[] m_analysisWriteCnt;
137         delete[] m_analysisReadCnt;
138 
139         X265_FREE(m_picIdxReadCnt);
140         X265_FREE(m_analysisWrite);
141         X265_FREE(m_analysisRead);
142 
143         X265_FREE(m_passEnc);
144     }
145 
PassEncoder(uint32_t id,CLIOptions cliopt,AbrEncoder * parent)146     PassEncoder::PassEncoder(uint32_t id, CLIOptions cliopt, AbrEncoder *parent)
147     {
148         m_id = id;
149         m_cliopt = cliopt;
150         m_parent = parent;
151         if(!(m_cliopt.enableScaler && m_id))
152             m_input = m_cliopt.input;
153         m_param = cliopt.param;
154         m_inputOver = false;
155         m_lastIdx = -1;
156         m_encoder = NULL;
157         m_scaler = NULL;
158         m_reader = NULL;
159         m_ret = 0;
160     }
161 
init(int & result)162     int PassEncoder::init(int &result)
163     {
164         if (m_parent->m_numEncodes > 1)
165             setReuseLevel();
166 
167         if (!(m_cliopt.enableScaler && m_id))
168             m_reader = new Reader(m_id, this);
169         else
170         {
171             VideoDesc *src = NULL, *dst = NULL;
172             dst = new VideoDesc(m_param->sourceWidth, m_param->sourceHeight, m_param->internalCsp, m_param->internalBitDepth);
173             int dstW = m_parent->m_passEnc[m_id - 1]->m_param->sourceWidth;
174             int dstH = m_parent->m_passEnc[m_id - 1]->m_param->sourceHeight;
175             src = new VideoDesc(dstW, dstH, m_param->internalCsp, m_param->internalBitDepth);
176             if (src != NULL && dst != NULL)
177             {
178                 m_scaler = new Scaler(0, 1, m_id, src, dst, this);
179                 if (!m_scaler)
180                 {
181                     x265_log(m_param, X265_LOG_ERROR, "\n MALLOC failure in Scaler");
182                     result = 4;
183                 }
184             }
185         }
186 
187         /* note: we could try to acquire a different libx265 API here based on
188         * the profile found during option parsing, but it must be done before
189         * opening an encoder */
190 
191         if (m_param)
192             m_encoder = m_cliopt.api->encoder_open(m_param);
193         if (!m_encoder)
194         {
195             x265_log(NULL, X265_LOG_ERROR, "x265_encoder_open() failed for Enc, \n");
196             m_ret = 2;
197             return -1;
198         }
199 
200         /* get the encoder parameters post-initialization */
201         m_cliopt.api->encoder_parameters(m_encoder, m_param);
202 
203         return 1;
204     }
205 
setReuseLevel()206     void PassEncoder::setReuseLevel()
207     {
208         uint32_t r, padh = 0, padw = 0;
209 
210         m_param->confWinBottomOffset = m_param->confWinRightOffset = 0;
211 
212         m_param->analysisLoadReuseLevel = m_cliopt.loadLevel;
213         m_param->analysisSaveReuseLevel = m_cliopt.saveLevel;
214         m_param->analysisSave = m_cliopt.saveLevel ? "save.dat" : NULL;
215         m_param->analysisLoad = m_cliopt.loadLevel ? "load.dat" : NULL;
216         m_param->bUseAnalysisFile = 0;
217 
218         if (m_cliopt.loadLevel)
219         {
220             x265_param *refParam = m_parent->m_passEnc[m_cliopt.refId]->m_param;
221 
222             if (m_param->sourceHeight == (refParam->sourceHeight - refParam->confWinBottomOffset) &&
223                 m_param->sourceWidth == (refParam->sourceWidth - refParam->confWinRightOffset))
224             {
225                 m_parent->m_passEnc[m_id]->m_param->confWinBottomOffset = refParam->confWinBottomOffset;
226                 m_parent->m_passEnc[m_id]->m_param->confWinRightOffset = refParam->confWinRightOffset;
227             }
228             else
229             {
230                 int srcH = refParam->sourceHeight - refParam->confWinBottomOffset;
231                 int srcW = refParam->sourceWidth - refParam->confWinRightOffset;
232 
233                 double scaleFactorH = double(m_param->sourceHeight / srcH);
234                 double scaleFactorW = double(m_param->sourceWidth / srcW);
235 
236                 int absScaleFactorH = (int)(10 * scaleFactorH + 0.5);
237                 int absScaleFactorW = (int)(10 * scaleFactorW + 0.5);
238 
239                 if (absScaleFactorH == 20 && absScaleFactorW == 20)
240                 {
241                     m_param->scaleFactor = 2;
242 
243                     m_parent->m_passEnc[m_id]->m_param->confWinBottomOffset = refParam->confWinBottomOffset * 2;
244                     m_parent->m_passEnc[m_id]->m_param->confWinRightOffset = refParam->confWinRightOffset * 2;
245 
246                 }
247             }
248         }
249 
250         int h = m_param->sourceHeight + m_param->confWinBottomOffset;
251         int w = m_param->sourceWidth + m_param->confWinRightOffset;
252         if (h & (m_param->minCUSize - 1))
253         {
254             r = h & (m_param->minCUSize - 1);
255             padh = m_param->minCUSize - r;
256             m_param->confWinBottomOffset += padh;
257 
258         }
259 
260         if (w & (m_param->minCUSize - 1))
261         {
262             r = w & (m_param->minCUSize - 1);
263             padw = m_param->minCUSize - r;
264             m_param->confWinRightOffset += padw;
265         }
266     }
267 
startThreads()268     void PassEncoder::startThreads()
269     {
270         /* Start slave worker threads */
271         m_threadActive = true;
272         start();
273         /* Start reader threads*/
274         if (m_reader != NULL)
275         {
276             m_reader->m_threadActive = true;
277             m_reader->start();
278         }
279         /* Start scaling worker threads */
280         if (m_scaler != NULL)
281         {
282             m_scaler->m_threadActive = true;
283             m_scaler->start();
284         }
285     }
286 
copyInfo(x265_analysis_data * src)287     void PassEncoder::copyInfo(x265_analysis_data * src)
288     {
289 
290         uint32_t written = m_parent->m_analysisWriteCnt[m_id].get();
291 
292         int index = written % m_parent->m_queueSize;
293         //If all streams have read analysis data, reuse that position in Queue
294 
295         int read = m_parent->m_analysisRead[m_id][index].get();
296         int write = m_parent->m_analysisWrite[m_id][index].get();
297 
298         int overwrite = written / m_parent->m_queueSize;
299         bool emptyIdxFound = 0;
300         while (!emptyIdxFound && overwrite)
301         {
302             for (uint32_t i = 0; i < m_parent->m_queueSize; i++)
303             {
304                 read = m_parent->m_analysisRead[m_id][i].get();
305                 write = m_parent->m_analysisWrite[m_id][i].get();
306                 write *= m_cliopt.numRefs;
307 
308                 if (read == write)
309                 {
310                     index = i;
311                     emptyIdxFound = 1;
312                 }
313             }
314         }
315 
316         x265_analysis_data *m_analysisInfo = &m_parent->m_analysisBuffer[m_id][index];
317 
318         x265_free_analysis_data(m_param, m_analysisInfo);
319         memcpy(m_analysisInfo, src, sizeof(x265_analysis_data));
320         x265_alloc_analysis_data(m_param, m_analysisInfo);
321 
322         bool isVbv = m_param->rc.vbvBufferSize && m_param->rc.vbvMaxBitrate;
323         if (m_param->bDisableLookahead && isVbv)
324         {
325             memcpy(m_analysisInfo->lookahead.intraSatdForVbv, src->lookahead.intraSatdForVbv, src->numCuInHeight * sizeof(uint32_t));
326             memcpy(m_analysisInfo->lookahead.satdForVbv, src->lookahead.satdForVbv, src->numCuInHeight * sizeof(uint32_t));
327             memcpy(m_analysisInfo->lookahead.intraVbvCost, src->lookahead.intraVbvCost, src->numCUsInFrame * sizeof(uint32_t));
328             memcpy(m_analysisInfo->lookahead.vbvCost, src->lookahead.vbvCost, src->numCUsInFrame * sizeof(uint32_t));
329         }
330 
331         if (src->sliceType == X265_TYPE_IDR || src->sliceType == X265_TYPE_I)
332         {
333             if (m_param->analysisSaveReuseLevel < 2)
334                 goto ret;
335             x265_analysis_intra_data *intraDst, *intraSrc;
336             intraDst = (x265_analysis_intra_data*)m_analysisInfo->intraData;
337             intraSrc = (x265_analysis_intra_data*)src->intraData;
338             memcpy(intraDst->depth, intraSrc->depth, sizeof(uint8_t) * src->depthBytes);
339             memcpy(intraDst->modes, intraSrc->modes, sizeof(uint8_t) * src->numCUsInFrame * src->numPartitions);
340             memcpy(intraDst->partSizes, intraSrc->partSizes, sizeof(char) * src->depthBytes);
341             memcpy(intraDst->chromaModes, intraSrc->chromaModes, sizeof(uint8_t) * src->depthBytes);
342             if (m_param->rc.cuTree)
343                 memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, sizeof(int8_t) * src->depthBytes);
344         }
345         else
346         {
347             bool bIntraInInter = (src->sliceType == X265_TYPE_P || m_param->bIntraInBFrames);
348             int numDir = src->sliceType == X265_TYPE_P ? 1 : 2;
349             memcpy(m_analysisInfo->wt, src->wt, sizeof(WeightParam) * 3 * numDir);
350             if (m_param->analysisSaveReuseLevel < 2)
351                 goto ret;
352             x265_analysis_inter_data *interDst, *interSrc;
353             interDst = (x265_analysis_inter_data*)m_analysisInfo->interData;
354             interSrc = (x265_analysis_inter_data*)src->interData;
355             memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) * src->depthBytes);
356             memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) * src->depthBytes);
357             if (m_param->rc.cuTree)
358                 memcpy(interDst->cuQPOff, interSrc->cuQPOff, sizeof(int8_t) * src->depthBytes);
359             if (m_param->analysisSaveReuseLevel > 4)
360             {
361                 memcpy(interDst->partSize, interSrc->partSize, sizeof(uint8_t) * src->depthBytes);
362                 memcpy(interDst->mergeFlag, interSrc->mergeFlag, sizeof(uint8_t) * src->depthBytes);
363                 if (m_param->analysisSaveReuseLevel == 10)
364                 {
365                     memcpy(interDst->interDir, interSrc->interDir, sizeof(uint8_t) * src->depthBytes);
366                     for (int dir = 0; dir < numDir; dir++)
367                     {
368                         memcpy(interDst->mvpIdx[dir], interSrc->mvpIdx[dir], sizeof(uint8_t) * src->depthBytes);
369                         memcpy(interDst->refIdx[dir], interSrc->refIdx[dir], sizeof(int8_t) * src->depthBytes);
370                         memcpy(interDst->mv[dir], interSrc->mv[dir], sizeof(MV) * src->depthBytes);
371                     }
372                     if (bIntraInInter)
373                     {
374                         x265_analysis_intra_data *intraDst = (x265_analysis_intra_data*)m_analysisInfo->intraData;
375                         x265_analysis_intra_data *intraSrc = (x265_analysis_intra_data*)src->intraData;
376                         memcpy(intraDst->modes, intraSrc->modes, sizeof(uint8_t) * src->numPartitions * src->numCUsInFrame);
377                         memcpy(intraDst->chromaModes, intraSrc->chromaModes, sizeof(uint8_t) * src->depthBytes);
378                     }
379                }
380             }
381             if (m_param->analysisSaveReuseLevel != 10)
382                 memcpy(interDst->ref, interSrc->ref, sizeof(int32_t) * src->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir);
383         }
384 
385 ret:
386         //increment analysis Write counter
387         m_parent->m_analysisWriteCnt[m_id].incr();
388         m_parent->m_analysisWrite[m_id][index].incr();
389         return;
390     }
391 
392 
readPicture(x265_picture * dstPic)393     bool PassEncoder::readPicture(x265_picture *dstPic)
394     {
395         /*Check and wait if there any input frames to read*/
396         int ipread = m_parent->m_picReadCnt[m_id].get();
397         int ipwrite = m_parent->m_picWriteCnt[m_id].get();
398 
399         bool isAbrLoad = m_cliopt.loadLevel && (m_parent->m_numEncodes > 1);
400         while (!m_inputOver && (ipread == ipwrite))
401         {
402             ipwrite = m_parent->m_picWriteCnt[m_id].waitForChange(ipwrite);
403         }
404 
405         if (m_threadActive && ipread < ipwrite)
406         {
407             /*Get input index to read from inputQueue. If doesn't need analysis info, it need not wait to fetch poc from analysisQueue*/
408             int readPos = ipread % m_parent->m_queueSize;
409             x265_analysis_data* analysisData = 0;
410 
411             if (isAbrLoad)
412             {
413                 /*If stream is master of each slave pass, then fetch analysis data from prev pass*/
414                 int analysisQId = m_cliopt.refId;
415                 /*Check and wait if there any analysis Data to read*/
416                 int analysisWrite = m_parent->m_analysisWriteCnt[analysisQId].get();
417                 int written = analysisWrite * m_parent->m_passEnc[analysisQId]->m_cliopt.numRefs;
418                 int analysisRead = m_parent->m_analysisReadCnt[analysisQId].get();
419 
420                 while (m_threadActive && written == analysisRead)
421                 {
422                     analysisWrite = m_parent->m_analysisWriteCnt[analysisQId].waitForChange(analysisWrite);
423                     written = analysisWrite * m_parent->m_passEnc[analysisQId]->m_cliopt.numRefs;
424                 }
425 
426                 if (analysisRead < written)
427                 {
428                     int analysisIdx = 0;
429                     if (!m_param->bDisableLookahead)
430                     {
431                         bool analysisdRead = false;
432                         while ((analysisRead < written) && !analysisdRead)
433                         {
434                             while (analysisWrite < ipread)
435                             {
436                                 analysisWrite = m_parent->m_analysisWriteCnt[analysisQId].waitForChange(analysisWrite);
437                                 written = analysisWrite * m_parent->m_passEnc[analysisQId]->m_cliopt.numRefs;
438                             }
439                             for (uint32_t i = 0; i < m_parent->m_queueSize; i++)
440                             {
441                                 analysisData = &m_parent->m_analysisBuffer[analysisQId][i];
442                                 int read = m_parent->m_analysisRead[analysisQId][i].get();
443                                 int write = m_parent->m_analysisWrite[analysisQId][i].get() * m_parent->m_passEnc[analysisQId]->m_cliopt.numRefs;
444                                 if ((analysisData->poc == (uint32_t)(ipread)) && (read < write))
445                                 {
446                                     analysisIdx = i;
447                                     analysisdRead = true;
448                                     break;
449                                 }
450                             }
451                         }
452                     }
453                     else
454                     {
455                         analysisIdx = analysisRead % m_parent->m_queueSize;
456                         analysisData = &m_parent->m_analysisBuffer[analysisQId][analysisIdx];
457                         readPos = analysisData->poc % m_parent->m_queueSize;
458                         while ((ipwrite < readPos) || ((ipwrite - 1) < (int)analysisData->poc))
459                         {
460                             ipwrite = m_parent->m_picWriteCnt[m_id].waitForChange(ipwrite);
461                         }
462                     }
463 
464                     m_lastIdx = analysisIdx;
465                 }
466                 else
467                     return false;
468             }
469 
470 
471             x265_picture *srcPic = (x265_picture*)(m_parent->m_inputPicBuffer[m_id][readPos]);
472 
473             x265_picture *pic = (x265_picture*)(dstPic);
474             pic->colorSpace = srcPic->colorSpace;
475             pic->bitDepth = srcPic->bitDepth;
476             pic->framesize = srcPic->framesize;
477             pic->height = srcPic->height;
478             pic->pts = srcPic->pts;
479             pic->dts = srcPic->dts;
480             pic->reorderedPts = srcPic->reorderedPts;
481             pic->width = srcPic->width;
482             pic->analysisData = srcPic->analysisData;
483             pic->userSEI = srcPic->userSEI;
484             pic->stride[0] = srcPic->stride[0];
485             pic->stride[1] = srcPic->stride[1];
486             pic->stride[2] = srcPic->stride[2];
487             pic->planes[0] = srcPic->planes[0];
488             pic->planes[1] = srcPic->planes[1];
489             pic->planes[2] = srcPic->planes[2];
490             if (isAbrLoad)
491                 pic->analysisData = *analysisData;
492             return true;
493         }
494         else
495             return false;
496     }
497 
threadMain()498     void PassEncoder::threadMain()
499     {
500         THREAD_NAME("PassEncoder", m_id);
501 
502         while (m_threadActive)
503         {
504 
505 #if ENABLE_LIBVMAF
506             x265_vmaf_data* vmafdata = m_cliopt.vmafData;
507 #endif
508             /* This allows muxers to modify bitstream format */
509             m_cliopt.output->setParam(m_param);
510             const x265_api* api = m_cliopt.api;
511             ReconPlay* reconPlay = NULL;
512             if (m_cliopt.reconPlayCmd)
513                 reconPlay = new ReconPlay(m_cliopt.reconPlayCmd, *m_param);
514             char* profileName = m_cliopt.encName ? m_cliopt.encName : (char *)"x265";
515 
516             if (m_cliopt.zoneFile)
517             {
518                 if (!m_cliopt.parseZoneFile())
519                 {
520                     x265_log(NULL, X265_LOG_ERROR, "Unable to parse zonefile in %s\n", profileName);
521                     fclose(m_cliopt.zoneFile);
522                     m_cliopt.zoneFile = NULL;
523                 }
524             }
525 
526             if (signal(SIGINT, sigint_handler) == SIG_ERR)
527                 x265_log(m_param, X265_LOG_ERROR, "Unable to register CTRL+C handler: %s in %s\n",
528                     strerror(errno), profileName);
529 
530             x265_picture pic_orig, pic_out;
531             x265_picture *pic_in = &pic_orig;
532             /* Allocate recon picture if analysis save/load is enabled */
533             std::priority_queue<int64_t>* pts_queue = m_cliopt.output->needPTS() ? new std::priority_queue<int64_t>() : NULL;
534             x265_picture *pic_recon = (m_cliopt.recon || m_param->analysisSave || m_param->analysisLoad || pts_queue || reconPlay || m_param->csvLogLevel) ? &pic_out : NULL;
535             uint32_t inFrameCount = 0;
536             uint32_t outFrameCount = 0;
537             x265_nal *p_nal;
538             x265_stats stats;
539             uint32_t nal;
540             int16_t *errorBuf = NULL;
541             bool bDolbyVisionRPU = false;
542             uint8_t *rpuPayload = NULL;
543             int inputPicNum = 1;
544             x265_picture picField1, picField2;
545             x265_analysis_data* analysisInfo = (x265_analysis_data*)(&pic_out.analysisData);
546             bool isAbrSave = m_cliopt.saveLevel && (m_parent->m_numEncodes > 1);
547 
548             if (!m_param->bRepeatHeaders && !m_param->bEnableSvtHevc)
549             {
550                 if (api->encoder_headers(m_encoder, &p_nal, &nal) < 0)
551                 {
552                     x265_log(m_param, X265_LOG_ERROR, "Failure generating stream headers in %s\n", profileName);
553                     m_ret = 3;
554                     goto fail;
555                 }
556                 else
557                     m_cliopt.totalbytes += m_cliopt.output->writeHeaders(p_nal, nal);
558             }
559 
560             if (m_param->bField && m_param->interlaceMode)
561             {
562                 api->picture_init(m_param, &picField1);
563                 api->picture_init(m_param, &picField2);
564                 // return back the original height of input
565                 m_param->sourceHeight *= 2;
566                 api->picture_init(m_param, &pic_orig);
567             }
568             else
569                 api->picture_init(m_param, &pic_orig);
570 
571             if (m_param->dolbyProfile && m_cliopt.dolbyVisionRpu)
572             {
573                 rpuPayload = X265_MALLOC(uint8_t, 1024);
574                 pic_in->rpu.payload = rpuPayload;
575                 if (pic_in->rpu.payload)
576                     bDolbyVisionRPU = true;
577             }
578 
579             if (m_cliopt.bDither)
580             {
581                 errorBuf = X265_MALLOC(int16_t, m_param->sourceWidth + 1);
582                 if (errorBuf)
583                     memset(errorBuf, 0, (m_param->sourceWidth + 1) * sizeof(int16_t));
584                 else
585                     m_cliopt.bDither = false;
586             }
587 
588             // main encoder loop
589             while (pic_in && !b_ctrl_c)
590             {
591                 pic_orig.poc = (m_param->bField && m_param->interlaceMode) ? inFrameCount * 2 : inFrameCount;
592                 if (m_cliopt.qpfile)
593                 {
594                     if (!m_cliopt.parseQPFile(pic_orig))
595                     {
596                         x265_log(NULL, X265_LOG_ERROR, "can't parse qpfile for frame %d in %s\n",
597                             pic_in->poc, profileName);
598                         fclose(m_cliopt.qpfile);
599                         m_cliopt.qpfile = NULL;
600                     }
601                 }
602 
603                 if (m_cliopt.framesToBeEncoded && inFrameCount >= m_cliopt.framesToBeEncoded)
604                     pic_in = NULL;
605                 else if (readPicture(pic_in))
606                     inFrameCount++;
607                 else
608                     pic_in = NULL;
609 
610                 if (pic_in)
611                 {
612                     if (pic_in->bitDepth > m_param->internalBitDepth && m_cliopt.bDither)
613                     {
614                         x265_dither_image(pic_in, m_cliopt.input->getWidth(), m_cliopt.input->getHeight(), errorBuf, m_param->internalBitDepth);
615                         pic_in->bitDepth = m_param->internalBitDepth;
616                     }
617                     /* Overwrite PTS */
618                     pic_in->pts = pic_in->poc;
619 
620                     // convert to field
621                     if (m_param->bField && m_param->interlaceMode)
622                     {
623                         int height = pic_in->height >> 1;
624 
625                         int static bCreated = 0;
626                         if (bCreated == 0)
627                         {
628                             bCreated = 1;
629                             inputPicNum = 2;
630                             picField1.fieldNum = 1;
631                             picField2.fieldNum = 2;
632 
633                             picField1.bitDepth = picField2.bitDepth = pic_in->bitDepth;
634                             picField1.colorSpace = picField2.colorSpace = pic_in->colorSpace;
635                             picField1.height = picField2.height = pic_in->height >> 1;
636                             picField1.framesize = picField2.framesize = pic_in->framesize >> 1;
637 
638                             size_t fieldFrameSize = (size_t)pic_in->framesize >> 1;
639                             char* field1Buf = X265_MALLOC(char, fieldFrameSize);
640                             char* field2Buf = X265_MALLOC(char, fieldFrameSize);
641 
642                             int stride = picField1.stride[0] = picField2.stride[0] = pic_in->stride[0];
643                             uint64_t framesize = stride * (height >> x265_cli_csps[pic_in->colorSpace].height[0]);
644                             picField1.planes[0] = field1Buf;
645                             picField2.planes[0] = field2Buf;
646                             for (int i = 1; i < x265_cli_csps[pic_in->colorSpace].planes; i++)
647                             {
648                                 picField1.planes[i] = field1Buf + framesize;
649                                 picField2.planes[i] = field2Buf + framesize;
650 
651                                 stride = picField1.stride[i] = picField2.stride[i] = pic_in->stride[i];
652                                 framesize += (stride * (height >> x265_cli_csps[pic_in->colorSpace].height[i]));
653                             }
654                             assert(framesize == picField1.framesize);
655                         }
656 
657                         picField1.pts = picField1.poc = pic_in->poc;
658                         picField2.pts = picField2.poc = pic_in->poc + 1;
659 
660                         picField1.userSEI = picField2.userSEI = pic_in->userSEI;
661 
662                         //if (pic_in->userData)
663                         //{
664                         //    // Have to handle userData here
665                         //}
666 
667                         if (pic_in->framesize)
668                         {
669                             for (int i = 0; i < x265_cli_csps[pic_in->colorSpace].planes; i++)
670                             {
671                                 char* srcP1 = (char*)pic_in->planes[i];
672                                 char* srcP2 = (char*)pic_in->planes[i] + pic_in->stride[i];
673                                 char* p1 = (char*)picField1.planes[i];
674                                 char* p2 = (char*)picField2.planes[i];
675 
676                                 int stride = picField1.stride[i];
677 
678                                 for (int y = 0; y < (height >> x265_cli_csps[pic_in->colorSpace].height[i]); y++)
679                                 {
680                                     memcpy(p1, srcP1, stride);
681                                     memcpy(p2, srcP2, stride);
682                                     srcP1 += 2 * stride;
683                                     srcP2 += 2 * stride;
684                                     p1 += stride;
685                                     p2 += stride;
686                                 }
687                             }
688                         }
689                     }
690 
691                     if (bDolbyVisionRPU)
692                     {
693                         if (m_param->bField && m_param->interlaceMode)
694                         {
695                             if (m_cliopt.rpuParser(&picField1) > 0)
696                                 goto fail;
697                             if (m_cliopt.rpuParser(&picField2) > 0)
698                                 goto fail;
699                         }
700                         else
701                         {
702                             if (m_cliopt.rpuParser(pic_in) > 0)
703                                 goto fail;
704                         }
705                     }
706                 }
707 
708                 for (int inputNum = 0; inputNum < inputPicNum; inputNum++)
709                 {
710                     x265_picture *picInput = NULL;
711                     if (inputPicNum == 2)
712                         picInput = pic_in ? (inputNum ? &picField2 : &picField1) : NULL;
713                     else
714                         picInput = pic_in;
715 
716                     int numEncoded = api->encoder_encode(m_encoder, &p_nal, &nal, picInput, pic_recon);
717 
718                     int idx = (inFrameCount - 1) % m_parent->m_queueSize;
719                     m_parent->m_picIdxReadCnt[m_id][idx].incr();
720                     m_parent->m_picReadCnt[m_id].incr();
721                     if (m_cliopt.loadLevel && picInput)
722                     {
723                         m_parent->m_analysisReadCnt[m_cliopt.refId].incr();
724                         m_parent->m_analysisRead[m_cliopt.refId][m_lastIdx].incr();
725                     }
726 
727                     if (numEncoded < 0)
728                     {
729                         b_ctrl_c = 1;
730                         m_ret = 4;
731                         break;
732                     }
733 
734                     if (reconPlay && numEncoded)
735                         reconPlay->writePicture(*pic_recon);
736 
737                     outFrameCount += numEncoded;
738 
739                     if (isAbrSave && numEncoded)
740                     {
741                         copyInfo(analysisInfo);
742                     }
743 
744                     if (numEncoded && pic_recon && m_cliopt.recon)
745                         m_cliopt.recon->writePicture(pic_out);
746                     if (nal)
747                     {
748                         m_cliopt.totalbytes += m_cliopt.output->writeFrame(p_nal, nal, pic_out);
749                         if (pts_queue)
750                         {
751                             pts_queue->push(-pic_out.pts);
752                             if (pts_queue->size() > 2)
753                                 pts_queue->pop();
754                         }
755                     }
756                     m_cliopt.printStatus(outFrameCount);
757                 }
758             }
759 
760             /* Flush the encoder */
761             while (!b_ctrl_c)
762             {
763                 int numEncoded = api->encoder_encode(m_encoder, &p_nal, &nal, NULL, pic_recon);
764                 if (numEncoded < 0)
765                 {
766                     m_ret = 4;
767                     break;
768                 }
769 
770                 if (reconPlay && numEncoded)
771                     reconPlay->writePicture(*pic_recon);
772 
773                 outFrameCount += numEncoded;
774                 if (isAbrSave && numEncoded)
775                 {
776                     copyInfo(analysisInfo);
777                 }
778 
779                 if (numEncoded && pic_recon && m_cliopt.recon)
780                     m_cliopt.recon->writePicture(pic_out);
781                 if (nal)
782                 {
783                     m_cliopt.totalbytes += m_cliopt.output->writeFrame(p_nal, nal, pic_out);
784                     if (pts_queue)
785                     {
786                         pts_queue->push(-pic_out.pts);
787                         if (pts_queue->size() > 2)
788                             pts_queue->pop();
789                     }
790                 }
791 
792                 m_cliopt.printStatus(outFrameCount);
793 
794                 if (!numEncoded)
795                     break;
796             }
797 
798             if (bDolbyVisionRPU)
799             {
800                 if (fgetc(m_cliopt.dolbyVisionRpu) != EOF)
801                     x265_log(NULL, X265_LOG_WARNING, "Dolby Vision RPU count is greater than frame count in %s\n",
802                         profileName);
803                 x265_log(NULL, X265_LOG_INFO, "VES muxing with Dolby Vision RPU file successful in %s\n",
804                     profileName);
805             }
806 
807             /* clear progress report */
808             if (m_cliopt.bProgress)
809                 fprintf(stderr, "%*s\r", 80, " ");
810 
811         fail:
812 
813             delete reconPlay;
814 
815             api->encoder_get_stats(m_encoder, &stats, sizeof(stats));
816             if (m_param->csvfn && !b_ctrl_c)
817 #if ENABLE_LIBVMAF
818                 api->vmaf_encoder_log(m_encoder, m_cliopt.argCnt, m_cliopt.argString, m_cliopt.param, vmafdata);
819 #else
820                 api->encoder_log(m_encoder, m_cliopt.argCnt, m_cliopt.argString);
821 #endif
822             api->encoder_close(m_encoder);
823 
824             int64_t second_largest_pts = 0;
825             int64_t largest_pts = 0;
826             if (pts_queue && pts_queue->size() >= 2)
827             {
828                 second_largest_pts = -pts_queue->top();
829                 pts_queue->pop();
830                 largest_pts = -pts_queue->top();
831                 pts_queue->pop();
832                 delete pts_queue;
833                 pts_queue = NULL;
834             }
835             m_cliopt.output->closeFile(largest_pts, second_largest_pts);
836 
837             if (b_ctrl_c)
838                 general_log(m_param, NULL, X265_LOG_INFO, "aborted at input frame %d, output frame %d in %s\n",
839                     m_cliopt.seek + inFrameCount, stats.encodedPictureCount, profileName);
840 
841             api->param_free(m_param);
842 
843             X265_FREE(errorBuf);
844             X265_FREE(rpuPayload);
845 
846             m_threadActive = false;
847             m_parent->m_numActiveEncodes.decr();
848         }
849     }
850 
destroy()851     void PassEncoder::destroy()
852     {
853         stop();
854         if (m_reader)
855         {
856             m_reader->stop();
857             delete m_reader;
858         }
859         else
860         {
861             m_scaler->stop();
862             m_scaler->destroy();
863             delete m_scaler;
864         }
865     }
866 
Scaler(int threadId,int threadNum,int id,VideoDesc * src,VideoDesc * dst,PassEncoder * parentEnc)867     Scaler::Scaler(int threadId, int threadNum, int id, VideoDesc *src, VideoDesc *dst, PassEncoder *parentEnc)
868     {
869         m_parentEnc = parentEnc;
870         m_id = id;
871         m_srcFormat = src;
872         m_dstFormat = dst;
873         m_threadActive = false;
874         m_scaleFrameSize = 0;
875         m_filterManager = NULL;
876         m_threadId = threadId;
877         m_threadTotal = threadNum;
878 
879         int csp = dst->m_csp;
880         uint32_t pixelbytes = dst->m_inputDepth > 8 ? 2 : 1;
881         for (int i = 0; i < x265_cli_csps[csp].planes; i++)
882         {
883             int w = dst->m_width >> x265_cli_csps[csp].width[i];
884             int h = dst->m_height >> x265_cli_csps[csp].height[i];
885             m_scalePlanes[i] = w * h * pixelbytes;
886             m_scaleFrameSize += m_scalePlanes[i];
887         }
888 
889         if (src->m_height != dst->m_height || src->m_width != dst->m_width)
890         {
891             m_filterManager = new ScalerFilterManager;
892             m_filterManager->init(4, m_srcFormat, m_dstFormat);
893         }
894     }
895 
scalePic(x265_picture * destination,x265_picture * source)896     bool Scaler::scalePic(x265_picture * destination, x265_picture * source)
897     {
898         if (!destination || !source)
899             return false;
900         x265_param* param = m_parentEnc->m_param;
901         int pixelBytes = m_dstFormat->m_inputDepth > 8 ? 2 : 1;
902         if (m_srcFormat->m_height != m_dstFormat->m_height || m_srcFormat->m_width != m_dstFormat->m_width)
903         {
904             void **srcPlane = NULL, **dstPlane = NULL;
905             int srcStride[3], dstStride[3];
906             destination->bitDepth = source->bitDepth;
907             destination->colorSpace = source->colorSpace;
908             destination->pts = source->pts;
909             destination->dts = source->dts;
910             destination->reorderedPts = source->reorderedPts;
911             destination->poc = source->poc;
912             destination->userSEI = source->userSEI;
913             srcPlane = source->planes;
914             dstPlane = destination->planes;
915             srcStride[0] = source->stride[0];
916             destination->stride[0] = m_dstFormat->m_width * pixelBytes;
917             dstStride[0] = destination->stride[0];
918             if (param->internalCsp != X265_CSP_I400)
919             {
920                 srcStride[1] = source->stride[1];
921                 srcStride[2] = source->stride[2];
922                 destination->stride[1] = destination->stride[0] >> x265_cli_csps[param->internalCsp].width[1];
923                 destination->stride[2] = destination->stride[0] >> x265_cli_csps[param->internalCsp].width[2];
924                 dstStride[1] = destination->stride[1];
925                 dstStride[2] = destination->stride[2];
926             }
927             if (m_scaleFrameSize)
928             {
929                 m_filterManager->scale_pic(srcPlane, dstPlane, srcStride, dstStride);
930                 return true;
931             }
932             else
933                 x265_log(param, X265_LOG_INFO, "Empty frame received\n");
934         }
935         return false;
936     }
937 
threadMain()938     void Scaler::threadMain()
939     {
940         THREAD_NAME("Scaler", m_id);
941 
942         /* unscaled picture is stored in the last index */
943         uint32_t srcId = m_id - 1;
944         int QDepth = m_parentEnc->m_parent->m_queueSize;
945         while (!m_parentEnc->m_inputOver)
946         {
947 
948             uint32_t scaledWritten = m_parentEnc->m_parent->m_picWriteCnt[m_id].get();
949 
950             if (m_parentEnc->m_cliopt.framesToBeEncoded && scaledWritten >= m_parentEnc->m_cliopt.framesToBeEncoded)
951                 break;
952 
953             if (m_threadTotal > 1 && (m_threadId != scaledWritten % m_threadTotal))
954             {
955                 continue;
956             }
957             uint32_t written = m_parentEnc->m_parent->m_picWriteCnt[srcId].get();
958 
959             /*If all the input pictures are scaled by the current scale worker thread wait for input pictures*/
960             while (m_threadActive && (scaledWritten == written)) {
961                 written = m_parentEnc->m_parent->m_picWriteCnt[srcId].waitForChange(written);
962             }
963 
964             if (m_threadActive && scaledWritten < written)
965             {
966 
967                 int scaledWriteIdx = scaledWritten % QDepth;
968                 int overWritePicBuffer = scaledWritten / QDepth;
969                 int read = m_parentEnc->m_parent->m_picIdxReadCnt[m_id][scaledWriteIdx].get();
970 
971                 while (overWritePicBuffer && read < overWritePicBuffer)
972                 {
973                     read = m_parentEnc->m_parent->m_picIdxReadCnt[m_id][scaledWriteIdx].waitForChange(read);
974                 }
975 
976                 if (!m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWriteIdx])
977                 {
978                     int framesize = 0;
979                     int planesize[3];
980                     int csp = m_dstFormat->m_csp;
981                     int stride[3];
982                     stride[0] = m_dstFormat->m_width;
983                     stride[1] = stride[0] >> x265_cli_csps[csp].width[1];
984                     stride[2] = stride[0] >> x265_cli_csps[csp].width[2];
985                     for (int i = 0; i < x265_cli_csps[csp].planes; i++)
986                     {
987                         uint32_t h = m_dstFormat->m_height >> x265_cli_csps[csp].height[i];
988                         planesize[i] = h * stride[i];
989                         framesize += planesize[i];
990                     }
991 
992                     m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWriteIdx] = x265_picture_alloc();
993                     x265_picture_init(m_parentEnc->m_param, m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWriteIdx]);
994 
995                     ((x265_picture*)m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWritten % QDepth])->framesize = framesize;
996                     for (int32_t j = 0; j < x265_cli_csps[csp].planes; j++)
997                     {
998                         m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWritten % QDepth]->planes[j] = X265_MALLOC(char, planesize[j]);
999                     }
1000                 }
1001 
1002                 x265_picture *srcPic = m_parentEnc->m_parent->m_inputPicBuffer[srcId][scaledWritten % QDepth];
1003                 x265_picture* destPic = m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWriteIdx];
1004 
1005                 // Enqueue this picture up with the current encoder so that it will asynchronously encode
1006                 if (!scalePic(destPic, srcPic))
1007                     x265_log(NULL, X265_LOG_ERROR, "Unable to copy scaled input picture to input queue \n");
1008                 else
1009                     m_parentEnc->m_parent->m_picWriteCnt[m_id].incr();
1010                 m_scaledWriteCnt.incr();
1011                 m_parentEnc->m_parent->m_picIdxReadCnt[srcId][scaledWriteIdx].incr();
1012             }
1013             if (m_threadTotal > 1)
1014             {
1015                 written = m_parentEnc->m_parent->m_picWriteCnt[srcId].get();
1016                 int totalWrite = written / m_threadTotal;
1017                 if (written % m_threadTotal > m_threadId)
1018                     totalWrite++;
1019                 if (totalWrite == m_scaledWriteCnt.get())
1020                 {
1021                     m_parentEnc->m_parent->m_picWriteCnt[srcId].poke();
1022                     m_parentEnc->m_parent->m_picWriteCnt[m_id].poke();
1023                     break;
1024                 }
1025             }
1026             else
1027             {
1028                 /* Once end of video is reached and all frames are scaled, release wait on picwritecount */
1029                 scaledWritten = m_parentEnc->m_parent->m_picWriteCnt[m_id].get();
1030                 written = m_parentEnc->m_parent->m_picWriteCnt[srcId].get();
1031                 if (written == scaledWritten)
1032                 {
1033                     m_parentEnc->m_parent->m_picWriteCnt[srcId].poke();
1034                     m_parentEnc->m_parent->m_picWriteCnt[m_id].poke();
1035                     break;
1036                 }
1037             }
1038 
1039         }
1040         m_threadActive = false;
1041         destroy();
1042     }
1043 
Reader(int id,PassEncoder * parentEnc)1044     Reader::Reader(int id, PassEncoder *parentEnc)
1045     {
1046         m_parentEnc = parentEnc;
1047         m_id = id;
1048         m_input = parentEnc->m_input;
1049     }
1050 
threadMain()1051     void Reader::threadMain()
1052     {
1053         THREAD_NAME("Reader", m_id);
1054 
1055         int QDepth = m_parentEnc->m_parent->m_queueSize;
1056         x265_picture* src = x265_picture_alloc();
1057         x265_picture_init(m_parentEnc->m_param, src);
1058 
1059         while (m_threadActive)
1060         {
1061             uint32_t written = m_parentEnc->m_parent->m_picWriteCnt[m_id].get();
1062             uint32_t writeIdx = written % QDepth;
1063             uint32_t read = m_parentEnc->m_parent->m_picIdxReadCnt[m_id][writeIdx].get();
1064             uint32_t overWritePicBuffer = written / QDepth;
1065 
1066             if (m_parentEnc->m_cliopt.framesToBeEncoded && written >= m_parentEnc->m_cliopt.framesToBeEncoded)
1067                 break;
1068 
1069             while (overWritePicBuffer && read < overWritePicBuffer)
1070             {
1071                 read = m_parentEnc->m_parent->m_picIdxReadCnt[m_id][writeIdx].waitForChange(read);
1072             }
1073 
1074             x265_picture* dest = m_parentEnc->m_parent->m_inputPicBuffer[m_id][writeIdx];
1075             if (m_input->readPicture(*src))
1076             {
1077                 dest->poc = src->poc;
1078                 dest->pts = src->pts;
1079                 dest->userSEI = src->userSEI;
1080                 dest->bitDepth = src->bitDepth;
1081                 dest->framesize = src->framesize;
1082                 dest->height = src->height;
1083                 dest->width = src->width;
1084                 dest->colorSpace = src->colorSpace;
1085                 dest->userSEI = src->userSEI;
1086                 dest->rpu.payload = src->rpu.payload;
1087                 dest->picStruct = src->picStruct;
1088                 dest->stride[0] = src->stride[0];
1089                 dest->stride[1] = src->stride[1];
1090                 dest->stride[2] = src->stride[2];
1091 
1092                 if (!dest->planes[0])
1093                     dest->planes[0] = X265_MALLOC(char, dest->framesize);
1094 
1095                 memcpy(dest->planes[0], src->planes[0], src->framesize * sizeof(char));
1096                 dest->planes[1] = (char*)dest->planes[0] + src->stride[0] * src->height;
1097                 dest->planes[2] = (char*)dest->planes[1] + src->stride[1] * (src->height >> x265_cli_csps[src->colorSpace].height[1]);
1098                 m_parentEnc->m_parent->m_picWriteCnt[m_id].incr();
1099             }
1100             else
1101             {
1102                 m_threadActive = false;
1103                 m_parentEnc->m_inputOver = true;
1104                 m_parentEnc->m_parent->m_picWriteCnt[m_id].poke();
1105             }
1106         }
1107         x265_picture_free(src);
1108     }
1109 }
1110