1 /*****************************************************************************
2 * Copyright (C) 2013-2020 MulticoreWare, Inc
3 *
4 * Authors: Pooja Venkatesan <pooja@multicorewareinc.com>
5 *          Aruna Matheswaran <aruna@multicorewareinc.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20 *
21 * This program is also available under a commercial proprietary license.
22 * For more information, contact us at license @ x265.com.
23 *****************************************************************************/
24 
25 #include "abrEncApp.h"
26 #include "mv.h"
27 #include "slice.h"
28 #include "param.h"
29 
30 #include <signal.h>
31 #include <errno.h>
32 
33 #include <queue>
34 
35 using namespace X265_NS;
36 
37 /* Ctrl-C handler */
38 static volatile sig_atomic_t b_ctrl_c /* = 0 */;
sigint_handler(int)39 static void sigint_handler(int)
40 {
41     b_ctrl_c = 1;
42 }
43 
44 namespace X265_NS {
45     // private namespace
46 #define X265_INPUT_QUEUE_SIZE 250
47 
AbrEncoder(CLIOptions cliopt[],uint8_t numEncodes,int & ret)48     AbrEncoder::AbrEncoder(CLIOptions cliopt[], uint8_t numEncodes, int &ret)
49     {
50         m_numEncodes = numEncodes;
51         m_numActiveEncodes.set(numEncodes);
52         m_queueSize = (numEncodes > 1) ? X265_INPUT_QUEUE_SIZE : 1;
53         m_passEnc = X265_MALLOC(PassEncoder*, m_numEncodes);
54 
55         for (uint8_t i = 0; i < m_numEncodes; i++)
56         {
57             m_passEnc[i] = new PassEncoder(i, cliopt[i], this);
58             if (!m_passEnc[i])
59             {
60                 x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for passEncoder\n");
61                 ret = 4;
62             }
63             m_passEnc[i]->init(ret);
64         }
65 
66         if (!allocBuffers())
67         {
68             x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for buffers\n");
69             ret = 4;
70         }
71 
72         /* start passEncoder worker threads */
73         for (uint8_t pass = 0; pass < m_numEncodes; pass++)
74             m_passEnc[pass]->startThreads();
75     }
76 
allocBuffers()77     bool AbrEncoder::allocBuffers()
78     {
79         m_inputPicBuffer = X265_MALLOC(x265_picture**, m_numEncodes);
80         m_analysisBuffer = X265_MALLOC(x265_analysis_data*, m_numEncodes);
81 
82         m_picWriteCnt = new ThreadSafeInteger[m_numEncodes];
83         m_picReadCnt = new ThreadSafeInteger[m_numEncodes];
84         m_analysisWriteCnt = new ThreadSafeInteger[m_numEncodes];
85         m_analysisReadCnt = new ThreadSafeInteger[m_numEncodes];
86 
87         m_picIdxReadCnt = X265_MALLOC(ThreadSafeInteger*, m_numEncodes);
88         m_analysisWrite = X265_MALLOC(ThreadSafeInteger*, m_numEncodes);
89         m_analysisRead = X265_MALLOC(ThreadSafeInteger*, m_numEncodes);
90         m_readFlag = X265_MALLOC(int*, m_numEncodes);
91 
92         for (uint8_t pass = 0; pass < m_numEncodes; pass++)
93         {
94             m_inputPicBuffer[pass] = X265_MALLOC(x265_picture*, m_queueSize);
95             for (uint32_t idx = 0; idx < m_queueSize; idx++)
96             {
97                 m_inputPicBuffer[pass][idx] = x265_picture_alloc();
98                 x265_picture_init(m_passEnc[pass]->m_param, m_inputPicBuffer[pass][idx]);
99             }
100 
101             CHECKED_MALLOC_ZERO(m_analysisBuffer[pass], x265_analysis_data, m_queueSize);
102             m_picIdxReadCnt[pass] = new ThreadSafeInteger[m_queueSize];
103             m_analysisWrite[pass] = new ThreadSafeInteger[m_queueSize];
104             m_analysisRead[pass] = new ThreadSafeInteger[m_queueSize];
105             m_readFlag[pass] = X265_MALLOC(int, m_queueSize);
106         }
107         return true;
108     fail:
109         return false;
110     }
111 
destroy()112     void AbrEncoder::destroy()
113     {
114         x265_cleanup(); /* Free library singletons */
115         for (uint8_t pass = 0; pass < m_numEncodes; pass++)
116         {
117             for (uint32_t index = 0; index < m_queueSize; index++)
118             {
119                 X265_FREE(m_inputPicBuffer[pass][index]->planes[0]);
120                 x265_picture_free(m_inputPicBuffer[pass][index]);
121             }
122 
123             X265_FREE(m_inputPicBuffer[pass]);
124             X265_FREE(m_analysisBuffer[pass]);
125             X265_FREE(m_readFlag[pass]);
126             delete[] m_picIdxReadCnt[pass];
127             delete[] m_analysisWrite[pass];
128             delete[] m_analysisRead[pass];
129             m_passEnc[pass]->destroy();
130             delete m_passEnc[pass];
131         }
132         X265_FREE(m_inputPicBuffer);
133         X265_FREE(m_analysisBuffer);
134         X265_FREE(m_readFlag);
135 
136         delete[] m_picWriteCnt;
137         delete[] m_picReadCnt;
138         delete[] m_analysisWriteCnt;
139         delete[] m_analysisReadCnt;
140 
141         X265_FREE(m_picIdxReadCnt);
142         X265_FREE(m_analysisWrite);
143         X265_FREE(m_analysisRead);
144 
145         X265_FREE(m_passEnc);
146     }
147 
PassEncoder(uint32_t id,CLIOptions cliopt,AbrEncoder * parent)148     PassEncoder::PassEncoder(uint32_t id, CLIOptions cliopt, AbrEncoder *parent)
149     {
150         m_id = id;
151         m_cliopt = cliopt;
152         m_parent = parent;
153         if(!(m_cliopt.enableScaler && m_id))
154             m_input = m_cliopt.input;
155         m_param = cliopt.param;
156         m_inputOver = false;
157         m_lastIdx = -1;
158         m_encoder = NULL;
159         m_scaler = NULL;
160         m_reader = NULL;
161         m_ret = 0;
162     }
163 
init(int & result)164     int PassEncoder::init(int &result)
165     {
166         if (m_parent->m_numEncodes > 1)
167             setReuseLevel();
168 
169         if (!(m_cliopt.enableScaler && m_id))
170             m_reader = new Reader(m_id, this);
171         else
172         {
173             VideoDesc *src = NULL, *dst = NULL;
174             dst = new VideoDesc(m_param->sourceWidth, m_param->sourceHeight, m_param->internalCsp, m_param->internalBitDepth);
175             int dstW = m_parent->m_passEnc[m_id - 1]->m_param->sourceWidth;
176             int dstH = m_parent->m_passEnc[m_id - 1]->m_param->sourceHeight;
177             src = new VideoDesc(dstW, dstH, m_param->internalCsp, m_param->internalBitDepth);
178             if (src != NULL && dst != NULL)
179             {
180                 m_scaler = new Scaler(0, 1, m_id, src, dst, this);
181                 if (!m_scaler)
182                 {
183                     x265_log(m_param, X265_LOG_ERROR, "\n MALLOC failure in Scaler");
184                     result = 4;
185                 }
186             }
187         }
188 
189         /* note: we could try to acquire a different libx265 API here based on
190         * the profile found during option parsing, but it must be done before
191         * opening an encoder */
192 
193         if (m_param)
194             m_encoder = m_cliopt.api->encoder_open(m_param);
195         if (!m_encoder)
196         {
197             x265_log(NULL, X265_LOG_ERROR, "x265_encoder_open() failed for Enc, \n");
198             m_ret = 2;
199             return -1;
200         }
201 
202         /* get the encoder parameters post-initialization */
203         m_cliopt.api->encoder_parameters(m_encoder, m_param);
204 
205         return 1;
206     }
207 
setReuseLevel()208     void PassEncoder::setReuseLevel()
209     {
210         uint32_t r, padh = 0, padw = 0;
211 
212         m_param->confWinBottomOffset = m_param->confWinRightOffset = 0;
213 
214         m_param->analysisLoadReuseLevel = m_cliopt.loadLevel;
215         m_param->analysisSaveReuseLevel = m_cliopt.saveLevel;
216         m_param->analysisSave = m_cliopt.saveLevel ? "save.dat" : NULL;
217         m_param->analysisLoad = m_cliopt.loadLevel ? "load.dat" : NULL;
218         m_param->bUseAnalysisFile = 0;
219 
220         if (m_cliopt.loadLevel)
221         {
222             x265_param *refParam = m_parent->m_passEnc[m_cliopt.refId]->m_param;
223 
224             if (m_param->sourceHeight == (refParam->sourceHeight - refParam->confWinBottomOffset) &&
225                 m_param->sourceWidth == (refParam->sourceWidth - refParam->confWinRightOffset))
226             {
227                 m_parent->m_passEnc[m_id]->m_param->confWinBottomOffset = refParam->confWinBottomOffset;
228                 m_parent->m_passEnc[m_id]->m_param->confWinRightOffset = refParam->confWinRightOffset;
229             }
230             else
231             {
232                 int srcH = refParam->sourceHeight - refParam->confWinBottomOffset;
233                 int srcW = refParam->sourceWidth - refParam->confWinRightOffset;
234 
235                 double scaleFactorH = double(m_param->sourceHeight / srcH);
236                 double scaleFactorW = double(m_param->sourceWidth / srcW);
237 
238                 int absScaleFactorH = (int)(10 * scaleFactorH + 0.5);
239                 int absScaleFactorW = (int)(10 * scaleFactorW + 0.5);
240 
241                 if (absScaleFactorH == 20 && absScaleFactorW == 20)
242                 {
243                     m_param->scaleFactor = 2;
244 
245                     m_parent->m_passEnc[m_id]->m_param->confWinBottomOffset = refParam->confWinBottomOffset * 2;
246                     m_parent->m_passEnc[m_id]->m_param->confWinRightOffset = refParam->confWinRightOffset * 2;
247 
248                 }
249             }
250         }
251 
252         int h = m_param->sourceHeight + m_param->confWinBottomOffset;
253         int w = m_param->sourceWidth + m_param->confWinRightOffset;
254         if (h & (m_param->minCUSize - 1))
255         {
256             r = h & (m_param->minCUSize - 1);
257             padh = m_param->minCUSize - r;
258             m_param->confWinBottomOffset += padh;
259 
260         }
261 
262         if (w & (m_param->minCUSize - 1))
263         {
264             r = w & (m_param->minCUSize - 1);
265             padw = m_param->minCUSize - r;
266             m_param->confWinRightOffset += padw;
267         }
268     }
269 
startThreads()270     void PassEncoder::startThreads()
271     {
272         /* Start slave worker threads */
273         m_threadActive = true;
274         start();
275         /* Start reader threads*/
276         if (m_reader != NULL)
277         {
278             m_reader->m_threadActive = true;
279             m_reader->start();
280         }
281         /* Start scaling worker threads */
282         if (m_scaler != NULL)
283         {
284             m_scaler->m_threadActive = true;
285             m_scaler->start();
286         }
287     }
288 
copyInfo(x265_analysis_data * src)289     void PassEncoder::copyInfo(x265_analysis_data * src)
290     {
291 
292         uint32_t written = m_parent->m_analysisWriteCnt[m_id].get();
293 
294         int index = written % m_parent->m_queueSize;
295         //If all streams have read analysis data, reuse that position in Queue
296 
297         int read = m_parent->m_analysisRead[m_id][index].get();
298         int write = m_parent->m_analysisWrite[m_id][index].get();
299 
300         int overwrite = written / m_parent->m_queueSize;
301         bool emptyIdxFound = 0;
302         while (!emptyIdxFound && overwrite)
303         {
304             for (uint32_t i = 0; i < m_parent->m_queueSize; i++)
305             {
306                 read = m_parent->m_analysisRead[m_id][i].get();
307                 write = m_parent->m_analysisWrite[m_id][i].get();
308                 write *= m_cliopt.numRefs;
309 
310                 if (read == write)
311                 {
312                     index = i;
313                     emptyIdxFound = 1;
314                 }
315             }
316         }
317 
318         x265_analysis_data *m_analysisInfo = &m_parent->m_analysisBuffer[m_id][index];
319 
320         x265_free_analysis_data(m_param, m_analysisInfo);
321         memcpy(m_analysisInfo, src, sizeof(x265_analysis_data));
322         x265_alloc_analysis_data(m_param, m_analysisInfo);
323 
324         bool isVbv = m_param->rc.vbvBufferSize && m_param->rc.vbvMaxBitrate;
325         if (m_param->bDisableLookahead && isVbv)
326         {
327             memcpy(m_analysisInfo->lookahead.intraSatdForVbv, src->lookahead.intraSatdForVbv, src->numCuInHeight * sizeof(uint32_t));
328             memcpy(m_analysisInfo->lookahead.satdForVbv, src->lookahead.satdForVbv, src->numCuInHeight * sizeof(uint32_t));
329             memcpy(m_analysisInfo->lookahead.intraVbvCost, src->lookahead.intraVbvCost, src->numCUsInFrame * sizeof(uint32_t));
330             memcpy(m_analysisInfo->lookahead.vbvCost, src->lookahead.vbvCost, src->numCUsInFrame * sizeof(uint32_t));
331         }
332 
333         if (src->sliceType == X265_TYPE_IDR || src->sliceType == X265_TYPE_I)
334         {
335             if (m_param->analysisSaveReuseLevel < 2)
336                 goto ret;
337             x265_analysis_intra_data *intraDst, *intraSrc;
338             intraDst = (x265_analysis_intra_data*)m_analysisInfo->intraData;
339             intraSrc = (x265_analysis_intra_data*)src->intraData;
340             memcpy(intraDst->depth, intraSrc->depth, sizeof(uint8_t) * src->depthBytes);
341             memcpy(intraDst->modes, intraSrc->modes, sizeof(uint8_t) * src->numCUsInFrame * src->numPartitions);
342             memcpy(intraDst->partSizes, intraSrc->partSizes, sizeof(char) * src->depthBytes);
343             memcpy(intraDst->chromaModes, intraSrc->chromaModes, sizeof(uint8_t) * src->depthBytes);
344             if (m_param->rc.cuTree)
345                 memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, sizeof(int8_t) * src->depthBytes);
346         }
347         else
348         {
349             bool bIntraInInter = (src->sliceType == X265_TYPE_P || m_param->bIntraInBFrames);
350             int numDir = src->sliceType == X265_TYPE_P ? 1 : 2;
351             memcpy(m_analysisInfo->wt, src->wt, sizeof(WeightParam) * 3 * numDir);
352             if (m_param->analysisSaveReuseLevel < 2)
353                 goto ret;
354             x265_analysis_inter_data *interDst, *interSrc;
355             interDst = (x265_analysis_inter_data*)m_analysisInfo->interData;
356             interSrc = (x265_analysis_inter_data*)src->interData;
357             memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) * src->depthBytes);
358             memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) * src->depthBytes);
359             if (m_param->rc.cuTree)
360                 memcpy(interDst->cuQPOff, interSrc->cuQPOff, sizeof(int8_t) * src->depthBytes);
361             if (m_param->analysisSaveReuseLevel > 4)
362             {
363                 memcpy(interDst->partSize, interSrc->partSize, sizeof(uint8_t) * src->depthBytes);
364                 memcpy(interDst->mergeFlag, interSrc->mergeFlag, sizeof(uint8_t) * src->depthBytes);
365                 if (m_param->analysisSaveReuseLevel == 10)
366                 {
367                     memcpy(interDst->interDir, interSrc->interDir, sizeof(uint8_t) * src->depthBytes);
368                     for (int dir = 0; dir < numDir; dir++)
369                     {
370                         memcpy(interDst->mvpIdx[dir], interSrc->mvpIdx[dir], sizeof(uint8_t) * src->depthBytes);
371                         memcpy(interDst->refIdx[dir], interSrc->refIdx[dir], sizeof(int8_t) * src->depthBytes);
372                         memcpy(interDst->mv[dir], interSrc->mv[dir], sizeof(MV) * src->depthBytes);
373                     }
374                     if (bIntraInInter)
375                     {
376                         x265_analysis_intra_data *intraDst = (x265_analysis_intra_data*)m_analysisInfo->intraData;
377                         x265_analysis_intra_data *intraSrc = (x265_analysis_intra_data*)src->intraData;
378                         memcpy(intraDst->modes, intraSrc->modes, sizeof(uint8_t) * src->numPartitions * src->numCUsInFrame);
379                         memcpy(intraDst->chromaModes, intraSrc->chromaModes, sizeof(uint8_t) * src->depthBytes);
380                     }
381                }
382             }
383             if (m_param->analysisSaveReuseLevel != 10)
384                 memcpy(interDst->ref, interSrc->ref, sizeof(int32_t) * src->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir);
385         }
386 
387 ret:
388         //increment analysis Write counter
389         m_parent->m_analysisWriteCnt[m_id].incr();
390         m_parent->m_analysisWrite[m_id][index].incr();
391         return;
392     }
393 
394 
readPicture(x265_picture * dstPic)395     bool PassEncoder::readPicture(x265_picture *dstPic)
396     {
397         /*Check and wait if there any input frames to read*/
398         int ipread = m_parent->m_picReadCnt[m_id].get();
399         int ipwrite = m_parent->m_picWriteCnt[m_id].get();
400 
401         bool isAbrLoad = m_cliopt.loadLevel && (m_parent->m_numEncodes > 1);
402         while (!m_inputOver && (ipread == ipwrite))
403         {
404             ipwrite = m_parent->m_picWriteCnt[m_id].waitForChange(ipwrite);
405         }
406 
407         if (m_threadActive && ipread < ipwrite)
408         {
409             /*Get input index to read from inputQueue. If doesn't need analysis info, it need not wait to fetch poc from analysisQueue*/
410             int readPos = ipread % m_parent->m_queueSize;
411             x265_analysis_data* analysisData = 0;
412 
413             if (isAbrLoad)
414             {
415                 /*If stream is master of each slave pass, then fetch analysis data from prev pass*/
416                 int analysisQId = m_cliopt.refId;
417                 /*Check and wait if there any analysis Data to read*/
418                 int analysisWrite = m_parent->m_analysisWriteCnt[analysisQId].get();
419                 int written = analysisWrite * m_parent->m_passEnc[analysisQId]->m_cliopt.numRefs;
420                 int analysisRead = m_parent->m_analysisReadCnt[analysisQId].get();
421 
422                 while (m_threadActive && written == analysisRead)
423                 {
424                     analysisWrite = m_parent->m_analysisWriteCnt[analysisQId].waitForChange(analysisWrite);
425                     written = analysisWrite * m_parent->m_passEnc[analysisQId]->m_cliopt.numRefs;
426                 }
427 
428                 if (analysisRead < written)
429                 {
430                     int analysisIdx = 0;
431                     if (!m_param->bDisableLookahead)
432                     {
433                         bool analysisdRead = false;
434                         while ((analysisRead < written) && !analysisdRead)
435                         {
436                             while (analysisWrite < ipread)
437                             {
438                                 analysisWrite = m_parent->m_analysisWriteCnt[analysisQId].waitForChange(analysisWrite);
439                                 written = analysisWrite * m_parent->m_passEnc[analysisQId]->m_cliopt.numRefs;
440                             }
441                             for (uint32_t i = 0; i < m_parent->m_queueSize; i++)
442                             {
443                                 analysisData = &m_parent->m_analysisBuffer[analysisQId][i];
444                                 int read = m_parent->m_analysisRead[analysisQId][i].get();
445                                 int write = m_parent->m_analysisWrite[analysisQId][i].get() * m_parent->m_passEnc[analysisQId]->m_cliopt.numRefs;
446                                 if ((analysisData->poc == (uint32_t)(ipread)) && (read < write))
447                                 {
448                                     analysisIdx = i;
449                                     analysisdRead = true;
450                                     break;
451                                 }
452                             }
453                         }
454                     }
455                     else
456                     {
457                         analysisIdx = analysisRead % m_parent->m_queueSize;
458                         analysisData = &m_parent->m_analysisBuffer[analysisQId][analysisIdx];
459                         readPos = analysisData->poc % m_parent->m_queueSize;
460                         while ((ipwrite < readPos) || ((ipwrite - 1) < (int)analysisData->poc))
461                         {
462                             ipwrite = m_parent->m_picWriteCnt[m_id].waitForChange(ipwrite);
463                         }
464                     }
465 
466                     m_lastIdx = analysisIdx;
467                 }
468                 else
469                     return false;
470             }
471 
472 
473             x265_picture *srcPic = (x265_picture*)(m_parent->m_inputPicBuffer[m_id][readPos]);
474 
475             x265_picture *pic = (x265_picture*)(dstPic);
476             pic->colorSpace = srcPic->colorSpace;
477             pic->bitDepth = srcPic->bitDepth;
478             pic->framesize = srcPic->framesize;
479             pic->height = srcPic->height;
480             pic->pts = srcPic->pts;
481             pic->dts = srcPic->dts;
482             pic->reorderedPts = srcPic->reorderedPts;
483             pic->width = srcPic->width;
484             pic->analysisData = srcPic->analysisData;
485             pic->userSEI = srcPic->userSEI;
486             pic->stride[0] = srcPic->stride[0];
487             pic->stride[1] = srcPic->stride[1];
488             pic->stride[2] = srcPic->stride[2];
489             pic->planes[0] = srcPic->planes[0];
490             pic->planes[1] = srcPic->planes[1];
491             pic->planes[2] = srcPic->planes[2];
492             if (isAbrLoad)
493                 pic->analysisData = *analysisData;
494             return true;
495         }
496         else
497             return false;
498     }
499 
threadMain()500     void PassEncoder::threadMain()
501     {
502         THREAD_NAME("PassEncoder", m_id);
503 
504         while (m_threadActive)
505         {
506 
507 #if ENABLE_LIBVMAF
508             x265_vmaf_data* vmafdata = m_cliopt.vmafData;
509 #endif
510             /* This allows muxers to modify bitstream format */
511             m_cliopt.output->setParam(m_param);
512             const x265_api* api = m_cliopt.api;
513             ReconPlay* reconPlay = NULL;
514             if (m_cliopt.reconPlayCmd)
515                 reconPlay = new ReconPlay(m_cliopt.reconPlayCmd, *m_param);
516             char* profileName = m_cliopt.encName ? m_cliopt.encName : (char *)"x265";
517 
518             if (m_cliopt.zoneFile)
519             {
520                 if (!m_cliopt.parseZoneFile())
521                 {
522                     x265_log(NULL, X265_LOG_ERROR, "Unable to parse zonefile in %s\n", profileName);
523                     fclose(m_cliopt.zoneFile);
524                     m_cliopt.zoneFile = NULL;
525                 }
526             }
527 
528             if (signal(SIGINT, sigint_handler) == SIG_ERR)
529                 x265_log(m_param, X265_LOG_ERROR, "Unable to register CTRL+C handler: %s in %s\n",
530                     strerror(errno), profileName);
531 
532             x265_picture pic_orig, pic_out;
533             x265_picture *pic_in = &pic_orig;
534             /* Allocate recon picture if analysis save/load is enabled */
535             std::priority_queue<int64_t>* pts_queue = m_cliopt.output->needPTS() ? new std::priority_queue<int64_t>() : NULL;
536             x265_picture *pic_recon = (m_cliopt.recon || m_param->analysisSave || m_param->analysisLoad || pts_queue || reconPlay || m_param->csvLogLevel) ? &pic_out : NULL;
537             uint32_t inFrameCount = 0;
538             uint32_t outFrameCount = 0;
539             x265_nal *p_nal;
540             x265_stats stats;
541             uint32_t nal;
542             int16_t *errorBuf = NULL;
543             bool bDolbyVisionRPU = false;
544             uint8_t *rpuPayload = NULL;
545             int inputPicNum = 1;
546             x265_picture picField1, picField2;
547             x265_analysis_data* analysisInfo = (x265_analysis_data*)(&pic_out.analysisData);
548             bool isAbrSave = m_cliopt.saveLevel && (m_parent->m_numEncodes > 1);
549 
550             if (!m_param->bRepeatHeaders && !m_param->bEnableSvtHevc)
551             {
552                 if (api->encoder_headers(m_encoder, &p_nal, &nal) < 0)
553                 {
554                     x265_log(m_param, X265_LOG_ERROR, "Failure generating stream headers in %s\n", profileName);
555                     m_ret = 3;
556                     goto fail;
557                 }
558                 else
559                     m_cliopt.totalbytes += m_cliopt.output->writeHeaders(p_nal, nal);
560             }
561 
562             if (m_param->bField && m_param->interlaceMode)
563             {
564                 api->picture_init(m_param, &picField1);
565                 api->picture_init(m_param, &picField2);
566                 // return back the original height of input
567                 m_param->sourceHeight *= 2;
568                 api->picture_init(m_param, &pic_orig);
569             }
570             else
571                 api->picture_init(m_param, &pic_orig);
572 
573             if (m_param->dolbyProfile && m_cliopt.dolbyVisionRpu)
574             {
575                 rpuPayload = X265_MALLOC(uint8_t, 1024);
576                 pic_in->rpu.payload = rpuPayload;
577                 if (pic_in->rpu.payload)
578                     bDolbyVisionRPU = true;
579             }
580 
581             if (m_cliopt.bDither)
582             {
583                 errorBuf = X265_MALLOC(int16_t, m_param->sourceWidth + 1);
584                 if (errorBuf)
585                     memset(errorBuf, 0, (m_param->sourceWidth + 1) * sizeof(int16_t));
586                 else
587                     m_cliopt.bDither = false;
588             }
589 
590             // main encoder loop
591             while (pic_in && !b_ctrl_c)
592             {
593                 pic_orig.poc = (m_param->bField && m_param->interlaceMode) ? inFrameCount * 2 : inFrameCount;
594                 if (m_cliopt.qpfile)
595                 {
596                     if (!m_cliopt.parseQPFile(pic_orig))
597                     {
598                         x265_log(NULL, X265_LOG_ERROR, "can't parse qpfile for frame %d in %s\n",
599                             pic_in->poc, profileName);
600                         fclose(m_cliopt.qpfile);
601                         m_cliopt.qpfile = NULL;
602                     }
603                 }
604 
605                 if (m_cliopt.framesToBeEncoded && inFrameCount >= m_cliopt.framesToBeEncoded)
606                     pic_in = NULL;
607                 else if (readPicture(pic_in))
608                     inFrameCount++;
609                 else
610                     pic_in = NULL;
611 
612                 if (pic_in)
613                 {
614                     if (pic_in->bitDepth > m_param->internalBitDepth && m_cliopt.bDither)
615                     {
616                         x265_dither_image(pic_in, m_cliopt.input->getWidth(), m_cliopt.input->getHeight(), errorBuf, m_param->internalBitDepth);
617                         pic_in->bitDepth = m_param->internalBitDepth;
618                     }
619                     /* Overwrite PTS */
620                     pic_in->pts = pic_in->poc;
621 
622                     // convert to field
623                     if (m_param->bField && m_param->interlaceMode)
624                     {
625                         int height = pic_in->height >> 1;
626 
627                         int static bCreated = 0;
628                         if (bCreated == 0)
629                         {
630                             bCreated = 1;
631                             inputPicNum = 2;
632                             picField1.fieldNum = 1;
633                             picField2.fieldNum = 2;
634 
635                             picField1.bitDepth = picField2.bitDepth = pic_in->bitDepth;
636                             picField1.colorSpace = picField2.colorSpace = pic_in->colorSpace;
637                             picField1.height = picField2.height = pic_in->height >> 1;
638                             picField1.framesize = picField2.framesize = pic_in->framesize >> 1;
639 
640                             size_t fieldFrameSize = (size_t)pic_in->framesize >> 1;
641                             char* field1Buf = X265_MALLOC(char, fieldFrameSize);
642                             char* field2Buf = X265_MALLOC(char, fieldFrameSize);
643 
644                             int stride = picField1.stride[0] = picField2.stride[0] = pic_in->stride[0];
645                             uint64_t framesize = stride * (height >> x265_cli_csps[pic_in->colorSpace].height[0]);
646                             picField1.planes[0] = field1Buf;
647                             picField2.planes[0] = field2Buf;
648                             for (int i = 1; i < x265_cli_csps[pic_in->colorSpace].planes; i++)
649                             {
650                                 picField1.planes[i] = field1Buf + framesize;
651                                 picField2.planes[i] = field2Buf + framesize;
652 
653                                 stride = picField1.stride[i] = picField2.stride[i] = pic_in->stride[i];
654                                 framesize += (stride * (height >> x265_cli_csps[pic_in->colorSpace].height[i]));
655                             }
656                             assert(framesize == picField1.framesize);
657                         }
658 
659                         picField1.pts = picField1.poc = pic_in->poc;
660                         picField2.pts = picField2.poc = pic_in->poc + 1;
661 
662                         picField1.userSEI = picField2.userSEI = pic_in->userSEI;
663 
664                         //if (pic_in->userData)
665                         //{
666                         //    // Have to handle userData here
667                         //}
668 
669                         if (pic_in->framesize)
670                         {
671                             for (int i = 0; i < x265_cli_csps[pic_in->colorSpace].planes; i++)
672                             {
673                                 char* srcP1 = (char*)pic_in->planes[i];
674                                 char* srcP2 = (char*)pic_in->planes[i] + pic_in->stride[i];
675                                 char* p1 = (char*)picField1.planes[i];
676                                 char* p2 = (char*)picField2.planes[i];
677 
678                                 int stride = picField1.stride[i];
679 
680                                 for (int y = 0; y < (height >> x265_cli_csps[pic_in->colorSpace].height[i]); y++)
681                                 {
682                                     memcpy(p1, srcP1, stride);
683                                     memcpy(p2, srcP2, stride);
684                                     srcP1 += 2 * stride;
685                                     srcP2 += 2 * stride;
686                                     p1 += stride;
687                                     p2 += stride;
688                                 }
689                             }
690                         }
691                     }
692 
693                     if (bDolbyVisionRPU)
694                     {
695                         if (m_param->bField && m_param->interlaceMode)
696                         {
697                             if (m_cliopt.rpuParser(&picField1) > 0)
698                                 goto fail;
699                             if (m_cliopt.rpuParser(&picField2) > 0)
700                                 goto fail;
701                         }
702                         else
703                         {
704                             if (m_cliopt.rpuParser(pic_in) > 0)
705                                 goto fail;
706                         }
707                     }
708                 }
709 
710                 for (int inputNum = 0; inputNum < inputPicNum; inputNum++)
711                 {
712                     x265_picture *picInput = NULL;
713                     if (inputPicNum == 2)
714                         picInput = pic_in ? (inputNum ? &picField2 : &picField1) : NULL;
715                     else
716                         picInput = pic_in;
717 
718                     int numEncoded = api->encoder_encode(m_encoder, &p_nal, &nal, picInput, pic_recon);
719 
720                     int idx = (inFrameCount - 1) % m_parent->m_queueSize;
721                     m_parent->m_picIdxReadCnt[m_id][idx].incr();
722                     m_parent->m_picReadCnt[m_id].incr();
723                     if (m_cliopt.loadLevel && picInput)
724                     {
725                         m_parent->m_analysisReadCnt[m_cliopt.refId].incr();
726                         m_parent->m_analysisRead[m_cliopt.refId][m_lastIdx].incr();
727                     }
728 
729                     if (numEncoded < 0)
730                     {
731                         b_ctrl_c = 1;
732                         m_ret = 4;
733                         break;
734                     }
735 
736                     if (reconPlay && numEncoded)
737                         reconPlay->writePicture(*pic_recon);
738 
739                     outFrameCount += numEncoded;
740 
741                     if (isAbrSave && numEncoded)
742                     {
743                         copyInfo(analysisInfo);
744                     }
745 
746                     if (numEncoded && pic_recon && m_cliopt.recon)
747                         m_cliopt.recon->writePicture(pic_out);
748                     if (nal)
749                     {
750                         m_cliopt.totalbytes += m_cliopt.output->writeFrame(p_nal, nal, pic_out);
751                         if (pts_queue)
752                         {
753                             pts_queue->push(-pic_out.pts);
754                             if (pts_queue->size() > 2)
755                                 pts_queue->pop();
756                         }
757                     }
758                     m_cliopt.printStatus(outFrameCount);
759                 }
760             }
761 
762             /* Flush the encoder */
763             while (!b_ctrl_c)
764             {
765                 int numEncoded = api->encoder_encode(m_encoder, &p_nal, &nal, NULL, pic_recon);
766                 if (numEncoded < 0)
767                 {
768                     m_ret = 4;
769                     break;
770                 }
771 
772                 if (reconPlay && numEncoded)
773                     reconPlay->writePicture(*pic_recon);
774 
775                 outFrameCount += numEncoded;
776                 if (isAbrSave && numEncoded)
777                 {
778                     copyInfo(analysisInfo);
779                 }
780 
781                 if (numEncoded && pic_recon && m_cliopt.recon)
782                     m_cliopt.recon->writePicture(pic_out);
783                 if (nal)
784                 {
785                     m_cliopt.totalbytes += m_cliopt.output->writeFrame(p_nal, nal, pic_out);
786                     if (pts_queue)
787                     {
788                         pts_queue->push(-pic_out.pts);
789                         if (pts_queue->size() > 2)
790                             pts_queue->pop();
791                     }
792                 }
793 
794                 m_cliopt.printStatus(outFrameCount);
795 
796                 if (!numEncoded)
797                     break;
798             }
799 
800             if (bDolbyVisionRPU)
801             {
802                 if (fgetc(m_cliopt.dolbyVisionRpu) != EOF)
803                     x265_log(NULL, X265_LOG_WARNING, "Dolby Vision RPU count is greater than frame count in %s\n",
804                         profileName);
805                 x265_log(NULL, X265_LOG_INFO, "VES muxing with Dolby Vision RPU file successful in %s\n",
806                     profileName);
807             }
808 
809             /* clear progress report */
810             if (m_cliopt.bProgress)
811                 fprintf(stderr, "%*s\r", 80, " ");
812 
813         fail:
814 
815             delete reconPlay;
816 
817             api->encoder_get_stats(m_encoder, &stats, sizeof(stats));
818             if (m_param->csvfn && !b_ctrl_c)
819 #if ENABLE_LIBVMAF
820                 api->vmaf_encoder_log(m_encoder, m_cliopt.argCnt, m_cliopt.argString, m_cliopt.param, vmafdata);
821 #else
822                 api->encoder_log(m_encoder, m_cliopt.argCnt, m_cliopt.argString);
823 #endif
824             api->encoder_close(m_encoder);
825 
826             int64_t second_largest_pts = 0;
827             int64_t largest_pts = 0;
828             if (pts_queue && pts_queue->size() >= 2)
829             {
830                 second_largest_pts = -pts_queue->top();
831                 pts_queue->pop();
832                 largest_pts = -pts_queue->top();
833                 pts_queue->pop();
834                 delete pts_queue;
835                 pts_queue = NULL;
836             }
837             m_cliopt.output->closeFile(largest_pts, second_largest_pts);
838 
839             if (b_ctrl_c)
840                 general_log(m_param, NULL, X265_LOG_INFO, "aborted at input frame %d, output frame %d in %s\n",
841                     m_cliopt.seek + inFrameCount, stats.encodedPictureCount, profileName);
842 
843             api->param_free(m_param);
844 
845             X265_FREE(errorBuf);
846             X265_FREE(rpuPayload);
847 
848             m_threadActive = false;
849             m_parent->m_numActiveEncodes.decr();
850         }
851     }
852 
destroy()853     void PassEncoder::destroy()
854     {
855         stop();
856         if (m_reader)
857         {
858             m_reader->stop();
859             delete m_reader;
860         }
861         else
862         {
863             m_scaler->stop();
864             m_scaler->destroy();
865             delete m_scaler;
866         }
867     }
868 
Scaler(int threadId,int threadNum,int id,VideoDesc * src,VideoDesc * dst,PassEncoder * parentEnc)869     Scaler::Scaler(int threadId, int threadNum, int id, VideoDesc *src, VideoDesc *dst, PassEncoder *parentEnc)
870     {
871         m_parentEnc = parentEnc;
872         m_id = id;
873         m_srcFormat = src;
874         m_dstFormat = dst;
875         m_threadActive = false;
876         m_scaleFrameSize = 0;
877         m_filterManager = NULL;
878         m_threadId = threadId;
879         m_threadTotal = threadNum;
880 
881         int csp = dst->m_csp;
882         uint32_t pixelbytes = dst->m_inputDepth > 8 ? 2 : 1;
883         for (int i = 0; i < x265_cli_csps[csp].planes; i++)
884         {
885             int w = dst->m_width >> x265_cli_csps[csp].width[i];
886             int h = dst->m_height >> x265_cli_csps[csp].height[i];
887             m_scalePlanes[i] = w * h * pixelbytes;
888             m_scaleFrameSize += m_scalePlanes[i];
889         }
890 
891         if (src->m_height != dst->m_height || src->m_width != dst->m_width)
892         {
893             m_filterManager = new ScalerFilterManager;
894             m_filterManager->init(4, m_srcFormat, m_dstFormat);
895         }
896     }
897 
scalePic(x265_picture * destination,x265_picture * source)898     bool Scaler::scalePic(x265_picture * destination, x265_picture * source)
899     {
900         if (!destination || !source)
901             return false;
902         x265_param* param = m_parentEnc->m_param;
903         int pixelBytes = m_dstFormat->m_inputDepth > 8 ? 2 : 1;
904         if (m_srcFormat->m_height != m_dstFormat->m_height || m_srcFormat->m_width != m_dstFormat->m_width)
905         {
906             void **srcPlane = NULL, **dstPlane = NULL;
907             int srcStride[3], dstStride[3];
908             destination->bitDepth = source->bitDepth;
909             destination->colorSpace = source->colorSpace;
910             destination->pts = source->pts;
911             destination->dts = source->dts;
912             destination->reorderedPts = source->reorderedPts;
913             destination->poc = source->poc;
914             destination->userSEI = source->userSEI;
915             srcPlane = source->planes;
916             dstPlane = destination->planes;
917             srcStride[0] = source->stride[0];
918             destination->stride[0] = m_dstFormat->m_width * pixelBytes;
919             dstStride[0] = destination->stride[0];
920             if (param->internalCsp != X265_CSP_I400)
921             {
922                 srcStride[1] = source->stride[1];
923                 srcStride[2] = source->stride[2];
924                 destination->stride[1] = destination->stride[0] >> x265_cli_csps[param->internalCsp].width[1];
925                 destination->stride[2] = destination->stride[0] >> x265_cli_csps[param->internalCsp].width[2];
926                 dstStride[1] = destination->stride[1];
927                 dstStride[2] = destination->stride[2];
928             }
929             if (m_scaleFrameSize)
930             {
931                 m_filterManager->scale_pic(srcPlane, dstPlane, srcStride, dstStride);
932                 return true;
933             }
934             else
935                 x265_log(param, X265_LOG_INFO, "Empty frame received\n");
936         }
937         return false;
938     }
939 
threadMain()940     void Scaler::threadMain()
941     {
942         THREAD_NAME("Scaler", m_id);
943 
944         /* unscaled picture is stored in the last index */
945         uint32_t srcId = m_id - 1;
946         int QDepth = m_parentEnc->m_parent->m_queueSize;
947         while (!m_parentEnc->m_inputOver)
948         {
949 
950             uint32_t scaledWritten = m_parentEnc->m_parent->m_picWriteCnt[m_id].get();
951 
952             if (m_parentEnc->m_cliopt.framesToBeEncoded && scaledWritten >= m_parentEnc->m_cliopt.framesToBeEncoded)
953                 break;
954 
955             if (m_threadTotal > 1 && (m_threadId != scaledWritten % m_threadTotal))
956             {
957                 continue;
958             }
959             uint32_t written = m_parentEnc->m_parent->m_picWriteCnt[srcId].get();
960 
961             /*If all the input pictures are scaled by the current scale worker thread wait for input pictures*/
962             while (m_threadActive && (scaledWritten == written)) {
963                 written = m_parentEnc->m_parent->m_picWriteCnt[srcId].waitForChange(written);
964             }
965 
966             if (m_threadActive && scaledWritten < written)
967             {
968 
969                 int scaledWriteIdx = scaledWritten % QDepth;
970                 int overWritePicBuffer = scaledWritten / QDepth;
971                 int read = m_parentEnc->m_parent->m_picIdxReadCnt[m_id][scaledWriteIdx].get();
972 
973                 while (overWritePicBuffer && read < overWritePicBuffer)
974                 {
975                     read = m_parentEnc->m_parent->m_picIdxReadCnt[m_id][scaledWriteIdx].waitForChange(read);
976                 }
977 
978                 if (!m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWriteIdx])
979                 {
980                     int framesize = 0;
981                     int planesize[3];
982                     int csp = m_dstFormat->m_csp;
983                     int stride[3];
984                     stride[0] = m_dstFormat->m_width;
985                     stride[1] = stride[0] >> x265_cli_csps[csp].width[1];
986                     stride[2] = stride[0] >> x265_cli_csps[csp].width[2];
987                     for (int i = 0; i < x265_cli_csps[csp].planes; i++)
988                     {
989                         uint32_t h = m_dstFormat->m_height >> x265_cli_csps[csp].height[i];
990                         planesize[i] = h * stride[i];
991                         framesize += planesize[i];
992                     }
993 
994                     m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWriteIdx] = x265_picture_alloc();
995                     x265_picture_init(m_parentEnc->m_param, m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWriteIdx]);
996 
997                     ((x265_picture*)m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWritten % QDepth])->framesize = framesize;
998                     for (int32_t j = 0; j < x265_cli_csps[csp].planes; j++)
999                     {
1000                         m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWritten % QDepth]->planes[j] = X265_MALLOC(char, planesize[j]);
1001                     }
1002                 }
1003 
1004                 x265_picture *srcPic = m_parentEnc->m_parent->m_inputPicBuffer[srcId][scaledWritten % QDepth];
1005                 x265_picture* destPic = m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWriteIdx];
1006 
1007                 // Enqueue this picture up with the current encoder so that it will asynchronously encode
1008                 if (!scalePic(destPic, srcPic))
1009                     x265_log(NULL, X265_LOG_ERROR, "Unable to copy scaled input picture to input queue \n");
1010                 else
1011                     m_parentEnc->m_parent->m_picWriteCnt[m_id].incr();
1012                 m_scaledWriteCnt.incr();
1013                 m_parentEnc->m_parent->m_picIdxReadCnt[srcId][scaledWriteIdx].incr();
1014             }
1015             if (m_threadTotal > 1)
1016             {
1017                 written = m_parentEnc->m_parent->m_picWriteCnt[srcId].get();
1018                 int totalWrite = written / m_threadTotal;
1019                 if (written % m_threadTotal > m_threadId)
1020                     totalWrite++;
1021                 if (totalWrite == m_scaledWriteCnt.get())
1022                 {
1023                     m_parentEnc->m_parent->m_picWriteCnt[srcId].poke();
1024                     m_parentEnc->m_parent->m_picWriteCnt[m_id].poke();
1025                     break;
1026                 }
1027             }
1028             else
1029             {
1030                 /* Once end of video is reached and all frames are scaled, release wait on picwritecount */
1031                 scaledWritten = m_parentEnc->m_parent->m_picWriteCnt[m_id].get();
1032                 written = m_parentEnc->m_parent->m_picWriteCnt[srcId].get();
1033                 if (written == scaledWritten)
1034                 {
1035                     m_parentEnc->m_parent->m_picWriteCnt[srcId].poke();
1036                     m_parentEnc->m_parent->m_picWriteCnt[m_id].poke();
1037                     break;
1038                 }
1039             }
1040 
1041         }
1042         m_threadActive = false;
1043         destroy();
1044     }
1045 
Reader(int id,PassEncoder * parentEnc)1046     Reader::Reader(int id, PassEncoder *parentEnc)
1047     {
1048         m_parentEnc = parentEnc;
1049         m_id = id;
1050         m_input = parentEnc->m_input;
1051     }
1052 
threadMain()1053     void Reader::threadMain()
1054     {
1055         THREAD_NAME("Reader", m_id);
1056 
1057         int QDepth = m_parentEnc->m_parent->m_queueSize;
1058         x265_picture* src = x265_picture_alloc();
1059         x265_picture_init(m_parentEnc->m_param, src);
1060 
1061         while (m_threadActive)
1062         {
1063             uint32_t written = m_parentEnc->m_parent->m_picWriteCnt[m_id].get();
1064             uint32_t writeIdx = written % QDepth;
1065             uint32_t read = m_parentEnc->m_parent->m_picIdxReadCnt[m_id][writeIdx].get();
1066             uint32_t overWritePicBuffer = written / QDepth;
1067 
1068             if (m_parentEnc->m_cliopt.framesToBeEncoded && written >= m_parentEnc->m_cliopt.framesToBeEncoded)
1069                 break;
1070 
1071             while (overWritePicBuffer && read < overWritePicBuffer)
1072             {
1073                 read = m_parentEnc->m_parent->m_picIdxReadCnt[m_id][writeIdx].waitForChange(read);
1074             }
1075 
1076             x265_picture* dest = m_parentEnc->m_parent->m_inputPicBuffer[m_id][writeIdx];
1077             if (m_input->readPicture(*src))
1078             {
1079                 dest->poc = src->poc;
1080                 dest->pts = src->pts;
1081                 dest->userSEI = src->userSEI;
1082                 dest->bitDepth = src->bitDepth;
1083                 dest->framesize = src->framesize;
1084                 dest->height = src->height;
1085                 dest->width = src->width;
1086                 dest->colorSpace = src->colorSpace;
1087                 dest->userSEI = src->userSEI;
1088                 dest->rpu.payload = src->rpu.payload;
1089                 dest->picStruct = src->picStruct;
1090                 dest->stride[0] = src->stride[0];
1091                 dest->stride[1] = src->stride[1];
1092                 dest->stride[2] = src->stride[2];
1093 
1094                 if (!dest->planes[0])
1095                     dest->planes[0] = X265_MALLOC(char, dest->framesize);
1096 
1097                 memcpy(dest->planes[0], src->planes[0], src->framesize * sizeof(char));
1098                 dest->planes[1] = (char*)dest->planes[0] + src->stride[0] * src->height;
1099                 dest->planes[2] = (char*)dest->planes[1] + src->stride[1] * (src->height >> x265_cli_csps[src->colorSpace].height[1]);
1100                 m_parentEnc->m_parent->m_picWriteCnt[m_id].incr();
1101             }
1102             else
1103             {
1104                 m_threadActive = false;
1105                 m_parentEnc->m_inputOver = true;
1106                 m_parentEnc->m_parent->m_picWriteCnt[m_id].poke();
1107             }
1108         }
1109         x265_picture_free(src);
1110     }
1111 }
1112