1 /*****************************************************************************
2 * Copyright (C) 2013-2020 MulticoreWare, Inc
3 *
4 * Authors: Pooja Venkatesan <pooja@multicorewareinc.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
19 *
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
23 
24 #include "scaler.h"
25 
26 #if _MSC_VER
27 #pragma warning(disable: 4706) // assignment within conditional
28 #pragma warning(disable: 4244) // '=' : possible loss of data
29 #endif
30 
31 #define SHORT_MIN (-(1 << 15))
32 #define SHORT_MAX ((1 << 15) - 1)
33 #define SHORT_MAX_10 ((1 << 10) - 1)
34 
35 namespace X265_NS{
36 
ScalerFilterManager()37 ScalerFilterManager::ScalerFilterManager() :
38     m_bitDepth(0),
39     m_algorithmFlags(0),
40     m_srcW(0),
41     m_srcH(0),
42     m_dstW(0),
43     m_dstH(0),
44     m_crSrcW(0),
45     m_crSrcH(0),
46     m_crDstW(0),
47     m_crDstH(0),
48     m_crSrcHSubSample(0),
49     m_crSrcVSubSample(0),
50     m_crDstHSubSample(0),
51     m_crDstVSubSample(0)
52 {
53     for (int i = 0; i < m_numSlice; i++)
54         m_slices[i] = NULL;
55     for (int i = 0; i < m_numFilter; i++)
56         m_ScalerFilters[i] = NULL;
57 }
58 
filter_copy_c(int64_t * filter,int64_t * filter2,int size)59 inline static void filter_copy_c(int64_t* filter, int64_t* filter2, int size)
60 {
61     for (int i = 0; i < size; i++)
62         filter2[i] = filter[i];
63 }
64 
65 #if X265_DEPTH == 8
doScaling_c(int16_t * dst,int dstW,const uint8_t * src,const int16_t * filter,const int32_t * filterPos,int filterSize)66 static void doScaling_c(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
67 {
68     for (int i = 0; i < dstW; i++)
69     {
70         int val = 0;
71         int sourcePos = filterPos[i];
72         for (int j = 0; j < filterSize; j++)
73             val += ((int)src[sourcePos + j]) * filter[filterSize * i + j];
74         // the cubic equation does overflow ...
75         dst[i] = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 7);
76     }
77 }
clipUint8(int a)78 static uint8_t clipUint8(int a)
79 {
80     if (a&(~0xFF))
81         return (-a) >> 31;
82     else
83         return a;
84 }
85 
yuv2PlaneX_c(const int16_t * filter,int filterSize,const int16_t ** src,uint8_t * dest,int dstW)86 static void yuv2PlaneX_c(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
87 {
88     for (int i = 0; i < dstW; i++)
89     {
90         int val = 64 << 12;
91         for (int j = 0; j < filterSize; j++)
92             val += src[j][i] * filter[j];
93         dest[i] = clipUint8(val >> 19);
94     }
95 }
96 #else
yuv2PlaneX_c_h(const int16_t * filter,int filterSize,const int16_t ** src,uint8_t * dest,int dstW)97 static void yuv2PlaneX_c_h(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
98 {
99     for (int i = 0; i < dstW; i++)
100     {
101         int val = 1 << 16;
102         uint16_t* dst16bit = (uint16_t *)dest;
103         for (int j = 0; j < filterSize; j++)
104             val += src[j][i] * filter[j];
105         uint16_t d = x265_clip3(0, SHORT_MAX_10, val >> 17);
106         ((uint8_t*)(&dst16bit[i]))[0] = (d);
107         ((uint8_t*)(&dst16bit[i]))[1] = (d) >> 8;
108     }
109 }
doScaling_c_h(int16_t * dst,int dstW,const uint8_t * src,const int16_t * filter,const int32_t * filterPos,int filterSize)110 static void doScaling_c_h(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
111 {
112     const uint16_t *srcLocal = (const uint16_t *)src;
113     for (int i = 0; i < dstW; i++)
114     {
115         int val = 0;
116         int sourcePos = filterPos[i];
117         for (int j = 0; j < filterSize; j++)
118             val += ((int)srcLocal[sourcePos + j]) * filter[filterSize * i + j];
119         // the cubic equation does overflow
120         dst[i] = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 9);
121     }
122 }
123 #endif
124 
ScalerFilter()125 ScalerFilter::ScalerFilter() :
126     m_filtLen(0),
127     m_filtPos(NULL),
128     m_filt(NULL),
129     m_sourceSlice(NULL),
130     m_destSlice(NULL)
131 {
132 }
133 
~ScalerFilter()134 ScalerFilter::~ScalerFilter()
135 {
136     if (m_filtPos) {
137         delete[] m_filtPos; m_filtPos = NULL;
138     }
139     if (m_filt) {
140         delete[] m_filt; m_filt = NULL;
141     }
142 }
143 
process(int sliceVer,int sliceHor)144 void ScalerHLumFilter::process(int sliceVer, int sliceHor)
145 {
146     uint8_t ** src = m_sourceSlice->m_plane[0].lineBuf;
147     uint8_t ** dst = m_destSlice->m_plane[0].lineBuf;
148     int sourcePos = sliceVer - m_sourceSlice->m_plane[0].sliceVer;
149     int destPos = sliceVer - m_destSlice->m_plane[0].sliceVer;
150     int dstW = m_destSlice->m_width;
151     for (int i = 0; i < sliceHor; ++i)
152     {
153         m_hFilterScaler->doScaling((int16_t*)dst[destPos + i], dstW, (const uint8_t *)src[sourcePos + i], m_filt, m_filtPos, m_filtLen);
154         m_destSlice->m_plane[0].sliceHor += 1;
155     }
156 }
157 
process(int sliceVer,int sliceHor)158 void ScalerHCrFilter::process(int sliceVer, int sliceHor)
159 {
160     uint8_t ** src1 = m_sourceSlice->m_plane[1].lineBuf;
161     uint8_t ** dst1 = m_destSlice->m_plane[1].lineBuf;
162     uint8_t ** src2 = m_sourceSlice->m_plane[2].lineBuf;
163     uint8_t ** dst2 = m_destSlice->m_plane[2].lineBuf;
164 
165     int sourcePos1 = sliceVer - m_sourceSlice->m_plane[1].sliceVer;
166     int destPos1 = sliceVer - m_destSlice->m_plane[1].sliceVer;
167     int sourcePos2 = sliceVer - m_sourceSlice->m_plane[2].sliceVer;
168     int destPos2 = sliceVer - m_destSlice->m_plane[2].sliceVer;
169 
170     int dstW = m_destSlice->m_width >> m_destSlice->m_hCrSubSample;
171 
172     for (int i = 0; i < sliceHor; ++i)
173     {
174         m_hFilterScaler->doScaling((int16_t*)dst1[destPos1 + i], dstW, src1[sourcePos1 + i], m_filt, m_filtPos, m_filtLen);
175         m_hFilterScaler->doScaling((int16_t*)dst2[destPos2 + i], dstW, src2[sourcePos2 + i], m_filt, m_filtPos, m_filtLen);
176         m_destSlice->m_plane[1].sliceHor += 1;
177         m_destSlice->m_plane[2].sliceHor += 1;
178     }
179 }
180 
yuv2PlaneX(const int16_t * filter,int filterSize,const int16_t ** src,uint8_t * dest,int dstW)181 void VFilterScaler8Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
182 {
183     int IdxW = FACTOR_4;
184     int IdxF = FIL_DEF;
185 
186     (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4);
187     (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4);
188 
189 #if X265_DEPTH == 8
190     yuv2PlaneX_c(filter, filterSize, src, dest, dstW);
191 #else
192     yuv2PlaneX_c_h(filter, filterSize, src, dest, dstW);
193 #endif
194 }
195 
yuv2PlaneX(const int16_t * filter,int filterSize,const int16_t ** src,uint8_t * dest,int dstW)196 void VFilterScaler10Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
197 {
198     int IdxW = FACTOR_4;
199     int IdxF = FIL_DEF;
200 
201     (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4);
202     (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4);
203 
204 #if X265_DEPTH == 8
205     yuv2PlaneX_c(filter, filterSize, src, dest, dstW);
206 #else
207     yuv2PlaneX_c_h(filter, filterSize, src, dest, dstW);
208 #endif
209 }
210 
process(int sliceVer,int sliceHor)211 void ScalerVLumFilter::process(int sliceVer, int sliceHor)
212 {
213     (void)sliceHor;
214     int first = X265_MAX(1 - m_filtLen, m_filtPos[sliceVer]);
215     int sp = first - m_sourceSlice->m_plane[0].sliceVer;
216     int dp = sliceVer - m_destSlice->m_plane[0].sliceVer;
217     uint8_t **src = m_sourceSlice->m_plane[0].lineBuf + sp;
218     uint8_t **dst = m_destSlice->m_plane[0].lineBuf + dp;
219     int16_t *filter = m_filt + (sliceVer * m_filtLen);
220     int dstW = m_destSlice->m_width;
221     m_vFilterScaler->yuv2PlaneX(filter, m_filtLen, (const int16_t**)src, dst[0], dstW);
222 }
223 
process(int sliceVer,int sliceHor)224 void ScalerVCrFilter::process(int sliceVer, int sliceHor)
225 {
226     (void)sliceHor;
227 
228     const int crSkipMask = (1 << m_destSlice->m_vCrSubSample) - 1;
229     if (sliceVer & crSkipMask)
230         return;
231     else
232     {
233         int dstW = m_destSlice->m_width >> m_destSlice->m_hCrSubSample;
234         int crSliceVer = sliceVer >> m_destSlice->m_vCrSubSample;
235         int first = X265_MAX(1 - m_filtLen, m_filtPos[crSliceVer]);
236         int sp1 = first - m_sourceSlice->m_plane[1].sliceVer;
237         int sp2 = first - m_sourceSlice->m_plane[2].sliceVer;
238         int dp1 = crSliceVer - m_destSlice->m_plane[1].sliceVer;
239         int dp2 = crSliceVer - m_destSlice->m_plane[2].sliceVer;
240         uint8_t **src1 = m_sourceSlice->m_plane[1].lineBuf + sp1;
241         uint8_t **src2 = m_sourceSlice->m_plane[2].lineBuf + sp2;
242         uint8_t **dst1 = m_destSlice->m_plane[1].lineBuf + dp1;
243         uint8_t **dst2 = m_destSlice->m_plane[2].lineBuf + dp2;
244         int16_t *filter = m_filt + (crSliceVer * m_filtLen);
245 
246         m_vFilterScaler->yuv2PlaneX((int16_t*)filter, m_filtLen, (const int16_t**)src1, dst1[0], dstW);
247         m_vFilterScaler->yuv2PlaneX((int16_t*)filter, m_filtLen, (const int16_t**)src2, dst2[0], dstW);
248     }
249 }
250 
initCoeff(int flag,int inc,int srcW,int dstW,int filtAlign,int one,int sourcePos,int destPos)251 int ScalerFilter::initCoeff(int flag, int inc, int srcW, int dstW, int filtAlign, int one, int sourcePos, int destPos)
252 {
253     int filterSize;
254     int filter2Size;
255     int minFilterSize;
256     int64_t *filter = NULL;
257     int64_t *filter2 = NULL;
258     const int64_t fone = 1LL << (54 - x265_min((int)X265_LOG2(srcW / dstW), 8));
259     int *outFilterSize = &m_filtLen;
260     int64_t xDstInSrc;
261     int sizeFactor = flag;
262 
263     // Init filter pos, the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end
264     m_filtPos = new int32_t[dstW + 3];
265     int32_t **filterPos = &m_filtPos;
266 
267     if (inc <= 1 << 16)
268         filterSize = 1 + sizeFactor; // upscale
269     else
270         filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW;
271 
272     filterSize = x265_min(filterSize, srcW - 2);
273     filterSize = x265_max(filterSize, 1);
274     filter = new int64_t[dstW * sizeof(*filter) * filterSize];
275 
276     xDstInSrc = ((destPos*(int64_t)inc) >> 7) - ((sourcePos * 0x10000LL) >> 7);
277     for (int i = 0; i < dstW; i++)
278     {
279         int xx = (xDstInSrc - (filterSize - 2) * (1LL << 16)) / (1 << 17);
280         (*filterPos)[i] = xx;
281         for (int j = 0; j < filterSize; j++)
282         {
283             int64_t d = (X265_ABS(((int64_t)xx * (1 << 17)) - xDstInSrc)) << 13;
284             int64_t coeff = 0;
285 
286             if (inc > 1 << 16)
287                 d = d * dstW / srcW;
288 
289             if (flag == 4) // BiCUBIC
290             {
291                 int64_t B = (0) * (1 << 24);
292                 int64_t C = (0.6) * (1 << 24);
293 
294                 if (d >= 1LL << 31)
295                     coeff = 0.0;
296                 else
297                 {
298                     int64_t dd = (d  * d) >> 30;
299                     int64_t ddd = (dd * d) >> 30;
300 
301                     if (d < 1LL << 30)
302                         coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd + (-18 * (1 << 24) + 12 * B + 6 * C) * dd + (6 * (1 << 24) - 2 * B) * (1 << 30);
303                     else
304                         coeff = (-B - 6 * C) * ddd + (6 * B + 30 * C) * dd + (-12 * B - 48 * C) * d + (8 * B + 24 * C) * (1 << 30);
305                 }
306                 coeff /= (1LL << 54) / fone;
307             }
308             else if (flag == 1) // BILINEAR
309             {
310                 coeff = (1 << 30) - d;
311                 if (coeff < 0)
312                     coeff = 0;
313                 coeff *= fone >> 30;
314             }
315             else
316                 assert(0);
317 
318             filter[i * filterSize + j] = coeff;
319             xx++;
320         }
321         xDstInSrc += 2 * inc;
322     }
323 
324     //apply src & dst Filter to filter -> filter2
325     X265_CHECK(filterSize > 0, "invalid filterSize value.\n");
326     filter2Size = filterSize;
327     filter2 = new int64_t[dstW * sizeof(*filter2) * filter2Size];
328 
329     /* This is hard to read code, but much faster. Speed is crucial here */
330     int index = RES_FACTOR_DEF;
331     int size = dstW * filterSize;
332 
333     (size % 4 == 0) && (index = RES_FACTOR_4);
334     (size % 8 == 0) && (index = RES_FACTOR_8);
335     (size % 16 == 0) && (index = RES_FACTOR_16);
336     (size % 32 == 0) && (index = RES_FACTOR_32);
337     (size % 64 == 0) && (index = RES_FACTOR_64);
338 
339     filter_copy_c(filter, filter2, size);
340 
341     delete[](filter);
342 
343     // try to reduce the filter-size (step1 find size and shift left)
344     // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
345     minFilterSize = 0;
346     for (int i = dstW - 1; i >= 0; i--)
347     {
348         int min = filter2Size;
349         int64_t cutOff = 0.0;
350 
351         // get rid of near zero elements on the left by shifting left
352         for (int j = 0; j < filter2Size; j++)
353         {
354             int k;
355             cutOff += X265_ABS(filter2[i * filter2Size]);
356 
357             if (cutOff > SCALER_MAX_REDUCE_CUTOFF * fone)
358                 break;
359             // preserve monotonicity because the core can't handle the filter otherwise
360             if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1])
361                 break;
362 
363             // move filter coefficients left
364             for (k = 1; k < filter2Size; k++)
365                 filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k];
366             filter2[i * filter2Size + k - 1] = 0;
367             (*filterPos)[i]++;
368         }
369 
370         cutOff = 0;
371         // count near zeros on the right
372         for (int j = filter2Size - 1; j > 0; j--)
373         {
374             cutOff += X265_ABS(filter2[i * filter2Size + j]);
375 
376             if (cutOff > SCALER_MAX_REDUCE_CUTOFF * fone)
377                 break;
378             min--;
379         }
380 
381         if (min > minFilterSize)
382             minFilterSize = min;
383     }
384 
385     X265_CHECK(minFilterSize > 0, "invalid minFilterSize value.\n");
386     filterSize = (minFilterSize + (filtAlign - 1)) & (~(filtAlign - 1));
387     X265_CHECK(filterSize > 0, "invalid filterSize value.\n");
388     filter = new int64_t[dstW*filterSize * sizeof(*filter)];
389 
390     *outFilterSize = filterSize;
391 
392     // try to reduce the filter-size (step2 reduce it)
393     for (int i = 0; i < dstW; i++)
394     {
395         for (int j = 0; j < filterSize; j++)
396         {
397             if (j >= filter2Size)
398                 filter[i * filterSize + j] = 0;
399             else
400                 filter[i * filterSize + j] = filter2[i * filter2Size + j];
401             if ((flag & SCALER_BITEXACT) && j >= minFilterSize)
402                 filter[i * filterSize + j] = 0;
403         }
404     }
405 
406     // fix borders
407     for (int i = 0; i < dstW; i++)
408     {
409         int j;
410         if ((*filterPos)[i] < 0)
411         {
412             // move filter coefficients left to compensate for filterPos
413             for (j = 1; j < filterSize; j++)
414             {
415                 int left = x265_max(j + (*filterPos)[i], 0);
416                 filter[i * filterSize + left] += filter[i * filterSize + j];
417                 filter[i * filterSize + j] = 0;
418             }
419             (*filterPos)[i] = 0;
420         }
421 
422         if ((*filterPos)[i] + filterSize > srcW)
423         {
424             int shift = (*filterPos)[i] + x265_min(filterSize - srcW, 0);
425             int64_t acc = 0;
426 
427             for (j = filterSize - 1; j >= 0; j--)
428             {
429                 if ((*filterPos)[i] + j >= srcW)
430                 {
431                     acc += filter[i * filterSize + j];
432                     filter[i * filterSize + j] = 0;
433                 }
434             }
435             for (j = filterSize - 1; j >= 0; j--)
436             {
437                 if (j < shift)
438                     filter[i * filterSize + j] = 0;
439                 else
440                     filter[i * filterSize + j] = filter[i * filterSize + j - shift];
441             }
442 
443             (*filterPos)[i] -= shift;
444             filter[i * filterSize + srcW - 1 - (*filterPos)[i]] += acc;
445         }
446 
447         X265_CHECK((*filterPos)[i] >= 0, "invalid: Value of (*filterPos)[%d] < 0.\n", i);
448         X265_CHECK((*filterPos)[i] < srcW, "invalid: Value of (*filterPos)[%d] > %d .\n", i, srcW);
449         if ((*filterPos)[i] + filterSize > srcW)
450         {
451             for (j = 0; j < filterSize; j++)
452             {
453                 X265_CHECK(!filter[i * filterSize + j], "invalid: Value of filter[%d * filterSize + %d] != 0.\n", i, j);
454                 X265_CHECK((*filterPos)[i] + j < srcW, "invalid: (*filterPos)[%d] + %d > %d .\n", i, i, srcW);
455             }
456         }
457     }
458 
459     // init filter
460     m_filt = new int16_t[(dstW + 3)*(*outFilterSize)];
461     int16_t **outFilter = &m_filt;
462 
463     // normalize & store in outFilter
464     for (int i = 0; i < dstW; i++)
465     {
466         int64_t error = 0;
467         int64_t sum = 0;
468 
469         for (int j = 0; j < filterSize; j++)
470             sum += filter[i * filterSize + j];
471         sum = (sum + one / 2) / one;
472         if (!sum)
473         {
474             x265_log(NULL, X265_LOG_WARNING, "Scaler: zero vector in scaling\n");
475             sum = 1;
476         }
477         for (int j = 0; j < *outFilterSize; j++)
478         {
479             int64_t v = filter[i * filterSize + j] + error;
480             int intV = ROUNDED_DIVISION(v, sum);
481             (*outFilter)[i * (*outFilterSize) + j] = intV;
482             error = v - intV * sum;
483         }
484     }
485 
486     (*filterPos)[dstW + 0] =
487         (*filterPos)[dstW + 1] =
488         (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1];
489     for (int i = 0; i < *outFilterSize; i++)
490     {
491         int k = (dstW - 1) * (*outFilterSize) + i;
492         (*outFilter)[k + 1 * (*outFilterSize)] =
493             (*outFilter)[k + 2 * (*outFilterSize)] =
494             (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
495     }
496 
497     delete[](filter);
498     delete[](filter2);
499     return 0;
500 }
501 
init(int algorithmFlags,VideoDesc * srcVideoDesc,VideoDesc * dstVideoDesc)502 int ScalerFilterManager::init(int algorithmFlags, VideoDesc *srcVideoDesc, VideoDesc *dstVideoDesc)
503 {
504     int srcW = m_srcW = srcVideoDesc->m_width;
505     int srcH = m_srcH = srcVideoDesc->m_height;
506     int dstW = m_dstW = dstVideoDesc->m_width;
507     int dstH = m_dstH = dstVideoDesc->m_height;
508     int lumXInc, crXInc;
509     int lumYInc, crYInc;
510     int  srcHCrPos;
511     int  dstHCrPos;
512     int  srcVCrPos;
513     int  dstVCrPos;
514     int dst_stride = SCALER_ALIGN(dstW * sizeof(int16_t) + 66, 16);
515     m_bitDepth = dstVideoDesc->m_inputDepth;
516     if (m_bitDepth == 16)
517         dst_stride <<= 1;
518 
519     m_algorithmFlags = algorithmFlags;
520     lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
521     lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
522 
523     srcHCrPos = -513;
524     dstHCrPos = -513;
525     srcVCrPos = -513;
526     dstVCrPos = -513;
527 
528     int srcCsp = srcVideoDesc->m_csp;
529     if (x265_cli_csps[srcCsp].planes > 1)
530     {
531         m_crSrcHSubSample = x265_cli_csps[srcCsp].width[1];
532         m_crSrcVSubSample = x265_cli_csps[srcCsp].height[1];
533         m_crSrcW = srcVideoDesc->m_width >> m_crSrcHSubSample;
534         m_crSrcH = srcVideoDesc->m_height >> m_crSrcVSubSample;
535         if (srcCsp == 1)// i420
536             srcVCrPos = 128;
537     }
538     else
539     {
540         m_crSrcW = 0;
541         m_crSrcH = 0;
542         m_crSrcHSubSample = 0;
543         m_crSrcVSubSample = 0;
544     }
545     int dstCsp = dstVideoDesc->m_csp;
546     if (x265_cli_csps[dstCsp].planes > 1)
547     {
548         m_crDstHSubSample = x265_cli_csps[dstCsp].width[1];
549         m_crDstVSubSample = x265_cli_csps[dstCsp].height[1];
550         m_crDstW = dstVideoDesc->m_width >> m_crDstHSubSample;
551         m_crDstH = dstVideoDesc->m_height >> m_crDstVSubSample;
552         if (dstCsp == 1)// i420
553             dstVCrPos = 128;
554     }
555     else
556     {
557         m_crDstW = 0;
558         m_crDstH = 0;
559         m_crDstHSubSample = 0;
560         m_crDstVSubSample = 0;
561     }
562     // Only srcCsp == dstCsp is supported at present
563     if (srcCsp != dstCsp)
564     {
565         x265_log(NULL, X265_LOG_ERROR, "wrong, source csp != destination csp \n");
566         return false;
567     }
568 
569     lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
570     lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
571     crXInc = (((int64_t)m_crSrcW << 16) + (m_crDstW >> 1)) / m_crDstW;
572     crYInc = (((int64_t)m_crSrcH << 16) + (m_crDstH >> 1)) / m_crDstH;
573 
574     const int filterAlign = 1;
575 
576     // init horizontal Luma Scaler filter
577     m_ScalerFilters[0] = new ScalerHLumFilter(m_bitDepth);
578     m_ScalerFilters[0]->initCoeff(m_algorithmFlags, lumXInc, srcW, dstW, filterAlign, 1 << 14, getLocalPos(0, 0), getLocalPos(0, 0));
579 
580     // init horizontal cr Scaler filter
581     m_ScalerFilters[1] = new ScalerHCrFilter(m_bitDepth);
582     m_ScalerFilters[1]->initCoeff(m_algorithmFlags, crXInc, m_crSrcW, m_crDstW, filterAlign, 1 << 14,
583         getLocalPos(m_crSrcHSubSample, srcHCrPos), getLocalPos(m_crDstHSubSample, dstHCrPos));
584 
585     // init vertical Luma scaler filter
586     m_ScalerFilters[2] = new ScalerVLumFilter(m_bitDepth);
587     m_ScalerFilters[2]->initCoeff(m_algorithmFlags, lumYInc, srcH, dstH, filterAlign, 1 << 12, getLocalPos(0, 0), getLocalPos(0, 0));
588 
589     // init vertical cr scaler filter
590     m_ScalerFilters[3] = new ScalerVCrFilter(m_bitDepth);
591     m_ScalerFilters[3]->initCoeff(m_algorithmFlags, crYInc, m_crSrcH, m_crDstH, filterAlign, 1 << 12,
592         getLocalPos(m_crSrcVSubSample, srcVCrPos), getLocalPos(m_crDstVSubSample, dstVCrPos));
593 
594     // init slice, must after filter initialization
595     initScalerSlice();
596 
597     // set slice
598     m_ScalerFilters[0]->setSlice(m_slices[0], m_slices[1]);
599     m_ScalerFilters[1]->setSlice(m_slices[0], m_slices[1]);
600 
601     m_ScalerFilters[2]->setSlice(m_slices[1], m_slices[2]);
602     m_ScalerFilters[3]->setSlice(m_slices[1], m_slices[2]);
603 
604     return 0;
605 }
606 
doScaling(int16_t * dst,int dstW,const uint8_t * src,const int16_t * filter,const int32_t * filterPos,int filterSize)607 void HFilterScaler8Bit::doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
608 {
609     int IdxW = FACTOR_4;
610     int IdxF = FIL_DEF;
611 
612     /* This is hard to read code, but much faster. Speed is crucial here */
613     (dstW % 8 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_8);
614     (dstW % 8 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_8);
615     (dstW % 8 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_8);
616     (dstW % 8 == 0) && (filterSize == 11) && (IdxF = FIL_11) && (IdxW = FACTOR_8);
617     (dstW % 8 == 0) && (filterSize == 10) && (IdxF = FIL_10) && (IdxW = FACTOR_8);
618     (dstW % 8 == 0) && (filterSize == 9) && (IdxF = FIL_9) && (IdxW = FACTOR_8);
619     (dstW % 8 == 0) && (filterSize == 15) && (IdxF = FIL_15) && (IdxW = FACTOR_8);
620     (dstW % 8 == 0) && (filterSize == 13) && (IdxF = FIL_13) && (IdxW = FACTOR_8);
621 
622     /* Do not check multiple of width 4, if width is already multiple of 8 */
623     !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4);
624     !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4);
625     !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_4);
626 
627     (dstW % 4 == 0) && (filterSize == 24) && (IdxF = FIL_24) && (IdxW = FACTOR_4);
628     (dstW % 4 == 0) && (filterSize == 22) && (IdxF = FIL_22) && (IdxW = FACTOR_4);
629     (dstW % 4 == 0) && (filterSize == 19) && (IdxF = FIL_19) && (IdxW = FACTOR_4);
630     (dstW % 4 == 0) && (filterSize == 17) && (IdxF = FIL_17) && (IdxW = FACTOR_4);
631 
632 #if X265_DEPTH == 8
633     doScaling_c(dst, dstW, src, filter, filterPos, filterSize);
634 #else
635     doScaling_c_h(dst, dstW, src, filter, filterPos, filterSize);
636 #endif
637 }
638 
doScaling(int16_t * dst,int dstW,const uint8_t * src,const int16_t * filter,const int32_t * filterPos,int filterSize)639 void HFilterScaler10Bit::doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
640 {
641     int IdxW = FACTOR_4;
642     int IdxF = FIL_DEF;
643 
644     /* This is hard to read code, but much faster. Speed is crucial here */
645     (dstW % 8 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_8);
646     (dstW % 8 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_8);
647     (dstW % 8 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_8);
648     (dstW % 8 == 0) && (filterSize == 11) && (IdxF = FIL_11) && (IdxW = FACTOR_8);
649     (dstW % 8 == 0) && (filterSize == 10) && (IdxF = FIL_10) && (IdxW = FACTOR_8);
650     (dstW % 8 == 0) && (filterSize == 9) && (IdxF = FIL_9) && (IdxW = FACTOR_8);
651     (dstW % 8 == 0) && (filterSize == 15) && (IdxF = FIL_15) && (IdxW = FACTOR_8);
652     (dstW % 8 == 0) && (filterSize == 13) && (IdxF = FIL_13) && (IdxW = FACTOR_8);
653 
654     /* Do not check multiple of width 4, if width is already multiple of 8 */
655     !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4);
656     !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4);
657     !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_4);
658 
659     (dstW % 4 == 0) && (filterSize == 24) && (IdxF = FIL_24) && (IdxW = FACTOR_4);
660     (dstW % 4 == 0) && (filterSize == 22) && (IdxF = FIL_22) && (IdxW = FACTOR_4);
661     (dstW % 4 == 0) && (filterSize == 19) && (IdxF = FIL_19) && (IdxW = FACTOR_4);
662     (dstW % 4 == 0) && (filterSize == 17) && (IdxF = FIL_17) && (IdxW = FACTOR_4);
663 
664 #if X265_DEPTH == 8
665     doScaling_c(dst, dstW, src, filter, filterPos, filterSize);
666 #else
667     doScaling_c_h(dst, dstW, src, filter, filterPos, filterSize);
668 #endif
669 }
670 
scale_pic(void ** src,void ** dst,int * srcStride,int * dstStride)671 int ScalerFilterManager::scale_pic(void ** src, void ** dst, int * srcStride, int * dstStride)
672 {
673     uint8_t** src_8bit, **dst_8bit;
674     src_8bit = (uint8_t**)src;
675     dst_8bit = (uint8_t**)dst;
676     if (!src_8bit || !dst_8bit)
677         return -1;
678 
679     const int srcsliceHor = m_srcH;
680     const int dstW = m_dstW;
681     const int dstH = m_dstH;
682     int32_t *vLumFilterPos = m_ScalerFilters[2]->m_filtPos;
683     int32_t *vCrFilterPos = m_ScalerFilters[3]->m_filtPos;
684     const int vLumFilterSize = m_ScalerFilters[2]->m_filtLen;
685     const int vCrFilterSize = m_ScalerFilters[3]->m_filtLen;
686     const int crSrcsliceHor = UH_CEIL_SHIFTR(srcsliceHor, m_crSrcVSubSample);
687 
688     // vars which will change and which we need to store back in the context
689     int lumBufIndex = -1;
690     int crBufIndex = -1;
691     int lastInLumBuf = -1;
692     int lastInCrBuf = -1;
693 
694     int hasLumHoles = 1;
695     int hasCrHoles = 1;
696 
697     ScalerSlice *src_slice = m_slices[0];
698     ScalerSlice *hout_slice = m_slices[1];
699     ScalerSlice *vout_slice = m_slices[2];
700     src_slice->initFromSrc((uint8_t**)src, srcStride, m_srcW, 0, srcsliceHor, 0, crSrcsliceHor, 1);
701     vout_slice->initFromSrc((uint8_t**)dst, dstStride, m_dstW, 0, dstH, 0, UH_CEIL_SHIFTR(dstH, m_crDstVSubSample), 0);
702 
703     hout_slice->m_plane[0].sliceVer = 0;
704     hout_slice->m_plane[1].sliceVer = 0;
705     hout_slice->m_plane[2].sliceVer = 0;
706     hout_slice->m_plane[3].sliceVer = 0;
707     hout_slice->m_plane[0].sliceHor = 0;
708     hout_slice->m_plane[1].sliceHor = 0;
709     hout_slice->m_plane[2].sliceHor = 0;
710     hout_slice->m_plane[3].sliceHor = 0;
711     hout_slice->m_width = dstW;
712 
713     for (int dstY = 0; dstY < dstH; dstY++)
714     {
715         const int crDstY = dstY >> m_crDstVSubSample;
716         const int firstLumSrcY = x265_max(1 - vLumFilterSize, vLumFilterPos[dstY]);
717         const int firstLumSrcY2 = x265_max(1 - vLumFilterSize, vLumFilterPos[x265_min(dstY | ((1 << m_crDstVSubSample) - 1), dstH - 1)]);
718         const int firstCrSrcY = x265_max(1 - vCrFilterSize, vCrFilterPos[crDstY]);
719 
720         int lastLumSrcY = x265_min(m_srcH, firstLumSrcY + vLumFilterSize) - 1;
721         int lastLumSrcY2 = x265_min(m_srcH, firstLumSrcY2 + vLumFilterSize) - 1;
722         int lastCrSrcY = x265_min(m_crSrcH, firstCrSrcY + vCrFilterSize) - 1;
723 
724         // handle holes
725         if (firstLumSrcY > lastInLumBuf)
726         {
727             hasLumHoles = lastInLumBuf != firstLumSrcY - 1;
728             if (hasLumHoles)
729             {
730                 hout_slice->m_plane[0].sliceVer = firstLumSrcY;
731                 hout_slice->m_plane[3].sliceVer = firstLumSrcY;
732                 hout_slice->m_plane[0].sliceHor =
733                     hout_slice->m_plane[3].sliceHor = 0;
734             }
735 
736             lastInLumBuf = firstLumSrcY - 1;
737         }
738         if (firstCrSrcY > lastInCrBuf)
739         {
740             hasCrHoles = lastInCrBuf != firstCrSrcY - 1;
741             if (hasCrHoles)
742             {
743                 hout_slice->m_plane[1].sliceVer = firstCrSrcY;
744                 hout_slice->m_plane[2].sliceVer = firstCrSrcY;
745                 hout_slice->m_plane[1].sliceHor =
746                     hout_slice->m_plane[2].sliceHor = 0;
747             }
748 
749             lastInCrBuf = firstCrSrcY - 1;
750         }
751 
752         // Do we have enough lines in this slice to output the dstY line
753         int enoughLines = lastLumSrcY2 < 0 + srcsliceHor && lastCrSrcY < UH_CEIL_SHIFTR(0 + srcsliceHor, m_crSrcVSubSample);
754         if (!enoughLines)
755         {
756             lastLumSrcY = 0 + srcsliceHor - 1;
757             lastCrSrcY = 0 + crSrcsliceHor - 1;
758             x265_log(NULL, X265_LOG_INFO, "buffering slice: lastLumSrcY %d lastCrSrcY %d\n", lastLumSrcY, lastCrSrcY);
759         }
760 
761         X265_CHECK(((lastLumSrcY - firstLumSrcY + 1) <= hout_slice->m_plane[0].availLines), "invalid value %d", lastLumSrcY - firstLumSrcY + 1);
762         X265_CHECK((lastCrSrcY - firstCrSrcY + 1) <= hout_slice->m_plane[1].availLines, "invalid value %d", lastCrSrcY - firstCrSrcY + 1);
763 
764         int firstPosY, lastPosY, firstCPosY, lastCPosY;
765         int posY = hout_slice->m_plane[0].sliceVer + hout_slice->m_plane[0].sliceHor;
766         if (posY <= lastLumSrcY && !hasLumHoles)
767         {
768             firstPosY = x265_max(firstLumSrcY, posY);
769             lastPosY = x265_min(firstLumSrcY + hout_slice->m_plane[0].availLines - 1, 0 + srcsliceHor - 1);
770         }
771         else
772         {
773             firstPosY = posY;
774             lastPosY = lastLumSrcY;
775         }
776 
777         int cPosY = hout_slice->m_plane[1].sliceVer + hout_slice->m_plane[1].sliceHor;
778         if (cPosY <= lastCrSrcY && !hasCrHoles)
779         {
780             firstCPosY = x265_max(firstCrSrcY, cPosY);
781             lastCPosY = x265_min(firstCrSrcY + hout_slice->m_plane[1].availLines - 1, UH_CEIL_SHIFTR(0 + srcsliceHor, m_crSrcVSubSample) - 1);
782         }
783         else
784         {
785             firstCPosY = cPosY;
786             lastCPosY = lastCrSrcY;
787         }
788 
789         hout_slice->rotate(lastPosY, lastCPosY);
790         // horizontal luma scale
791         if (posY < lastLumSrcY + 1)
792             m_ScalerFilters[0]->process(firstPosY, lastPosY - firstPosY + 1);
793 
794         lumBufIndex += lastLumSrcY - lastInLumBuf;
795         lastInLumBuf = lastLumSrcY;
796         // horizontal chroma Scale
797         if (cPosY < lastCrSrcY + 1)
798             m_ScalerFilters[1]->process(firstCPosY, lastCPosY - firstCPosY + 1);
799 
800         crBufIndex += lastCrSrcY - lastInCrBuf;
801         lastInCrBuf = lastCrSrcY;
802 
803         // wrap buf index around to stay inside the ring buffer
804         if (lumBufIndex >= vLumFilterSize)
805             lumBufIndex -= vLumFilterSize;
806         if (crBufIndex >= vCrFilterSize)
807             crBufIndex -= vCrFilterSize;
808         if (!enoughLines)
809             break;  // we can't output a dstY line so let's try with the next slice
810 
811         // vertical scale(output converter)
812         for (int i = 2; i < m_numFilter; ++i)
813             m_ScalerFilters[i]->process(dstY, 1);
814     }
815     return 0;
816 }
817 
getMinBufferSize(int * out_lum_size,int * out_cr_size)818 void ScalerFilterManager::getMinBufferSize(int *out_lum_size, int *out_cr_size)
819 {
820     int lumY;
821     int dstH = m_dstH;
822     int crDstH = m_crDstH;
823     int *lumFilterPos = m_ScalerFilters[2]->m_filtPos;
824     int *crFilterPos = m_ScalerFilters[3]->m_filtPos;
825     int lumFilterSize = m_ScalerFilters[2]->m_filtLen;
826     int crFilterSize = m_ScalerFilters[3]->m_filtLen;
827     int crSubSample = m_crSrcVSubSample;
828 
829     *out_lum_size = lumFilterSize;
830     *out_cr_size = crFilterSize;
831 
832     for (lumY = 0; lumY < dstH; lumY++)
833     {
834         int crY = (int64_t)lumY * crDstH / dstH;
835         int nextSlice = x265_max(lumFilterPos[lumY] + lumFilterSize - 1, ((crFilterPos[crY] + crFilterSize - 1) << crSubSample));
836 
837         nextSlice >>= crSubSample;
838         nextSlice <<= crSubSample;
839         (*out_lum_size) = x265_max((*out_lum_size), nextSlice - lumFilterPos[lumY]);
840         (*out_cr_size) = x265_max((*out_cr_size), (nextSlice >> crSubSample) - crFilterPos[crY]);
841     }
842 }
843 
initScalerSlice()844 int ScalerFilterManager::initScalerSlice()
845 {
846     int ret = 0;
847     int dst_stride = SCALER_ALIGN(m_dstW * sizeof(int16_t) + 66, 16);
848     if (m_bitDepth == 16)
849         dst_stride <<= 1;
850 
851     int lumBufSize;
852     int crBufSize;
853     int vLumFilterSize = m_ScalerFilters[2]->m_filtLen; // Vertical filter size for luma pixels.
854     int vCrFilterSize = m_ScalerFilters[3]->m_filtLen;  // Vertical filter size for chroma pixels.
855     getMinBufferSize(&lumBufSize, &crBufSize);
856     lumBufSize = X265_MAX(lumBufSize, vLumFilterSize + MAX_NUM_LINES_AHEAD);
857     crBufSize = X265_MAX(crBufSize, vCrFilterSize + MAX_NUM_LINES_AHEAD);
858 
859     for (int i = 0; i < m_numSlice; i++)
860         m_slices[i] = new ScalerSlice;
861     ret = m_slices[0]->create(m_srcH, m_crSrcH, m_crSrcHSubSample, m_crSrcVSubSample, 0);
862     if (ret < 0)
863     {
864         x265_log(NULL, X265_LOG_ERROR, "alloc_slice m_slice[0] failed\n");
865         return -1;
866     }
867 
868     // horizontal scaler output
869     ret = m_slices[1]->create(lumBufSize, crBufSize, m_crDstHSubSample, m_crDstVSubSample, 1);
870     if (ret < 0)
871     {
872         x265_log(NULL, X265_LOG_ERROR, "m_slice[1].create failed\n");
873         return -1;
874     }
875     ret = m_slices[1]->createLines(dst_stride, m_dstW);
876     if (ret < 0)
877     {
878         x265_log(NULL, X265_LOG_ERROR, "m_slice[1].createLines failed\n");
879         return -1;
880     }
881 
882     m_slices[1]->fillOnes(dst_stride >> 1, m_bitDepth == 16);
883 
884     // vertical scaler output
885     ret = m_slices[2]->create(m_dstH, m_crDstH, m_crDstHSubSample, m_crDstVSubSample, 0);
886     if (ret < 0)
887     {
888         x265_log(NULL, X265_LOG_ERROR, "m_slice[2].create failed\n");
889         return -1;
890     }
891 
892     return 0;
893 }
894 
getLocalPos(int crSubSample,int pos)895 int ScalerFilterManager::getLocalPos(int crSubSample, int pos)
896 {
897     if (pos == -1 || pos <= -513)
898         pos = (128 << crSubSample) - 128;
899     pos += 128; // relative to ideal left edge
900     return pos >> crSubSample;
901 }
902 
ScalerSlice()903 ScalerSlice::ScalerSlice() :
904     m_width(0),
905     m_hCrSubSample(0),
906     m_vCrSubSample(0),
907     m_isRing(0),
908     m_destroyLines(0)
909 {
910     for (int i = 0; i < m_numSlicePlane; i++)
911     {
912         m_plane[i].availLines = 0;
913         m_plane[i].sliceVer = 0;
914         m_plane[i].sliceHor = 0;
915         m_plane[i].lineBuf = NULL;
916     }
917 }
918 
destroy()919 void ScalerSlice::destroy()
920 {
921     if (m_destroyLines)
922         destroyLines();
923     for (int i = 0; i < m_numSlicePlane; i++)
924     {
925         if (m_plane[i].lineBuf)
926             X265_FREE(m_plane[i].lineBuf);
927     }
928 }
929 
create(int lumLines,int crLines,int h_sub_sample,int v_sub_sample,int ring)930 int ScalerSlice::create(int lumLines, int crLines, int h_sub_sample, int v_sub_sample, int ring)
931 {
932     int i;
933     int size[4] = { lumLines, crLines, crLines, lumLines };
934 
935     m_hCrSubSample = h_sub_sample;
936     m_vCrSubSample = v_sub_sample;
937     m_isRing = ring;
938     m_destroyLines = 0;
939 
940     for (i = 0; i < m_numSlicePlane; ++i)
941     {
942         int n = size[i] * (ring == 0 ? 1 : 3);
943         m_plane[i].lineBuf = X265_MALLOC(uint8_t*, n);
944         if (!m_plane[i].lineBuf)
945             return -1;
946 
947         m_plane[i].availLines = size[i];
948         m_plane[i].sliceVer = 0;
949         m_plane[i].sliceHor = 0;
950     }
951     return 0;
952 }
953 
954 /*
955 slice lines contains extra bytes for vectorial code thus @size
956 is the allocated memory size and @width is the number of pixels
957 */
createLines(int size,int width)958 int ScalerSlice::createLines(int size, int width)
959 {
960     int i;
961     int idx[2] = { 3, 2 };
962 
963     m_destroyLines = 1;
964     m_width = width;
965 
966     for (i = 0; i < 2; ++i) {
967         int n = m_plane[i].availLines;
968         int j;
969         int ii = idx[i];
970         assert(n == m_plane[ii].availLines);
971         for (j = 0; j < n; ++j)
972         {
973             // chroma plane line U and V are expected to be contiguous in memory
974             m_plane[i].lineBuf[j] = (uint8_t*)X265_MALLOC(uint8_t, size * 2 + 32);
975             if (!m_plane[i].lineBuf[j])
976             {
977                 destroyLines();
978                 return -1;
979             }
980             m_plane[ii].lineBuf[j] = m_plane[i].lineBuf[j] + size + 16;
981             if (m_isRing)
982             {
983                 m_plane[i].lineBuf[j + n] = m_plane[i].lineBuf[j];
984                 m_plane[ii].lineBuf[j + n] = m_plane[ii].lineBuf[j];
985             }
986         }
987     }
988 
989     return 0;
990 }
991 
destroyLines()992 void ScalerSlice::destroyLines()
993 {
994     int i;
995     for (i = 0; i < 2; ++i)
996     {
997         int n = m_plane[i].availLines;
998         int j;
999         for (j = 0; j < n; ++j)
1000         {
1001             X265_FREE(m_plane[i].lineBuf[j]);
1002             m_plane[i].lineBuf[j] = NULL;
1003             if (m_isRing)
1004                 m_plane[i].lineBuf[j + n] = NULL;
1005         }
1006     }
1007 
1008     for (i = 0; i < m_numSlicePlane; ++i)
1009         memset(m_plane[i].lineBuf, 0, sizeof(uint8_t*) * m_plane[i].availLines * (m_isRing ? 3 : 1));
1010     m_destroyLines = 0;
1011 }
1012 
fillOnes(int n,int is16bit)1013 void ScalerSlice::fillOnes(int n, int is16bit)
1014 {
1015     int i;
1016     for (i = 0; i < m_numSlicePlane; ++i)
1017     {
1018         int j;
1019         int size = m_plane[i].availLines;
1020         for (j = 0; j < size; ++j)
1021         {
1022             int k;
1023             int end = is16bit ? n >> 1 : n;
1024             // fill also one extra element
1025             end += 1;
1026             if (is16bit)
1027                 for (k = 0; k < end; ++k)
1028                     ((int32_t*)(m_plane[i].lineBuf[j]))[k] = 1 << 18;
1029             else
1030                 for (k = 0; k < end; ++k)
1031                     ((int16_t*)(m_plane[i].lineBuf[j]))[k] = 1 << 14;
1032         }
1033     }
1034 }
1035 
rotate(int lum,int cr)1036 int ScalerSlice::rotate(int lum, int cr)
1037 {
1038     int i;
1039     if (lum)
1040     {
1041         for (i = 0; i < m_numSlicePlane; i += 3)
1042         {
1043             int n = m_plane[i].availLines;
1044             int l = lum - m_plane[i].sliceVer;
1045 
1046             if (l >= n * 2)
1047             {
1048                 m_plane[i].sliceVer += n;
1049                 m_plane[i].sliceHor -= n;
1050             }
1051         }
1052     }
1053     if (cr)
1054     {
1055         for (i = 1; i < 3; ++i)
1056         {
1057             int n = m_plane[i].availLines;
1058             int l = cr - m_plane[i].sliceVer;
1059 
1060             if (l >= n * 2)
1061             {
1062                 m_plane[i].sliceVer += n;
1063                 m_plane[i].sliceHor -= n;
1064             }
1065         }
1066     }
1067     return 0;
1068 }
1069 
initFromSrc(uint8_t * src[4],const int stride[4],int srcW,int lumY,int lumH,int crY,int crH,int relative)1070 int ScalerSlice::initFromSrc(uint8_t *src[4], const int stride[4], int srcW, int lumY, int lumH, int crY, int crH, int relative)
1071 {
1072     int i = 0;
1073 
1074     const int start[m_numSlicePlane] = { lumY, crY, crY, lumY };
1075 
1076     const int end[m_numSlicePlane] = { lumY + lumH, crY + crH, crY + crH, lumY + lumH };
1077 
1078     uint8_t *const src_[m_numSlicePlane] = { src[0] + (relative ? 0 : start[0]) * stride[0],
1079         src[1] + (relative ? 0 : start[1]) * stride[1],
1080         src[2] + (relative ? 0 : start[2]) * stride[2],
1081         src[3] + (relative ? 0 : start[3]) * stride[3] };
1082 
1083     m_width = srcW;
1084 
1085     for (i = 0; i < m_numSlicePlane; ++i)
1086     {
1087         int j;
1088         int first = m_plane[i].sliceVer;
1089         int n = m_plane[i].availLines;
1090         int lines = end[i] - start[i];
1091         int tot_lines = end[i] - first;
1092 
1093         if (start[i] >= first && n >= tot_lines)
1094         {
1095             m_plane[i].sliceHor = x265_max(tot_lines, m_plane[i].sliceHor);
1096             for (j = 0; j < lines; j += 1)
1097                 m_plane[i].lineBuf[start[i] - first + j] = src_[i] + j * stride[i];
1098         }
1099         else
1100         {
1101             m_plane[i].sliceVer = start[i];
1102             lines = lines > n ? n : lines;
1103             m_plane[i].sliceHor = lines;
1104             for (j = 0; j < lines; j += 1)
1105                 m_plane[i].lineBuf[j] = src_[i] + j * stride[i];
1106         }
1107     }
1108     return 0;
1109 }
1110 }
1111