1 /*****************************************************************************
2 * Copyright (C) 2013-2020 MulticoreWare, Inc
3 *
4 * Authors: Pooja Venkatesan <pooja@multicorewareinc.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19 *
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
23
24 #include "scaler.h"
25
26 #if _MSC_VER
27 #pragma warning(disable: 4706) // assignment within conditional
28 #pragma warning(disable: 4244) // '=' : possible loss of data
29 #endif
30
31 #define SHORT_MIN (-(1 << 15))
32 #define SHORT_MAX ((1 << 15) - 1)
33 #define SHORT_MAX_10 ((1 << 10) - 1)
34
35 namespace X265_NS{
36
ScalerFilterManager()37 ScalerFilterManager::ScalerFilterManager() :
38 m_bitDepth(0),
39 m_algorithmFlags(0),
40 m_srcW(0),
41 m_srcH(0),
42 m_dstW(0),
43 m_dstH(0),
44 m_crSrcW(0),
45 m_crSrcH(0),
46 m_crDstW(0),
47 m_crDstH(0),
48 m_crSrcHSubSample(0),
49 m_crSrcVSubSample(0),
50 m_crDstHSubSample(0),
51 m_crDstVSubSample(0)
52 {
53 for (int i = 0; i < m_numSlice; i++)
54 m_slices[i] = NULL;
55 for (int i = 0; i < m_numFilter; i++)
56 m_ScalerFilters[i] = NULL;
57 }
58
filter_copy_c(int64_t * filter,int64_t * filter2,int size)59 inline static void filter_copy_c(int64_t* filter, int64_t* filter2, int size)
60 {
61 for (int i = 0; i < size; i++)
62 filter2[i] = filter[i];
63 }
64
65 #if X265_DEPTH == 8
doScaling_c(int16_t * dst,int dstW,const uint8_t * src,const int16_t * filter,const int32_t * filterPos,int filterSize)66 static void doScaling_c(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
67 {
68 for (int i = 0; i < dstW; i++)
69 {
70 int val = 0;
71 int sourcePos = filterPos[i];
72 for (int j = 0; j < filterSize; j++)
73 val += ((int)src[sourcePos + j]) * filter[filterSize * i + j];
74 // the cubic equation does overflow ...
75 dst[i] = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 7);
76 }
77 }
clipUint8(int a)78 static uint8_t clipUint8(int a)
79 {
80 if (a&(~0xFF))
81 return (-a) >> 31;
82 else
83 return a;
84 }
85
yuv2PlaneX_c(const int16_t * filter,int filterSize,const int16_t ** src,uint8_t * dest,int dstW)86 static void yuv2PlaneX_c(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
87 {
88 for (int i = 0; i < dstW; i++)
89 {
90 int val = 64 << 12;
91 for (int j = 0; j < filterSize; j++)
92 val += src[j][i] * filter[j];
93 dest[i] = clipUint8(val >> 19);
94 }
95 }
96 #else
yuv2PlaneX_c_h(const int16_t * filter,int filterSize,const int16_t ** src,uint8_t * dest,int dstW)97 static void yuv2PlaneX_c_h(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
98 {
99 for (int i = 0; i < dstW; i++)
100 {
101 int val = 1 << 16;
102 uint16_t* dst16bit = (uint16_t *)dest;
103 for (int j = 0; j < filterSize; j++)
104 val += src[j][i] * filter[j];
105 uint16_t d = x265_clip3(0, SHORT_MAX_10, val >> 17);
106 ((uint8_t*)(&dst16bit[i]))[0] = (d);
107 ((uint8_t*)(&dst16bit[i]))[1] = (d) >> 8;
108 }
109 }
doScaling_c_h(int16_t * dst,int dstW,const uint8_t * src,const int16_t * filter,const int32_t * filterPos,int filterSize)110 static void doScaling_c_h(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
111 {
112 const uint16_t *srcLocal = (const uint16_t *)src;
113 for (int i = 0; i < dstW; i++)
114 {
115 int val = 0;
116 int sourcePos = filterPos[i];
117 for (int j = 0; j < filterSize; j++)
118 val += ((int)srcLocal[sourcePos + j]) * filter[filterSize * i + j];
119 // the cubic equation does overflow
120 dst[i] = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 9);
121 }
122 }
123 #endif
124
ScalerFilter()125 ScalerFilter::ScalerFilter() :
126 m_filtLen(0),
127 m_filtPos(NULL),
128 m_filt(NULL),
129 m_sourceSlice(NULL),
130 m_destSlice(NULL)
131 {
132 }
133
~ScalerFilter()134 ScalerFilter::~ScalerFilter()
135 {
136 if (m_filtPos) {
137 delete[] m_filtPos; m_filtPos = NULL;
138 }
139 if (m_filt) {
140 delete[] m_filt; m_filt = NULL;
141 }
142 }
143
process(int sliceVer,int sliceHor)144 void ScalerHLumFilter::process(int sliceVer, int sliceHor)
145 {
146 uint8_t ** src = m_sourceSlice->m_plane[0].lineBuf;
147 uint8_t ** dst = m_destSlice->m_plane[0].lineBuf;
148 int sourcePos = sliceVer - m_sourceSlice->m_plane[0].sliceVer;
149 int destPos = sliceVer - m_destSlice->m_plane[0].sliceVer;
150 int dstW = m_destSlice->m_width;
151 for (int i = 0; i < sliceHor; ++i)
152 {
153 m_hFilterScaler->doScaling((int16_t*)dst[destPos + i], dstW, (const uint8_t *)src[sourcePos + i], m_filt, m_filtPos, m_filtLen);
154 m_destSlice->m_plane[0].sliceHor += 1;
155 }
156 }
157
process(int sliceVer,int sliceHor)158 void ScalerHCrFilter::process(int sliceVer, int sliceHor)
159 {
160 uint8_t ** src1 = m_sourceSlice->m_plane[1].lineBuf;
161 uint8_t ** dst1 = m_destSlice->m_plane[1].lineBuf;
162 uint8_t ** src2 = m_sourceSlice->m_plane[2].lineBuf;
163 uint8_t ** dst2 = m_destSlice->m_plane[2].lineBuf;
164
165 int sourcePos1 = sliceVer - m_sourceSlice->m_plane[1].sliceVer;
166 int destPos1 = sliceVer - m_destSlice->m_plane[1].sliceVer;
167 int sourcePos2 = sliceVer - m_sourceSlice->m_plane[2].sliceVer;
168 int destPos2 = sliceVer - m_destSlice->m_plane[2].sliceVer;
169
170 int dstW = m_destSlice->m_width >> m_destSlice->m_hCrSubSample;
171
172 for (int i = 0; i < sliceHor; ++i)
173 {
174 m_hFilterScaler->doScaling((int16_t*)dst1[destPos1 + i], dstW, src1[sourcePos1 + i], m_filt, m_filtPos, m_filtLen);
175 m_hFilterScaler->doScaling((int16_t*)dst2[destPos2 + i], dstW, src2[sourcePos2 + i], m_filt, m_filtPos, m_filtLen);
176 m_destSlice->m_plane[1].sliceHor += 1;
177 m_destSlice->m_plane[2].sliceHor += 1;
178 }
179 }
180
yuv2PlaneX(const int16_t * filter,int filterSize,const int16_t ** src,uint8_t * dest,int dstW)181 void VFilterScaler8Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
182 {
183 int IdxW = FACTOR_4;
184 int IdxF = FIL_DEF;
185
186 (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4);
187 (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4);
188
189 #if X265_DEPTH == 8
190 yuv2PlaneX_c(filter, filterSize, src, dest, dstW);
191 #else
192 yuv2PlaneX_c_h(filter, filterSize, src, dest, dstW);
193 #endif
194 }
195
yuv2PlaneX(const int16_t * filter,int filterSize,const int16_t ** src,uint8_t * dest,int dstW)196 void VFilterScaler10Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
197 {
198 int IdxW = FACTOR_4;
199 int IdxF = FIL_DEF;
200
201 (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4);
202 (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4);
203
204 #if X265_DEPTH == 8
205 yuv2PlaneX_c(filter, filterSize, src, dest, dstW);
206 #else
207 yuv2PlaneX_c_h(filter, filterSize, src, dest, dstW);
208 #endif
209 }
210
process(int sliceVer,int sliceHor)211 void ScalerVLumFilter::process(int sliceVer, int sliceHor)
212 {
213 (void)sliceHor;
214 int first = X265_MAX(1 - m_filtLen, m_filtPos[sliceVer]);
215 int sp = first - m_sourceSlice->m_plane[0].sliceVer;
216 int dp = sliceVer - m_destSlice->m_plane[0].sliceVer;
217 uint8_t **src = m_sourceSlice->m_plane[0].lineBuf + sp;
218 uint8_t **dst = m_destSlice->m_plane[0].lineBuf + dp;
219 int16_t *filter = m_filt + (sliceVer * m_filtLen);
220 int dstW = m_destSlice->m_width;
221 m_vFilterScaler->yuv2PlaneX(filter, m_filtLen, (const int16_t**)src, dst[0], dstW);
222 }
223
process(int sliceVer,int sliceHor)224 void ScalerVCrFilter::process(int sliceVer, int sliceHor)
225 {
226 (void)sliceHor;
227
228 const int crSkipMask = (1 << m_destSlice->m_vCrSubSample) - 1;
229 if (sliceVer & crSkipMask)
230 return;
231 else
232 {
233 int dstW = m_destSlice->m_width >> m_destSlice->m_hCrSubSample;
234 int crSliceVer = sliceVer >> m_destSlice->m_vCrSubSample;
235 int first = X265_MAX(1 - m_filtLen, m_filtPos[crSliceVer]);
236 int sp1 = first - m_sourceSlice->m_plane[1].sliceVer;
237 int sp2 = first - m_sourceSlice->m_plane[2].sliceVer;
238 int dp1 = crSliceVer - m_destSlice->m_plane[1].sliceVer;
239 int dp2 = crSliceVer - m_destSlice->m_plane[2].sliceVer;
240 uint8_t **src1 = m_sourceSlice->m_plane[1].lineBuf + sp1;
241 uint8_t **src2 = m_sourceSlice->m_plane[2].lineBuf + sp2;
242 uint8_t **dst1 = m_destSlice->m_plane[1].lineBuf + dp1;
243 uint8_t **dst2 = m_destSlice->m_plane[2].lineBuf + dp2;
244 int16_t *filter = m_filt + (crSliceVer * m_filtLen);
245
246 m_vFilterScaler->yuv2PlaneX((int16_t*)filter, m_filtLen, (const int16_t**)src1, dst1[0], dstW);
247 m_vFilterScaler->yuv2PlaneX((int16_t*)filter, m_filtLen, (const int16_t**)src2, dst2[0], dstW);
248 }
249 }
250
initCoeff(int flag,int inc,int srcW,int dstW,int filtAlign,int one,int sourcePos,int destPos)251 int ScalerFilter::initCoeff(int flag, int inc, int srcW, int dstW, int filtAlign, int one, int sourcePos, int destPos)
252 {
253 int filterSize;
254 int filter2Size;
255 int minFilterSize;
256 int64_t *filter = NULL;
257 int64_t *filter2 = NULL;
258 const int64_t fone = 1LL << (54 - x265_min((int)X265_LOG2(srcW / dstW), 8));
259 int *outFilterSize = &m_filtLen;
260 int64_t xDstInSrc;
261 int sizeFactor = flag;
262
263 // Init filter pos, the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end
264 m_filtPos = new int32_t[dstW + 3];
265 int32_t **filterPos = &m_filtPos;
266
267 if (inc <= 1 << 16)
268 filterSize = 1 + sizeFactor; // upscale
269 else
270 filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW;
271
272 filterSize = x265_min(filterSize, srcW - 2);
273 filterSize = x265_max(filterSize, 1);
274 filter = new int64_t[dstW * sizeof(*filter) * filterSize];
275
276 xDstInSrc = ((destPos*(int64_t)inc) >> 7) - ((sourcePos * 0x10000LL) >> 7);
277 for (int i = 0; i < dstW; i++)
278 {
279 int xx = (xDstInSrc - (filterSize - 2) * (1LL << 16)) / (1 << 17);
280 (*filterPos)[i] = xx;
281 for (int j = 0; j < filterSize; j++)
282 {
283 int64_t d = (X265_ABS(((int64_t)xx * (1 << 17)) - xDstInSrc)) << 13;
284 int64_t coeff = 0;
285
286 if (inc > 1 << 16)
287 d = d * dstW / srcW;
288
289 if (flag == 4) // BiCUBIC
290 {
291 int64_t B = (0) * (1 << 24);
292 int64_t C = (0.6) * (1 << 24);
293
294 if (d >= 1LL << 31)
295 coeff = 0.0;
296 else
297 {
298 int64_t dd = (d * d) >> 30;
299 int64_t ddd = (dd * d) >> 30;
300
301 if (d < 1LL << 30)
302 coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd + (-18 * (1 << 24) + 12 * B + 6 * C) * dd + (6 * (1 << 24) - 2 * B) * (1 << 30);
303 else
304 coeff = (-B - 6 * C) * ddd + (6 * B + 30 * C) * dd + (-12 * B - 48 * C) * d + (8 * B + 24 * C) * (1 << 30);
305 }
306 coeff /= (1LL << 54) / fone;
307 }
308 else if (flag == 1) // BILINEAR
309 {
310 coeff = (1 << 30) - d;
311 if (coeff < 0)
312 coeff = 0;
313 coeff *= fone >> 30;
314 }
315 else
316 assert(0);
317
318 filter[i * filterSize + j] = coeff;
319 xx++;
320 }
321 xDstInSrc += 2 * inc;
322 }
323
324 //apply src & dst Filter to filter -> filter2
325 X265_CHECK(filterSize > 0, "invalid filterSize value.\n");
326 filter2Size = filterSize;
327 filter2 = new int64_t[dstW * sizeof(*filter2) * filter2Size];
328
329 /* This is hard to read code, but much faster. Speed is crucial here */
330 int index = RES_FACTOR_DEF;
331 int size = dstW * filterSize;
332
333 (size % 4 == 0) && (index = RES_FACTOR_4);
334 (size % 8 == 0) && (index = RES_FACTOR_8);
335 (size % 16 == 0) && (index = RES_FACTOR_16);
336 (size % 32 == 0) && (index = RES_FACTOR_32);
337 (size % 64 == 0) && (index = RES_FACTOR_64);
338
339 filter_copy_c(filter, filter2, size);
340
341 delete[](filter);
342
343 // try to reduce the filter-size (step1 find size and shift left)
344 // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
345 minFilterSize = 0;
346 for (int i = dstW - 1; i >= 0; i--)
347 {
348 int min = filter2Size;
349 int64_t cutOff = 0.0;
350
351 // get rid of near zero elements on the left by shifting left
352 for (int j = 0; j < filter2Size; j++)
353 {
354 int k;
355 cutOff += X265_ABS(filter2[i * filter2Size]);
356
357 if (cutOff > SCALER_MAX_REDUCE_CUTOFF * fone)
358 break;
359 // preserve monotonicity because the core can't handle the filter otherwise
360 if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1])
361 break;
362
363 // move filter coefficients left
364 for (k = 1; k < filter2Size; k++)
365 filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k];
366 filter2[i * filter2Size + k - 1] = 0;
367 (*filterPos)[i]++;
368 }
369
370 cutOff = 0;
371 // count near zeros on the right
372 for (int j = filter2Size - 1; j > 0; j--)
373 {
374 cutOff += X265_ABS(filter2[i * filter2Size + j]);
375
376 if (cutOff > SCALER_MAX_REDUCE_CUTOFF * fone)
377 break;
378 min--;
379 }
380
381 if (min > minFilterSize)
382 minFilterSize = min;
383 }
384
385 X265_CHECK(minFilterSize > 0, "invalid minFilterSize value.\n");
386 filterSize = (minFilterSize + (filtAlign - 1)) & (~(filtAlign - 1));
387 X265_CHECK(filterSize > 0, "invalid filterSize value.\n");
388 filter = new int64_t[dstW*filterSize * sizeof(*filter)];
389
390 *outFilterSize = filterSize;
391
392 // try to reduce the filter-size (step2 reduce it)
393 for (int i = 0; i < dstW; i++)
394 {
395 for (int j = 0; j < filterSize; j++)
396 {
397 if (j >= filter2Size)
398 filter[i * filterSize + j] = 0;
399 else
400 filter[i * filterSize + j] = filter2[i * filter2Size + j];
401 if ((flag & SCALER_BITEXACT) && j >= minFilterSize)
402 filter[i * filterSize + j] = 0;
403 }
404 }
405
406 // fix borders
407 for (int i = 0; i < dstW; i++)
408 {
409 int j;
410 if ((*filterPos)[i] < 0)
411 {
412 // move filter coefficients left to compensate for filterPos
413 for (j = 1; j < filterSize; j++)
414 {
415 int left = x265_max(j + (*filterPos)[i], 0);
416 filter[i * filterSize + left] += filter[i * filterSize + j];
417 filter[i * filterSize + j] = 0;
418 }
419 (*filterPos)[i] = 0;
420 }
421
422 if ((*filterPos)[i] + filterSize > srcW)
423 {
424 int shift = (*filterPos)[i] + x265_min(filterSize - srcW, 0);
425 int64_t acc = 0;
426
427 for (j = filterSize - 1; j >= 0; j--)
428 {
429 if ((*filterPos)[i] + j >= srcW)
430 {
431 acc += filter[i * filterSize + j];
432 filter[i * filterSize + j] = 0;
433 }
434 }
435 for (j = filterSize - 1; j >= 0; j--)
436 {
437 if (j < shift)
438 filter[i * filterSize + j] = 0;
439 else
440 filter[i * filterSize + j] = filter[i * filterSize + j - shift];
441 }
442
443 (*filterPos)[i] -= shift;
444 filter[i * filterSize + srcW - 1 - (*filterPos)[i]] += acc;
445 }
446
447 X265_CHECK((*filterPos)[i] >= 0, "invalid: Value of (*filterPos)[%d] < 0.\n", i);
448 X265_CHECK((*filterPos)[i] < srcW, "invalid: Value of (*filterPos)[%d] > %d .\n", i, srcW);
449 if ((*filterPos)[i] + filterSize > srcW)
450 {
451 for (j = 0; j < filterSize; j++)
452 {
453 X265_CHECK(!filter[i * filterSize + j], "invalid: Value of filter[%d * filterSize + %d] != 0.\n", i, j);
454 X265_CHECK((*filterPos)[i] + j < srcW, "invalid: (*filterPos)[%d] + %d > %d .\n", i, i, srcW);
455 }
456 }
457 }
458
459 // init filter
460 m_filt = new int16_t[(dstW + 3)*(*outFilterSize)];
461 int16_t **outFilter = &m_filt;
462
463 // normalize & store in outFilter
464 for (int i = 0; i < dstW; i++)
465 {
466 int64_t error = 0;
467 int64_t sum = 0;
468
469 for (int j = 0; j < filterSize; j++)
470 sum += filter[i * filterSize + j];
471 sum = (sum + one / 2) / one;
472 if (!sum)
473 {
474 x265_log(NULL, X265_LOG_WARNING, "Scaler: zero vector in scaling\n");
475 sum = 1;
476 }
477 for (int j = 0; j < *outFilterSize; j++)
478 {
479 int64_t v = filter[i * filterSize + j] + error;
480 int intV = ROUNDED_DIVISION(v, sum);
481 (*outFilter)[i * (*outFilterSize) + j] = intV;
482 error = v - intV * sum;
483 }
484 }
485
486 (*filterPos)[dstW + 0] =
487 (*filterPos)[dstW + 1] =
488 (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1];
489 for (int i = 0; i < *outFilterSize; i++)
490 {
491 int k = (dstW - 1) * (*outFilterSize) + i;
492 (*outFilter)[k + 1 * (*outFilterSize)] =
493 (*outFilter)[k + 2 * (*outFilterSize)] =
494 (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
495 }
496
497 delete[](filter);
498 delete[](filter2);
499 return 0;
500 }
501
init(int algorithmFlags,VideoDesc * srcVideoDesc,VideoDesc * dstVideoDesc)502 int ScalerFilterManager::init(int algorithmFlags, VideoDesc *srcVideoDesc, VideoDesc *dstVideoDesc)
503 {
504 int srcW = m_srcW = srcVideoDesc->m_width;
505 int srcH = m_srcH = srcVideoDesc->m_height;
506 int dstW = m_dstW = dstVideoDesc->m_width;
507 int dstH = m_dstH = dstVideoDesc->m_height;
508 int lumXInc, crXInc;
509 int lumYInc, crYInc;
510 int srcHCrPos;
511 int dstHCrPos;
512 int srcVCrPos;
513 int dstVCrPos;
514 int dst_stride = SCALER_ALIGN(dstW * sizeof(int16_t) + 66, 16);
515 m_bitDepth = dstVideoDesc->m_inputDepth;
516 if (m_bitDepth == 16)
517 dst_stride <<= 1;
518
519 m_algorithmFlags = algorithmFlags;
520 lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
521 lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
522
523 srcHCrPos = -513;
524 dstHCrPos = -513;
525 srcVCrPos = -513;
526 dstVCrPos = -513;
527
528 int srcCsp = srcVideoDesc->m_csp;
529 if (x265_cli_csps[srcCsp].planes > 1)
530 {
531 m_crSrcHSubSample = x265_cli_csps[srcCsp].width[1];
532 m_crSrcVSubSample = x265_cli_csps[srcCsp].height[1];
533 m_crSrcW = srcVideoDesc->m_width >> m_crSrcHSubSample;
534 m_crSrcH = srcVideoDesc->m_height >> m_crSrcVSubSample;
535 if (srcCsp == 1)// i420
536 srcVCrPos = 128;
537 }
538 else
539 {
540 m_crSrcW = 0;
541 m_crSrcH = 0;
542 m_crSrcHSubSample = 0;
543 m_crSrcVSubSample = 0;
544 }
545 int dstCsp = dstVideoDesc->m_csp;
546 if (x265_cli_csps[dstCsp].planes > 1)
547 {
548 m_crDstHSubSample = x265_cli_csps[dstCsp].width[1];
549 m_crDstVSubSample = x265_cli_csps[dstCsp].height[1];
550 m_crDstW = dstVideoDesc->m_width >> m_crDstHSubSample;
551 m_crDstH = dstVideoDesc->m_height >> m_crDstVSubSample;
552 if (dstCsp == 1)// i420
553 dstVCrPos = 128;
554 }
555 else
556 {
557 m_crDstW = 0;
558 m_crDstH = 0;
559 m_crDstHSubSample = 0;
560 m_crDstVSubSample = 0;
561 }
562 // Only srcCsp == dstCsp is supported at present
563 if (srcCsp != dstCsp)
564 {
565 x265_log(NULL, X265_LOG_ERROR, "wrong, source csp != destination csp \n");
566 return false;
567 }
568
569 lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
570 lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
571 crXInc = (((int64_t)m_crSrcW << 16) + (m_crDstW >> 1)) / m_crDstW;
572 crYInc = (((int64_t)m_crSrcH << 16) + (m_crDstH >> 1)) / m_crDstH;
573
574 const int filterAlign = 1;
575
576 // init horizontal Luma Scaler filter
577 m_ScalerFilters[0] = new ScalerHLumFilter(m_bitDepth);
578 m_ScalerFilters[0]->initCoeff(m_algorithmFlags, lumXInc, srcW, dstW, filterAlign, 1 << 14, getLocalPos(0, 0), getLocalPos(0, 0));
579
580 // init horizontal cr Scaler filter
581 m_ScalerFilters[1] = new ScalerHCrFilter(m_bitDepth);
582 m_ScalerFilters[1]->initCoeff(m_algorithmFlags, crXInc, m_crSrcW, m_crDstW, filterAlign, 1 << 14,
583 getLocalPos(m_crSrcHSubSample, srcHCrPos), getLocalPos(m_crDstHSubSample, dstHCrPos));
584
585 // init vertical Luma scaler filter
586 m_ScalerFilters[2] = new ScalerVLumFilter(m_bitDepth);
587 m_ScalerFilters[2]->initCoeff(m_algorithmFlags, lumYInc, srcH, dstH, filterAlign, 1 << 12, getLocalPos(0, 0), getLocalPos(0, 0));
588
589 // init vertical cr scaler filter
590 m_ScalerFilters[3] = new ScalerVCrFilter(m_bitDepth);
591 m_ScalerFilters[3]->initCoeff(m_algorithmFlags, crYInc, m_crSrcH, m_crDstH, filterAlign, 1 << 12,
592 getLocalPos(m_crSrcVSubSample, srcVCrPos), getLocalPos(m_crDstVSubSample, dstVCrPos));
593
594 // init slice, must after filter initialization
595 initScalerSlice();
596
597 // set slice
598 m_ScalerFilters[0]->setSlice(m_slices[0], m_slices[1]);
599 m_ScalerFilters[1]->setSlice(m_slices[0], m_slices[1]);
600
601 m_ScalerFilters[2]->setSlice(m_slices[1], m_slices[2]);
602 m_ScalerFilters[3]->setSlice(m_slices[1], m_slices[2]);
603
604 return 0;
605 }
606
doScaling(int16_t * dst,int dstW,const uint8_t * src,const int16_t * filter,const int32_t * filterPos,int filterSize)607 void HFilterScaler8Bit::doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
608 {
609 int IdxW = FACTOR_4;
610 int IdxF = FIL_DEF;
611
612 /* This is hard to read code, but much faster. Speed is crucial here */
613 (dstW % 8 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_8);
614 (dstW % 8 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_8);
615 (dstW % 8 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_8);
616 (dstW % 8 == 0) && (filterSize == 11) && (IdxF = FIL_11) && (IdxW = FACTOR_8);
617 (dstW % 8 == 0) && (filterSize == 10) && (IdxF = FIL_10) && (IdxW = FACTOR_8);
618 (dstW % 8 == 0) && (filterSize == 9) && (IdxF = FIL_9) && (IdxW = FACTOR_8);
619 (dstW % 8 == 0) && (filterSize == 15) && (IdxF = FIL_15) && (IdxW = FACTOR_8);
620 (dstW % 8 == 0) && (filterSize == 13) && (IdxF = FIL_13) && (IdxW = FACTOR_8);
621
622 /* Do not check multiple of width 4, if width is already multiple of 8 */
623 !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4);
624 !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4);
625 !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_4);
626
627 (dstW % 4 == 0) && (filterSize == 24) && (IdxF = FIL_24) && (IdxW = FACTOR_4);
628 (dstW % 4 == 0) && (filterSize == 22) && (IdxF = FIL_22) && (IdxW = FACTOR_4);
629 (dstW % 4 == 0) && (filterSize == 19) && (IdxF = FIL_19) && (IdxW = FACTOR_4);
630 (dstW % 4 == 0) && (filterSize == 17) && (IdxF = FIL_17) && (IdxW = FACTOR_4);
631
632 #if X265_DEPTH == 8
633 doScaling_c(dst, dstW, src, filter, filterPos, filterSize);
634 #else
635 doScaling_c_h(dst, dstW, src, filter, filterPos, filterSize);
636 #endif
637 }
638
doScaling(int16_t * dst,int dstW,const uint8_t * src,const int16_t * filter,const int32_t * filterPos,int filterSize)639 void HFilterScaler10Bit::doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
640 {
641 int IdxW = FACTOR_4;
642 int IdxF = FIL_DEF;
643
644 /* This is hard to read code, but much faster. Speed is crucial here */
645 (dstW % 8 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_8);
646 (dstW % 8 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_8);
647 (dstW % 8 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_8);
648 (dstW % 8 == 0) && (filterSize == 11) && (IdxF = FIL_11) && (IdxW = FACTOR_8);
649 (dstW % 8 == 0) && (filterSize == 10) && (IdxF = FIL_10) && (IdxW = FACTOR_8);
650 (dstW % 8 == 0) && (filterSize == 9) && (IdxF = FIL_9) && (IdxW = FACTOR_8);
651 (dstW % 8 == 0) && (filterSize == 15) && (IdxF = FIL_15) && (IdxW = FACTOR_8);
652 (dstW % 8 == 0) && (filterSize == 13) && (IdxF = FIL_13) && (IdxW = FACTOR_8);
653
654 /* Do not check multiple of width 4, if width is already multiple of 8 */
655 !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4);
656 !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4);
657 !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_4);
658
659 (dstW % 4 == 0) && (filterSize == 24) && (IdxF = FIL_24) && (IdxW = FACTOR_4);
660 (dstW % 4 == 0) && (filterSize == 22) && (IdxF = FIL_22) && (IdxW = FACTOR_4);
661 (dstW % 4 == 0) && (filterSize == 19) && (IdxF = FIL_19) && (IdxW = FACTOR_4);
662 (dstW % 4 == 0) && (filterSize == 17) && (IdxF = FIL_17) && (IdxW = FACTOR_4);
663
664 #if X265_DEPTH == 8
665 doScaling_c(dst, dstW, src, filter, filterPos, filterSize);
666 #else
667 doScaling_c_h(dst, dstW, src, filter, filterPos, filterSize);
668 #endif
669 }
670
scale_pic(void ** src,void ** dst,int * srcStride,int * dstStride)671 int ScalerFilterManager::scale_pic(void ** src, void ** dst, int * srcStride, int * dstStride)
672 {
673 uint8_t** src_8bit, **dst_8bit;
674 src_8bit = (uint8_t**)src;
675 dst_8bit = (uint8_t**)dst;
676 if (!src_8bit || !dst_8bit)
677 return -1;
678
679 const int srcsliceHor = m_srcH;
680 const int dstW = m_dstW;
681 const int dstH = m_dstH;
682 int32_t *vLumFilterPos = m_ScalerFilters[2]->m_filtPos;
683 int32_t *vCrFilterPos = m_ScalerFilters[3]->m_filtPos;
684 const int vLumFilterSize = m_ScalerFilters[2]->m_filtLen;
685 const int vCrFilterSize = m_ScalerFilters[3]->m_filtLen;
686 const int crSrcsliceHor = UH_CEIL_SHIFTR(srcsliceHor, m_crSrcVSubSample);
687
688 // vars which will change and which we need to store back in the context
689 int lumBufIndex = -1;
690 int crBufIndex = -1;
691 int lastInLumBuf = -1;
692 int lastInCrBuf = -1;
693
694 int hasLumHoles = 1;
695 int hasCrHoles = 1;
696
697 ScalerSlice *src_slice = m_slices[0];
698 ScalerSlice *hout_slice = m_slices[1];
699 ScalerSlice *vout_slice = m_slices[2];
700 src_slice->initFromSrc((uint8_t**)src, srcStride, m_srcW, 0, srcsliceHor, 0, crSrcsliceHor, 1);
701 vout_slice->initFromSrc((uint8_t**)dst, dstStride, m_dstW, 0, dstH, 0, UH_CEIL_SHIFTR(dstH, m_crDstVSubSample), 0);
702
703 hout_slice->m_plane[0].sliceVer = 0;
704 hout_slice->m_plane[1].sliceVer = 0;
705 hout_slice->m_plane[2].sliceVer = 0;
706 hout_slice->m_plane[3].sliceVer = 0;
707 hout_slice->m_plane[0].sliceHor = 0;
708 hout_slice->m_plane[1].sliceHor = 0;
709 hout_slice->m_plane[2].sliceHor = 0;
710 hout_slice->m_plane[3].sliceHor = 0;
711 hout_slice->m_width = dstW;
712
713 for (int dstY = 0; dstY < dstH; dstY++)
714 {
715 const int crDstY = dstY >> m_crDstVSubSample;
716 const int firstLumSrcY = x265_max(1 - vLumFilterSize, vLumFilterPos[dstY]);
717 const int firstLumSrcY2 = x265_max(1 - vLumFilterSize, vLumFilterPos[x265_min(dstY | ((1 << m_crDstVSubSample) - 1), dstH - 1)]);
718 const int firstCrSrcY = x265_max(1 - vCrFilterSize, vCrFilterPos[crDstY]);
719
720 int lastLumSrcY = x265_min(m_srcH, firstLumSrcY + vLumFilterSize) - 1;
721 int lastLumSrcY2 = x265_min(m_srcH, firstLumSrcY2 + vLumFilterSize) - 1;
722 int lastCrSrcY = x265_min(m_crSrcH, firstCrSrcY + vCrFilterSize) - 1;
723
724 // handle holes
725 if (firstLumSrcY > lastInLumBuf)
726 {
727 hasLumHoles = lastInLumBuf != firstLumSrcY - 1;
728 if (hasLumHoles)
729 {
730 hout_slice->m_plane[0].sliceVer = firstLumSrcY;
731 hout_slice->m_plane[3].sliceVer = firstLumSrcY;
732 hout_slice->m_plane[0].sliceHor =
733 hout_slice->m_plane[3].sliceHor = 0;
734 }
735
736 lastInLumBuf = firstLumSrcY - 1;
737 }
738 if (firstCrSrcY > lastInCrBuf)
739 {
740 hasCrHoles = lastInCrBuf != firstCrSrcY - 1;
741 if (hasCrHoles)
742 {
743 hout_slice->m_plane[1].sliceVer = firstCrSrcY;
744 hout_slice->m_plane[2].sliceVer = firstCrSrcY;
745 hout_slice->m_plane[1].sliceHor =
746 hout_slice->m_plane[2].sliceHor = 0;
747 }
748
749 lastInCrBuf = firstCrSrcY - 1;
750 }
751
752 // Do we have enough lines in this slice to output the dstY line
753 int enoughLines = lastLumSrcY2 < 0 + srcsliceHor && lastCrSrcY < UH_CEIL_SHIFTR(0 + srcsliceHor, m_crSrcVSubSample);
754 if (!enoughLines)
755 {
756 lastLumSrcY = 0 + srcsliceHor - 1;
757 lastCrSrcY = 0 + crSrcsliceHor - 1;
758 x265_log(NULL, X265_LOG_INFO, "buffering slice: lastLumSrcY %d lastCrSrcY %d\n", lastLumSrcY, lastCrSrcY);
759 }
760
761 X265_CHECK(((lastLumSrcY - firstLumSrcY + 1) <= hout_slice->m_plane[0].availLines), "invalid value %d", lastLumSrcY - firstLumSrcY + 1);
762 X265_CHECK((lastCrSrcY - firstCrSrcY + 1) <= hout_slice->m_plane[1].availLines, "invalid value %d", lastCrSrcY - firstCrSrcY + 1);
763
764 int firstPosY, lastPosY, firstCPosY, lastCPosY;
765 int posY = hout_slice->m_plane[0].sliceVer + hout_slice->m_plane[0].sliceHor;
766 if (posY <= lastLumSrcY && !hasLumHoles)
767 {
768 firstPosY = x265_max(firstLumSrcY, posY);
769 lastPosY = x265_min(firstLumSrcY + hout_slice->m_plane[0].availLines - 1, 0 + srcsliceHor - 1);
770 }
771 else
772 {
773 firstPosY = posY;
774 lastPosY = lastLumSrcY;
775 }
776
777 int cPosY = hout_slice->m_plane[1].sliceVer + hout_slice->m_plane[1].sliceHor;
778 if (cPosY <= lastCrSrcY && !hasCrHoles)
779 {
780 firstCPosY = x265_max(firstCrSrcY, cPosY);
781 lastCPosY = x265_min(firstCrSrcY + hout_slice->m_plane[1].availLines - 1, UH_CEIL_SHIFTR(0 + srcsliceHor, m_crSrcVSubSample) - 1);
782 }
783 else
784 {
785 firstCPosY = cPosY;
786 lastCPosY = lastCrSrcY;
787 }
788
789 hout_slice->rotate(lastPosY, lastCPosY);
790 // horizontal luma scale
791 if (posY < lastLumSrcY + 1)
792 m_ScalerFilters[0]->process(firstPosY, lastPosY - firstPosY + 1);
793
794 lumBufIndex += lastLumSrcY - lastInLumBuf;
795 lastInLumBuf = lastLumSrcY;
796 // horizontal chroma Scale
797 if (cPosY < lastCrSrcY + 1)
798 m_ScalerFilters[1]->process(firstCPosY, lastCPosY - firstCPosY + 1);
799
800 crBufIndex += lastCrSrcY - lastInCrBuf;
801 lastInCrBuf = lastCrSrcY;
802
803 // wrap buf index around to stay inside the ring buffer
804 if (lumBufIndex >= vLumFilterSize)
805 lumBufIndex -= vLumFilterSize;
806 if (crBufIndex >= vCrFilterSize)
807 crBufIndex -= vCrFilterSize;
808 if (!enoughLines)
809 break; // we can't output a dstY line so let's try with the next slice
810
811 // vertical scale(output converter)
812 for (int i = 2; i < m_numFilter; ++i)
813 m_ScalerFilters[i]->process(dstY, 1);
814 }
815 return 0;
816 }
817
getMinBufferSize(int * out_lum_size,int * out_cr_size)818 void ScalerFilterManager::getMinBufferSize(int *out_lum_size, int *out_cr_size)
819 {
820 int lumY;
821 int dstH = m_dstH;
822 int crDstH = m_crDstH;
823 int *lumFilterPos = m_ScalerFilters[2]->m_filtPos;
824 int *crFilterPos = m_ScalerFilters[3]->m_filtPos;
825 int lumFilterSize = m_ScalerFilters[2]->m_filtLen;
826 int crFilterSize = m_ScalerFilters[3]->m_filtLen;
827 int crSubSample = m_crSrcVSubSample;
828
829 *out_lum_size = lumFilterSize;
830 *out_cr_size = crFilterSize;
831
832 for (lumY = 0; lumY < dstH; lumY++)
833 {
834 int crY = (int64_t)lumY * crDstH / dstH;
835 int nextSlice = x265_max(lumFilterPos[lumY] + lumFilterSize - 1, ((crFilterPos[crY] + crFilterSize - 1) << crSubSample));
836
837 nextSlice >>= crSubSample;
838 nextSlice <<= crSubSample;
839 (*out_lum_size) = x265_max((*out_lum_size), nextSlice - lumFilterPos[lumY]);
840 (*out_cr_size) = x265_max((*out_cr_size), (nextSlice >> crSubSample) - crFilterPos[crY]);
841 }
842 }
843
initScalerSlice()844 int ScalerFilterManager::initScalerSlice()
845 {
846 int ret = 0;
847 int dst_stride = SCALER_ALIGN(m_dstW * sizeof(int16_t) + 66, 16);
848 if (m_bitDepth == 16)
849 dst_stride <<= 1;
850
851 int lumBufSize;
852 int crBufSize;
853 int vLumFilterSize = m_ScalerFilters[2]->m_filtLen; // Vertical filter size for luma pixels.
854 int vCrFilterSize = m_ScalerFilters[3]->m_filtLen; // Vertical filter size for chroma pixels.
855 getMinBufferSize(&lumBufSize, &crBufSize);
856 lumBufSize = X265_MAX(lumBufSize, vLumFilterSize + MAX_NUM_LINES_AHEAD);
857 crBufSize = X265_MAX(crBufSize, vCrFilterSize + MAX_NUM_LINES_AHEAD);
858
859 for (int i = 0; i < m_numSlice; i++)
860 m_slices[i] = new ScalerSlice;
861 ret = m_slices[0]->create(m_srcH, m_crSrcH, m_crSrcHSubSample, m_crSrcVSubSample, 0);
862 if (ret < 0)
863 {
864 x265_log(NULL, X265_LOG_ERROR, "alloc_slice m_slice[0] failed\n");
865 return -1;
866 }
867
868 // horizontal scaler output
869 ret = m_slices[1]->create(lumBufSize, crBufSize, m_crDstHSubSample, m_crDstVSubSample, 1);
870 if (ret < 0)
871 {
872 x265_log(NULL, X265_LOG_ERROR, "m_slice[1].create failed\n");
873 return -1;
874 }
875 ret = m_slices[1]->createLines(dst_stride, m_dstW);
876 if (ret < 0)
877 {
878 x265_log(NULL, X265_LOG_ERROR, "m_slice[1].createLines failed\n");
879 return -1;
880 }
881
882 m_slices[1]->fillOnes(dst_stride >> 1, m_bitDepth == 16);
883
884 // vertical scaler output
885 ret = m_slices[2]->create(m_dstH, m_crDstH, m_crDstHSubSample, m_crDstVSubSample, 0);
886 if (ret < 0)
887 {
888 x265_log(NULL, X265_LOG_ERROR, "m_slice[2].create failed\n");
889 return -1;
890 }
891
892 return 0;
893 }
894
getLocalPos(int crSubSample,int pos)895 int ScalerFilterManager::getLocalPos(int crSubSample, int pos)
896 {
897 if (pos == -1 || pos <= -513)
898 pos = (128 << crSubSample) - 128;
899 pos += 128; // relative to ideal left edge
900 return pos >> crSubSample;
901 }
902
ScalerSlice()903 ScalerSlice::ScalerSlice() :
904 m_width(0),
905 m_hCrSubSample(0),
906 m_vCrSubSample(0),
907 m_isRing(0),
908 m_destroyLines(0)
909 {
910 for (int i = 0; i < m_numSlicePlane; i++)
911 {
912 m_plane[i].availLines = 0;
913 m_plane[i].sliceVer = 0;
914 m_plane[i].sliceHor = 0;
915 m_plane[i].lineBuf = NULL;
916 }
917 }
918
destroy()919 void ScalerSlice::destroy()
920 {
921 if (m_destroyLines)
922 destroyLines();
923 for (int i = 0; i < m_numSlicePlane; i++)
924 {
925 if (m_plane[i].lineBuf)
926 X265_FREE(m_plane[i].lineBuf);
927 }
928 }
929
create(int lumLines,int crLines,int h_sub_sample,int v_sub_sample,int ring)930 int ScalerSlice::create(int lumLines, int crLines, int h_sub_sample, int v_sub_sample, int ring)
931 {
932 int i;
933 int size[4] = { lumLines, crLines, crLines, lumLines };
934
935 m_hCrSubSample = h_sub_sample;
936 m_vCrSubSample = v_sub_sample;
937 m_isRing = ring;
938 m_destroyLines = 0;
939
940 for (i = 0; i < m_numSlicePlane; ++i)
941 {
942 int n = size[i] * (ring == 0 ? 1 : 3);
943 m_plane[i].lineBuf = X265_MALLOC(uint8_t*, n);
944 if (!m_plane[i].lineBuf)
945 return -1;
946
947 m_plane[i].availLines = size[i];
948 m_plane[i].sliceVer = 0;
949 m_plane[i].sliceHor = 0;
950 }
951 return 0;
952 }
953
954 /*
955 slice lines contains extra bytes for vectorial code thus @size
956 is the allocated memory size and @width is the number of pixels
957 */
createLines(int size,int width)958 int ScalerSlice::createLines(int size, int width)
959 {
960 int i;
961 int idx[2] = { 3, 2 };
962
963 m_destroyLines = 1;
964 m_width = width;
965
966 for (i = 0; i < 2; ++i) {
967 int n = m_plane[i].availLines;
968 int j;
969 int ii = idx[i];
970 assert(n == m_plane[ii].availLines);
971 for (j = 0; j < n; ++j)
972 {
973 // chroma plane line U and V are expected to be contiguous in memory
974 m_plane[i].lineBuf[j] = (uint8_t*)X265_MALLOC(uint8_t, size * 2 + 32);
975 if (!m_plane[i].lineBuf[j])
976 {
977 destroyLines();
978 return -1;
979 }
980 m_plane[ii].lineBuf[j] = m_plane[i].lineBuf[j] + size + 16;
981 if (m_isRing)
982 {
983 m_plane[i].lineBuf[j + n] = m_plane[i].lineBuf[j];
984 m_plane[ii].lineBuf[j + n] = m_plane[ii].lineBuf[j];
985 }
986 }
987 }
988
989 return 0;
990 }
991
destroyLines()992 void ScalerSlice::destroyLines()
993 {
994 int i;
995 for (i = 0; i < 2; ++i)
996 {
997 int n = m_plane[i].availLines;
998 int j;
999 for (j = 0; j < n; ++j)
1000 {
1001 X265_FREE(m_plane[i].lineBuf[j]);
1002 m_plane[i].lineBuf[j] = NULL;
1003 if (m_isRing)
1004 m_plane[i].lineBuf[j + n] = NULL;
1005 }
1006 }
1007
1008 for (i = 0; i < m_numSlicePlane; ++i)
1009 memset(m_plane[i].lineBuf, 0, sizeof(uint8_t*) * m_plane[i].availLines * (m_isRing ? 3 : 1));
1010 m_destroyLines = 0;
1011 }
1012
fillOnes(int n,int is16bit)1013 void ScalerSlice::fillOnes(int n, int is16bit)
1014 {
1015 int i;
1016 for (i = 0; i < m_numSlicePlane; ++i)
1017 {
1018 int j;
1019 int size = m_plane[i].availLines;
1020 for (j = 0; j < size; ++j)
1021 {
1022 int k;
1023 int end = is16bit ? n >> 1 : n;
1024 // fill also one extra element
1025 end += 1;
1026 if (is16bit)
1027 for (k = 0; k < end; ++k)
1028 ((int32_t*)(m_plane[i].lineBuf[j]))[k] = 1 << 18;
1029 else
1030 for (k = 0; k < end; ++k)
1031 ((int16_t*)(m_plane[i].lineBuf[j]))[k] = 1 << 14;
1032 }
1033 }
1034 }
1035
rotate(int lum,int cr)1036 int ScalerSlice::rotate(int lum, int cr)
1037 {
1038 int i;
1039 if (lum)
1040 {
1041 for (i = 0; i < m_numSlicePlane; i += 3)
1042 {
1043 int n = m_plane[i].availLines;
1044 int l = lum - m_plane[i].sliceVer;
1045
1046 if (l >= n * 2)
1047 {
1048 m_plane[i].sliceVer += n;
1049 m_plane[i].sliceHor -= n;
1050 }
1051 }
1052 }
1053 if (cr)
1054 {
1055 for (i = 1; i < 3; ++i)
1056 {
1057 int n = m_plane[i].availLines;
1058 int l = cr - m_plane[i].sliceVer;
1059
1060 if (l >= n * 2)
1061 {
1062 m_plane[i].sliceVer += n;
1063 m_plane[i].sliceHor -= n;
1064 }
1065 }
1066 }
1067 return 0;
1068 }
1069
initFromSrc(uint8_t * src[4],const int stride[4],int srcW,int lumY,int lumH,int crY,int crH,int relative)1070 int ScalerSlice::initFromSrc(uint8_t *src[4], const int stride[4], int srcW, int lumY, int lumH, int crY, int crH, int relative)
1071 {
1072 int i = 0;
1073
1074 const int start[m_numSlicePlane] = { lumY, crY, crY, lumY };
1075
1076 const int end[m_numSlicePlane] = { lumY + lumH, crY + crH, crY + crH, lumY + lumH };
1077
1078 uint8_t *const src_[m_numSlicePlane] = { src[0] + (relative ? 0 : start[0]) * stride[0],
1079 src[1] + (relative ? 0 : start[1]) * stride[1],
1080 src[2] + (relative ? 0 : start[2]) * stride[2],
1081 src[3] + (relative ? 0 : start[3]) * stride[3] };
1082
1083 m_width = srcW;
1084
1085 for (i = 0; i < m_numSlicePlane; ++i)
1086 {
1087 int j;
1088 int first = m_plane[i].sliceVer;
1089 int n = m_plane[i].availLines;
1090 int lines = end[i] - start[i];
1091 int tot_lines = end[i] - first;
1092
1093 if (start[i] >= first && n >= tot_lines)
1094 {
1095 m_plane[i].sliceHor = x265_max(tot_lines, m_plane[i].sliceHor);
1096 for (j = 0; j < lines; j += 1)
1097 m_plane[i].lineBuf[start[i] - first + j] = src_[i] + j * stride[i];
1098 }
1099 else
1100 {
1101 m_plane[i].sliceVer = start[i];
1102 lines = lines > n ? n : lines;
1103 m_plane[i].sliceHor = lines;
1104 for (j = 0; j < lines; j += 1)
1105 m_plane[i].lineBuf[j] = src_[i] + j * stride[i];
1106 }
1107 }
1108 return 0;
1109 }
1110 }
1111