1 /* 2 * Copyright 2015 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <string.h> // For memset/memcpy 12 13 #include "libyuv/scale.h" 14 #include "libyuv/scale_row.h" 15 16 #include "libyuv/basic_types.h" 17 18 #ifdef __cplusplus 19 namespace libyuv { 20 extern "C" { 21 #endif 22 23 // Fixed scale down. 24 // Mask may be non-power of 2, so use MOD 25 #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ 26 void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ 27 int dst_width) { \ 28 int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ 29 int n = dst_width - r; \ 30 if (n > 0) { \ 31 SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ 32 } \ 33 SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ 34 dst_ptr + n * BPP, r); \ 35 } 36 37 // Fixed scale down for odd source width. Used by I420Blend subsampling. 38 // Since dst_width is (width + 1) / 2, this function scales one less pixel 39 // and copies the last pixel. 40 #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ 41 void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ 42 int dst_width) { \ 43 int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \ 44 int n = (dst_width - 1) - r; \ 45 if (n > 0) { \ 46 SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ 47 } \ 48 SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ 49 dst_ptr + n * BPP, r + 1); \ 50 } 51 52 #ifdef HAS_SCALEROWDOWN2_SSSE3 53 SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15) 54 SDANY(ScaleRowDown2Linear_Any_SSSE3, 55 ScaleRowDown2Linear_SSSE3, 56 ScaleRowDown2Linear_C, 57 2, 58 1, 59 15) 60 SDANY(ScaleRowDown2Box_Any_SSSE3, 61 ScaleRowDown2Box_SSSE3, 62 ScaleRowDown2Box_C, 63 2, 64 1, 65 15) 66 SDODD(ScaleRowDown2Box_Odd_SSSE3, 67 ScaleRowDown2Box_SSSE3, 68 ScaleRowDown2Box_Odd_C, 69 2, 70 1, 71 15) 72 #endif 73 #ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3 74 SDANY(ScaleUVRowDown2Box_Any_SSSE3, 75 ScaleUVRowDown2Box_SSSE3, 76 ScaleUVRowDown2Box_C, 77 2, 78 2, 79 4) 80 #endif 81 #ifdef HAS_SCALEUVROWDOWN2BOX_AVX2 82 SDANY(ScaleUVRowDown2Box_Any_AVX2, 83 ScaleUVRowDown2Box_AVX2, 84 ScaleUVRowDown2Box_C, 85 2, 86 2, 87 8) 88 #endif 89 #ifdef HAS_SCALEROWDOWN2_AVX2 90 SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31) 91 SDANY(ScaleRowDown2Linear_Any_AVX2, 92 ScaleRowDown2Linear_AVX2, 93 ScaleRowDown2Linear_C, 94 2, 95 1, 96 31) 97 SDANY(ScaleRowDown2Box_Any_AVX2, 98 ScaleRowDown2Box_AVX2, 99 ScaleRowDown2Box_C, 100 2, 101 1, 102 31) 103 SDODD(ScaleRowDown2Box_Odd_AVX2, 104 ScaleRowDown2Box_AVX2, 105 ScaleRowDown2Box_Odd_C, 106 2, 107 1, 108 31) 109 #endif 110 #ifdef HAS_SCALEROWDOWN2_NEON 111 SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15) 112 SDANY(ScaleRowDown2Linear_Any_NEON, 113 ScaleRowDown2Linear_NEON, 114 ScaleRowDown2Linear_C, 115 2, 116 1, 117 15) 118 SDANY(ScaleRowDown2Box_Any_NEON, 119 ScaleRowDown2Box_NEON, 120 ScaleRowDown2Box_C, 121 2, 122 1, 123 15) 124 SDODD(ScaleRowDown2Box_Odd_NEON, 125 ScaleRowDown2Box_NEON, 126 ScaleRowDown2Box_Odd_C, 127 2, 128 1, 129 15) 130 #endif 131 #ifdef HAS_SCALEUVROWDOWN2BOX_NEON 132 SDANY(ScaleUVRowDown2Box_Any_NEON, 133 ScaleUVRowDown2Box_NEON, 134 ScaleUVRowDown2Box_C, 135 2, 136 2, 137 8) 138 #endif 139 140 #ifdef HAS_SCALEROWDOWN2_MSA 141 SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31) 142 SDANY(ScaleRowDown2Linear_Any_MSA, 143 ScaleRowDown2Linear_MSA, 144 ScaleRowDown2Linear_C, 145 2, 146 1, 147 31) 148 SDANY(ScaleRowDown2Box_Any_MSA, 149 ScaleRowDown2Box_MSA, 150 ScaleRowDown2Box_C, 151 2, 152 1, 153 31) 154 #endif 155 #ifdef HAS_SCALEROWDOWN2_MMI 156 SDANY(ScaleRowDown2_Any_MMI, ScaleRowDown2_MMI, ScaleRowDown2_C, 2, 1, 7) 157 SDANY(ScaleRowDown2Linear_Any_MMI, 158 ScaleRowDown2Linear_MMI, 159 ScaleRowDown2Linear_C, 160 2, 161 1, 162 7) 163 SDANY(ScaleRowDown2Box_Any_MMI, 164 ScaleRowDown2Box_MMI, 165 ScaleRowDown2Box_C, 166 2, 167 1, 168 7) 169 SDODD(ScaleRowDown2Box_Odd_MMI, 170 ScaleRowDown2Box_MMI, 171 ScaleRowDown2Box_Odd_C, 172 2, 173 1, 174 7) 175 #endif 176 #ifdef HAS_SCALEROWDOWN4_SSSE3 177 SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7) 178 SDANY(ScaleRowDown4Box_Any_SSSE3, 179 ScaleRowDown4Box_SSSE3, 180 ScaleRowDown4Box_C, 181 4, 182 1, 183 7) 184 #endif 185 #ifdef HAS_SCALEROWDOWN4_AVX2 186 SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15) 187 SDANY(ScaleRowDown4Box_Any_AVX2, 188 ScaleRowDown4Box_AVX2, 189 ScaleRowDown4Box_C, 190 4, 191 1, 192 15) 193 #endif 194 #ifdef HAS_SCALEROWDOWN4_NEON 195 SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7) 196 SDANY(ScaleRowDown4Box_Any_NEON, 197 ScaleRowDown4Box_NEON, 198 ScaleRowDown4Box_C, 199 4, 200 1, 201 7) 202 #endif 203 #ifdef HAS_SCALEROWDOWN4_MSA 204 SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15) 205 SDANY(ScaleRowDown4Box_Any_MSA, 206 ScaleRowDown4Box_MSA, 207 ScaleRowDown4Box_C, 208 4, 209 1, 210 15) 211 #endif 212 #ifdef HAS_SCALEROWDOWN4_MMI 213 SDANY(ScaleRowDown4_Any_MMI, ScaleRowDown4_MMI, ScaleRowDown4_C, 4, 1, 7) 214 SDANY(ScaleRowDown4Box_Any_MMI, 215 ScaleRowDown4Box_MMI, 216 ScaleRowDown4Box_C, 217 4, 218 1, 219 7) 220 #endif 221 #ifdef HAS_SCALEROWDOWN34_SSSE3 222 SDANY(ScaleRowDown34_Any_SSSE3, 223 ScaleRowDown34_SSSE3, 224 ScaleRowDown34_C, 225 4 / 3, 226 1, 227 23) 228 SDANY(ScaleRowDown34_0_Box_Any_SSSE3, 229 ScaleRowDown34_0_Box_SSSE3, 230 ScaleRowDown34_0_Box_C, 231 4 / 3, 232 1, 233 23) 234 SDANY(ScaleRowDown34_1_Box_Any_SSSE3, 235 ScaleRowDown34_1_Box_SSSE3, 236 ScaleRowDown34_1_Box_C, 237 4 / 3, 238 1, 239 23) 240 #endif 241 #ifdef HAS_SCALEROWDOWN34_NEON 242 SDANY(ScaleRowDown34_Any_NEON, 243 ScaleRowDown34_NEON, 244 ScaleRowDown34_C, 245 4 / 3, 246 1, 247 23) 248 SDANY(ScaleRowDown34_0_Box_Any_NEON, 249 ScaleRowDown34_0_Box_NEON, 250 ScaleRowDown34_0_Box_C, 251 4 / 3, 252 1, 253 23) 254 SDANY(ScaleRowDown34_1_Box_Any_NEON, 255 ScaleRowDown34_1_Box_NEON, 256 ScaleRowDown34_1_Box_C, 257 4 / 3, 258 1, 259 23) 260 #endif 261 #ifdef HAS_SCALEROWDOWN34_MSA 262 SDANY(ScaleRowDown34_Any_MSA, 263 ScaleRowDown34_MSA, 264 ScaleRowDown34_C, 265 4 / 3, 266 1, 267 47) 268 SDANY(ScaleRowDown34_0_Box_Any_MSA, 269 ScaleRowDown34_0_Box_MSA, 270 ScaleRowDown34_0_Box_C, 271 4 / 3, 272 1, 273 47) 274 SDANY(ScaleRowDown34_1_Box_Any_MSA, 275 ScaleRowDown34_1_Box_MSA, 276 ScaleRowDown34_1_Box_C, 277 4 / 3, 278 1, 279 47) 280 #endif 281 #ifdef HAS_SCALEROWDOWN34_MMI 282 SDANY(ScaleRowDown34_Any_MMI, 283 ScaleRowDown34_MMI, 284 ScaleRowDown34_C, 285 4 / 3, 286 1, 287 23) 288 #endif 289 #ifdef HAS_SCALEROWDOWN38_SSSE3 290 SDANY(ScaleRowDown38_Any_SSSE3, 291 ScaleRowDown38_SSSE3, 292 ScaleRowDown38_C, 293 8 / 3, 294 1, 295 11) 296 SDANY(ScaleRowDown38_3_Box_Any_SSSE3, 297 ScaleRowDown38_3_Box_SSSE3, 298 ScaleRowDown38_3_Box_C, 299 8 / 3, 300 1, 301 5) 302 SDANY(ScaleRowDown38_2_Box_Any_SSSE3, 303 ScaleRowDown38_2_Box_SSSE3, 304 ScaleRowDown38_2_Box_C, 305 8 / 3, 306 1, 307 5) 308 #endif 309 #ifdef HAS_SCALEROWDOWN38_NEON 310 SDANY(ScaleRowDown38_Any_NEON, 311 ScaleRowDown38_NEON, 312 ScaleRowDown38_C, 313 8 / 3, 314 1, 315 11) 316 SDANY(ScaleRowDown38_3_Box_Any_NEON, 317 ScaleRowDown38_3_Box_NEON, 318 ScaleRowDown38_3_Box_C, 319 8 / 3, 320 1, 321 11) 322 SDANY(ScaleRowDown38_2_Box_Any_NEON, 323 ScaleRowDown38_2_Box_NEON, 324 ScaleRowDown38_2_Box_C, 325 8 / 3, 326 1, 327 11) 328 #endif 329 #ifdef HAS_SCALEROWDOWN38_MSA 330 SDANY(ScaleRowDown38_Any_MSA, 331 ScaleRowDown38_MSA, 332 ScaleRowDown38_C, 333 8 / 3, 334 1, 335 11) 336 SDANY(ScaleRowDown38_3_Box_Any_MSA, 337 ScaleRowDown38_3_Box_MSA, 338 ScaleRowDown38_3_Box_C, 339 8 / 3, 340 1, 341 11) 342 SDANY(ScaleRowDown38_2_Box_Any_MSA, 343 ScaleRowDown38_2_Box_MSA, 344 ScaleRowDown38_2_Box_C, 345 8 / 3, 346 1, 347 11) 348 #endif 349 350 #ifdef HAS_SCALEARGBROWDOWN2_SSE2 351 SDANY(ScaleARGBRowDown2_Any_SSE2, 352 ScaleARGBRowDown2_SSE2, 353 ScaleARGBRowDown2_C, 354 2, 355 4, 356 3) 357 SDANY(ScaleARGBRowDown2Linear_Any_SSE2, 358 ScaleARGBRowDown2Linear_SSE2, 359 ScaleARGBRowDown2Linear_C, 360 2, 361 4, 362 3) 363 SDANY(ScaleARGBRowDown2Box_Any_SSE2, 364 ScaleARGBRowDown2Box_SSE2, 365 ScaleARGBRowDown2Box_C, 366 2, 367 4, 368 3) 369 #endif 370 #ifdef HAS_SCALEARGBROWDOWN2_NEON 371 SDANY(ScaleARGBRowDown2_Any_NEON, 372 ScaleARGBRowDown2_NEON, 373 ScaleARGBRowDown2_C, 374 2, 375 4, 376 7) 377 SDANY(ScaleARGBRowDown2Linear_Any_NEON, 378 ScaleARGBRowDown2Linear_NEON, 379 ScaleARGBRowDown2Linear_C, 380 2, 381 4, 382 7) 383 SDANY(ScaleARGBRowDown2Box_Any_NEON, 384 ScaleARGBRowDown2Box_NEON, 385 ScaleARGBRowDown2Box_C, 386 2, 387 4, 388 7) 389 #endif 390 #ifdef HAS_SCALEARGBROWDOWN2_MSA 391 SDANY(ScaleARGBRowDown2_Any_MSA, 392 ScaleARGBRowDown2_MSA, 393 ScaleARGBRowDown2_C, 394 2, 395 4, 396 3) 397 SDANY(ScaleARGBRowDown2Linear_Any_MSA, 398 ScaleARGBRowDown2Linear_MSA, 399 ScaleARGBRowDown2Linear_C, 400 2, 401 4, 402 3) 403 SDANY(ScaleARGBRowDown2Box_Any_MSA, 404 ScaleARGBRowDown2Box_MSA, 405 ScaleARGBRowDown2Box_C, 406 2, 407 4, 408 3) 409 #endif 410 #ifdef HAS_SCALEARGBROWDOWN2_MMI 411 SDANY(ScaleARGBRowDown2_Any_MMI, 412 ScaleARGBRowDown2_MMI, 413 ScaleARGBRowDown2_C, 414 2, 415 4, 416 1) 417 SDANY(ScaleARGBRowDown2Linear_Any_MMI, 418 ScaleARGBRowDown2Linear_MMI, 419 ScaleARGBRowDown2Linear_C, 420 2, 421 4, 422 1) 423 SDANY(ScaleARGBRowDown2Box_Any_MMI, 424 ScaleARGBRowDown2Box_MMI, 425 ScaleARGBRowDown2Box_C, 426 2, 427 4, 428 1) 429 #endif 430 #undef SDANY 431 432 // Scale down by even scale factor. 433 #define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \ 434 void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \ 435 uint8_t* dst_ptr, int dst_width) { \ 436 int r = dst_width & MASK; \ 437 int n = dst_width & ~MASK; \ 438 if (n > 0) { \ 439 SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \ 440 } \ 441 SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \ 442 dst_ptr + n * BPP, r); \ 443 } 444 445 #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2 446 SDAANY(ScaleARGBRowDownEven_Any_SSE2, 447 ScaleARGBRowDownEven_SSE2, 448 ScaleARGBRowDownEven_C, 449 4, 450 3) 451 SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, 452 ScaleARGBRowDownEvenBox_SSE2, 453 ScaleARGBRowDownEvenBox_C, 454 4, 455 3) 456 #endif 457 #ifdef HAS_SCALEARGBROWDOWNEVEN_NEON 458 SDAANY(ScaleARGBRowDownEven_Any_NEON, 459 ScaleARGBRowDownEven_NEON, 460 ScaleARGBRowDownEven_C, 461 4, 462 3) 463 SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, 464 ScaleARGBRowDownEvenBox_NEON, 465 ScaleARGBRowDownEvenBox_C, 466 4, 467 3) 468 #endif 469 #ifdef HAS_SCALEARGBROWDOWNEVEN_MSA 470 SDAANY(ScaleARGBRowDownEven_Any_MSA, 471 ScaleARGBRowDownEven_MSA, 472 ScaleARGBRowDownEven_C, 473 4, 474 3) 475 SDAANY(ScaleARGBRowDownEvenBox_Any_MSA, 476 ScaleARGBRowDownEvenBox_MSA, 477 ScaleARGBRowDownEvenBox_C, 478 4, 479 3) 480 #endif 481 #ifdef HAS_SCALEARGBROWDOWNEVEN_MMI 482 SDAANY(ScaleARGBRowDownEven_Any_MMI, 483 ScaleARGBRowDownEven_MMI, 484 ScaleARGBRowDownEven_C, 485 4, 486 1) 487 SDAANY(ScaleARGBRowDownEvenBox_Any_MMI, 488 ScaleARGBRowDownEvenBox_MMI, 489 ScaleARGBRowDownEvenBox_C, 490 4, 491 1) 492 #endif 493 #ifdef HAS_SCALEUVROWDOWNEVEN_NEON 494 SDAANY(ScaleUVRowDownEven_Any_NEON, 495 ScaleUVRowDownEven_NEON, 496 ScaleUVRowDownEven_C, 497 2, 498 3) 499 #endif 500 501 #ifdef SASIMDONLY 502 // This also works and uses memcpy and SIMD instead of C, but is slower on ARM 503 504 // Add rows box filter scale down. Using macro from row_any 505 #define SAROW(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ 506 void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int width) { \ 507 SIMD_ALIGNED(uint16_t dst_temp[32]); \ 508 SIMD_ALIGNED(uint8_t src_temp[32]); \ 509 memset(dst_temp, 0, 32 * 2); /* for msan */ \ 510 int r = width & MASK; \ 511 int n = width & ~MASK; \ 512 if (n > 0) { \ 513 ANY_SIMD(src_ptr, dst_ptr, n); \ 514 } \ 515 memcpy(src_temp, src_ptr + n * SBPP, r * SBPP); \ 516 memcpy(dst_temp, dst_ptr + n * BPP, r * BPP); \ 517 ANY_SIMD(src_temp, dst_temp, MASK + 1); \ 518 memcpy(dst_ptr + n * BPP, dst_temp, r * BPP); \ 519 } 520 521 #ifdef HAS_SCALEADDROW_SSE2 522 SAROW(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, 1, 2, 15) 523 #endif 524 #ifdef HAS_SCALEADDROW_AVX2 525 SAROW(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, 1, 2, 31) 526 #endif 527 #ifdef HAS_SCALEADDROW_NEON 528 SAROW(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, 1, 2, 15) 529 #endif 530 #ifdef HAS_SCALEADDROW_MSA 531 SAROW(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, 1, 2, 15) 532 #endif 533 #ifdef HAS_SCALEADDROW_MMI 534 SAROW(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, 1, 2, 7) 535 #endif 536 #undef SAANY 537 538 #else 539 540 // Add rows box filter scale down. 541 #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ 542 void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \ 543 int n = src_width & ~MASK; \ 544 if (n > 0) { \ 545 SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ 546 } \ 547 SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ 548 } 549 550 #ifdef HAS_SCALEADDROW_SSE2 551 SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15) 552 #endif 553 #ifdef HAS_SCALEADDROW_AVX2 554 SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31) 555 #endif 556 #ifdef HAS_SCALEADDROW_NEON 557 SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) 558 #endif 559 #ifdef HAS_SCALEADDROW_MSA 560 SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) 561 #endif 562 #ifdef HAS_SCALEADDROW_MMI 563 SAANY(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, ScaleAddRow_C, 7) 564 #endif 565 #undef SAANY 566 567 #endif // SASIMDONLY 568 569 // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols 570 #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \ 571 void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \ 572 int dx) { \ 573 int r = dst_width & MASK; \ 574 int n = dst_width & ~MASK; \ 575 if (n > 0) { \ 576 TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \ 577 } \ 578 TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \ 579 } 580 581 #ifdef HAS_SCALEFILTERCOLS_NEON 582 CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7) 583 #endif 584 #ifdef HAS_SCALEFILTERCOLS_MSA 585 CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15) 586 #endif 587 #ifdef HAS_SCALEARGBCOLS_NEON 588 CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7) 589 #endif 590 #ifdef HAS_SCALEARGBCOLS_MSA 591 CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3) 592 #endif 593 #ifdef HAS_SCALEARGBCOLS_MMI 594 CANY(ScaleARGBCols_Any_MMI, ScaleARGBCols_MMI, ScaleARGBCols_C, 4, 0) 595 #endif 596 #ifdef HAS_SCALEARGBFILTERCOLS_NEON 597 CANY(ScaleARGBFilterCols_Any_NEON, 598 ScaleARGBFilterCols_NEON, 599 ScaleARGBFilterCols_C, 600 4, 601 3) 602 #endif 603 #ifdef HAS_SCALEARGBFILTERCOLS_MSA 604 CANY(ScaleARGBFilterCols_Any_MSA, 605 ScaleARGBFilterCols_MSA, 606 ScaleARGBFilterCols_C, 607 4, 608 7) 609 #endif 610 #undef CANY 611 612 // Scale up horizontally 2 times using linear filter. 613 #define SUH2LANY(NAME, SIMD, C, MASK, PTYPE) \ 614 void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \ 615 int work_width = (dst_width - 1) & ~1; \ 616 int r = work_width & MASK; \ 617 int n = work_width & ~MASK; \ 618 dst_ptr[0] = src_ptr[0]; \ 619 if (work_width > 0) { \ 620 if (n != 0) { \ 621 SIMD(src_ptr, dst_ptr + 1, n); \ 622 } \ 623 C(src_ptr + (n / 2), dst_ptr + n + 1, r); \ 624 } \ 625 dst_ptr[dst_width - 1] = src_ptr[(dst_width / 2) - 1]; \ 626 } 627 628 // Even the C versions need to be wrapped, because boundary pixels have to 629 // be handled differently 630 631 SUH2LANY(ScaleRowUp2_Linear_Any_C, 632 ScaleRowUp2_Linear_C, 633 ScaleRowUp2_Linear_C, 634 0, 635 uint8_t) 636 637 SUH2LANY(ScaleRowUp2_Linear_16_Any_C, 638 ScaleRowUp2_Linear_16_C, 639 ScaleRowUp2_Linear_16_C, 640 0, 641 uint16_t) 642 643 #ifdef HAS_SCALEROWUP2LINEAR_SSE2 644 SUH2LANY(ScaleRowUp2_Linear_Any_SSE2, 645 ScaleRowUp2_Linear_SSE2, 646 ScaleRowUp2_Linear_C, 647 15, 648 uint8_t) 649 #endif 650 651 #ifdef HAS_SCALEROWUP2LINEAR_SSSE3 652 SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3, 653 ScaleRowUp2_Linear_SSSE3, 654 ScaleRowUp2_Linear_C, 655 15, 656 uint8_t) 657 #endif 658 659 #ifdef HAS_SCALEROWUP2LINEAR_12_SSSE3 660 SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3, 661 ScaleRowUp2_Linear_12_SSSE3, 662 ScaleRowUp2_Linear_16_C, 663 15, 664 uint16_t) 665 #endif 666 667 #ifdef HAS_SCALEROWUP2LINEAR_16_SSE2 668 SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2, 669 ScaleRowUp2_Linear_16_SSE2, 670 ScaleRowUp2_Linear_16_C, 671 7, 672 uint16_t) 673 #endif 674 675 #ifdef HAS_SCALEROWUP2LINEAR_AVX2 676 SUH2LANY(ScaleRowUp2_Linear_Any_AVX2, 677 ScaleRowUp2_Linear_AVX2, 678 ScaleRowUp2_Linear_C, 679 31, 680 uint8_t) 681 #endif 682 683 #ifdef HAS_SCALEROWUP2LINEAR_12_AVX2 684 SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2, 685 ScaleRowUp2_Linear_12_AVX2, 686 ScaleRowUp2_Linear_16_C, 687 31, 688 uint16_t) 689 #endif 690 691 #ifdef HAS_SCALEROWUP2LINEAR_16_AVX2 692 SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2, 693 ScaleRowUp2_Linear_16_AVX2, 694 ScaleRowUp2_Linear_16_C, 695 15, 696 uint16_t) 697 #endif 698 699 #ifdef HAS_SCALEROWUP2LINEAR_NEON 700 SUH2LANY(ScaleRowUp2_Linear_Any_NEON, 701 ScaleRowUp2_Linear_NEON, 702 ScaleRowUp2_Linear_C, 703 15, 704 uint8_t) 705 #endif 706 707 #ifdef HAS_SCALEROWUP2LINEAR_12_NEON 708 SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON, 709 ScaleRowUp2_Linear_12_NEON, 710 ScaleRowUp2_Linear_16_C, 711 15, 712 uint16_t) 713 #endif 714 715 #ifdef HAS_SCALEROWUP2LINEAR_16_NEON 716 SUH2LANY(ScaleRowUp2_Linear_16_Any_NEON, 717 ScaleRowUp2_Linear_16_NEON, 718 ScaleRowUp2_Linear_16_C, 719 15, 720 uint16_t) 721 #endif 722 723 #undef SUH2LANY 724 725 // Scale up 2 times using bilinear filter. 726 // This function produces 2 rows at a time. 727 #define SU2BLANY(NAME, SIMD, C, MASK, PTYPE) \ 728 void NAME(const PTYPE* src_ptr, ptrdiff_t src_stride, PTYPE* dst_ptr, \ 729 ptrdiff_t dst_stride, int dst_width) { \ 730 int work_width = (dst_width - 1) & ~1; \ 731 int r = work_width & MASK; \ 732 int n = work_width & ~MASK; \ 733 const PTYPE* sa = src_ptr; \ 734 const PTYPE* sb = src_ptr + src_stride; \ 735 PTYPE* da = dst_ptr; \ 736 PTYPE* db = dst_ptr + dst_stride; \ 737 da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \ 738 db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \ 739 if (work_width > 0) { \ 740 if (n != 0) { \ 741 SIMD(sa, sb - sa, da + 1, db - da, n); \ 742 } \ 743 C(sa + (n / 2), sb - sa, da + n + 1, db - da, r); \ 744 } \ 745 da[dst_width - 1] = \ 746 (3 * sa[(dst_width - 1) / 2] + sb[(dst_width - 1) / 2] + 2) >> 2; \ 747 db[dst_width - 1] = \ 748 (sa[(dst_width - 1) / 2] + 3 * sb[(dst_width - 1) / 2] + 2) >> 2; \ 749 } 750 751 SU2BLANY(ScaleRowUp2_Bilinear_Any_C, 752 ScaleRowUp2_Bilinear_C, 753 ScaleRowUp2_Bilinear_C, 754 0, 755 uint8_t) 756 757 SU2BLANY(ScaleRowUp2_Bilinear_16_Any_C, 758 ScaleRowUp2_Bilinear_16_C, 759 ScaleRowUp2_Bilinear_16_C, 760 0, 761 uint16_t) 762 763 #ifdef HAS_SCALEROWUP2BILINEAR_SSE2 764 SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2, 765 ScaleRowUp2_Bilinear_SSE2, 766 ScaleRowUp2_Bilinear_C, 767 15, 768 uint8_t) 769 #endif 770 771 #ifdef HAS_SCALEROWUP2BILINEAR_12_SSSE3 772 SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3, 773 ScaleRowUp2_Bilinear_12_SSSE3, 774 ScaleRowUp2_Bilinear_16_C, 775 15, 776 uint16_t) 777 #endif 778 779 #ifdef HAS_SCALEROWUP2BILINEAR_16_SSE2 780 SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSSE3, 781 ScaleRowUp2_Bilinear_16_SSE2, 782 ScaleRowUp2_Bilinear_16_C, 783 7, 784 uint16_t) 785 #endif 786 787 #ifdef HAS_SCALEROWUP2BILINEAR_SSSE3 788 SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3, 789 ScaleRowUp2_Bilinear_SSSE3, 790 ScaleRowUp2_Bilinear_C, 791 15, 792 uint8_t) 793 #endif 794 795 #ifdef HAS_SCALEROWUP2BILINEAR_AVX2 796 SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2, 797 ScaleRowUp2_Bilinear_AVX2, 798 ScaleRowUp2_Bilinear_C, 799 31, 800 uint8_t) 801 #endif 802 803 #ifdef HAS_SCALEROWUP2BILINEAR_12_AVX2 804 SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2, 805 ScaleRowUp2_Bilinear_12_AVX2, 806 ScaleRowUp2_Bilinear_16_C, 807 15, 808 uint16_t) 809 #endif 810 811 #ifdef HAS_SCALEROWUP2BILINEAR_16_AVX2 812 SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2, 813 ScaleRowUp2_Bilinear_16_AVX2, 814 ScaleRowUp2_Bilinear_16_C, 815 15, 816 uint16_t) 817 #endif 818 819 #ifdef HAS_SCALEROWUP2BILINEAR_NEON 820 SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON, 821 ScaleRowUp2_Bilinear_NEON, 822 ScaleRowUp2_Bilinear_C, 823 15, 824 uint8_t) 825 #endif 826 827 #ifdef HAS_SCALEROWUP2BILINEAR_12_NEON 828 SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON, 829 ScaleRowUp2_Bilinear_12_NEON, 830 ScaleRowUp2_Bilinear_16_C, 831 15, 832 uint16_t) 833 #endif 834 835 #ifdef HAS_SCALEROWUP2BILINEAR_16_NEON 836 SU2BLANY(ScaleRowUp2_Bilinear_16_Any_NEON, 837 ScaleRowUp2_Bilinear_16_NEON, 838 ScaleRowUp2_Bilinear_16_C, 839 7, 840 uint16_t) 841 #endif 842 843 #undef SU2BLANY 844 845 // Scale bi-planar plane up horizontally 2 times using linear filter. 846 #define SBUH2LANY(NAME, SIMD, C, MASK, PTYPE) \ 847 void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \ 848 int work_width = (dst_width - 1) & ~1; \ 849 int r = work_width & MASK; \ 850 int n = work_width & ~MASK; \ 851 dst_ptr[0] = src_ptr[0]; \ 852 dst_ptr[1] = src_ptr[1]; \ 853 if (work_width > 0) { \ 854 if (n != 0) { \ 855 SIMD(src_ptr, dst_ptr + 2, n); \ 856 } \ 857 C(src_ptr + n, dst_ptr + 2 * n + 2, r); \ 858 } \ 859 dst_ptr[2 * dst_width - 2] = src_ptr[((dst_width + 1) & ~1) - 2]; \ 860 dst_ptr[2 * dst_width - 1] = src_ptr[((dst_width + 1) & ~1) - 1]; \ 861 } 862 863 SBUH2LANY(ScaleUVRowUp2_Linear_Any_C, 864 ScaleUVRowUp2_Linear_C, 865 ScaleUVRowUp2_Linear_C, 866 0, 867 uint8_t) 868 869 SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_C, 870 ScaleUVRowUp2_Linear_16_C, 871 ScaleUVRowUp2_Linear_16_C, 872 0, 873 uint16_t) 874 875 #ifdef HAS_SCALEUVROWUP2LINEAR_SSSE3 876 SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3, 877 ScaleUVRowUp2_Linear_SSSE3, 878 ScaleUVRowUp2_Linear_C, 879 7, 880 uint8_t) 881 #endif 882 883 #ifdef HAS_SCALEUVROWUP2LINEAR_AVX2 884 SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2, 885 ScaleUVRowUp2_Linear_AVX2, 886 ScaleUVRowUp2_Linear_C, 887 15, 888 uint8_t) 889 #endif 890 891 #ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE2 892 SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE2, 893 ScaleUVRowUp2_Linear_16_SSE2, 894 ScaleUVRowUp2_Linear_16_C, 895 3, 896 uint16_t) 897 #endif 898 899 #ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2 900 SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2, 901 ScaleUVRowUp2_Linear_16_AVX2, 902 ScaleUVRowUp2_Linear_16_C, 903 7, 904 uint16_t) 905 #endif 906 907 #ifdef HAS_SCALEUVROWUP2LINEAR_NEON 908 SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON, 909 ScaleUVRowUp2_Linear_NEON, 910 ScaleUVRowUp2_Linear_C, 911 15, 912 uint8_t) 913 #endif 914 915 #ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON 916 SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_NEON, 917 ScaleUVRowUp2_Linear_16_NEON, 918 ScaleUVRowUp2_Linear_16_C, 919 15, 920 uint16_t) 921 #endif 922 923 #undef SBUH2LANY 924 925 // Scale bi-planar plane up 2 times using bilinear filter. 926 // This function produces 2 rows at a time. 927 #define SBU2BLANY(NAME, SIMD, C, MASK, PTYPE) \ 928 void NAME(const PTYPE* src_ptr, ptrdiff_t src_stride, PTYPE* dst_ptr, \ 929 ptrdiff_t dst_stride, int dst_width) { \ 930 int work_width = (dst_width - 1) & ~1; \ 931 int r = work_width & MASK; \ 932 int n = work_width & ~MASK; \ 933 const PTYPE* sa = src_ptr; \ 934 const PTYPE* sb = src_ptr + src_stride; \ 935 PTYPE* da = dst_ptr; \ 936 PTYPE* db = dst_ptr + dst_stride; \ 937 da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \ 938 db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \ 939 da[1] = (3 * sa[1] + sb[1] + 2) >> 2; \ 940 db[1] = (sa[1] + 3 * sb[1] + 2) >> 2; \ 941 if (work_width > 0) { \ 942 if (n != 0) { \ 943 SIMD(sa, sb - sa, da + 2, db - da, n); \ 944 } \ 945 C(sa + n, sb - sa, da + 2 * n + 2, db - da, r); \ 946 } \ 947 da[2 * dst_width - 2] = (3 * sa[((dst_width + 1) & ~1) - 2] + \ 948 sb[((dst_width + 1) & ~1) - 2] + 2) >> \ 949 2; \ 950 db[2 * dst_width - 2] = (sa[((dst_width + 1) & ~1) - 2] + \ 951 3 * sb[((dst_width + 1) & ~1) - 2] + 2) >> \ 952 2; \ 953 da[2 * dst_width - 1] = (3 * sa[((dst_width + 1) & ~1) - 1] + \ 954 sb[((dst_width + 1) & ~1) - 1] + 2) >> \ 955 2; \ 956 db[2 * dst_width - 1] = (sa[((dst_width + 1) & ~1) - 1] + \ 957 3 * sb[((dst_width + 1) & ~1) - 1] + 2) >> \ 958 2; \ 959 } 960 961 SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_C, 962 ScaleUVRowUp2_Bilinear_C, 963 ScaleUVRowUp2_Bilinear_C, 964 0, 965 uint8_t) 966 967 SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_C, 968 ScaleUVRowUp2_Bilinear_16_C, 969 ScaleUVRowUp2_Bilinear_16_C, 970 0, 971 uint16_t) 972 973 #ifdef HAS_SCALEUVROWUP2BILINEAR_SSSE3 974 SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3, 975 ScaleUVRowUp2_Bilinear_SSSE3, 976 ScaleUVRowUp2_Bilinear_C, 977 7, 978 uint8_t) 979 #endif 980 981 #ifdef HAS_SCALEUVROWUP2BILINEAR_AVX2 982 SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2, 983 ScaleUVRowUp2_Bilinear_AVX2, 984 ScaleUVRowUp2_Bilinear_C, 985 15, 986 uint8_t) 987 #endif 988 989 #ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE2 990 SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE2, 991 ScaleUVRowUp2_Bilinear_16_SSE2, 992 ScaleUVRowUp2_Bilinear_16_C, 993 7, 994 uint16_t) 995 #endif 996 997 #ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2 998 SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2, 999 ScaleUVRowUp2_Bilinear_16_AVX2, 1000 ScaleUVRowUp2_Bilinear_16_C, 1001 7, 1002 uint16_t) 1003 #endif 1004 1005 #ifdef HAS_SCALEUVROWUP2BILINEAR_NEON 1006 SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON, 1007 ScaleUVRowUp2_Bilinear_NEON, 1008 ScaleUVRowUp2_Bilinear_C, 1009 7, 1010 uint8_t) 1011 #endif 1012 1013 #ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON 1014 SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_NEON, 1015 ScaleUVRowUp2_Bilinear_16_NEON, 1016 ScaleUVRowUp2_Bilinear_16_C, 1017 7, 1018 uint16_t) 1019 #endif 1020 1021 #undef SBU2BLANY 1022 1023 #ifdef __cplusplus 1024 } // extern "C" 1025 } // namespace libyuv 1026 #endif 1027