Lines Matching refs:_mm256_adds_epu16

88                         ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));  in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
89 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
93 ss3 = _mm256_adds_epu16(ss3, ss5); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
149 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
150 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
151 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
152 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
156 … ss3 = _mm256_adds_epu16(_mm256_adds_epu16(ss3, ss4), _mm256_adds_epu16(ss5, ss6)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
295 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
296 … ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
297 … ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
298 … ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
302 … ss3 = _mm256_adds_epu16(_mm256_adds_epu16(ss3, ss4), _mm256_adds_epu16(ss5, ss6)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
339 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
340 … ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
341 … ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
342 … ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
346 ss3 = _mm256_adds_epu16(ss3, ss4); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
347 ss5 = _mm256_adds_epu16(ss5, ss6); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
348 ss0 = _mm256_adds_epu16(ss3, ss5); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
394 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
395 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
396 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
397 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
401 ss3 = _mm256_adds_epu16(ss3, ss4); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
402 ss5 = _mm256_adds_epu16(ss5, ss6); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
439 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
440 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
441 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
442 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
446 ss3 = _mm256_adds_epu16(ss3, ss4); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
447 ss5 = _mm256_adds_epu16(ss5, ss6); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
496 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
497 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
498 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
499 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
503 … ss3 = _mm256_adds_epu16(_mm256_adds_epu16(ss3, ss4), _mm256_adds_epu16(ss5, ss6)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
540 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
541 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
542 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
543 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
547 ss3 = _mm256_adds_epu16(ss3, ss4); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
548 ss5 = _mm256_adds_epu16(ss5, ss6); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
549 ss6 = _mm256_adds_epu16(ss3, ss5); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
594 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
595 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
596 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
597 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
601 … ss7 = _mm256_adds_epu16(_mm256_adds_epu16(ss3, ss4), _mm256_adds_epu16(ss5, ss6)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
654 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
655 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
656 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
657 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
671 ss3 = _mm256_adds_epu16(ss3, ss4); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
672 ss5 = _mm256_adds_epu16(ss5, ss6); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
673 ss6 = _mm256_adds_epu16(ss3, ss5); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
721 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
722 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
723 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
724 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
736 ss7 = _mm256_adds_epu16(ss3, ss4); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
737 ss8 = _mm256_adds_epu16(ss5, ss6); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
738 ss7 = _mm256_adds_epu16(ss7, ss8); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
796 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
797 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
798 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
799 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
803 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
804 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
805 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
806 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
810 … ss7 = _mm256_adds_epu16(_mm256_adds_epu16(ss3, ss4), _mm256_adds_epu16(ss5, ss6)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
860 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
861 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
862 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
863 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
867 ss7 = _mm256_adds_epu16(ss7, _mm256_mpsadbw_epu8(ss0, ss2, 0)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
868 ss8 = _mm256_adds_epu16(ss8, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
869 ss9 = _mm256_adds_epu16(ss9, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
870 … ss10 = _mm256_adds_epu16(ss10, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111 in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
874 … ss0 = _mm256_adds_epu16(_mm256_adds_epu16(ss3, ss4), _mm256_adds_epu16(ss5, ss6)); in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
875 …ss0 = _mm256_adds_epu16(ss0, _mm256_adds_epu16(_mm256_adds_epu16(ss7, ss8), _mm256_adds_epu16(ss9,… in eb_vp9_sad_loop_kernel_avx512_hme_l0_intrin()
957 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
958 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
962 ss3 = _mm256_adds_epu16(ss3, ss5);
1018 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1019 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1020 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1021 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1025 … ss3 = _mm256_adds_epu16(_mm256_adds_epu16(ss3, ss4), _mm256_adds_epu16(ss5, ss6));
1082 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1083 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1084 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1085 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1088 ss7 = _mm256_adds_epu16(ss7, _mm256_mpsadbw_epu8(ss1, ss2, 0));
1089 ss11 = _mm256_adds_epu16(ss11, _mm256_mpsadbw_epu8(ss1, ss2, 45));
1090 ss9 = _mm256_adds_epu16(ss9, _mm256_mpsadbw_epu8(ss0, ss2, 18));
1091 ss10 = _mm256_adds_epu16(ss10, _mm256_mpsadbw_epu8(ss0, ss2, 63));
1096 … ss3 = _mm256_adds_epu16(_mm256_adds_epu16(ss3, ss4), _mm256_adds_epu16(ss5, ss6));
1106 … ss7 = _mm256_adds_epu16(_mm256_adds_epu16(ss7, ss11), _mm256_adds_epu16(ss9, ss10));
1129 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1130 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1131 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1132 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1136 … ss3 = _mm256_adds_epu16(_mm256_adds_epu16(ss3, ss4), _mm256_adds_epu16(ss5, ss6));
1173 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1174 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1175 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1176 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1180 ss3 = _mm256_adds_epu16(ss3, ss4);
1181 ss5 = _mm256_adds_epu16(ss5, ss6);
1182 ss0 = _mm256_adds_epu16(ss3, ss5);
1228 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1229 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1230 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1231 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1235 ss3 = _mm256_adds_epu16(ss3, ss4);
1236 ss5 = _mm256_adds_epu16(ss5, ss6);
1273 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1274 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1275 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1276 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1280 ss3 = _mm256_adds_epu16(ss3, ss4);
1281 ss5 = _mm256_adds_epu16(ss5, ss6);
1330 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1331 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1332 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1333 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1337 … ss3 = _mm256_adds_epu16(_mm256_adds_epu16(ss3, ss4), _mm256_adds_epu16(ss5, ss6));
1374 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1375 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1376 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1377 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1381 ss3 = _mm256_adds_epu16(ss3, ss4);
1382 ss5 = _mm256_adds_epu16(ss5, ss6);
1383 ss6 = _mm256_adds_epu16(ss3, ss5);
1428 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1429 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1430 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1431 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1435 … ss7 = _mm256_adds_epu16(_mm256_adds_epu16(ss3, ss4), _mm256_adds_epu16(ss5, ss6));
1488 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1489 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1490 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1491 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1505 ss3 = _mm256_adds_epu16(ss3, ss4);
1506 ss5 = _mm256_adds_epu16(ss5, ss6);
1507 ss6 = _mm256_adds_epu16(ss3, ss5);
1555 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1556 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1557 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1558 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1570 ss7 = _mm256_adds_epu16(ss3, ss4);
1571 ss8 = _mm256_adds_epu16(ss5, ss6);
1572 ss7 = _mm256_adds_epu16(ss7, ss8);
1630 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1631 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1632 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1633 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1637 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1638 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1639 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1640 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1644 … ss7 = _mm256_adds_epu16(_mm256_adds_epu16(ss3, ss4), _mm256_adds_epu16(ss5, ss6));
1694 ss3 = _mm256_adds_epu16(ss3, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1695 ss4 = _mm256_adds_epu16(ss4, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1696 ss5 = _mm256_adds_epu16(ss5, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1697 ss6 = _mm256_adds_epu16(ss6, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1701 ss7 = _mm256_adds_epu16(ss7, _mm256_mpsadbw_epu8(ss0, ss2, 0));
1702 ss8 = _mm256_adds_epu16(ss8, _mm256_mpsadbw_epu8(ss0, ss2, 45)); // 101 101
1703 ss9 = _mm256_adds_epu16(ss9, _mm256_mpsadbw_epu8(ss1, ss2, 18)); // 010 010
1704 … ss10 = _mm256_adds_epu16(ss10, _mm256_mpsadbw_epu8(ss1, ss2, 63)); // 111 111
1708 … ss0 = _mm256_adds_epu16(_mm256_adds_epu16(ss3, ss4), _mm256_adds_epu16(ss5, ss6));
1709 …ss0 = _mm256_adds_epu16(ss0, _mm256_adds_epu16(_mm256_adds_epu16(ss7, ss8), _mm256_adds_epu16(ss9,…
1855 result1 = _mm256_adds_epu16(result1, _mm256_dbsad_epu8(ss4temp, ref2temp, 0x94)); in GetEightHorizontalSearchPointResults_8x8_16x16_PU_AVX512_INTRIN()
1856 result2 = _mm256_adds_epu16(result2, _mm256_dbsad_epu8(ss5temp, ref3temp, 0x94)); in GetEightHorizontalSearchPointResults_8x8_16x16_PU_AVX512_INTRIN()
1857 result3 = _mm256_adds_epu16(result3, _mm256_dbsad_epu8(ss6temp, ref2temp, 0xE9)); in GetEightHorizontalSearchPointResults_8x8_16x16_PU_AVX512_INTRIN()
1858 result4 = _mm256_adds_epu16(result4, _mm256_dbsad_epu8(ss7temp, ref3temp, 0xE9)); in GetEightHorizontalSearchPointResults_8x8_16x16_PU_AVX512_INTRIN()
1874 result1 = _mm256_adds_epu16(result1, _mm256_dbsad_epu8(ss0temp, ref0temp, 0x94)); in GetEightHorizontalSearchPointResults_8x8_16x16_PU_AVX512_INTRIN()
1875 result2 = _mm256_adds_epu16(result2, _mm256_dbsad_epu8(ss1temp, ref1temp, 0x94)); in GetEightHorizontalSearchPointResults_8x8_16x16_PU_AVX512_INTRIN()
1876 result3 = _mm256_adds_epu16(result3, _mm256_dbsad_epu8(ss2temp, ref0temp, 0xE9)); in GetEightHorizontalSearchPointResults_8x8_16x16_PU_AVX512_INTRIN()
1877 result4 = _mm256_adds_epu16(result4, _mm256_dbsad_epu8(ss3temp, ref1temp, 0xE9)); in GetEightHorizontalSearchPointResults_8x8_16x16_PU_AVX512_INTRIN()
1893 result1 = _mm256_adds_epu16(result1, _mm256_dbsad_epu8(ss4temp, ref2temp, 0x94)); in GetEightHorizontalSearchPointResults_8x8_16x16_PU_AVX512_INTRIN()
1894 result2 = _mm256_adds_epu16(result2, _mm256_dbsad_epu8(ss5temp, ref3temp, 0x94)); in GetEightHorizontalSearchPointResults_8x8_16x16_PU_AVX512_INTRIN()
1895 result3 = _mm256_adds_epu16(result3, _mm256_dbsad_epu8(ss6temp, ref2temp, 0xE9)); in GetEightHorizontalSearchPointResults_8x8_16x16_PU_AVX512_INTRIN()
1896 result4 = _mm256_adds_epu16(result4, _mm256_dbsad_epu8(ss7temp, ref3temp, 0xE9)); in GetEightHorizontalSearchPointResults_8x8_16x16_PU_AVX512_INTRIN()
1898 result1 = _mm256_adds_epu16(result1, result3); in GetEightHorizontalSearchPointResults_8x8_16x16_PU_AVX512_INTRIN()
1899 result2 = _mm256_adds_epu16(result2, result4); in GetEightHorizontalSearchPointResults_8x8_16x16_PU_AVX512_INTRIN()