1;************************************************************************
2;* SIMD-optimized HuffYUV encoding functions
3;* Copyright (c) 2000, 2001 Fabrice Bellard
4;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5;*
6;* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
7;* Conversion to NASM format by Tiancheng "Timothy" Gu <timothygu99@gmail.com>
8;*
9;* This file is part of FFmpeg.
10;*
11;* FFmpeg is free software; you can redistribute it and/or
12;* modify it under the terms of the GNU Lesser General Public
13;* License as published by the Free Software Foundation; either
14;* version 2.1 of the License, or (at your option) any later version.
15;*
16;* FFmpeg is distributed in the hope that it will be useful,
17;* but WITHOUT ANY WARRANTY; without even the implied warranty of
18;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19;* Lesser General Public License for more details.
20;*
21;* You should have received a copy of the GNU Lesser General Public
22;* License along with FFmpeg; if not, write to the Free Software
23;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24;******************************************************************************
25
26%include "libavutil/x86/x86util.asm"
27
28SECTION .text
29
30%include "libavcodec/x86/huffyuvdsp_template.asm"
31
32;------------------------------------------------------------------------------
33; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
34;                    unsigned mask, int w);
35;------------------------------------------------------------------------------
36
37%macro DIFF_INT16 0
38cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
39%if mmsize > 8
40    test src1q, mmsize-1
41    jnz .unaligned
42    test src2q, mmsize-1
43    jnz .unaligned
44    test dstq, mmsize-1
45    jnz .unaligned
46%endif
47    INT16_LOOP a, sub
48%if mmsize > 8
49.unaligned:
50    INT16_LOOP u, sub
51%endif
52%endmacro
53
54%if ARCH_X86_32
55INIT_MMX mmx
56DIFF_INT16
57%endif
58
59INIT_XMM sse2
60DIFF_INT16
61
62%if HAVE_AVX2_EXTERNAL
63INIT_YMM avx2
64DIFF_INT16
65%endif
66
67INIT_MMX mmxext
68cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
69    add      wd, wd
70    movd    mm7, maskd
71    SPLATW  mm7, mm7
72    movq    mm0, [src1q]
73    movq    mm2, [src2q]
74    psllq   mm0, 16
75    psllq   mm2, 16
76    movd    mm6, [left_topq]
77    por     mm0, mm6
78    movd    mm6, [leftq]
79    por     mm2, mm6
80    xor     maskq, maskq
81.loop:
82    movq    mm1, [src1q + maskq]
83    movq    mm3, [src2q + maskq]
84    movq    mm4, mm2
85    psubw   mm2, mm0
86    paddw   mm2, mm1
87    pand    mm2, mm7
88    movq    mm5, mm4
89    pmaxsw  mm4, mm1
90    pminsw  mm1, mm5
91    pminsw  mm4, mm2
92    pmaxsw  mm4, mm1
93    psubw   mm3, mm4
94    pand    mm3, mm7
95    movq    [dstq + maskq], mm3
96    add     maskq, 8
97    movq    mm0, [src1q + maskq - 2]
98    movq    mm2, [src2q + maskq - 2]
99    cmp     maskq, wq
100        jb .loop
101    movzx maskd, word [src1q + wq - 2]
102    mov [left_topq], maskd
103    movzx maskd, word [src2q + wq - 2]
104    mov [leftq], maskd
105    RET
106