1 /*
2  * Lossless video DSP utils
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "config.h"
22 #include "libavutil/x86/asm.h"
23 #include "../lossless_videodsp.h"
24 #include "libavutil/x86/cpu.h"
25 
26 void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t w);
27 void ff_add_bytes_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t w);
28 void ff_add_bytes_avx2(uint8_t *dst, uint8_t *src, ptrdiff_t w);
29 
30 void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
31                                const uint8_t *diff, ptrdiff_t w,
32                                int *left, int *left_top);
33 void ff_add_median_pred_sse2(uint8_t *dst, const uint8_t *top,
34                              const uint8_t *diff, ptrdiff_t w,
35                              int *left, int *left_top);
36 
37 int  ff_add_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
38                             ptrdiff_t w, int left);
39 int  ff_add_left_pred_unaligned_ssse3(uint8_t *dst, const uint8_t *src,
40                                       ptrdiff_t w, int left);
41 int  ff_add_left_pred_unaligned_avx2(uint8_t *dst, const uint8_t *src,
42                                      ptrdiff_t w, int left);
43 
44 int ff_add_left_pred_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, ptrdiff_t w, unsigned acc);
45 int ff_add_left_pred_int16_unaligned_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, ptrdiff_t w, unsigned acc);
46 
47 void ff_add_gradient_pred_ssse3(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width);
48 void ff_add_gradient_pred_avx2(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width);
49 
50 #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
add_median_pred_cmov(uint8_t * dst,const uint8_t * top,const uint8_t * diff,ptrdiff_t w,int * left,int * left_top)51 static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top,
52                                  const uint8_t *diff, ptrdiff_t w,
53                                  int *left, int *left_top)
54 {
55     x86_reg w2 = -w;
56     x86_reg x;
57     int l  = *left     & 0xff;
58     int tl = *left_top & 0xff;
59     int t;
60     __asm__ volatile (
61         "mov          %7, %3            \n"
62         "1:                             \n"
63         "movzbl (%3, %4), %2            \n"
64         "mov          %2, %k3           \n"
65         "sub         %b1, %b3           \n"
66         "add         %b0, %b3           \n"
67         "mov          %2, %1            \n"
68         "cmp          %0, %2            \n"
69         "cmovg        %0, %2            \n"
70         "cmovg        %1, %0            \n"
71         "cmp         %k3, %0            \n"
72         "cmovg       %k3, %0            \n"
73         "mov          %7, %3            \n"
74         "cmp          %2, %0            \n"
75         "cmovl        %2, %0            \n"
76         "add    (%6, %4), %b0           \n"
77         "mov         %b0, (%5, %4)      \n"
78         "inc          %4                \n"
79         "jl           1b                \n"
80         : "+&q"(l), "+&q"(tl), "=&r"(t), "=&q"(x), "+&r"(w2)
81         : "r"(dst + w), "r"(diff + w), "rm"(top + w)
82     );
83     *left     = l;
84     *left_top = tl;
85 }
86 #endif
87 
ff_llviddsp_init_x86(LLVidDSPContext * c)88 void ff_llviddsp_init_x86(LLVidDSPContext *c)
89 {
90     int cpu_flags = av_get_cpu_flags();
91 
92 #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
93     if (cpu_flags & AV_CPU_FLAG_CMOV)
94         c->add_median_pred = add_median_pred_cmov;
95 #endif
96 
97     if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
98         c->add_bytes = ff_add_bytes_mmx;
99     }
100 
101     if (ARCH_X86_32 && EXTERNAL_MMXEXT(cpu_flags)) {
102         /* slower than cmov version on AMD */
103         if (!(cpu_flags & AV_CPU_FLAG_3DNOW))
104             c->add_median_pred = ff_add_median_pred_mmxext;
105     }
106 
107     if (EXTERNAL_SSE2(cpu_flags)) {
108         c->add_bytes       = ff_add_bytes_sse2;
109         c->add_median_pred = ff_add_median_pred_sse2;
110     }
111 
112     if (EXTERNAL_SSSE3(cpu_flags)) {
113         c->add_left_pred = ff_add_left_pred_ssse3;
114         c->add_left_pred_int16 = ff_add_left_pred_int16_ssse3;
115         c->add_gradient_pred   = ff_add_gradient_pred_ssse3;
116     }
117 
118     if (EXTERNAL_SSSE3_FAST(cpu_flags)) {
119         c->add_left_pred = ff_add_left_pred_unaligned_ssse3;
120         c->add_left_pred_int16 = ff_add_left_pred_int16_unaligned_ssse3;
121     }
122 
123     if (EXTERNAL_AVX2_FAST(cpu_flags)) {
124         c->add_bytes       = ff_add_bytes_avx2;
125         c->add_left_pred   = ff_add_left_pred_unaligned_avx2;
126         c->add_gradient_pred = ff_add_gradient_pred_avx2;
127     }
128 }
129