1 /*
2  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #ifndef VPX_VPX_DSP_X86_CONVOLVE_H_
11 #define VPX_VPX_DSP_X86_CONVOLVE_H_
12 
13 #include <assert.h>
14 #include "stdint.h"
15 
16 //#include "./vpx_config.h"
17 //#include "vpx/vpx_integer.h"
18 #include "mem.h"
19 
20 typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch,
21                                 uint8_t *output_ptr, ptrdiff_t out_pitch,
22                                 uint32_t output_height, const int16_t *filter);
23 
24 #define FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt)         \
25   void eb_vp9_convolve8_##name##_##opt(                                         \
26       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
27       ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4,    \
28       int x_step_q4, int y0_q4, int y_step_q4, int w, int h) {               \
29     const int16_t *filter = filter_kernel[offset];                           \
30     (void)x0_q4;                                                             \
31     (void)x_step_q4;                                                         \
32     (void)y0_q4;                                                             \
33     (void)y_step_q4;                                                         \
34     assert(filter[3] != 128);                                                \
35     assert(step_q4 == 16);                                                   \
36     if (filter[0] | filter[1] | filter[2]) {                                 \
37       while (w >= 16) {                                                      \
38         vpx_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \
39                                                  dst_stride, h, filter);     \
40         src += 16;                                                           \
41         dst += 16;                                                           \
42         w -= 16;                                                             \
43       }                                                                      \
44       if (w == 8) {                                                          \
45         vpx_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst,  \
46                                                 dst_stride, h, filter);      \
47       } else if (w == 4) {                                                   \
48         vpx_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst,  \
49                                                 dst_stride, h, filter);      \
50       }                                                                      \
51     } else {                                                                 \
52       while (w >= 16) {                                                      \
53         vpx_filter_block1d16_##dir##2_##avg##opt(src, src_stride, dst,       \
54                                                  dst_stride, h, filter);     \
55         src += 16;                                                           \
56         dst += 16;                                                           \
57         w -= 16;                                                             \
58       }                                                                      \
59       if (w == 8) {                                                          \
60         vpx_filter_block1d8_##dir##2_##avg##opt(src, src_stride, dst,        \
61                                                 dst_stride, h, filter);      \
62       } else if (w == 4) {                                                   \
63         vpx_filter_block1d4_##dir##2_##avg##opt(src, src_stride, dst,        \
64                                                 dst_stride, h, filter);      \
65       }                                                                      \
66     }                                                                        \
67   }
68 
69 #define FUN_CONV_2D(avg, opt)                                                  \
70   void eb_vp9_convolve8_##avg##opt(                                               \
71       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                  \
72       ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4,             \
73       int x_step_q4, int y0_q4, int y_step_q4, int w, int h) {                 \
74     const int16_t *filter_x = filter[x0_q4];                                   \
75     const int16_t *filter_y = filter[y0_q4];                                   \
76     (void)filter_y;                                                            \
77     assert(filter_x[3] != 128);                                                \
78     assert(filter_y[3] != 128);                                                \
79     assert(w <= 64);                                                           \
80     assert(h <= 64);                                                           \
81     assert(x_step_q4 == 16);                                                   \
82     assert(y_step_q4 == 16);                                                   \
83     if (filter_x[0] | filter_x[1] | filter_x[2]) {                             \
84       DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]);                           \
85       eb_vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64,  \
86                                 filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \
87                                 h + 7);                                        \
88       eb_vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride,    \
89                                       filter, x0_q4, x_step_q4, y0_q4,         \
90                                       y_step_q4, w, h);                        \
91     } else {                                                                   \
92       DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]);                           \
93       eb_vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4,    \
94                                 x_step_q4, y0_q4, y_step_q4, w, h + 1);        \
95       eb_vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter,     \
96                                       x0_q4, x_step_q4, y0_q4, y_step_q4, w,   \
97                                       h);                                      \
98     }                                                                          \
99   }
100 
101 #if CONFIG_VP9_HIGHBITDEPTH
102 
103 typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr,
104                                        const ptrdiff_t src_pitch,
105                                        uint16_t *output_ptr,
106                                        ptrdiff_t out_pitch,
107                                        unsigned int output_height,
108                                        const int16_t *filter, int bd);
109 
110 #define HIGH_FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt)     \
111   void vpx_highbd_convolve8_##name##_##opt(                                   \
112       const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst,               \
113       ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4,     \
114       int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) {        \
115     const int16_t *filter = filter_kernel[offset];                            \
116     if (step_q4 == 16 && filter[3] != 128) {                                  \
117       if (filter[0] | filter[1] | filter[2]) {                                \
118         while (w >= 16) {                                                     \
119           vpx_highbd_filter_block1d16_##dir##8_##avg##opt(                    \
120               src_start, src_stride, dst, dst_stride, h, filter, bd);         \
121           src += 16;                                                          \
122           dst += 16;                                                          \
123           w -= 16;                                                            \
124         }                                                                     \
125         while (w >= 8) {                                                      \
126           vpx_highbd_filter_block1d8_##dir##8_##avg##opt(                     \
127               src_start, src_stride, dst, dst_stride, h, filter, bd);         \
128           src += 8;                                                           \
129           dst += 8;                                                           \
130           w -= 8;                                                             \
131         }                                                                     \
132         while (w >= 4) {                                                      \
133           vpx_highbd_filter_block1d4_##dir##8_##avg##opt(                     \
134               src_start, src_stride, dst, dst_stride, h, filter, bd);         \
135           src += 4;                                                           \
136           dst += 4;                                                           \
137           w -= 4;                                                             \
138         }                                                                     \
139       } else {                                                                \
140         while (w >= 16) {                                                     \
141           vpx_highbd_filter_block1d16_##dir##2_##avg##opt(                    \
142               src, src_stride, dst, dst_stride, h, filter, bd);               \
143           src += 16;                                                          \
144           dst += 16;                                                          \
145           w -= 16;                                                            \
146         }                                                                     \
147         while (w >= 8) {                                                      \
148           vpx_highbd_filter_block1d8_##dir##2_##avg##opt(                     \
149               src, src_stride, dst, dst_stride, h, filter, bd);               \
150           src += 8;                                                           \
151           dst += 8;                                                           \
152           w -= 8;                                                             \
153         }                                                                     \
154         while (w >= 4) {                                                      \
155           vpx_highbd_filter_block1d4_##dir##2_##avg##opt(                     \
156               src, src_stride, dst, dst_stride, h, filter, bd);               \
157           src += 4;                                                           \
158           dst += 4;                                                           \
159           w -= 4;                                                             \
160         }                                                                     \
161       }                                                                       \
162     }                                                                         \
163     if (w) {                                                                  \
164       vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride,       \
165                                       filter_kernel, x0_q4, x_step_q4, y0_q4, \
166                                       y_step_q4, w, h, bd);                   \
167     }                                                                         \
168   }
169 
170 #define HIGH_FUN_CONV_2D(avg, opt)                                             \
171   void vpx_highbd_convolve8_##avg##opt(                                        \
172       const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst,                \
173       ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4,             \
174       int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) {         \
175     const int16_t *filter_x = filter[x0_q4];                                   \
176     assert(w <= 64);                                                           \
177     assert(h <= 64);                                                           \
178     if (x_step_q4 == 16 && y_step_q4 == 16) {                                  \
179       if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) {   \
180         DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]);                        \
181         vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride,     \
182                                          fdata2, 64, filter, x0_q4, x_step_q4, \
183                                          y0_q4, y_step_q4, w, h + 7, bd);      \
184         vpx_highbd_convolve8_##avg##vert_##opt(                                \
185             fdata2 + 192, 64, dst, dst_stride, filter, x0_q4, x_step_q4,       \
186             y0_q4, y_step_q4, w, h, bd);                                       \
187       } else {                                                                 \
188         DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]);                        \
189         vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter,  \
190                                          x0_q4, x_step_q4, y0_q4, y_step_q4,   \
191                                          w, h + 1, bd);                        \
192         vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride,    \
193                                                filter, x0_q4, x_step_q4,       \
194                                                y0_q4, y_step_q4, w, h, bd);    \
195       }                                                                        \
196     } else {                                                                   \
197       vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, filter,  \
198                                     x0_q4, x_step_q4, y0_q4, y_step_q4, w, h,  \
199                                     bd);                                       \
200     }                                                                          \
201   }
202 #endif  // CONFIG_VP9_HIGHBITDEPTH
203 
204 #endif  // VPX_VPX_DSP_X86_CONVOLVE_H_
205