1 /* 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 #ifndef VPX_VPX_DSP_X86_CONVOLVE_H_ 11 #define VPX_VPX_DSP_X86_CONVOLVE_H_ 12 13 #include <assert.h> 14 #include "stdint.h" 15 16 //#include "./vpx_config.h" 17 //#include "vpx/vpx_integer.h" 18 #include "mem.h" 19 20 typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, 21 uint8_t *output_ptr, ptrdiff_t out_pitch, 22 uint32_t output_height, const int16_t *filter); 23 24 #define FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt) \ 25 void eb_vp9_convolve8_##name##_##opt( \ 26 const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ 27 ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \ 28 int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ 29 const int16_t *filter = filter_kernel[offset]; \ 30 (void)x0_q4; \ 31 (void)x_step_q4; \ 32 (void)y0_q4; \ 33 (void)y_step_q4; \ 34 assert(filter[3] != 128); \ 35 assert(step_q4 == 16); \ 36 if (filter[0] | filter[1] | filter[2]) { \ 37 while (w >= 16) { \ 38 vpx_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \ 39 dst_stride, h, filter); \ 40 src += 16; \ 41 dst += 16; \ 42 w -= 16; \ 43 } \ 44 if (w == 8) { \ 45 vpx_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst, \ 46 dst_stride, h, filter); \ 47 } else if (w == 4) { \ 48 vpx_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst, \ 49 dst_stride, h, filter); \ 50 } \ 51 } else { \ 52 while (w >= 16) { \ 53 vpx_filter_block1d16_##dir##2_##avg##opt(src, src_stride, dst, \ 54 dst_stride, h, filter); \ 55 src += 16; \ 56 dst += 16; \ 57 w -= 16; \ 58 } \ 59 if (w == 8) { \ 60 vpx_filter_block1d8_##dir##2_##avg##opt(src, src_stride, dst, \ 61 dst_stride, h, filter); \ 62 } else if (w == 4) { \ 63 vpx_filter_block1d4_##dir##2_##avg##opt(src, src_stride, dst, \ 64 dst_stride, h, filter); \ 65 } \ 66 } \ 67 } 68 69 #define FUN_CONV_2D(avg, opt) \ 70 void eb_vp9_convolve8_##avg##opt( \ 71 const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ 72 ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ 73 int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ 74 const int16_t *filter_x = filter[x0_q4]; \ 75 const int16_t *filter_y = filter[y0_q4]; \ 76 (void)filter_y; \ 77 assert(filter_x[3] != 128); \ 78 assert(filter_y[3] != 128); \ 79 assert(w <= 64); \ 80 assert(h <= 64); \ 81 assert(x_step_q4 == 16); \ 82 assert(y_step_q4 == 16); \ 83 if (filter_x[0] | filter_x[1] | filter_x[2]) { \ 84 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ 85 eb_vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ 86 filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \ 87 h + 7); \ 88 eb_vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ 89 filter, x0_q4, x_step_q4, y0_q4, \ 90 y_step_q4, w, h); \ 91 } else { \ 92 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ 93 eb_vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4, \ 94 x_step_q4, y0_q4, y_step_q4, w, h + 1); \ 95 eb_vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter, \ 96 x0_q4, x_step_q4, y0_q4, y_step_q4, w, \ 97 h); \ 98 } \ 99 } 100 101 #if CONFIG_VP9_HIGHBITDEPTH 102 103 typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, 104 const ptrdiff_t src_pitch, 105 uint16_t *output_ptr, 106 ptrdiff_t out_pitch, 107 unsigned int output_height, 108 const int16_t *filter, int bd); 109 110 #define HIGH_FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt) \ 111 void vpx_highbd_convolve8_##name##_##opt( \ 112 const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \ 113 ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \ 114 int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \ 115 const int16_t *filter = filter_kernel[offset]; \ 116 if (step_q4 == 16 && filter[3] != 128) { \ 117 if (filter[0] | filter[1] | filter[2]) { \ 118 while (w >= 16) { \ 119 vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \ 120 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 121 src += 16; \ 122 dst += 16; \ 123 w -= 16; \ 124 } \ 125 while (w >= 8) { \ 126 vpx_highbd_filter_block1d8_##dir##8_##avg##opt( \ 127 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 128 src += 8; \ 129 dst += 8; \ 130 w -= 8; \ 131 } \ 132 while (w >= 4) { \ 133 vpx_highbd_filter_block1d4_##dir##8_##avg##opt( \ 134 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 135 src += 4; \ 136 dst += 4; \ 137 w -= 4; \ 138 } \ 139 } else { \ 140 while (w >= 16) { \ 141 vpx_highbd_filter_block1d16_##dir##2_##avg##opt( \ 142 src, src_stride, dst, dst_stride, h, filter, bd); \ 143 src += 16; \ 144 dst += 16; \ 145 w -= 16; \ 146 } \ 147 while (w >= 8) { \ 148 vpx_highbd_filter_block1d8_##dir##2_##avg##opt( \ 149 src, src_stride, dst, dst_stride, h, filter, bd); \ 150 src += 8; \ 151 dst += 8; \ 152 w -= 8; \ 153 } \ 154 while (w >= 4) { \ 155 vpx_highbd_filter_block1d4_##dir##2_##avg##opt( \ 156 src, src_stride, dst, dst_stride, h, filter, bd); \ 157 src += 4; \ 158 dst += 4; \ 159 w -= 4; \ 160 } \ 161 } \ 162 } \ 163 if (w) { \ 164 vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ 165 filter_kernel, x0_q4, x_step_q4, y0_q4, \ 166 y_step_q4, w, h, bd); \ 167 } \ 168 } 169 170 #define HIGH_FUN_CONV_2D(avg, opt) \ 171 void vpx_highbd_convolve8_##avg##opt( \ 172 const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \ 173 ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ 174 int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \ 175 const int16_t *filter_x = filter[x0_q4]; \ 176 assert(w <= 64); \ 177 assert(h <= 64); \ 178 if (x_step_q4 == 16 && y_step_q4 == 16) { \ 179 if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \ 180 DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ 181 vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ 182 fdata2, 64, filter, x0_q4, x_step_q4, \ 183 y0_q4, y_step_q4, w, h + 7, bd); \ 184 vpx_highbd_convolve8_##avg##vert_##opt( \ 185 fdata2 + 192, 64, dst, dst_stride, filter, x0_q4, x_step_q4, \ 186 y0_q4, y_step_q4, w, h, bd); \ 187 } else { \ 188 DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ 189 vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, \ 190 x0_q4, x_step_q4, y0_q4, y_step_q4, \ 191 w, h + 1, bd); \ 192 vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ 193 filter, x0_q4, x_step_q4, \ 194 y0_q4, y_step_q4, w, h, bd); \ 195 } \ 196 } else { \ 197 vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, filter, \ 198 x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, \ 199 bd); \ 200 } \ 201 } 202 #endif // CONFIG_VP9_HIGHBITDEPTH 203 204 #endif // VPX_VPX_DSP_X86_CONVOLVE_H_ 205