1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 #ifndef AOM_AOM_DSP_X86_CONVOLVE_H_ 12 #define AOM_AOM_DSP_X86_CONVOLVE_H_ 13 14 #include <assert.h> 15 16 #include "config/aom_config.h" 17 18 #include "aom/aom_integer.h" 19 #include "aom_ports/mem.h" 20 21 typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, 22 uint8_t *output_ptr, ptrdiff_t out_pitch, 23 uint32_t output_height, const int16_t *filter); 24 25 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 26 void aom_convolve8_##name##_##opt( \ 27 const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ 28 ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \ 29 const int16_t *filter_y, int y_step_q4, int w, int h) { \ 30 (void)filter_x; \ 31 (void)x_step_q4; \ 32 (void)filter_y; \ 33 (void)y_step_q4; \ 34 assert((-128 <= filter[3]) && (filter[3] <= 127)); \ 35 assert(step_q4 == 16); \ 36 if (((filter[0] | filter[1] | filter[6] | filter[7]) == 0) && \ 37 (filter[2] | filter[5])) { \ 38 while (w >= 16) { \ 39 aom_filter_block1d16_##dir##4_##avg##opt(src_start, src_stride, dst, \ 40 dst_stride, h, filter); \ 41 src += 16; \ 42 dst += 16; \ 43 w -= 16; \ 44 } \ 45 while (w >= 8) { \ 46 aom_filter_block1d8_##dir##4_##avg##opt(src_start, src_stride, dst, \ 47 dst_stride, h, filter); \ 48 src += 8; \ 49 dst += 8; \ 50 w -= 8; \ 51 } \ 52 while (w >= 4) { \ 53 aom_filter_block1d4_##dir##4_##avg##opt(src_start, src_stride, dst, \ 54 dst_stride, h, filter); \ 55 src += 4; \ 56 dst += 4; \ 57 w -= 4; \ 58 } \ 59 } else if (filter[0] | filter[1] | filter[2]) { \ 60 while (w >= 16) { \ 61 aom_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \ 62 dst_stride, h, filter); \ 63 src += 16; \ 64 dst += 16; \ 65 w -= 16; \ 66 } \ 67 while (w >= 8) { \ 68 aom_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst, \ 69 dst_stride, h, filter); \ 70 src += 8; \ 71 dst += 8; \ 72 w -= 8; \ 73 } \ 74 while (w >= 4) { \ 75 aom_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst, \ 76 dst_stride, h, filter); \ 77 src += 4; \ 78 dst += 4; \ 79 w -= 4; \ 80 } \ 81 } else { \ 82 while (w >= 16) { \ 83 aom_filter_block1d16_##dir##2_##avg##opt(src, src_stride, dst, \ 84 dst_stride, h, filter); \ 85 src += 16; \ 86 dst += 16; \ 87 w -= 16; \ 88 } \ 89 while (w >= 8) { \ 90 aom_filter_block1d8_##dir##2_##avg##opt(src, src_stride, dst, \ 91 dst_stride, h, filter); \ 92 src += 8; \ 93 dst += 8; \ 94 w -= 8; \ 95 } \ 96 while (w >= 4) { \ 97 aom_filter_block1d4_##dir##2_##avg##opt(src, src_stride, dst, \ 98 dst_stride, h, filter); \ 99 src += 4; \ 100 dst += 4; \ 101 w -= 4; \ 102 } \ 103 } \ 104 if (w) { \ 105 aom_convolve8_##name##_c(src, src_stride, dst, dst_stride, filter_x, \ 106 x_step_q4, filter_y, y_step_q4, w, h); \ 107 } \ 108 } 109 110 typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, 111 const ptrdiff_t src_pitch, 112 uint16_t *output_ptr, 113 ptrdiff_t out_pitch, 114 unsigned int output_height, 115 const int16_t *filter, int bd); 116 117 #define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 118 void aom_highbd_convolve8_##name##_##opt( \ 119 const uint8_t *src8, ptrdiff_t src_stride, uint8_t *dst8, \ 120 ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \ 121 const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \ 122 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ 123 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ 124 if (step_q4 == 16 && filter[3] != 128) { \ 125 if (filter[0] | filter[1] | filter[2]) { \ 126 while (w >= 16) { \ 127 aom_highbd_filter_block1d16_##dir##8_##avg##opt( \ 128 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 129 src += 16; \ 130 dst += 16; \ 131 w -= 16; \ 132 } \ 133 while (w >= 8) { \ 134 aom_highbd_filter_block1d8_##dir##8_##avg##opt( \ 135 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 136 src += 8; \ 137 dst += 8; \ 138 w -= 8; \ 139 } \ 140 while (w >= 4) { \ 141 aom_highbd_filter_block1d4_##dir##8_##avg##opt( \ 142 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 143 src += 4; \ 144 dst += 4; \ 145 w -= 4; \ 146 } \ 147 } else { \ 148 while (w >= 16) { \ 149 aom_highbd_filter_block1d16_##dir##2_##avg##opt( \ 150 src, src_stride, dst, dst_stride, h, filter, bd); \ 151 src += 16; \ 152 dst += 16; \ 153 w -= 16; \ 154 } \ 155 while (w >= 8) { \ 156 aom_highbd_filter_block1d8_##dir##2_##avg##opt( \ 157 src, src_stride, dst, dst_stride, h, filter, bd); \ 158 src += 8; \ 159 dst += 8; \ 160 w -= 8; \ 161 } \ 162 while (w >= 4) { \ 163 aom_highbd_filter_block1d4_##dir##2_##avg##opt( \ 164 src, src_stride, dst, dst_stride, h, filter, bd); \ 165 src += 4; \ 166 dst += 4; \ 167 w -= 4; \ 168 } \ 169 } \ 170 } \ 171 if (w) { \ 172 aom_highbd_convolve8_##name##_c( \ 173 CONVERT_TO_BYTEPTR(src), src_stride, CONVERT_TO_BYTEPTR(dst), \ 174 dst_stride, filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); \ 175 } \ 176 } 177 178 #endif // AOM_AOM_DSP_X86_CONVOLVE_H_ 179