1 /* 2 * quarterpel DSP function templates 3 * Copyright (c) 2000, 2001 Fabrice Bellard 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23 /** 24 * @file 25 * quarterpel DSP function templates 26 */ 27 28 #define PIXOP2(OPNAME, OP) \ 29 static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst, \ 30 const uint8_t *src1, \ 31 const uint8_t *src2, \ 32 int dst_stride, \ 33 int src_stride1, \ 34 int src_stride2, \ 35 int h) \ 36 { \ 37 int i; \ 38 \ 39 for (i = 0; i < h; i++) { \ 40 uint32_t a, b; \ 41 a = AV_RN32(&src1[i * src_stride1]); \ 42 b = AV_RN32(&src2[i * src_stride2]); \ 43 OP(*((uint32_t *) &dst[i * dst_stride]), \ 44 no_rnd_avg32(a, b)); \ 45 a = AV_RN32(&src1[i * src_stride1 + 4]); \ 46 b = AV_RN32(&src2[i * src_stride2 + 4]); \ 47 OP(*((uint32_t *) &dst[i * dst_stride + 4]), \ 48 no_rnd_avg32(a, b)); \ 49 } \ 50 } \ 51 \ 52 static inline void OPNAME ## _no_rnd_pixels16_l2_8(uint8_t *dst, \ 53 const uint8_t *src1, \ 54 const uint8_t *src2, \ 55 int dst_stride, \ 56 int src_stride1, \ 57 int src_stride2, \ 58 int h) \ 59 { \ 60 OPNAME ## _no_rnd_pixels8_l2_8(dst, src1, src2, dst_stride, \ 61 src_stride1, src_stride2, h); \ 62 OPNAME ## _no_rnd_pixels8_l2_8(dst + 8, \ 63 src1 + 8, \ 64 src2 + 8, \ 65 dst_stride, src_stride1, \ 66 src_stride2, h); \ 67 } \ 68 \ 69 static inline void OPNAME ## _pixels8_l4_8(uint8_t *dst, \ 70 const uint8_t *src1, \ 71 const uint8_t *src2, \ 72 const uint8_t *src3, \ 73 const uint8_t *src4, \ 74 int dst_stride, \ 75 int src_stride1, \ 76 int src_stride2, \ 77 int src_stride3, \ 78 int src_stride4, \ 79 int h) \ 80 { \ 81 /* FIXME HIGH BIT DEPTH */ \ 82 int i; \ 83 \ 84 for (i = 0; i < h; i++) { \ 85 uint32_t a, b, c, d, l0, l1, h0, h1; \ 86 a = AV_RN32(&src1[i * src_stride1]); \ 87 b = AV_RN32(&src2[i * src_stride2]); \ 88 c = AV_RN32(&src3[i * src_stride3]); \ 89 d = AV_RN32(&src4[i * src_stride4]); \ 90 l0 = (a & 0x03030303UL) + \ 91 (b & 0x03030303UL) + \ 92 0x02020202UL; \ 93 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ 94 ((b & 0xFCFCFCFCUL) >> 2); \ 95 l1 = (c & 0x03030303UL) + \ 96 (d & 0x03030303UL); \ 97 h1 = ((c & 0xFCFCFCFCUL) >> 2) + \ 98 ((d & 0xFCFCFCFCUL) >> 2); \ 99 OP(*((uint32_t *) &dst[i * dst_stride]), \ 100 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ 101 a = AV_RN32(&src1[i * src_stride1 + 4]); \ 102 b = AV_RN32(&src2[i * src_stride2 + 4]); \ 103 c = AV_RN32(&src3[i * src_stride3 + 4]); \ 104 d = AV_RN32(&src4[i * src_stride4 + 4]); \ 105 l0 = (a & 0x03030303UL) + \ 106 (b & 0x03030303UL) + \ 107 0x02020202UL; \ 108 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ 109 ((b & 0xFCFCFCFCUL) >> 2); \ 110 l1 = (c & 0x03030303UL) + \ 111 (d & 0x03030303UL); \ 112 h1 = ((c & 0xFCFCFCFCUL) >> 2) + \ 113 ((d & 0xFCFCFCFCUL) >> 2); \ 114 OP(*((uint32_t *) &dst[i * dst_stride + 4]), \ 115 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ 116 } \ 117 } \ 118 \ 119 static inline void OPNAME ## _no_rnd_pixels8_l4_8(uint8_t *dst, \ 120 const uint8_t *src1, \ 121 const uint8_t *src2, \ 122 const uint8_t *src3, \ 123 const uint8_t *src4, \ 124 int dst_stride, \ 125 int src_stride1, \ 126 int src_stride2, \ 127 int src_stride3, \ 128 int src_stride4, \ 129 int h) \ 130 { \ 131 /* FIXME HIGH BIT DEPTH */ \ 132 int i; \ 133 \ 134 for (i = 0; i < h; i++) { \ 135 uint32_t a, b, c, d, l0, l1, h0, h1; \ 136 a = AV_RN32(&src1[i * src_stride1]); \ 137 b = AV_RN32(&src2[i * src_stride2]); \ 138 c = AV_RN32(&src3[i * src_stride3]); \ 139 d = AV_RN32(&src4[i * src_stride4]); \ 140 l0 = (a & 0x03030303UL) + \ 141 (b & 0x03030303UL) + \ 142 0x01010101UL; \ 143 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ 144 ((b & 0xFCFCFCFCUL) >> 2); \ 145 l1 = (c & 0x03030303UL) + \ 146 (d & 0x03030303UL); \ 147 h1 = ((c & 0xFCFCFCFCUL) >> 2) + \ 148 ((d & 0xFCFCFCFCUL) >> 2); \ 149 OP(*((uint32_t *) &dst[i * dst_stride]), \ 150 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ 151 a = AV_RN32(&src1[i * src_stride1 + 4]); \ 152 b = AV_RN32(&src2[i * src_stride2 + 4]); \ 153 c = AV_RN32(&src3[i * src_stride3 + 4]); \ 154 d = AV_RN32(&src4[i * src_stride4 + 4]); \ 155 l0 = (a & 0x03030303UL) + \ 156 (b & 0x03030303UL) + \ 157 0x01010101UL; \ 158 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \ 159 ((b & 0xFCFCFCFCUL) >> 2); \ 160 l1 = (c & 0x03030303UL) + \ 161 (d & 0x03030303UL); \ 162 h1 = ((c & 0xFCFCFCFCUL) >> 2) + \ 163 ((d & 0xFCFCFCFCUL) >> 2); \ 164 OP(*((uint32_t *) &dst[i * dst_stride + 4]), \ 165 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \ 166 } \ 167 } \ 168 \ 169 static inline void OPNAME ## _pixels16_l4_8(uint8_t *dst, \ 170 const uint8_t *src1, \ 171 const uint8_t *src2, \ 172 const uint8_t *src3, \ 173 const uint8_t *src4, \ 174 int dst_stride, \ 175 int src_stride1, \ 176 int src_stride2, \ 177 int src_stride3, \ 178 int src_stride4, \ 179 int h) \ 180 { \ 181 OPNAME ## _pixels8_l4_8(dst, src1, src2, src3, src4, dst_stride, \ 182 src_stride1, src_stride2, src_stride3, \ 183 src_stride4, h); \ 184 OPNAME ## _pixels8_l4_8(dst + 8, \ 185 src1 + 8, src2 + 8, \ 186 src3 + 8, src4 + 8, \ 187 dst_stride, src_stride1, src_stride2, \ 188 src_stride3, src_stride4, h); \ 189 } \ 190 \ 191 static inline void OPNAME ## _no_rnd_pixels16_l4_8(uint8_t *dst, \ 192 const uint8_t *src1, \ 193 const uint8_t *src2, \ 194 const uint8_t *src3, \ 195 const uint8_t *src4, \ 196 int dst_stride, \ 197 int src_stride1, \ 198 int src_stride2, \ 199 int src_stride3, \ 200 int src_stride4, \ 201 int h) \ 202 { \ 203 OPNAME ## _no_rnd_pixels8_l4_8(dst, src1, src2, src3, src4, \ 204 dst_stride, src_stride1, \ 205 src_stride2, src_stride3, \ 206 src_stride4, h); \ 207 OPNAME ## _no_rnd_pixels8_l4_8(dst + 8, \ 208 src1 + 8, src2 + 8, \ 209 src3 + 8, src4 + 8, \ 210 dst_stride, src_stride1, \ 211 src_stride2, src_stride3, \ 212 src_stride4, h); \ 213 } \ 214 215 #define op_avg(a, b) a = rnd_avg32(a, b) 216 #define op_put(a, b) a = b 217 #define put_no_rnd_pixels8_8_c put_pixels8_8_c 218 PIXOP2(avg, op_avg) 219 PIXOP2(put, op_put) 220 #undef op_avg 221 #undef op_put 222