1;****************************************************************************** 2;* SIMD optimized SBC encoder DSP functions 3;* 4;* Copyright (C) 2017 Aurelien Jacobs <aurel@gnuage.org> 5;* Copyright (C) 2008-2010 Nokia Corporation 6;* Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org> 7;* Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> 8;* Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> 9;* 10;* This file is part of FFmpeg. 11;* 12;* FFmpeg is free software; you can redistribute it and/or 13;* modify it under the terms of the GNU Lesser General Public 14;* License as published by the Free Software Foundation; either 15;* version 2.1 of the License, or (at your option) any later version. 16;* 17;* FFmpeg is distributed in the hope that it will be useful, 18;* but WITHOUT ANY WARRANTY; without even the implied warranty of 19;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20;* Lesser General Public License for more details. 21;* 22;* You should have received a copy of the GNU Lesser General Public 23;* License along with FFmpeg; if not, write to the Free Software 24;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 25;****************************************************************************** 26 27%include "libavutil/x86/x86util.asm" 28 29SECTION_RODATA 30 31scale_mask: times 2 dd 0x8000 ; 1 << (SBC_PROTO_FIXED_SCALE - 1) 32 33SECTION .text 34 35%macro NIDN 3 36%ifnidn %2, %3 37 %1 %2, %3 38%endif 39%endmacro 40 41%macro ANALYZE_MAC 9 ; out1, out2, in1, in2, tmp1, tmp2, add1, add2, offset 42 NIDN movq, %5, %3 43 NIDN movq, %6, %4 44 pmaddwd %5, [constsq+%9] 45 pmaddwd %6, [constsq+%9+8] 46 NIDN paddd, %1, %7 47 NIDN paddd, %2, %8 48%endmacro 49 50%macro ANALYZE_MAC_IN 7 ; out1, out2, tmp1, tmp2, add1, add2, offset 51 ANALYZE_MAC %1, %2, [inq+%7], [inq+%7+8], %3, %4, %5, %6, %7 52%endmacro 53 54%macro ANALYZE_MAC_REG 7 ; out1, out2, in, tmp1, tmp2, offset, pack 55%ifidn %7, pack 56 psrad %3, 16 ; SBC_PROTO_FIXED_SCALE 57 packssdw %3, %3 58%endif 59 ANALYZE_MAC %1, %2, %3, %3, %4, %5, %4, %5, %6 60%endmacro 61 62;******************************************************************* 63;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t *consts); 64;******************************************************************* 65INIT_MMX mmx 66cglobal sbc_analyze_4, 3, 3, 4, in, out, consts 67 ANALYZE_MAC_IN m0, m1, m0, m1, [scale_mask], [scale_mask], 0 68 ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 16 69 ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 32 70 ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 48 71 ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 64 72 73 ANALYZE_MAC_REG m0, m2, m0, m0, m2, 80, pack 74 ANALYZE_MAC_REG m0, m2, m1, m1, m3, 96, pack 75 76 movq [outq ], m0 77 movq [outq+8], m2 78 79 RET 80 81 82;******************************************************************* 83;void ff_sbc_analyze_8(const int16_t *in, int32_t *out, const int16_t *consts); 84;******************************************************************* 85INIT_MMX mmx 86cglobal sbc_analyze_8, 3, 3, 4, in, out, consts 87 ANALYZE_MAC_IN m0, m1, m0, m1, [scale_mask], [scale_mask], 0 88 ANALYZE_MAC_IN m2, m3, m2, m3, [scale_mask], [scale_mask], 16 89 ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 32 90 ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 48 91 ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 64 92 ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 80 93 ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 96 94 ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 112 95 ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 128 96 ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 144 97 98 ANALYZE_MAC_REG m4, m5, m0, m4, m5, 160, pack 99 ANALYZE_MAC_REG m4, m5, m1, m6, m7, 192, pack 100 ANALYZE_MAC_REG m4, m5, m2, m6, m7, 224, pack 101 ANALYZE_MAC_REG m4, m5, m3, m6, m7, 256, pack 102 103 movq [outq ], m4 104 movq [outq+8], m5 105 106 ANALYZE_MAC_REG m0, m5, m0, m0, m5, 176, no 107 ANALYZE_MAC_REG m0, m5, m1, m1, m7, 208, no 108 ANALYZE_MAC_REG m0, m5, m2, m2, m7, 240, no 109 ANALYZE_MAC_REG m0, m5, m3, m3, m7, 272, no 110 111 movq [outq+16], m0 112 movq [outq+24], m5 113 114 RET 115 116 117;******************************************************************* 118;void ff_sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8], 119; uint32_t scale_factor[2][8], 120; int blocks, int channels, int subbands) 121;******************************************************************* 122INIT_MMX mmx 123cglobal sbc_calc_scalefactors, 5, 7, 4, sb_sample_f, scale_factor, blocks, channels, subbands, ptr, blk 124 ; subbands = 4 * subbands * channels 125 movq m3, [scale_mask] 126 shl subbandsd, 2 127 cmp channelsd, 2 128 jl .loop_1 129 shl subbandsd, 1 130 131.loop_1: 132 sub subbandsq, 8 133 lea ptrq, [sb_sample_fq + subbandsq] 134 135 ; blk = (blocks - 1) * 64; 136 lea blkq, [blocksq - 1] 137 shl blkd, 6 138 139 movq m0, m3 140.loop_2: 141 movq m1, [ptrq+blkq] 142 pxor m2, m2 143 pcmpgtd m1, m2 144 paddd m1, [ptrq+blkq] 145 pcmpgtd m2, m1 146 pxor m1, m2 147 148 por m0, m1 149 150 sub blkq, 64 151 jns .loop_2 152 153 movd blkd, m0 154 psrlq m0, 32 155 bsr blkd, blkd 156 sub blkd, 15 ; SCALE_OUT_BITS 157 mov [scale_factorq + subbandsq], blkd 158 159 movd blkd, m0 160 bsr blkd, blkd 161 sub blkd, 15 ; SCALE_OUT_BITS 162 mov [scale_factorq + subbandsq + 4], blkd 163 164 cmp subbandsq, 0 165 jg .loop_1 166 167 emms 168 RET 169