1;****************************************************************************** 2;* FLAC DSP SIMD optimizations 3;* 4;* Copyright (C) 2014 Loren Merritt 5;* 6;* This file is part of FFmpeg. 7;* 8;* FFmpeg is free software; you can redistribute it and/or 9;* modify it under the terms of the GNU Lesser General Public 10;* License as published by the Free Software Foundation; either 11;* version 2.1 of the License, or (at your option) any later version. 12;* 13;* FFmpeg is distributed in the hope that it will be useful, 14;* but WITHOUT ANY WARRANTY; without even the implied warranty of 15;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16;* Lesser General Public License for more details. 17;* 18;* You should have received a copy of the GNU Lesser General Public 19;* License along with FFmpeg; if not, write to the Free Software 20;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21;****************************************************************************** 22 23%include "libavutil/x86/x86util.asm" 24 25SECTION .text 26 27%macro LPC_32 1 28INIT_XMM %1 29cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j 30 sub lend, pred_orderd 31 jle .ret 32 lea decodedq, [decodedq+pred_orderq*4-8] 33 lea coeffsq, [coeffsq+pred_orderq*4] 34 neg pred_orderq 35 movd m4, qlevelm 36ALIGN 16 37.loop_sample: 38 movd m0, [decodedq+pred_orderq*4+8] 39 add decodedq, 8 40 movd m1, [coeffsq+pred_orderq*4] 41 pxor m2, m2 42 pxor m3, m3 43 lea jq, [pred_orderq+1] 44 test jq, jq 45 jz .end_order 46.loop_order: 47 PMACSDQL m2, m0, m1, m2, m0 48 movd m0, [decodedq+jq*4] 49 PMACSDQL m3, m1, m0, m3, m1 50 movd m1, [coeffsq+jq*4] 51 inc jq 52 jl .loop_order 53.end_order: 54 PMACSDQL m2, m0, m1, m2, m0 55 psrlq m2, m4 56 movd m0, [decodedq] 57 paddd m0, m2 58 movd [decodedq], m0 59 sub lend, 2 60 jl .ret 61 PMACSDQL m3, m1, m0, m3, m1 62 psrlq m3, m4 63 movd m1, [decodedq+4] 64 paddd m1, m3 65 movd [decodedq+4], m1 66 jg .loop_sample 67.ret: 68 REP_RET 69%endmacro 70 71%if HAVE_XOP_EXTERNAL 72LPC_32 xop 73%endif 74LPC_32 sse4 75