1;****************************************************************************** 2;* SIMD-optimized IDCT-related routines 3;* Copyright (c) 2008 Loren Merritt 4;* Copyright (c) 2003-2013 Michael Niedermayer 5;* Copyright (c) 2013 Daniel Kang 6;* 7;* This file is part of FFmpeg. 8;* 9;* FFmpeg is free software; you can redistribute it and/or 10;* modify it under the terms of the GNU Lesser General Public 11;* License as published by the Free Software Foundation; either 12;* version 2.1 of the License, or (at your option) any later version. 13;* 14;* FFmpeg is distributed in the hope that it will be useful, 15;* but WITHOUT ANY WARRANTY; without even the implied warranty of 16;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17;* Lesser General Public License for more details. 18;* 19;* You should have received a copy of the GNU Lesser General Public 20;* License along with FFmpeg; if not, write to the Free Software 21;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22;****************************************************************************** 23 24%include "libavutil/x86/x86util.asm" 25 26SECTION_RODATA 27 28cextern pb_80 29 30SECTION_TEXT 31 32;-------------------------------------------------------------------------- 33;void ff_put_signed_pixels_clamped(const int16_t *block, uint8_t *pixels, 34; int line_size) 35;-------------------------------------------------------------------------- 36 37%macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1 38 mova m1, [blockq+mmsize*0+%1] 39 mova m2, [blockq+mmsize*2+%1] 40%if mmsize == 8 41 mova m3, [blockq+mmsize*4+%1] 42 mova m4, [blockq+mmsize*6+%1] 43%endif 44 packsswb m1, [blockq+mmsize*1+%1] 45 packsswb m2, [blockq+mmsize*3+%1] 46%if mmsize == 8 47 packsswb m3, [blockq+mmsize*5+%1] 48 packsswb m4, [blockq+mmsize*7+%1] 49%endif 50 paddb m1, m0 51 paddb m2, m0 52%if mmsize == 8 53 paddb m3, m0 54 paddb m4, m0 55 movq [pixelsq+lsizeq*0], m1 56 movq [pixelsq+lsizeq*1], m2 57 movq [pixelsq+lsizeq*2], m3 58 movq [pixelsq+lsize3q ], m4 59%else 60 movq [pixelsq+lsizeq*0], m1 61 movhps [pixelsq+lsizeq*1], m1 62 movq [pixelsq+lsizeq*2], m2 63 movhps [pixelsq+lsize3q ], m2 64%endif 65%endmacro 66 67%macro PUT_SIGNED_PIXELS_CLAMPED 1 68cglobal put_signed_pixels_clamped, 3, 4, %1, block, pixels, lsize, lsize3 69 mova m0, [pb_80] 70 lea lsize3q, [lsizeq*3] 71 PUT_SIGNED_PIXELS_CLAMPED_HALF 0 72 lea pixelsq, [pixelsq+lsizeq*4] 73 PUT_SIGNED_PIXELS_CLAMPED_HALF 64 74 RET 75%endmacro 76 77INIT_MMX mmx 78PUT_SIGNED_PIXELS_CLAMPED 0 79INIT_XMM sse2 80PUT_SIGNED_PIXELS_CLAMPED 3 81