1 /*
2  * Loongson SIMD optimized idctdsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "idctdsp_mips.h"
25 #include "constants.h"
26 #include "libavutil/mips/mmiutils.h"
27 
ff_put_pixels_clamped_mmi(const int16_t * block,uint8_t * av_restrict pixels,ptrdiff_t line_size)28 void ff_put_pixels_clamped_mmi(const int16_t *block,
29         uint8_t *av_restrict pixels, ptrdiff_t line_size)
30 {
31     double ftmp[8];
32 
33     __asm__ volatile (
34         MMI_LDC1(%[ftmp0], %[block], 0x00)
35         MMI_LDC1(%[ftmp1], %[block], 0x08)
36         MMI_LDC1(%[ftmp2], %[block], 0x10)
37         MMI_LDC1(%[ftmp3], %[block], 0x18)
38         MMI_LDC1(%[ftmp4], %[block], 0x20)
39         MMI_LDC1(%[ftmp5], %[block], 0x28)
40         MMI_LDC1(%[ftmp6], %[block], 0x30)
41         MMI_LDC1(%[ftmp7], %[block], 0x38)
42         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
43         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
44         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
45         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
46         MMI_SDC1(%[ftmp0], %[pixels], 0x00)
47         PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
48         MMI_SDC1(%[ftmp2], %[pixels], 0x00)
49         PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
50         MMI_SDC1(%[ftmp4], %[pixels], 0x00)
51         PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
52         MMI_SDC1(%[ftmp6], %[pixels], 0x00)
53         PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
54 
55         MMI_LDC1(%[ftmp0], %[block], 0x40)
56         MMI_LDC1(%[ftmp1], %[block], 0x48)
57         MMI_LDC1(%[ftmp2], %[block], 0x50)
58         MMI_LDC1(%[ftmp3], %[block], 0x58)
59         MMI_LDC1(%[ftmp4], %[block], 0x60)
60         MMI_LDC1(%[ftmp5], %[block], 0x68)
61         MMI_LDC1(%[ftmp6], %[block], 0x70)
62         MMI_LDC1(%[ftmp7], %[block], 0x78)
63         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
64         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
65         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
66         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
67         MMI_SDC1(%[ftmp0], %[pixels], 0x00)
68         PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
69         MMI_SDC1(%[ftmp2], %[pixels], 0x00)
70         PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
71         MMI_SDC1(%[ftmp4], %[pixels], 0x00)
72         PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
73         MMI_SDC1(%[ftmp6], %[pixels], 0x00)
74         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
75           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
76           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
77           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
78           [pixels]"+&r"(pixels)
79         : [line_size]"r"((mips_reg)line_size),
80           [block]"r"(block)
81         : "memory"
82     );
83 }
84 
ff_put_signed_pixels_clamped_mmi(const int16_t * block,uint8_t * av_restrict pixels,ptrdiff_t line_size)85 void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
86     uint8_t *av_restrict pixels, ptrdiff_t line_size)
87 {
88     double ftmp[5];
89 
90     __asm__ volatile (
91         MMI_LDC1(%[ftmp1], %[block], 0x00)
92         MMI_LDC1(%[ftmp0], %[block], 0x08)
93         "packsshb   %[ftmp1],       %[ftmp1],       %[ftmp0]            \n\t"
94         MMI_LDC1(%[ftmp2], %[block], 0x10)
95         MMI_LDC1(%[ftmp0], %[block], 0x18)
96         "packsshb   %[ftmp2],       %[ftmp2],       %[ftmp0]            \n\t"
97         MMI_LDC1(%[ftmp3], %[block], 0x20)
98         MMI_LDC1(%[ftmp0], %[block], 0x28)
99         "packsshb   %[ftmp3],       %[ftmp3],       %[ftmp0]            \n\t"
100         MMI_LDC1(%[ftmp4], %[block], 0x30)
101         MMI_LDC1(%[ftmp0], %[block], 0x38)
102         "packsshb   %[ftmp4],       %[ftmp4],       %[ftmp0]            \n\t"
103         "paddb      %[ftmp1],       %[ftmp1],       %[ff_pb_80]         \n\t"
104         "paddb      %[ftmp2],       %[ftmp2],       %[ff_pb_80]         \n\t"
105         "paddb      %[ftmp3],       %[ftmp3],       %[ff_pb_80]         \n\t"
106         "paddb      %[ftmp4],       %[ftmp4],       %[ff_pb_80]         \n\t"
107         MMI_SDC1(%[ftmp1], %[pixels], 0x00)
108         PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
109         MMI_SDC1(%[ftmp2], %[pixels], 0x00)
110         PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
111         MMI_SDC1(%[ftmp3], %[pixels], 0x00)
112         PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
113         MMI_SDC1(%[ftmp4], %[pixels], 0x00)
114         PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
115 
116         MMI_LDC1(%[ftmp1], %[block], 0x40)
117         MMI_LDC1(%[ftmp0], %[block], 0x48)
118         "packsshb   %[ftmp1],       %[ftmp1],       %[ftmp0]            \n\t"
119         MMI_LDC1(%[ftmp2], %[block], 0x50)
120         MMI_LDC1(%[ftmp0], %[block], 0x58)
121         "packsshb   %[ftmp2],       %[ftmp2],       %[ftmp0]            \n\t"
122         MMI_LDC1(%[ftmp3], %[block], 0x60)
123         MMI_LDC1(%[ftmp0], %[block], 0x68)
124         "packsshb   %[ftmp3],       %[ftmp3],       %[ftmp0]            \n\t"
125         MMI_LDC1(%[ftmp4], %[block], 0x70)
126         MMI_LDC1(%[ftmp0], %[block], 0x78)
127         "packsshb   %[ftmp4],       %[ftmp4],       %[ftmp0]            \n\t"
128         "paddb      %[ftmp1],       %[ftmp1],       %[ff_pb_80]         \n\t"
129         "paddb      %[ftmp2],       %[ftmp2],       %[ff_pb_80]         \n\t"
130         "paddb      %[ftmp3],       %[ftmp3],       %[ff_pb_80]         \n\t"
131         "paddb      %[ftmp4],       %[ftmp4],       %[ff_pb_80]         \n\t"
132         MMI_SDC1(%[ftmp1], %[pixels], 0x00)
133         PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
134         MMI_SDC1(%[ftmp2], %[pixels], 0x00)
135         PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
136         MMI_SDC1(%[ftmp3], %[pixels], 0x00)
137         PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
138         MMI_SDC1(%[ftmp4], %[pixels], 0x00)
139         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
140           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
141           [ftmp4]"=&f"(ftmp[4]),
142           [pixels]"+&r"(pixels)
143         : [block]"r"(block),
144           [line_size]"r"((mips_reg)line_size),
145           [ff_pb_80]"f"(ff_pb_80)
146         : "memory"
147     );
148 }
149 
ff_add_pixels_clamped_mmi(const int16_t * block,uint8_t * av_restrict pixels,ptrdiff_t line_size)150 void ff_add_pixels_clamped_mmi(const int16_t *block,
151         uint8_t *av_restrict pixels, ptrdiff_t line_size)
152 {
153     double ftmp[9];
154     uint64_t tmp[1];
155     __asm__ volatile (
156         "li         %[tmp0],    0x04                           \n\t"
157         "xor        %[ftmp0],   %[ftmp0],   %[ftmp0]           \n\t"
158         "1:                                                    \n\t"
159         MMI_LDC1(%[ftmp5], %[pixels], 0x00)
160         PTR_ADDU   "%[pixels],  %[pixels],  %[line_size]       \n\t"
161         MMI_LDC1(%[ftmp6], %[pixels], 0x00)
162         PTR_SUBU   "%[pixels],  %[pixels],  %[line_size]       \n\t"
163         MMI_LDC1(%[ftmp1], %[block], 0x00)
164         MMI_LDC1(%[ftmp2], %[block], 0x08)
165         MMI_LDC1(%[ftmp3], %[block], 0x10)
166         MMI_LDC1(%[ftmp4], %[block], 0x18)
167         PTR_ADDIU  "%[block],   %[block],   0x20               \n\t"
168         "punpckhbh  %[ftmp7],   %[ftmp5],   %[ftmp0]           \n\t"
169         "punpcklbh  %[ftmp5],   %[ftmp5],   %[ftmp0]           \n\t"
170         "punpckhbh  %[ftmp8],   %[ftmp6],   %[ftmp0]           \n\t"
171         "punpcklbh  %[ftmp6],   %[ftmp6],   %[ftmp0]           \n\t"
172         "paddh      %[ftmp1],   %[ftmp1],   %[ftmp5]           \n\t"
173         "paddh      %[ftmp2],   %[ftmp2],   %[ftmp7]           \n\t"
174         "paddh      %[ftmp3],   %[ftmp3],   %[ftmp6]           \n\t"
175         "paddh      %[ftmp4],   %[ftmp4],   %[ftmp8]           \n\t"
176         "packushb   %[ftmp1],   %[ftmp1],   %[ftmp2]           \n\t"
177         "packushb   %[ftmp3],   %[ftmp3],   %[ftmp4]           \n\t"
178         MMI_SDC1(%[ftmp1], %[pixels], 0x00)
179         PTR_ADDU   "%[pixels],  %[pixels],  %[line_size]       \n\t"
180         MMI_SDC1(%[ftmp3], %[pixels], 0x00)
181         "addi       %[tmp0],    %[tmp0],    -0x01              \n\t"
182         PTR_ADDU   "%[pixels],  %[pixels],  %[line_size]       \n\t"
183         "bnez       %[tmp0],    1b                             \n\t"
184         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
185           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
186           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
187           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
188           [ftmp8]"=&f"(ftmp[8]),            [tmp0]"=&r"(tmp[0]),
189           [pixels]"+&r"(pixels),            [block]"+&r"(block)
190         : [line_size]"r"((mips_reg)line_size)
191         : "memory"
192     );
193 }
194