1 /*
2  * Loongson SIMD optimized blockdsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "blockdsp_mips.h"
25 #include "libavutil/mips/mmiutils.h"
26 
ff_fill_block16_mmi(uint8_t * block,uint8_t value,ptrdiff_t line_size,int h)27 void ff_fill_block16_mmi(uint8_t *block, uint8_t value, ptrdiff_t line_size, int h)
28 {
29     double ftmp[1];
30     DECLARE_VAR_ALL64;
31 
32     __asm__ volatile (
33         "mtc1       %[value],   %[ftmp0]                                \n\t"
34         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
35         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
36         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
37         "1:                                                             \n\t"
38         MMI_SDC1(%[ftmp0], %[block], 0x00)
39         PTR_ADDI   "%[h],       %[h],           -0x01                   \n\t"
40         MMI_SDC1(%[ftmp0], %[block], 0x08)
41         PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
42         "bnez       %[h],       1b                                      \n\t"
43         : [ftmp0]"=&f"(ftmp[0]),
44           RESTRICT_ASM_ALL64
45           [block]"+&r"(block),              [h]"+&r"(h)
46         : [value]"r"(value),                [line_size]"r"((mips_reg)line_size)
47         : "memory"
48     );
49 }
50 
ff_fill_block8_mmi(uint8_t * block,uint8_t value,ptrdiff_t line_size,int h)51 void ff_fill_block8_mmi(uint8_t *block, uint8_t value, ptrdiff_t line_size, int h)
52 {
53     double ftmp0;
54     DECLARE_VAR_ALL64;
55 
56     __asm__ volatile (
57         "mtc1       %[value],   %[ftmp0]                                \n\t"
58         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
59         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
60         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
61         "1:                                                             \n\t"
62         MMI_SDC1(%[ftmp0], %[block], 0x00)
63         PTR_ADDI   "%[h],       %[h],           -0x01                   \n\t"
64         PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
65         "bnez       %[h],       1b                                      \n\t"
66         : [ftmp0]"=&f"(ftmp0),
67           RESTRICT_ASM_ALL64
68           [block]"+&r"(block),              [h]"+&r"(h)
69         : [value]"r"(value),                [line_size]"r"((mips_reg)line_size)
70         : "memory"
71     );
72 }
73 
ff_clear_block_mmi(int16_t * block)74 void ff_clear_block_mmi(int16_t *block)
75 {
76     double ftmp[2];
77 
78     __asm__ volatile (
79         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
80         "xor        %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
81         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x00)
82         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x10)
83         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x20)
84         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x30)
85         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x40)
86         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x50)
87         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x60)
88         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x70)
89         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1])
90         : [block]"r"(block)
91         : "memory"
92     );
93 }
94 
ff_clear_blocks_mmi(int16_t * block)95 void ff_clear_blocks_mmi(int16_t *block)
96 {
97     double ftmp[2];
98 
99     __asm__ volatile (
100         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
101         "xor        %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
102         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x00)
103         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x10)
104         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x20)
105         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x30)
106         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x40)
107         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x50)
108         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x60)
109         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x70)
110 
111         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x80)
112         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x90)
113         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xa0)
114         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xb0)
115         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xc0)
116         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xd0)
117         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xe0)
118         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xf0)
119 
120         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x100)
121         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x110)
122         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x120)
123         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x130)
124         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x140)
125         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x150)
126         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x160)
127         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x170)
128 
129         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x180)
130         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x190)
131         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1a0)
132         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1b0)
133         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1c0)
134         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1d0)
135         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1e0)
136         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1f0)
137 
138         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x200)
139         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x210)
140         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x220)
141         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x230)
142         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x240)
143         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x250)
144         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x260)
145         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x270)
146 
147         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x280)
148         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x290)
149         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2a0)
150         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2b0)
151         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2c0)
152         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2d0)
153         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2e0)
154         MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2f0)
155         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1])
156         : [block]"r"((uint64_t *)block)
157         : "memory"
158     );
159 }
160