1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
13 #define AOM_AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
14 
15 #include <stdlib.h>
16 
17 #include "config/aom_dsp_rtcd.h"
18 
19 #include "aom/aom_integer.h"
20 #include "aom_mem/aom_mem.h"
21 
22 #ifdef __cplusplus
23 extern "C" {
24 #endif
25 
26 #if HAVE_DSPR2
27 #define STORE_F0()                                                       \
28   {                                                                      \
29     __asm__ __volatile__(                                                \
30         "sb     %[q1_f0],    1(%[s4])           \n\t"                    \
31         "sb     %[q0_f0],    0(%[s4])           \n\t"                    \
32         "sb     %[p0_f0],   -1(%[s4])           \n\t"                    \
33         "sb     %[p1_f0],   -2(%[s4])           \n\t"                    \
34                                                                          \
35         :                                                                \
36         : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
37           [p1_f0] "r"(p1_f0), [s4] "r"(s4));                             \
38                                                                          \
39     __asm__ __volatile__(                                                \
40         "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
41         "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
42         "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
43         "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
44                                                                          \
45         : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
46           [p1_f0] "+r"(p1_f0)                                            \
47         :);                                                              \
48                                                                          \
49     __asm__ __volatile__(                                                \
50         "sb     %[q1_f0],    1(%[s3])           \n\t"                    \
51         "sb     %[q0_f0],    0(%[s3])           \n\t"                    \
52         "sb     %[p0_f0],   -1(%[s3])           \n\t"                    \
53         "sb     %[p1_f0],   -2(%[s3])           \n\t"                    \
54                                                                          \
55         : [p1_f0] "+r"(p1_f0)                                            \
56         : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [s3] "r"(s3),          \
57           [p0_f0] "r"(p0_f0));                                           \
58                                                                          \
59     __asm__ __volatile__(                                                \
60         "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
61         "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
62         "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
63         "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
64                                                                          \
65         : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
66           [p1_f0] "+r"(p1_f0)                                            \
67         :);                                                              \
68                                                                          \
69     __asm__ __volatile__(                                                \
70         "sb     %[q1_f0],    1(%[s2])           \n\t"                    \
71         "sb     %[q0_f0],    0(%[s2])           \n\t"                    \
72         "sb     %[p0_f0],   -1(%[s2])           \n\t"                    \
73         "sb     %[p1_f0],   -2(%[s2])           \n\t"                    \
74                                                                          \
75         :                                                                \
76         : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
77           [p1_f0] "r"(p1_f0), [s2] "r"(s2));                             \
78                                                                          \
79     __asm__ __volatile__(                                                \
80         "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
81         "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
82         "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
83         "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
84                                                                          \
85         : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
86           [p1_f0] "+r"(p1_f0)                                            \
87         :);                                                              \
88                                                                          \
89     __asm__ __volatile__(                                                \
90         "sb     %[q1_f0],    1(%[s1])           \n\t"                    \
91         "sb     %[q0_f0],    0(%[s1])           \n\t"                    \
92         "sb     %[p0_f0],   -1(%[s1])           \n\t"                    \
93         "sb     %[p1_f0],   -2(%[s1])           \n\t"                    \
94                                                                          \
95         :                                                                \
96         : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
97           [p1_f0] "r"(p1_f0), [s1] "r"(s1));                             \
98   }
99 
100 #define STORE_F1()                                                             \
101   {                                                                            \
102     __asm__ __volatile__(                                                      \
103         "sb     %[q2_r],     2(%[s4])           \n\t"                          \
104         "sb     %[q1_r],     1(%[s4])           \n\t"                          \
105         "sb     %[q0_r],     0(%[s4])           \n\t"                          \
106         "sb     %[p0_r],    -1(%[s4])           \n\t"                          \
107         "sb     %[p1_r],    -2(%[s4])           \n\t"                          \
108         "sb     %[p2_r],    -3(%[s4])           \n\t"                          \
109                                                                                \
110         :                                                                      \
111         : [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r),                \
112           [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s4] "r"(s4)); \
113                                                                                \
114     __asm__ __volatile__(                                                      \
115         "srl    %[q2_r],    %[q2_r],    16      \n\t"                          \
116         "srl    %[q1_r],    %[q1_r],    16      \n\t"                          \
117         "srl    %[q0_r],    %[q0_r],    16      \n\t"                          \
118         "srl    %[p0_r],    %[p0_r],    16      \n\t"                          \
119         "srl    %[p1_r],    %[p1_r],    16      \n\t"                          \
120         "srl    %[p2_r],    %[p2_r],    16      \n\t"                          \
121                                                                                \
122         : [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), [q0_r] "+r"(q0_r),             \
123           [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), [p2_r] "+r"(p2_r)              \
124         :);                                                                    \
125                                                                                \
126     __asm__ __volatile__(                                                      \
127         "sb     %[q2_r],     2(%[s3])           \n\t"                          \
128         "sb     %[q1_r],     1(%[s3])           \n\t"                          \
129         "sb     %[q0_r],     0(%[s3])           \n\t"                          \
130         "sb     %[p0_r],    -1(%[s3])           \n\t"                          \
131         "sb     %[p1_r],    -2(%[s3])           \n\t"                          \
132         "sb     %[p2_r],    -3(%[s3])           \n\t"                          \
133                                                                                \
134         :                                                                      \
135         : [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r),                \
136           [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s3] "r"(s3)); \
137                                                                                \
138     __asm__ __volatile__(                                                      \
139         "sb     %[q2_l],     2(%[s2])           \n\t"                          \
140         "sb     %[q1_l],     1(%[s2])           \n\t"                          \
141         "sb     %[q0_l],     0(%[s2])           \n\t"                          \
142         "sb     %[p0_l],    -1(%[s2])           \n\t"                          \
143         "sb     %[p1_l],    -2(%[s2])           \n\t"                          \
144         "sb     %[p2_l],    -3(%[s2])           \n\t"                          \
145                                                                                \
146         :                                                                      \
147         : [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l),                \
148           [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s2] "r"(s2)); \
149                                                                                \
150     __asm__ __volatile__(                                                      \
151         "srl    %[q2_l],    %[q2_l],    16      \n\t"                          \
152         "srl    %[q1_l],    %[q1_l],    16      \n\t"                          \
153         "srl    %[q0_l],    %[q0_l],    16      \n\t"                          \
154         "srl    %[p0_l],    %[p0_l],    16      \n\t"                          \
155         "srl    %[p1_l],    %[p1_l],    16      \n\t"                          \
156         "srl    %[p2_l],    %[p2_l],    16      \n\t"                          \
157                                                                                \
158         : [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), [q0_l] "+r"(q0_l),             \
159           [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), [p2_l] "+r"(p2_l)              \
160         :);                                                                    \
161                                                                                \
162     __asm__ __volatile__(                                                      \
163         "sb     %[q2_l],     2(%[s1])           \n\t"                          \
164         "sb     %[q1_l],     1(%[s1])           \n\t"                          \
165         "sb     %[q0_l],     0(%[s1])           \n\t"                          \
166         "sb     %[p0_l],    -1(%[s1])           \n\t"                          \
167         "sb     %[p1_l],    -2(%[s1])           \n\t"                          \
168         "sb     %[p2_l],    -3(%[s1])           \n\t"                          \
169                                                                                \
170         :                                                                      \
171         : [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l),                \
172           [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s1] "r"(s1)); \
173   }
174 
175 #define STORE_F2()                                                 \
176   {                                                                \
177     __asm__ __volatile__(                                          \
178         "sb     %[q6_r],     6(%[s4])           \n\t"              \
179         "sb     %[q5_r],     5(%[s4])           \n\t"              \
180         "sb     %[q4_r],     4(%[s4])           \n\t"              \
181         "sb     %[q3_r],     3(%[s4])           \n\t"              \
182         "sb     %[q2_r],     2(%[s4])           \n\t"              \
183         "sb     %[q1_r],     1(%[s4])           \n\t"              \
184         "sb     %[q0_r],     0(%[s4])           \n\t"              \
185         "sb     %[p0_r],    -1(%[s4])           \n\t"              \
186         "sb     %[p1_r],    -2(%[s4])           \n\t"              \
187         "sb     %[p2_r],    -3(%[s4])           \n\t"              \
188         "sb     %[p3_r],    -4(%[s4])           \n\t"              \
189         "sb     %[p4_r],    -5(%[s4])           \n\t"              \
190         "sb     %[p5_r],    -6(%[s4])           \n\t"              \
191         "sb     %[p6_r],    -7(%[s4])           \n\t"              \
192                                                                    \
193         :                                                          \
194         : [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r),    \
195           [q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r),    \
196           [q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r),    \
197           [p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r),    \
198           [p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s4] "r"(s4));       \
199                                                                    \
200     __asm__ __volatile__(                                          \
201         "srl    %[q6_r],    %[q6_r],    16      \n\t"              \
202         "srl    %[q5_r],    %[q5_r],    16      \n\t"              \
203         "srl    %[q4_r],    %[q4_r],    16      \n\t"              \
204         "srl    %[q3_r],    %[q3_r],    16      \n\t"              \
205         "srl    %[q2_r],    %[q2_r],    16      \n\t"              \
206         "srl    %[q1_r],    %[q1_r],    16      \n\t"              \
207         "srl    %[q0_r],    %[q0_r],    16      \n\t"              \
208         "srl    %[p0_r],    %[p0_r],    16      \n\t"              \
209         "srl    %[p1_r],    %[p1_r],    16      \n\t"              \
210         "srl    %[p2_r],    %[p2_r],    16      \n\t"              \
211         "srl    %[p3_r],    %[p3_r],    16      \n\t"              \
212         "srl    %[p4_r],    %[p4_r],    16      \n\t"              \
213         "srl    %[p5_r],    %[p5_r],    16      \n\t"              \
214         "srl    %[p6_r],    %[p6_r],    16      \n\t"              \
215                                                                    \
216         : [q6_r] "+r"(q6_r), [q5_r] "+r"(q5_r), [q4_r] "+r"(q4_r), \
217           [q3_r] "+r"(q3_r), [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), \
218           [q0_r] "+r"(q0_r), [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), \
219           [p2_r] "+r"(p2_r), [p3_r] "+r"(p3_r), [p4_r] "+r"(p4_r), \
220           [p5_r] "+r"(p5_r), [p6_r] "+r"(p6_r)                     \
221         :);                                                        \
222                                                                    \
223     __asm__ __volatile__(                                          \
224         "sb     %[q6_r],     6(%[s3])           \n\t"              \
225         "sb     %[q5_r],     5(%[s3])           \n\t"              \
226         "sb     %[q4_r],     4(%[s3])           \n\t"              \
227         "sb     %[q3_r],     3(%[s3])           \n\t"              \
228         "sb     %[q2_r],     2(%[s3])           \n\t"              \
229         "sb     %[q1_r],     1(%[s3])           \n\t"              \
230         "sb     %[q0_r],     0(%[s3])           \n\t"              \
231         "sb     %[p0_r],    -1(%[s3])           \n\t"              \
232         "sb     %[p1_r],    -2(%[s3])           \n\t"              \
233         "sb     %[p2_r],    -3(%[s3])           \n\t"              \
234         "sb     %[p3_r],    -4(%[s3])           \n\t"              \
235         "sb     %[p4_r],    -5(%[s3])           \n\t"              \
236         "sb     %[p5_r],    -6(%[s3])           \n\t"              \
237         "sb     %[p6_r],    -7(%[s3])           \n\t"              \
238                                                                    \
239         :                                                          \
240         : [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r),    \
241           [q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r),    \
242           [q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r),    \
243           [p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r),    \
244           [p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s3] "r"(s3));       \
245                                                                    \
246     __asm__ __volatile__(                                          \
247         "sb     %[q6_l],     6(%[s2])           \n\t"              \
248         "sb     %[q5_l],     5(%[s2])           \n\t"              \
249         "sb     %[q4_l],     4(%[s2])           \n\t"              \
250         "sb     %[q3_l],     3(%[s2])           \n\t"              \
251         "sb     %[q2_l],     2(%[s2])           \n\t"              \
252         "sb     %[q1_l],     1(%[s2])           \n\t"              \
253         "sb     %[q0_l],     0(%[s2])           \n\t"              \
254         "sb     %[p0_l],    -1(%[s2])           \n\t"              \
255         "sb     %[p1_l],    -2(%[s2])           \n\t"              \
256         "sb     %[p2_l],    -3(%[s2])           \n\t"              \
257         "sb     %[p3_l],    -4(%[s2])           \n\t"              \
258         "sb     %[p4_l],    -5(%[s2])           \n\t"              \
259         "sb     %[p5_l],    -6(%[s2])           \n\t"              \
260         "sb     %[p6_l],    -7(%[s2])           \n\t"              \
261                                                                    \
262         :                                                          \
263         : [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l),    \
264           [q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l),    \
265           [q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l),    \
266           [p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l),    \
267           [p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s2] "r"(s2));       \
268                                                                    \
269     __asm__ __volatile__(                                          \
270         "srl    %[q6_l],    %[q6_l],    16     \n\t"               \
271         "srl    %[q5_l],    %[q5_l],    16     \n\t"               \
272         "srl    %[q4_l],    %[q4_l],    16     \n\t"               \
273         "srl    %[q3_l],    %[q3_l],    16     \n\t"               \
274         "srl    %[q2_l],    %[q2_l],    16     \n\t"               \
275         "srl    %[q1_l],    %[q1_l],    16     \n\t"               \
276         "srl    %[q0_l],    %[q0_l],    16     \n\t"               \
277         "srl    %[p0_l],    %[p0_l],    16     \n\t"               \
278         "srl    %[p1_l],    %[p1_l],    16     \n\t"               \
279         "srl    %[p2_l],    %[p2_l],    16     \n\t"               \
280         "srl    %[p3_l],    %[p3_l],    16     \n\t"               \
281         "srl    %[p4_l],    %[p4_l],    16     \n\t"               \
282         "srl    %[p5_l],    %[p5_l],    16     \n\t"               \
283         "srl    %[p6_l],    %[p6_l],    16     \n\t"               \
284                                                                    \
285         : [q6_l] "+r"(q6_l), [q5_l] "+r"(q5_l), [q4_l] "+r"(q4_l), \
286           [q3_l] "+r"(q3_l), [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), \
287           [q0_l] "+r"(q0_l), [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), \
288           [p2_l] "+r"(p2_l), [p3_l] "+r"(p3_l), [p4_l] "+r"(p4_l), \
289           [p5_l] "+r"(p5_l), [p6_l] "+r"(p6_l)                     \
290         :);                                                        \
291                                                                    \
292     __asm__ __volatile__(                                          \
293         "sb     %[q6_l],     6(%[s1])           \n\t"              \
294         "sb     %[q5_l],     5(%[s1])           \n\t"              \
295         "sb     %[q4_l],     4(%[s1])           \n\t"              \
296         "sb     %[q3_l],     3(%[s1])           \n\t"              \
297         "sb     %[q2_l],     2(%[s1])           \n\t"              \
298         "sb     %[q1_l],     1(%[s1])           \n\t"              \
299         "sb     %[q0_l],     0(%[s1])           \n\t"              \
300         "sb     %[p0_l],    -1(%[s1])           \n\t"              \
301         "sb     %[p1_l],    -2(%[s1])           \n\t"              \
302         "sb     %[p2_l],    -3(%[s1])           \n\t"              \
303         "sb     %[p3_l],    -4(%[s1])           \n\t"              \
304         "sb     %[p4_l],    -5(%[s1])           \n\t"              \
305         "sb     %[p5_l],    -6(%[s1])           \n\t"              \
306         "sb     %[p6_l],    -7(%[s1])           \n\t"              \
307                                                                    \
308         :                                                          \
309         : [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l),    \
310           [q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l),    \
311           [q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l),    \
312           [p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l),    \
313           [p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s1] "r"(s1));       \
314   }
315 
316 #define PACK_LEFT_0TO3()                                              \
317   {                                                                   \
318     __asm__ __volatile__(                                             \
319         "preceu.ph.qbl   %[p3_l],   %[p3]   \n\t"                     \
320         "preceu.ph.qbl   %[p2_l],   %[p2]   \n\t"                     \
321         "preceu.ph.qbl   %[p1_l],   %[p1]   \n\t"                     \
322         "preceu.ph.qbl   %[p0_l],   %[p0]   \n\t"                     \
323         "preceu.ph.qbl   %[q0_l],   %[q0]   \n\t"                     \
324         "preceu.ph.qbl   %[q1_l],   %[q1]   \n\t"                     \
325         "preceu.ph.qbl   %[q2_l],   %[q2]   \n\t"                     \
326         "preceu.ph.qbl   %[q3_l],   %[q3]   \n\t"                     \
327                                                                       \
328         : [p3_l] "=&r"(p3_l), [p2_l] "=&r"(p2_l), [p1_l] "=&r"(p1_l), \
329           [p0_l] "=&r"(p0_l), [q0_l] "=&r"(q0_l), [q1_l] "=&r"(q1_l), \
330           [q2_l] "=&r"(q2_l), [q3_l] "=&r"(q3_l)                      \
331         : [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0),     \
332           [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3));    \
333   }
334 
335 #define PACK_LEFT_4TO7()                                              \
336   {                                                                   \
337     __asm__ __volatile__(                                             \
338         "preceu.ph.qbl   %[p7_l],   %[p7]   \n\t"                     \
339         "preceu.ph.qbl   %[p6_l],   %[p6]   \n\t"                     \
340         "preceu.ph.qbl   %[p5_l],   %[p5]   \n\t"                     \
341         "preceu.ph.qbl   %[p4_l],   %[p4]   \n\t"                     \
342         "preceu.ph.qbl   %[q4_l],   %[q4]   \n\t"                     \
343         "preceu.ph.qbl   %[q5_l],   %[q5]   \n\t"                     \
344         "preceu.ph.qbl   %[q6_l],   %[q6]   \n\t"                     \
345         "preceu.ph.qbl   %[q7_l],   %[q7]   \n\t"                     \
346                                                                       \
347         : [p7_l] "=&r"(p7_l), [p6_l] "=&r"(p6_l), [p5_l] "=&r"(p5_l), \
348           [p4_l] "=&r"(p4_l), [q4_l] "=&r"(q4_l), [q5_l] "=&r"(q5_l), \
349           [q6_l] "=&r"(q6_l), [q7_l] "=&r"(q7_l)                      \
350         : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4),     \
351           [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7));    \
352   }
353 
354 #define PACK_RIGHT_0TO3()                                             \
355   {                                                                   \
356     __asm__ __volatile__(                                             \
357         "preceu.ph.qbr   %[p3_r],   %[p3]  \n\t"                      \
358         "preceu.ph.qbr   %[p2_r],   %[p2]   \n\t"                     \
359         "preceu.ph.qbr   %[p1_r],   %[p1]   \n\t"                     \
360         "preceu.ph.qbr   %[p0_r],   %[p0]   \n\t"                     \
361         "preceu.ph.qbr   %[q0_r],   %[q0]   \n\t"                     \
362         "preceu.ph.qbr   %[q1_r],   %[q1]   \n\t"                     \
363         "preceu.ph.qbr   %[q2_r],   %[q2]   \n\t"                     \
364         "preceu.ph.qbr   %[q3_r],   %[q3]   \n\t"                     \
365                                                                       \
366         : [p3_r] "=&r"(p3_r), [p2_r] "=&r"(p2_r), [p1_r] "=&r"(p1_r), \
367           [p0_r] "=&r"(p0_r), [q0_r] "=&r"(q0_r), [q1_r] "=&r"(q1_r), \
368           [q2_r] "=&r"(q2_r), [q3_r] "=&r"(q3_r)                      \
369         : [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0),     \
370           [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3));    \
371   }
372 
373 #define PACK_RIGHT_4TO7()                                             \
374   {                                                                   \
375     __asm__ __volatile__(                                             \
376         "preceu.ph.qbr   %[p7_r],   %[p7]   \n\t"                     \
377         "preceu.ph.qbr   %[p6_r],   %[p6]   \n\t"                     \
378         "preceu.ph.qbr   %[p5_r],   %[p5]   \n\t"                     \
379         "preceu.ph.qbr   %[p4_r],   %[p4]   \n\t"                     \
380         "preceu.ph.qbr   %[q4_r],   %[q4]   \n\t"                     \
381         "preceu.ph.qbr   %[q5_r],   %[q5]   \n\t"                     \
382         "preceu.ph.qbr   %[q6_r],   %[q6]   \n\t"                     \
383         "preceu.ph.qbr   %[q7_r],   %[q7]   \n\t"                     \
384                                                                       \
385         : [p7_r] "=&r"(p7_r), [p6_r] "=&r"(p6_r), [p5_r] "=&r"(p5_r), \
386           [p4_r] "=&r"(p4_r), [q4_r] "=&r"(q4_r), [q5_r] "=&r"(q5_r), \
387           [q6_r] "=&r"(q6_r), [q7_r] "=&r"(q7_r)                      \
388         : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4),     \
389           [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7));    \
390   }
391 
392 #define COMBINE_LEFT_RIGHT_0TO2()                                         \
393   {                                                                       \
394     __asm__ __volatile__(                                                 \
395         "precr.qb.ph    %[p2],  %[p2_l],    %[p2_r]    \n\t"              \
396         "precr.qb.ph    %[p1],  %[p1_l],    %[p1_r]    \n\t"              \
397         "precr.qb.ph    %[p0],  %[p0_l],    %[p0_r]    \n\t"              \
398         "precr.qb.ph    %[q0],  %[q0_l],    %[q0_r]    \n\t"              \
399         "precr.qb.ph    %[q1],  %[q1_l],    %[q1_r]    \n\t"              \
400         "precr.qb.ph    %[q2],  %[q2_l],    %[q2_r]    \n\t"              \
401                                                                           \
402         : [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0), [q0] "=&r"(q0), \
403           [q1] "=&r"(q1), [q2] "=&r"(q2)                                  \
404         : [p2_l] "r"(p2_l), [p2_r] "r"(p2_r), [p1_l] "r"(p1_l),           \
405           [p1_r] "r"(p1_r), [p0_l] "r"(p0_l), [p0_r] "r"(p0_r),           \
406           [q0_l] "r"(q0_l), [q0_r] "r"(q0_r), [q1_l] "r"(q1_l),           \
407           [q1_r] "r"(q1_r), [q2_l] "r"(q2_l), [q2_r] "r"(q2_r));          \
408   }
409 
410 #define COMBINE_LEFT_RIGHT_3TO6()                                         \
411   {                                                                       \
412     __asm__ __volatile__(                                                 \
413         "precr.qb.ph    %[p6],  %[p6_l],    %[p6_r]    \n\t"              \
414         "precr.qb.ph    %[p5],  %[p5_l],    %[p5_r]    \n\t"              \
415         "precr.qb.ph    %[p4],  %[p4_l],    %[p4_r]    \n\t"              \
416         "precr.qb.ph    %[p3],  %[p3_l],    %[p3_r]    \n\t"              \
417         "precr.qb.ph    %[q3],  %[q3_l],    %[q3_r]    \n\t"              \
418         "precr.qb.ph    %[q4],  %[q4_l],    %[q4_r]    \n\t"              \
419         "precr.qb.ph    %[q5],  %[q5_l],    %[q5_r]    \n\t"              \
420         "precr.qb.ph    %[q6],  %[q6_l],    %[q6_r]    \n\t"              \
421                                                                           \
422         : [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4), [p3] "=&r"(p3), \
423           [q3] "=&r"(q3), [q4] "=&r"(q4), [q5] "=&r"(q5), [q6] "=&r"(q6)  \
424         : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),           \
425           [p3_l] "r"(p3_l), [p6_r] "r"(p6_r), [p5_r] "r"(p5_r),           \
426           [p4_r] "r"(p4_r), [p3_r] "r"(p3_r), [q3_l] "r"(q3_l),           \
427           [q4_l] "r"(q4_l), [q5_l] "r"(q5_l), [q6_l] "r"(q6_l),           \
428           [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),           \
429           [q6_r] "r"(q6_r));                                              \
430   }
431 
432 #endif  // #if HAVE_DSPR2
433 #ifdef __cplusplus
434 }  // extern "C"
435 #endif
436 
437 #endif  // AOM_AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
438