1 /*
2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef VPX_VPX_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
12 #define VPX_VPX_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
13 
14 #include <stdlib.h>
15 
16 #include "./vpx_dsp_rtcd.h"
17 #include "vpx/vpx_integer.h"
18 #include "vpx_mem/vpx_mem.h"
19 
20 #ifdef __cplusplus
21 extern "C" {
22 #endif
23 
24 #if HAVE_DSPR2
25 #define STORE_F0()                                                       \
26   {                                                                      \
27     __asm__ __volatile__(                                                \
28         "sb     %[q1_f0],    1(%[s4])           \n\t"                    \
29         "sb     %[q0_f0],    0(%[s4])           \n\t"                    \
30         "sb     %[p0_f0],   -1(%[s4])           \n\t"                    \
31         "sb     %[p1_f0],   -2(%[s4])           \n\t"                    \
32                                                                          \
33         :                                                                \
34         : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
35           [p1_f0] "r"(p1_f0), [s4] "r"(s4));                             \
36                                                                          \
37     __asm__ __volatile__(                                                \
38         "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
39         "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
40         "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
41         "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
42                                                                          \
43         : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
44           [p1_f0] "+r"(p1_f0)                                            \
45         :);                                                              \
46                                                                          \
47     __asm__ __volatile__(                                                \
48         "sb     %[q1_f0],    1(%[s3])           \n\t"                    \
49         "sb     %[q0_f0],    0(%[s3])           \n\t"                    \
50         "sb     %[p0_f0],   -1(%[s3])           \n\t"                    \
51         "sb     %[p1_f0],   -2(%[s3])           \n\t"                    \
52                                                                          \
53         : [p1_f0] "+r"(p1_f0)                                            \
54         : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [s3] "r"(s3),          \
55           [p0_f0] "r"(p0_f0));                                           \
56                                                                          \
57     __asm__ __volatile__(                                                \
58         "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
59         "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
60         "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
61         "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
62                                                                          \
63         : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
64           [p1_f0] "+r"(p1_f0)                                            \
65         :);                                                              \
66                                                                          \
67     __asm__ __volatile__(                                                \
68         "sb     %[q1_f0],    1(%[s2])           \n\t"                    \
69         "sb     %[q0_f0],    0(%[s2])           \n\t"                    \
70         "sb     %[p0_f0],   -1(%[s2])           \n\t"                    \
71         "sb     %[p1_f0],   -2(%[s2])           \n\t"                    \
72                                                                          \
73         :                                                                \
74         : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
75           [p1_f0] "r"(p1_f0), [s2] "r"(s2));                             \
76                                                                          \
77     __asm__ __volatile__(                                                \
78         "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
79         "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
80         "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
81         "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
82                                                                          \
83         : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
84           [p1_f0] "+r"(p1_f0)                                            \
85         :);                                                              \
86                                                                          \
87     __asm__ __volatile__(                                                \
88         "sb     %[q1_f0],    1(%[s1])           \n\t"                    \
89         "sb     %[q0_f0],    0(%[s1])           \n\t"                    \
90         "sb     %[p0_f0],   -1(%[s1])           \n\t"                    \
91         "sb     %[p1_f0],   -2(%[s1])           \n\t"                    \
92                                                                          \
93         :                                                                \
94         : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
95           [p1_f0] "r"(p1_f0), [s1] "r"(s1));                             \
96   }
97 
98 #define STORE_F1()                                                             \
99   {                                                                            \
100     __asm__ __volatile__(                                                      \
101         "sb     %[q2_r],     2(%[s4])           \n\t"                          \
102         "sb     %[q1_r],     1(%[s4])           \n\t"                          \
103         "sb     %[q0_r],     0(%[s4])           \n\t"                          \
104         "sb     %[p0_r],    -1(%[s4])           \n\t"                          \
105         "sb     %[p1_r],    -2(%[s4])           \n\t"                          \
106         "sb     %[p2_r],    -3(%[s4])           \n\t"                          \
107                                                                                \
108         :                                                                      \
109         : [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r),                \
110           [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s4] "r"(s4)); \
111                                                                                \
112     __asm__ __volatile__(                                                      \
113         "srl    %[q2_r],    %[q2_r],    16      \n\t"                          \
114         "srl    %[q1_r],    %[q1_r],    16      \n\t"                          \
115         "srl    %[q0_r],    %[q0_r],    16      \n\t"                          \
116         "srl    %[p0_r],    %[p0_r],    16      \n\t"                          \
117         "srl    %[p1_r],    %[p1_r],    16      \n\t"                          \
118         "srl    %[p2_r],    %[p2_r],    16      \n\t"                          \
119                                                                                \
120         : [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), [q0_r] "+r"(q0_r),             \
121           [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), [p2_r] "+r"(p2_r)              \
122         :);                                                                    \
123                                                                                \
124     __asm__ __volatile__(                                                      \
125         "sb     %[q2_r],     2(%[s3])           \n\t"                          \
126         "sb     %[q1_r],     1(%[s3])           \n\t"                          \
127         "sb     %[q0_r],     0(%[s3])           \n\t"                          \
128         "sb     %[p0_r],    -1(%[s3])           \n\t"                          \
129         "sb     %[p1_r],    -2(%[s3])           \n\t"                          \
130         "sb     %[p2_r],    -3(%[s3])           \n\t"                          \
131                                                                                \
132         :                                                                      \
133         : [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r),                \
134           [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s3] "r"(s3)); \
135                                                                                \
136     __asm__ __volatile__(                                                      \
137         "sb     %[q2_l],     2(%[s2])           \n\t"                          \
138         "sb     %[q1_l],     1(%[s2])           \n\t"                          \
139         "sb     %[q0_l],     0(%[s2])           \n\t"                          \
140         "sb     %[p0_l],    -1(%[s2])           \n\t"                          \
141         "sb     %[p1_l],    -2(%[s2])           \n\t"                          \
142         "sb     %[p2_l],    -3(%[s2])           \n\t"                          \
143                                                                                \
144         :                                                                      \
145         : [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l),                \
146           [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s2] "r"(s2)); \
147                                                                                \
148     __asm__ __volatile__(                                                      \
149         "srl    %[q2_l],    %[q2_l],    16      \n\t"                          \
150         "srl    %[q1_l],    %[q1_l],    16      \n\t"                          \
151         "srl    %[q0_l],    %[q0_l],    16      \n\t"                          \
152         "srl    %[p0_l],    %[p0_l],    16      \n\t"                          \
153         "srl    %[p1_l],    %[p1_l],    16      \n\t"                          \
154         "srl    %[p2_l],    %[p2_l],    16      \n\t"                          \
155                                                                                \
156         : [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), [q0_l] "+r"(q0_l),             \
157           [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), [p2_l] "+r"(p2_l)              \
158         :);                                                                    \
159                                                                                \
160     __asm__ __volatile__(                                                      \
161         "sb     %[q2_l],     2(%[s1])           \n\t"                          \
162         "sb     %[q1_l],     1(%[s1])           \n\t"                          \
163         "sb     %[q0_l],     0(%[s1])           \n\t"                          \
164         "sb     %[p0_l],    -1(%[s1])           \n\t"                          \
165         "sb     %[p1_l],    -2(%[s1])           \n\t"                          \
166         "sb     %[p2_l],    -3(%[s1])           \n\t"                          \
167                                                                                \
168         :                                                                      \
169         : [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l),                \
170           [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s1] "r"(s1)); \
171   }
172 
173 #define STORE_F2()                                                 \
174   {                                                                \
175     __asm__ __volatile__(                                          \
176         "sb     %[q6_r],     6(%[s4])           \n\t"              \
177         "sb     %[q5_r],     5(%[s4])           \n\t"              \
178         "sb     %[q4_r],     4(%[s4])           \n\t"              \
179         "sb     %[q3_r],     3(%[s4])           \n\t"              \
180         "sb     %[q2_r],     2(%[s4])           \n\t"              \
181         "sb     %[q1_r],     1(%[s4])           \n\t"              \
182         "sb     %[q0_r],     0(%[s4])           \n\t"              \
183         "sb     %[p0_r],    -1(%[s4])           \n\t"              \
184         "sb     %[p1_r],    -2(%[s4])           \n\t"              \
185         "sb     %[p2_r],    -3(%[s4])           \n\t"              \
186         "sb     %[p3_r],    -4(%[s4])           \n\t"              \
187         "sb     %[p4_r],    -5(%[s4])           \n\t"              \
188         "sb     %[p5_r],    -6(%[s4])           \n\t"              \
189         "sb     %[p6_r],    -7(%[s4])           \n\t"              \
190                                                                    \
191         :                                                          \
192         : [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r),    \
193           [q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r),    \
194           [q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r),    \
195           [p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r),    \
196           [p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s4] "r"(s4));       \
197                                                                    \
198     __asm__ __volatile__(                                          \
199         "srl    %[q6_r],    %[q6_r],    16      \n\t"              \
200         "srl    %[q5_r],    %[q5_r],    16      \n\t"              \
201         "srl    %[q4_r],    %[q4_r],    16      \n\t"              \
202         "srl    %[q3_r],    %[q3_r],    16      \n\t"              \
203         "srl    %[q2_r],    %[q2_r],    16      \n\t"              \
204         "srl    %[q1_r],    %[q1_r],    16      \n\t"              \
205         "srl    %[q0_r],    %[q0_r],    16      \n\t"              \
206         "srl    %[p0_r],    %[p0_r],    16      \n\t"              \
207         "srl    %[p1_r],    %[p1_r],    16      \n\t"              \
208         "srl    %[p2_r],    %[p2_r],    16      \n\t"              \
209         "srl    %[p3_r],    %[p3_r],    16      \n\t"              \
210         "srl    %[p4_r],    %[p4_r],    16      \n\t"              \
211         "srl    %[p5_r],    %[p5_r],    16      \n\t"              \
212         "srl    %[p6_r],    %[p6_r],    16      \n\t"              \
213                                                                    \
214         : [q6_r] "+r"(q6_r), [q5_r] "+r"(q5_r), [q4_r] "+r"(q4_r), \
215           [q3_r] "+r"(q3_r), [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), \
216           [q0_r] "+r"(q0_r), [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), \
217           [p2_r] "+r"(p2_r), [p3_r] "+r"(p3_r), [p4_r] "+r"(p4_r), \
218           [p5_r] "+r"(p5_r), [p6_r] "+r"(p6_r)                     \
219         :);                                                        \
220                                                                    \
221     __asm__ __volatile__(                                          \
222         "sb     %[q6_r],     6(%[s3])           \n\t"              \
223         "sb     %[q5_r],     5(%[s3])           \n\t"              \
224         "sb     %[q4_r],     4(%[s3])           \n\t"              \
225         "sb     %[q3_r],     3(%[s3])           \n\t"              \
226         "sb     %[q2_r],     2(%[s3])           \n\t"              \
227         "sb     %[q1_r],     1(%[s3])           \n\t"              \
228         "sb     %[q0_r],     0(%[s3])           \n\t"              \
229         "sb     %[p0_r],    -1(%[s3])           \n\t"              \
230         "sb     %[p1_r],    -2(%[s3])           \n\t"              \
231         "sb     %[p2_r],    -3(%[s3])           \n\t"              \
232         "sb     %[p3_r],    -4(%[s3])           \n\t"              \
233         "sb     %[p4_r],    -5(%[s3])           \n\t"              \
234         "sb     %[p5_r],    -6(%[s3])           \n\t"              \
235         "sb     %[p6_r],    -7(%[s3])           \n\t"              \
236                                                                    \
237         :                                                          \
238         : [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r),    \
239           [q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r),    \
240           [q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r),    \
241           [p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r),    \
242           [p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s3] "r"(s3));       \
243                                                                    \
244     __asm__ __volatile__(                                          \
245         "sb     %[q6_l],     6(%[s2])           \n\t"              \
246         "sb     %[q5_l],     5(%[s2])           \n\t"              \
247         "sb     %[q4_l],     4(%[s2])           \n\t"              \
248         "sb     %[q3_l],     3(%[s2])           \n\t"              \
249         "sb     %[q2_l],     2(%[s2])           \n\t"              \
250         "sb     %[q1_l],     1(%[s2])           \n\t"              \
251         "sb     %[q0_l],     0(%[s2])           \n\t"              \
252         "sb     %[p0_l],    -1(%[s2])           \n\t"              \
253         "sb     %[p1_l],    -2(%[s2])           \n\t"              \
254         "sb     %[p2_l],    -3(%[s2])           \n\t"              \
255         "sb     %[p3_l],    -4(%[s2])           \n\t"              \
256         "sb     %[p4_l],    -5(%[s2])           \n\t"              \
257         "sb     %[p5_l],    -6(%[s2])           \n\t"              \
258         "sb     %[p6_l],    -7(%[s2])           \n\t"              \
259                                                                    \
260         :                                                          \
261         : [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l),    \
262           [q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l),    \
263           [q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l),    \
264           [p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l),    \
265           [p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s2] "r"(s2));       \
266                                                                    \
267     __asm__ __volatile__(                                          \
268         "srl    %[q6_l],    %[q6_l],    16     \n\t"               \
269         "srl    %[q5_l],    %[q5_l],    16     \n\t"               \
270         "srl    %[q4_l],    %[q4_l],    16     \n\t"               \
271         "srl    %[q3_l],    %[q3_l],    16     \n\t"               \
272         "srl    %[q2_l],    %[q2_l],    16     \n\t"               \
273         "srl    %[q1_l],    %[q1_l],    16     \n\t"               \
274         "srl    %[q0_l],    %[q0_l],    16     \n\t"               \
275         "srl    %[p0_l],    %[p0_l],    16     \n\t"               \
276         "srl    %[p1_l],    %[p1_l],    16     \n\t"               \
277         "srl    %[p2_l],    %[p2_l],    16     \n\t"               \
278         "srl    %[p3_l],    %[p3_l],    16     \n\t"               \
279         "srl    %[p4_l],    %[p4_l],    16     \n\t"               \
280         "srl    %[p5_l],    %[p5_l],    16     \n\t"               \
281         "srl    %[p6_l],    %[p6_l],    16     \n\t"               \
282                                                                    \
283         : [q6_l] "+r"(q6_l), [q5_l] "+r"(q5_l), [q4_l] "+r"(q4_l), \
284           [q3_l] "+r"(q3_l), [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), \
285           [q0_l] "+r"(q0_l), [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), \
286           [p2_l] "+r"(p2_l), [p3_l] "+r"(p3_l), [p4_l] "+r"(p4_l), \
287           [p5_l] "+r"(p5_l), [p6_l] "+r"(p6_l)                     \
288         :);                                                        \
289                                                                    \
290     __asm__ __volatile__(                                          \
291         "sb     %[q6_l],     6(%[s1])           \n\t"              \
292         "sb     %[q5_l],     5(%[s1])           \n\t"              \
293         "sb     %[q4_l],     4(%[s1])           \n\t"              \
294         "sb     %[q3_l],     3(%[s1])           \n\t"              \
295         "sb     %[q2_l],     2(%[s1])           \n\t"              \
296         "sb     %[q1_l],     1(%[s1])           \n\t"              \
297         "sb     %[q0_l],     0(%[s1])           \n\t"              \
298         "sb     %[p0_l],    -1(%[s1])           \n\t"              \
299         "sb     %[p1_l],    -2(%[s1])           \n\t"              \
300         "sb     %[p2_l],    -3(%[s1])           \n\t"              \
301         "sb     %[p3_l],    -4(%[s1])           \n\t"              \
302         "sb     %[p4_l],    -5(%[s1])           \n\t"              \
303         "sb     %[p5_l],    -6(%[s1])           \n\t"              \
304         "sb     %[p6_l],    -7(%[s1])           \n\t"              \
305                                                                    \
306         :                                                          \
307         : [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l),    \
308           [q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l),    \
309           [q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l),    \
310           [p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l),    \
311           [p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s1] "r"(s1));       \
312   }
313 
314 #define PACK_LEFT_0TO3()                                              \
315   {                                                                   \
316     __asm__ __volatile__(                                             \
317         "preceu.ph.qbl   %[p3_l],   %[p3]   \n\t"                     \
318         "preceu.ph.qbl   %[p2_l],   %[p2]   \n\t"                     \
319         "preceu.ph.qbl   %[p1_l],   %[p1]   \n\t"                     \
320         "preceu.ph.qbl   %[p0_l],   %[p0]   \n\t"                     \
321         "preceu.ph.qbl   %[q0_l],   %[q0]   \n\t"                     \
322         "preceu.ph.qbl   %[q1_l],   %[q1]   \n\t"                     \
323         "preceu.ph.qbl   %[q2_l],   %[q2]   \n\t"                     \
324         "preceu.ph.qbl   %[q3_l],   %[q3]   \n\t"                     \
325                                                                       \
326         : [p3_l] "=&r"(p3_l), [p2_l] "=&r"(p2_l), [p1_l] "=&r"(p1_l), \
327           [p0_l] "=&r"(p0_l), [q0_l] "=&r"(q0_l), [q1_l] "=&r"(q1_l), \
328           [q2_l] "=&r"(q2_l), [q3_l] "=&r"(q3_l)                      \
329         : [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0),     \
330           [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3));    \
331   }
332 
333 #define PACK_LEFT_4TO7()                                              \
334   {                                                                   \
335     __asm__ __volatile__(                                             \
336         "preceu.ph.qbl   %[p7_l],   %[p7]   \n\t"                     \
337         "preceu.ph.qbl   %[p6_l],   %[p6]   \n\t"                     \
338         "preceu.ph.qbl   %[p5_l],   %[p5]   \n\t"                     \
339         "preceu.ph.qbl   %[p4_l],   %[p4]   \n\t"                     \
340         "preceu.ph.qbl   %[q4_l],   %[q4]   \n\t"                     \
341         "preceu.ph.qbl   %[q5_l],   %[q5]   \n\t"                     \
342         "preceu.ph.qbl   %[q6_l],   %[q6]   \n\t"                     \
343         "preceu.ph.qbl   %[q7_l],   %[q7]   \n\t"                     \
344                                                                       \
345         : [p7_l] "=&r"(p7_l), [p6_l] "=&r"(p6_l), [p5_l] "=&r"(p5_l), \
346           [p4_l] "=&r"(p4_l), [q4_l] "=&r"(q4_l), [q5_l] "=&r"(q5_l), \
347           [q6_l] "=&r"(q6_l), [q7_l] "=&r"(q7_l)                      \
348         : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4),     \
349           [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7));    \
350   }
351 
352 #define PACK_RIGHT_0TO3()                                             \
353   {                                                                   \
354     __asm__ __volatile__(                                             \
355         "preceu.ph.qbr   %[p3_r],   %[p3]  \n\t"                      \
356         "preceu.ph.qbr   %[p2_r],   %[p2]   \n\t"                     \
357         "preceu.ph.qbr   %[p1_r],   %[p1]   \n\t"                     \
358         "preceu.ph.qbr   %[p0_r],   %[p0]   \n\t"                     \
359         "preceu.ph.qbr   %[q0_r],   %[q0]   \n\t"                     \
360         "preceu.ph.qbr   %[q1_r],   %[q1]   \n\t"                     \
361         "preceu.ph.qbr   %[q2_r],   %[q2]   \n\t"                     \
362         "preceu.ph.qbr   %[q3_r],   %[q3]   \n\t"                     \
363                                                                       \
364         : [p3_r] "=&r"(p3_r), [p2_r] "=&r"(p2_r), [p1_r] "=&r"(p1_r), \
365           [p0_r] "=&r"(p0_r), [q0_r] "=&r"(q0_r), [q1_r] "=&r"(q1_r), \
366           [q2_r] "=&r"(q2_r), [q3_r] "=&r"(q3_r)                      \
367         : [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0),     \
368           [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3));    \
369   }
370 
371 #define PACK_RIGHT_4TO7()                                             \
372   {                                                                   \
373     __asm__ __volatile__(                                             \
374         "preceu.ph.qbr   %[p7_r],   %[p7]   \n\t"                     \
375         "preceu.ph.qbr   %[p6_r],   %[p6]   \n\t"                     \
376         "preceu.ph.qbr   %[p5_r],   %[p5]   \n\t"                     \
377         "preceu.ph.qbr   %[p4_r],   %[p4]   \n\t"                     \
378         "preceu.ph.qbr   %[q4_r],   %[q4]   \n\t"                     \
379         "preceu.ph.qbr   %[q5_r],   %[q5]   \n\t"                     \
380         "preceu.ph.qbr   %[q6_r],   %[q6]   \n\t"                     \
381         "preceu.ph.qbr   %[q7_r],   %[q7]   \n\t"                     \
382                                                                       \
383         : [p7_r] "=&r"(p7_r), [p6_r] "=&r"(p6_r), [p5_r] "=&r"(p5_r), \
384           [p4_r] "=&r"(p4_r), [q4_r] "=&r"(q4_r), [q5_r] "=&r"(q5_r), \
385           [q6_r] "=&r"(q6_r), [q7_r] "=&r"(q7_r)                      \
386         : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4),     \
387           [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7));    \
388   }
389 
390 #define COMBINE_LEFT_RIGHT_0TO2()                                         \
391   {                                                                       \
392     __asm__ __volatile__(                                                 \
393         "precr.qb.ph    %[p2],  %[p2_l],    %[p2_r]    \n\t"              \
394         "precr.qb.ph    %[p1],  %[p1_l],    %[p1_r]    \n\t"              \
395         "precr.qb.ph    %[p0],  %[p0_l],    %[p0_r]    \n\t"              \
396         "precr.qb.ph    %[q0],  %[q0_l],    %[q0_r]    \n\t"              \
397         "precr.qb.ph    %[q1],  %[q1_l],    %[q1_r]    \n\t"              \
398         "precr.qb.ph    %[q2],  %[q2_l],    %[q2_r]    \n\t"              \
399                                                                           \
400         : [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0), [q0] "=&r"(q0), \
401           [q1] "=&r"(q1), [q2] "=&r"(q2)                                  \
402         : [p2_l] "r"(p2_l), [p2_r] "r"(p2_r), [p1_l] "r"(p1_l),           \
403           [p1_r] "r"(p1_r), [p0_l] "r"(p0_l), [p0_r] "r"(p0_r),           \
404           [q0_l] "r"(q0_l), [q0_r] "r"(q0_r), [q1_l] "r"(q1_l),           \
405           [q1_r] "r"(q1_r), [q2_l] "r"(q2_l), [q2_r] "r"(q2_r));          \
406   }
407 
408 #define COMBINE_LEFT_RIGHT_3TO6()                                         \
409   {                                                                       \
410     __asm__ __volatile__(                                                 \
411         "precr.qb.ph    %[p6],  %[p6_l],    %[p6_r]    \n\t"              \
412         "precr.qb.ph    %[p5],  %[p5_l],    %[p5_r]    \n\t"              \
413         "precr.qb.ph    %[p4],  %[p4_l],    %[p4_r]    \n\t"              \
414         "precr.qb.ph    %[p3],  %[p3_l],    %[p3_r]    \n\t"              \
415         "precr.qb.ph    %[q3],  %[q3_l],    %[q3_r]    \n\t"              \
416         "precr.qb.ph    %[q4],  %[q4_l],    %[q4_r]    \n\t"              \
417         "precr.qb.ph    %[q5],  %[q5_l],    %[q5_r]    \n\t"              \
418         "precr.qb.ph    %[q6],  %[q6_l],    %[q6_r]    \n\t"              \
419                                                                           \
420         : [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4), [p3] "=&r"(p3), \
421           [q3] "=&r"(q3), [q4] "=&r"(q4), [q5] "=&r"(q5), [q6] "=&r"(q6)  \
422         : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),           \
423           [p3_l] "r"(p3_l), [p6_r] "r"(p6_r), [p5_r] "r"(p5_r),           \
424           [p4_r] "r"(p4_r), [p3_r] "r"(p3_r), [q3_l] "r"(q3_l),           \
425           [q4_l] "r"(q4_l), [q5_l] "r"(q5_l), [q6_l] "r"(q6_l),           \
426           [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),           \
427           [q6_r] "r"(q6_r));                                              \
428   }
429 
430 #endif  // #if HAVE_DSPR2
431 #ifdef __cplusplus
432 }  // extern "C"
433 #endif
434 
435 #endif  // VPX_VPX_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
436