1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #ifndef VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_
12 #define VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_
13
14 #include <stdlib.h>
15
16 #include "./vpx_dsp_rtcd.h"
17 #include "vpx/vpx_integer.h"
18 #include "vpx_mem/vpx_mem.h"
19
20 #ifdef __cplusplus
21 extern "C" {
22 #endif
23
24 #if HAVE_DSPR2
25 /* processing 4 pixels at the same time
26 * compute hev and mask in the same function */
filter_hev_mask_dspr2(uint32_t limit,uint32_t flimit,uint32_t p1,uint32_t p0,uint32_t p3,uint32_t p2,uint32_t q0,uint32_t q1,uint32_t q2,uint32_t q3,uint32_t thresh,uint32_t * hev,uint32_t * mask)27 static INLINE void filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit,
28 uint32_t p1, uint32_t p0, uint32_t p3,
29 uint32_t p2, uint32_t q0, uint32_t q1,
30 uint32_t q2, uint32_t q3,
31 uint32_t thresh, uint32_t *hev,
32 uint32_t *mask) {
33 uint32_t c, r, r3, r_k;
34 uint32_t s1, s2, s3;
35 uint32_t ones = 0xFFFFFFFF;
36 uint32_t hev1;
37
38 __asm__ __volatile__(
39 /* mask |= (abs(p3 - p2) > limit) */
40 "subu_s.qb %[c], %[p3], %[p2] \n\t"
41 "subu_s.qb %[r_k], %[p2], %[p3] \n\t"
42 "or %[r_k], %[r_k], %[c] \n\t"
43 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
44 "or %[r], $0, %[c] \n\t"
45
46 /* mask |= (abs(p2 - p1) > limit) */
47 "subu_s.qb %[c], %[p2], %[p1] \n\t"
48 "subu_s.qb %[r_k], %[p1], %[p2] \n\t"
49 "or %[r_k], %[r_k], %[c] \n\t"
50 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
51 "or %[r], %[r], %[c] \n\t"
52
53 /* mask |= (abs(p1 - p0) > limit)
54 * hev |= (abs(p1 - p0) > thresh)
55 */
56 "subu_s.qb %[c], %[p1], %[p0] \n\t"
57 "subu_s.qb %[r_k], %[p0], %[p1] \n\t"
58 "or %[r_k], %[r_k], %[c] \n\t"
59 "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t"
60 "or %[r3], $0, %[c] \n\t"
61 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
62 "or %[r], %[r], %[c] \n\t"
63
64 /* mask |= (abs(q1 - q0) > limit)
65 * hev |= (abs(q1 - q0) > thresh)
66 */
67 "subu_s.qb %[c], %[q1], %[q0] \n\t"
68 "subu_s.qb %[r_k], %[q0], %[q1] \n\t"
69 "or %[r_k], %[r_k], %[c] \n\t"
70 "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t"
71 "or %[r3], %[r3], %[c] \n\t"
72 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
73 "or %[r], %[r], %[c] \n\t"
74
75 /* mask |= (abs(q2 - q1) > limit) */
76 "subu_s.qb %[c], %[q2], %[q1] \n\t"
77 "subu_s.qb %[r_k], %[q1], %[q2] \n\t"
78 "or %[r_k], %[r_k], %[c] \n\t"
79 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
80 "or %[r], %[r], %[c] \n\t"
81 "sll %[r3], %[r3], 24 \n\t"
82
83 /* mask |= (abs(q3 - q2) > limit) */
84 "subu_s.qb %[c], %[q3], %[q2] \n\t"
85 "subu_s.qb %[r_k], %[q2], %[q3] \n\t"
86 "or %[r_k], %[r_k], %[c] \n\t"
87 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
88 "or %[r], %[r], %[c] \n\t"
89
90 : [c] "=&r"(c), [r_k] "=&r"(r_k), [r] "=&r"(r), [r3] "=&r"(r3)
91 : [limit] "r"(limit), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
92 [p0] "r"(p0), [q1] "r"(q1), [q0] "r"(q0), [q2] "r"(q2), [q3] "r"(q3),
93 [thresh] "r"(thresh));
94
95 __asm__ __volatile__(
96 /* abs(p0 - q0) */
97 "subu_s.qb %[c], %[p0], %[q0] \n\t"
98 "subu_s.qb %[r_k], %[q0], %[p0] \n\t"
99 "wrdsp %[r3] \n\t"
100 "or %[s1], %[r_k], %[c] \n\t"
101
102 /* abs(p1 - q1) */
103 "subu_s.qb %[c], %[p1], %[q1] \n\t"
104 "addu_s.qb %[s3], %[s1], %[s1] \n\t"
105 "pick.qb %[hev1], %[ones], $0 \n\t"
106 "subu_s.qb %[r_k], %[q1], %[p1] \n\t"
107 "or %[s2], %[r_k], %[c] \n\t"
108
109 /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */
110 "shrl.qb %[s2], %[s2], 1 \n\t"
111 "addu_s.qb %[s1], %[s2], %[s3] \n\t"
112 "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t"
113 "or %[r], %[r], %[c] \n\t"
114 "sll %[r], %[r], 24 \n\t"
115
116 "wrdsp %[r] \n\t"
117 "pick.qb %[s2], $0, %[ones] \n\t"
118
119 : [c] "=&r"(c), [r_k] "=&r"(r_k), [s1] "=&r"(s1), [hev1] "=&r"(hev1),
120 [s2] "=&r"(s2), [r] "+r"(r), [s3] "=&r"(s3)
121 : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [r3] "r"(r3), [q1] "r"(q1),
122 [ones] "r"(ones), [flimit] "r"(flimit));
123
124 *hev = hev1;
125 *mask = s2;
126 }
127
filter_hev_mask_flatmask4_dspr2(uint32_t limit,uint32_t flimit,uint32_t thresh,uint32_t p1,uint32_t p0,uint32_t p3,uint32_t p2,uint32_t q0,uint32_t q1,uint32_t q2,uint32_t q3,uint32_t * hev,uint32_t * mask,uint32_t * flat)128 static INLINE void filter_hev_mask_flatmask4_dspr2(
129 uint32_t limit, uint32_t flimit, uint32_t thresh, uint32_t p1, uint32_t p0,
130 uint32_t p3, uint32_t p2, uint32_t q0, uint32_t q1, uint32_t q2,
131 uint32_t q3, uint32_t *hev, uint32_t *mask, uint32_t *flat) {
132 uint32_t c, r, r3, r_k, r_flat;
133 uint32_t s1, s2, s3;
134 uint32_t ones = 0xFFFFFFFF;
135 uint32_t flat_thresh = 0x01010101;
136 uint32_t hev1;
137 uint32_t flat1;
138
139 __asm__ __volatile__(
140 /* mask |= (abs(p3 - p2) > limit) */
141 "subu_s.qb %[c], %[p3], %[p2] \n\t"
142 "subu_s.qb %[r_k], %[p2], %[p3] \n\t"
143 "or %[r_k], %[r_k], %[c] \n\t"
144 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
145 "or %[r], $0, %[c] \n\t"
146
147 /* mask |= (abs(p2 - p1) > limit) */
148 "subu_s.qb %[c], %[p2], %[p1] \n\t"
149 "subu_s.qb %[r_k], %[p1], %[p2] \n\t"
150 "or %[r_k], %[r_k], %[c] \n\t"
151 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
152 "or %[r], %[r], %[c] \n\t"
153
154 /* mask |= (abs(p1 - p0) > limit)
155 * hev |= (abs(p1 - p0) > thresh)
156 * flat |= (abs(p1 - p0) > thresh)
157 */
158 "subu_s.qb %[c], %[p1], %[p0] \n\t"
159 "subu_s.qb %[r_k], %[p0], %[p1] \n\t"
160 "or %[r_k], %[r_k], %[c] \n\t"
161 "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t"
162 "or %[r3], $0, %[c] \n\t"
163 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
164 "or %[r], %[r], %[c] \n\t"
165 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
166 "or %[r_flat], $0, %[c] \n\t"
167
168 /* mask |= (abs(q1 - q0) > limit)
169 * hev |= (abs(q1 - q0) > thresh)
170 * flat |= (abs(q1 - q0) > thresh)
171 */
172 "subu_s.qb %[c], %[q1], %[q0] \n\t"
173 "subu_s.qb %[r_k], %[q0], %[q1] \n\t"
174 "or %[r_k], %[r_k], %[c] \n\t"
175 "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t"
176 "or %[r3], %[r3], %[c] \n\t"
177 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
178 "or %[r], %[r], %[c] \n\t"
179 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
180 "or %[r_flat], %[r_flat], %[c] \n\t"
181
182 /* flat |= (abs(p0 - p2) > thresh) */
183 "subu_s.qb %[c], %[p0], %[p2] \n\t"
184 "subu_s.qb %[r_k], %[p2], %[p0] \n\t"
185 "or %[r_k], %[r_k], %[c] \n\t"
186 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
187 "or %[r_flat], %[r_flat], %[c] \n\t"
188
189 /* flat |= (abs(q0 - q2) > thresh) */
190 "subu_s.qb %[c], %[q0], %[q2] \n\t"
191 "subu_s.qb %[r_k], %[q2], %[q0] \n\t"
192 "or %[r_k], %[r_k], %[c] \n\t"
193 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
194 "or %[r_flat], %[r_flat], %[c] \n\t"
195
196 /* flat |= (abs(p3 - p0) > thresh) */
197 "subu_s.qb %[c], %[p3], %[p0] \n\t"
198 "subu_s.qb %[r_k], %[p0], %[p3] \n\t"
199 "or %[r_k], %[r_k], %[c] \n\t"
200 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
201 "or %[r_flat], %[r_flat], %[c] \n\t"
202
203 /* flat |= (abs(q3 - q0) > thresh) */
204 "subu_s.qb %[c], %[q3], %[q0] \n\t"
205 "subu_s.qb %[r_k], %[q0], %[q3] \n\t"
206 "or %[r_k], %[r_k], %[c] \n\t"
207 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
208 "or %[r_flat], %[r_flat], %[c] \n\t"
209 "sll %[r_flat], %[r_flat], 24 \n\t"
210 /* look at stall here */
211 "wrdsp %[r_flat] \n\t"
212 "pick.qb %[flat1], $0, %[ones] \n\t"
213
214 /* mask |= (abs(q2 - q1) > limit) */
215 "subu_s.qb %[c], %[q2], %[q1] \n\t"
216 "subu_s.qb %[r_k], %[q1], %[q2] \n\t"
217 "or %[r_k], %[r_k], %[c] \n\t"
218 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
219 "or %[r], %[r], %[c] \n\t"
220 "sll %[r3], %[r3], 24 \n\t"
221
222 /* mask |= (abs(q3 - q2) > limit) */
223 "subu_s.qb %[c], %[q3], %[q2] \n\t"
224 "subu_s.qb %[r_k], %[q2], %[q3] \n\t"
225 "or %[r_k], %[r_k], %[c] \n\t"
226 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
227 "or %[r], %[r], %[c] \n\t"
228
229 : [c] "=&r"(c), [r_k] "=&r"(r_k), [r] "=&r"(r), [r3] "=&r"(r3),
230 [r_flat] "=&r"(r_flat), [flat1] "=&r"(flat1)
231 : [limit] "r"(limit), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
232 [p0] "r"(p0), [q1] "r"(q1), [q0] "r"(q0), [q2] "r"(q2), [q3] "r"(q3),
233 [thresh] "r"(thresh), [flat_thresh] "r"(flat_thresh), [ones] "r"(ones));
234
235 __asm__ __volatile__(
236 /* abs(p0 - q0) */
237 "subu_s.qb %[c], %[p0], %[q0] \n\t"
238 "subu_s.qb %[r_k], %[q0], %[p0] \n\t"
239 "wrdsp %[r3] \n\t"
240 "or %[s1], %[r_k], %[c] \n\t"
241
242 /* abs(p1 - q1) */
243 "subu_s.qb %[c], %[p1], %[q1] \n\t"
244 "addu_s.qb %[s3], %[s1], %[s1] \n\t"
245 "pick.qb %[hev1], %[ones], $0 \n\t"
246 "subu_s.qb %[r_k], %[q1], %[p1] \n\t"
247 "or %[s2], %[r_k], %[c] \n\t"
248
249 /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */
250 "shrl.qb %[s2], %[s2], 1 \n\t"
251 "addu_s.qb %[s1], %[s2], %[s3] \n\t"
252 "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t"
253 "or %[r], %[r], %[c] \n\t"
254 "sll %[r], %[r], 24 \n\t"
255
256 "wrdsp %[r] \n\t"
257 "pick.qb %[s2], $0, %[ones] \n\t"
258
259 : [c] "=&r"(c), [r_k] "=&r"(r_k), [s1] "=&r"(s1), [hev1] "=&r"(hev1),
260 [s2] "=&r"(s2), [r] "+r"(r), [s3] "=&r"(s3)
261 : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [r3] "r"(r3), [q1] "r"(q1),
262 [ones] "r"(ones), [flimit] "r"(flimit));
263
264 *hev = hev1;
265 *mask = s2;
266 *flat = flat1;
267 }
268
flatmask5(uint32_t p4,uint32_t p3,uint32_t p2,uint32_t p1,uint32_t p0,uint32_t q0,uint32_t q1,uint32_t q2,uint32_t q3,uint32_t q4,uint32_t * flat2)269 static INLINE void flatmask5(uint32_t p4, uint32_t p3, uint32_t p2, uint32_t p1,
270 uint32_t p0, uint32_t q0, uint32_t q1, uint32_t q2,
271 uint32_t q3, uint32_t q4, uint32_t *flat2) {
272 uint32_t c, r, r_k, r_flat;
273 uint32_t ones = 0xFFFFFFFF;
274 uint32_t flat_thresh = 0x01010101;
275 uint32_t flat1, flat3;
276
277 __asm__ __volatile__(
278 /* flat |= (abs(p4 - p0) > thresh) */
279 "subu_s.qb %[c], %[p4], %[p0] \n\t"
280 "subu_s.qb %[r_k], %[p0], %[p4] \n\t"
281 "or %[r_k], %[r_k], %[c] \n\t"
282 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
283 "or %[r], $0, %[c] \n\t"
284
285 /* flat |= (abs(q4 - q0) > thresh) */
286 "subu_s.qb %[c], %[q4], %[q0] \n\t"
287 "subu_s.qb %[r_k], %[q0], %[q4] \n\t"
288 "or %[r_k], %[r_k], %[c] \n\t"
289 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
290 "or %[r], %[r], %[c] \n\t"
291 "sll %[r], %[r], 24 \n\t"
292 "wrdsp %[r] \n\t"
293 "pick.qb %[flat3], $0, %[ones] \n\t"
294
295 /* flat |= (abs(p1 - p0) > thresh) */
296 "subu_s.qb %[c], %[p1], %[p0] \n\t"
297 "subu_s.qb %[r_k], %[p0], %[p1] \n\t"
298 "or %[r_k], %[r_k], %[c] \n\t"
299 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
300 "or %[r_flat], $0, %[c] \n\t"
301
302 /* flat |= (abs(q1 - q0) > thresh) */
303 "subu_s.qb %[c], %[q1], %[q0] \n\t"
304 "subu_s.qb %[r_k], %[q0], %[q1] \n\t"
305 "or %[r_k], %[r_k], %[c] \n\t"
306 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
307 "or %[r_flat], %[r_flat], %[c] \n\t"
308
309 /* flat |= (abs(p0 - p2) > thresh) */
310 "subu_s.qb %[c], %[p0], %[p2] \n\t"
311 "subu_s.qb %[r_k], %[p2], %[p0] \n\t"
312 "or %[r_k], %[r_k], %[c] \n\t"
313 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
314 "or %[r_flat], %[r_flat], %[c] \n\t"
315
316 /* flat |= (abs(q0 - q2) > thresh) */
317 "subu_s.qb %[c], %[q0], %[q2] \n\t"
318 "subu_s.qb %[r_k], %[q2], %[q0] \n\t"
319 "or %[r_k], %[r_k], %[c] \n\t"
320 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
321 "or %[r_flat], %[r_flat], %[c] \n\t"
322
323 /* flat |= (abs(p3 - p0) > thresh) */
324 "subu_s.qb %[c], %[p3], %[p0] \n\t"
325 "subu_s.qb %[r_k], %[p0], %[p3] \n\t"
326 "or %[r_k], %[r_k], %[c] \n\t"
327 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
328 "or %[r_flat], %[r_flat], %[c] \n\t"
329
330 /* flat |= (abs(q3 - q0) > thresh) */
331 "subu_s.qb %[c], %[q3], %[q0] \n\t"
332 "subu_s.qb %[r_k], %[q0], %[q3] \n\t"
333 "or %[r_k], %[r_k], %[c] \n\t"
334 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
335 "or %[r_flat], %[r_flat], %[c] \n\t"
336 "sll %[r_flat], %[r_flat], 24 \n\t"
337 "wrdsp %[r_flat] \n\t"
338 "pick.qb %[flat1], $0, %[ones] \n\t"
339 /* flat & flatmask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3) */
340 "and %[flat1], %[flat3], %[flat1] \n\t"
341
342 : [c] "=&r"(c), [r_k] "=&r"(r_k), [r] "=&r"(r), [r_flat] "=&r"(r_flat),
343 [flat1] "=&r"(flat1), [flat3] "=&r"(flat3)
344 : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0),
345 [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3), [q4] "r"(q4),
346 [flat_thresh] "r"(flat_thresh), [ones] "r"(ones));
347
348 *flat2 = flat1;
349 }
350 #endif // #if HAVE_DSPR2
351 #ifdef __cplusplus
352 } // extern "C"
353 #endif
354
355 #endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_
356