1 /*
2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <stdio.h>
13 
14 #include "./vpx_dsp_rtcd.h"
15 #include "vpx_dsp/mips/convolve_common_dspr2.h"
16 #include "vpx_dsp/vpx_convolve.h"
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_ports/mem.h"
19 
20 #if HAVE_DSPR2
convolve_bi_avg_horiz_4_dspr2(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,const int16_t * filter_x0,int32_t h)21 static void convolve_bi_avg_horiz_4_dspr2(const uint8_t *src,
22                                           int32_t src_stride, uint8_t *dst,
23                                           int32_t dst_stride,
24                                           const int16_t *filter_x0, int32_t h) {
25   int32_t y;
26   uint8_t *cm = vpx_ff_cropTbl;
27   int32_t Temp1, Temp2, Temp3, Temp4;
28   uint32_t vector4a = 64;
29   uint32_t tp1, tp2;
30   uint32_t p1, p2, p3;
31   uint32_t tn1, tn2;
32   const int16_t *filter = &filter_x0[3];
33   uint32_t filter45;
34 
35   filter45 = ((const int32_t *)filter)[0];
36 
37   for (y = h; y--;) {
38     /* prefetch data to cache memory */
39     prefetch_load(src + src_stride);
40     prefetch_load(src + src_stride + 32);
41     prefetch_store(dst + dst_stride);
42 
43     __asm__ __volatile__(
44         "ulw              %[tp1],         0(%[src])                      \n\t"
45         "ulw              %[tp2],         4(%[src])                      \n\t"
46 
47         /* even 1. pixel */
48         "mtlo             %[vector4a],    $ac3                           \n\t"
49         "mthi             $zero,          $ac3                           \n\t"
50         "preceu.ph.qbr    %[p1],          %[tp1]                         \n\t"
51         "preceu.ph.qbl    %[p2],          %[tp1]                         \n\t"
52         "dpa.w.ph         $ac3,           %[p1],          %[filter45]    \n\t"
53         "extp             %[Temp1],       $ac3,           31             \n\t"
54 
55         /* even 2. pixel */
56         "mtlo             %[vector4a],    $ac2                           \n\t"
57         "mthi             $zero,          $ac2                           \n\t"
58         "balign           %[tp2],         %[tp1],         3              \n\t"
59         "dpa.w.ph         $ac2,           %[p2],          %[filter45]    \n\t"
60         "extp             %[Temp3],       $ac2,           31             \n\t"
61 
62         "lbu              %[p2],          3(%[dst])                      \n\t" /* load odd 2 */
63 
64         /* odd 1. pixel */
65         "lbux             %[tp1],         %[Temp1](%[cm])                \n\t" /* even 1 */
66         "mtlo             %[vector4a],    $ac3                           \n\t"
67         "mthi             $zero,          $ac3                           \n\t"
68         "lbu              %[Temp1],       1(%[dst])                      \n\t" /* load odd 1 */
69         "preceu.ph.qbr    %[p1],          %[tp2]                         \n\t"
70         "preceu.ph.qbl    %[p3],          %[tp2]                         \n\t"
71         "dpa.w.ph         $ac3,           %[p1],          %[filter45]    \n\t"
72         "extp             %[Temp2],       $ac3,           31             \n\t"
73 
74         "lbu              %[tn2],         0(%[dst])                      \n\t" /* load even 1 */
75 
76         /* odd 2. pixel */
77         "lbux             %[tp2],         %[Temp3](%[cm])                \n\t" /* even 2 */
78         "mtlo             %[vector4a],    $ac2                           \n\t"
79         "mthi             $zero,          $ac2                           \n\t"
80         "lbux             %[tn1],         %[Temp2](%[cm])                \n\t" /* odd 1 */
81         "addqh_r.w        %[tn2],         %[tn2],         %[tp1]         \n\t" /* average even 1 */
82         "dpa.w.ph         $ac2,           %[p3],          %[filter45]    \n\t"
83         "extp             %[Temp4],       $ac2,           31             \n\t"
84 
85         "lbu              %[tp1],         2(%[dst])                      \n\t" /* load even 2 */
86         "sb               %[tn2],         0(%[dst])                      \n\t" /* store even 1 */
87 
88         /* clamp */
89         "addqh_r.w        %[Temp1],       %[Temp1],       %[tn1]         \n\t" /* average odd 1 */
90         "lbux             %[p3],          %[Temp4](%[cm])                \n\t" /* odd 2 */
91         "sb               %[Temp1],       1(%[dst])                      \n\t" /* store odd 1 */
92 
93         "addqh_r.w        %[tp1],         %[tp1],         %[tp2]         \n\t" /* average even 2 */
94         "sb               %[tp1],         2(%[dst])                      \n\t" /* store even 2 */
95 
96         "addqh_r.w        %[p2],          %[p2],          %[p3]          \n\t" /* average odd 2 */
97         "sb               %[p2],          3(%[dst])                      \n\t" /* store odd 2 */
98 
99         : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1),
100           [tn2] "=&r"(tn2), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3),
101           [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3),
102           [Temp4] "=&r"(Temp4)
103         : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm),
104           [dst] "r"(dst), [src] "r"(src));
105 
106     /* Next row... */
107     src += src_stride;
108     dst += dst_stride;
109   }
110 }
111 
convolve_bi_avg_horiz_8_dspr2(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,const int16_t * filter_x0,int32_t h)112 static void convolve_bi_avg_horiz_8_dspr2(const uint8_t *src,
113                                           int32_t src_stride, uint8_t *dst,
114                                           int32_t dst_stride,
115                                           const int16_t *filter_x0, int32_t h) {
116   int32_t y;
117   uint8_t *cm = vpx_ff_cropTbl;
118   uint32_t vector4a = 64;
119   int32_t Temp1, Temp2, Temp3;
120   uint32_t tp1, tp2, tp3, tp4;
121   uint32_t p1, p2, p3, p4, n1;
122   uint32_t st0, st1;
123   const int16_t *filter = &filter_x0[3];
124   uint32_t filter45;
125 
126   filter45 = ((const int32_t *)filter)[0];
127 
128   for (y = h; y--;) {
129     /* prefetch data to cache memory */
130     prefetch_load(src + src_stride);
131     prefetch_load(src + src_stride + 32);
132     prefetch_store(dst + dst_stride);
133 
134     __asm__ __volatile__(
135         "ulw              %[tp1],         0(%[src])                      \n\t"
136         "ulw              %[tp2],         4(%[src])                      \n\t"
137 
138         /* even 1. pixel */
139         "mtlo             %[vector4a],    $ac3                           \n\t"
140         "mthi             $zero,          $ac3                           \n\t"
141         "mtlo             %[vector4a],    $ac2                           \n\t"
142         "mthi             $zero,          $ac2                           \n\t"
143         "preceu.ph.qbr    %[p1],          %[tp1]                         \n\t"
144         "preceu.ph.qbl    %[p2],          %[tp1]                         \n\t"
145         "preceu.ph.qbr    %[p3],          %[tp2]                         \n\t"
146         "preceu.ph.qbl    %[p4],          %[tp2]                         \n\t"
147         "ulw              %[tp3],         8(%[src])                      \n\t"
148         "dpa.w.ph         $ac3,           %[p1],          %[filter45]    \n\t"
149         "extp             %[Temp1],       $ac3,           31             \n\t"
150         "lbu              %[Temp2],       0(%[dst])                      \n\t"
151         "lbu              %[tp4],         2(%[dst])                      \n\t"
152 
153         /* even 2. pixel */
154         "dpa.w.ph         $ac2,           %[p2],          %[filter45]    \n\t"
155         "extp             %[Temp3],       $ac2,           31             \n\t"
156 
157         /* even 3. pixel */
158         "lbux             %[st0],         %[Temp1](%[cm])                \n\t"
159         "mtlo             %[vector4a],    $ac1                           \n\t"
160         "mthi             $zero,          $ac1                           \n\t"
161         "lbux             %[st1],         %[Temp3](%[cm])                \n\t"
162         "dpa.w.ph         $ac1,           %[p3],          %[filter45]    \n\t"
163         "extp             %[Temp1],       $ac1,           31             \n\t"
164 
165         "addqh_r.w        %[Temp2],       %[Temp2],       %[st0]         \n\t"
166         "addqh_r.w        %[tp4],         %[tp4],         %[st1]         \n\t"
167         "sb               %[Temp2],       0(%[dst])                      \n\t"
168         "sb               %[tp4],         2(%[dst])                      \n\t"
169 
170         /* even 4. pixel */
171         "mtlo             %[vector4a],    $ac2                           \n\t"
172         "mthi             $zero,          $ac2                           \n\t"
173         "mtlo             %[vector4a],    $ac3                           \n\t"
174         "mthi             $zero,          $ac3                           \n\t"
175 
176         "balign           %[tp3],         %[tp2],         3              \n\t"
177         "balign           %[tp2],         %[tp1],         3              \n\t"
178 
179         "lbux             %[st0],         %[Temp1](%[cm])                \n\t"
180         "lbu              %[Temp2],       4(%[dst])                      \n\t"
181         "addqh_r.w        %[Temp2],       %[Temp2],       %[st0]         \n\t"
182 
183         "dpa.w.ph         $ac2,           %[p4],          %[filter45]    \n\t"
184         "extp             %[Temp3],       $ac2,           31             \n\t"
185 
186         /* odd 1. pixel */
187         "mtlo             %[vector4a],    $ac1                           \n\t"
188         "mthi             $zero,          $ac1                           \n\t"
189         "sb               %[Temp2],       4(%[dst])                      \n\t"
190         "preceu.ph.qbr    %[p1],          %[tp2]                         \n\t"
191         "preceu.ph.qbl    %[p2],          %[tp2]                         \n\t"
192         "preceu.ph.qbr    %[p3],          %[tp3]                         \n\t"
193         "preceu.ph.qbl    %[p4],          %[tp3]                         \n\t"
194         "dpa.w.ph         $ac3,           %[p1],          %[filter45]    \n\t"
195         "extp             %[Temp2],       $ac3,           31             \n\t"
196 
197         "lbu              %[tp1],         6(%[dst])                      \n\t"
198 
199         /* odd 2. pixel */
200         "mtlo             %[vector4a],    $ac3                           \n\t"
201         "mthi             $zero,          $ac3                           \n\t"
202         "mtlo             %[vector4a],    $ac2                           \n\t"
203         "mthi             $zero,          $ac2                           \n\t"
204         "lbux             %[st0],         %[Temp3](%[cm])                \n\t"
205         "dpa.w.ph         $ac1,           %[p2],          %[filter45]    \n\t"
206         "extp             %[Temp3],       $ac1,           31             \n\t"
207 
208         "lbu              %[tp2],         1(%[dst])                      \n\t"
209         "lbu              %[tp3],         3(%[dst])                      \n\t"
210         "addqh_r.w        %[tp1],         %[tp1],         %[st0]         \n\t"
211 
212         /* odd 3. pixel */
213         "lbux             %[st1],         %[Temp2](%[cm])                \n\t"
214         "dpa.w.ph         $ac3,           %[p3],          %[filter45]    \n\t"
215         "addqh_r.w        %[tp2],         %[tp2],         %[st1]         \n\t"
216         "extp             %[Temp2],       $ac3,           31             \n\t"
217 
218         "lbu              %[tp4],         5(%[dst])                      \n\t"
219 
220         /* odd 4. pixel */
221         "sb               %[tp2],         1(%[dst])                      \n\t"
222         "sb               %[tp1],         6(%[dst])                      \n\t"
223         "dpa.w.ph         $ac2,           %[p4],          %[filter45]    \n\t"
224         "extp             %[Temp1],       $ac2,           31             \n\t"
225 
226         "lbu              %[tp1],         7(%[dst])                      \n\t"
227 
228         /* clamp */
229         "lbux             %[p4],          %[Temp3](%[cm])                \n\t"
230         "addqh_r.w        %[tp3],         %[tp3],         %[p4]          \n\t"
231 
232         "lbux             %[p2],          %[Temp2](%[cm])                \n\t"
233         "addqh_r.w        %[tp4],         %[tp4],         %[p2]          \n\t"
234 
235         "lbux             %[p1],          %[Temp1](%[cm])                \n\t"
236         "addqh_r.w        %[tp1],         %[tp1],         %[p1]          \n\t"
237 
238         /* store bytes */
239         "sb               %[tp3],         3(%[dst])                      \n\t"
240         "sb               %[tp4],         5(%[dst])                      \n\t"
241         "sb               %[tp1],         7(%[dst])                      \n\t"
242 
243         : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3),
244           [tp4] "=&r"(tp4), [st0] "=&r"(st0), [st1] "=&r"(st1), [p1] "=&r"(p1),
245           [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [n1] "=&r"(n1),
246           [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3)
247         : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm),
248           [dst] "r"(dst), [src] "r"(src));
249 
250     /* Next row... */
251     src += src_stride;
252     dst += dst_stride;
253   }
254 }
255 
convolve_bi_avg_horiz_16_dspr2(const uint8_t * src_ptr,int32_t src_stride,uint8_t * dst_ptr,int32_t dst_stride,const int16_t * filter_x0,int32_t h,int32_t count)256 static void convolve_bi_avg_horiz_16_dspr2(const uint8_t *src_ptr,
257                                            int32_t src_stride, uint8_t *dst_ptr,
258                                            int32_t dst_stride,
259                                            const int16_t *filter_x0, int32_t h,
260                                            int32_t count) {
261   int32_t y, c;
262   const uint8_t *src;
263   uint8_t *dst;
264   uint8_t *cm = vpx_ff_cropTbl;
265   uint32_t vector_64 = 64;
266   int32_t Temp1, Temp2, Temp3;
267   uint32_t qload1, qload2, qload3;
268   uint32_t p1, p2, p3, p4, p5;
269   uint32_t st1, st2, st3;
270   const int16_t *filter = &filter_x0[3];
271   uint32_t filter45;
272 
273   filter45 = ((const int32_t *)filter)[0];
274 
275   for (y = h; y--;) {
276     src = src_ptr;
277     dst = dst_ptr;
278 
279     /* prefetch data to cache memory */
280     prefetch_load(src_ptr + src_stride);
281     prefetch_load(src_ptr + src_stride + 32);
282     prefetch_store(dst_ptr + dst_stride);
283 
284     for (c = 0; c < count; c++) {
285       __asm__ __volatile__(
286           "ulw              %[qload1],    0(%[src])                    \n\t"
287           "ulw              %[qload2],    4(%[src])                    \n\t"
288 
289           /* even 1. pixel */
290           "mtlo             %[vector_64], $ac1                         \n\t" /* even 1 */
291           "mthi             $zero,        $ac1                         \n\t"
292           "mtlo             %[vector_64], $ac2                         \n\t" /* even 2 */
293           "mthi             $zero,        $ac2                         \n\t"
294           "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
295           "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
296           "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
297           "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
298           "ulw              %[qload3],    8(%[src])                    \n\t"
299           "dpa.w.ph         $ac1,         %[p1],          %[filter45]  \n\t" /* even 1 */
300           "extp             %[Temp1],     $ac1,           31           \n\t" /* even 1 */
301           "lbu              %[st2],       0(%[dst])                    \n\t" /* load even 1 from dst */
302 
303           /* even 2. pixel */
304           "mtlo             %[vector_64], $ac3                         \n\t" /* even 3 */
305           "mthi             $zero,        $ac3                         \n\t"
306           "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
307           "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
308           "ulw              %[qload1],    12(%[src])                   \n\t"
309           "dpa.w.ph         $ac2,         %[p2],          %[filter45]  \n\t" /* even 1 */
310           "extp             %[Temp2],     $ac2,           31           \n\t" /* even 1 */
311           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 1 */
312 
313           "lbu              %[qload3],    2(%[dst])                    \n\t" /* load even 2 from dst */
314 
315           /* even 3. pixel */
316           "mtlo             %[vector_64], $ac1                         \n\t" /* even 4 */
317           "mthi             $zero,        $ac1                         \n\t"
318           "addqh_r.w        %[st2],       %[st2],         %[st1]       \n\t" /* average even 1 */
319           "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
320           "sb               %[st2],       0(%[dst])                    \n\t" /* store even 1 to dst */
321           "dpa.w.ph         $ac3,         %[p3],          %[filter45]  \n\t" /* even 3 */
322           "extp             %[Temp3],     $ac3,           31           \n\t" /* even 3 */
323           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 1 */
324 
325           /* even 4. pixel */
326           "mtlo             %[vector_64], $ac2                         \n\t" /* even 5 */
327           "mthi             $zero,        $ac2                         \n\t"
328           "addqh_r.w        %[qload3],    %[qload3],      %[st2]       \n\t" /* average even 2 */
329           "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
330           "sb               %[qload3],    2(%[dst])                    \n\t" /* store even 2 to dst */
331           "lbu              %[qload3],    4(%[dst])                    \n\t" /* load even 3 from dst */
332           "lbu              %[qload1],    6(%[dst])                    \n\t" /* load even 4 from dst */
333           "dpa.w.ph         $ac1,         %[p4],          %[filter45]  \n\t" /* even 4 */
334           "extp             %[Temp1],     $ac1,           31           \n\t" /* even 4 */
335           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 3 */
336 
337           /* even 5. pixel */
338           "mtlo             %[vector_64], $ac3                         \n\t" /* even 6 */
339           "mthi             $zero,        $ac3                         \n\t"
340           "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average even 3 */
341           "sb               %[qload3],    4(%[dst])                    \n\t" /* store even 3 to dst */
342           "dpa.w.ph         $ac2,         %[p1],          %[filter45]  \n\t" /* even 5 */
343           "extp             %[Temp2],     $ac2,           31           \n\t" /* even 5 */
344           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 4 */
345 
346           /* even 6. pixel */
347           "mtlo             %[vector_64], $ac1                         \n\t" /* even 7 */
348           "mthi             $zero,        $ac1                         \n\t"
349           "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average even 4 */
350           "sb               %[qload1],    6(%[dst])                    \n\t" /* store even 4 to dst */
351           "dpa.w.ph         $ac3,         %[p5],          %[filter45]  \n\t" /* even 6 */
352           "lbu              %[qload2],    8(%[dst])                    \n\t" /* load even 5 from dst */
353           "extp             %[Temp3],     $ac3,           31           \n\t" /* even 6 */
354           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 5 */
355 
356           /* even 7. pixel */
357           "mtlo             %[vector_64], $ac2                         \n\t" /* even 8 */
358           "mthi             $zero,        $ac2                         \n\t"
359           "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average even 5 */
360           "sb               %[qload2],    8(%[dst])                    \n\t" /* store even 5 to dst */
361           "dpa.w.ph         $ac1,         %[p2],          %[filter45]  \n\t" /* even 7 */
362           "lbu              %[qload3],    10(%[dst])                   \n\t" /* load even 6 from dst */
363           "extp             %[Temp1],     $ac1,           31           \n\t" /* even 7 */
364           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 6 */
365 
366           "lbu              %[st2],       12(%[dst])                   \n\t" /* load even 7 from dst */
367 
368           /* even 8. pixel */
369           "mtlo             %[vector_64], $ac3                         \n\t" /* odd 1 */
370           "mthi             $zero,        $ac3                         \n\t"
371           "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average even 6 */
372           "dpa.w.ph         $ac2,         %[p3],          %[filter45]  \n\t" /* even 8 */
373           "sb               %[qload3],    10(%[dst])                   \n\t" /* store even 6 to dst */
374           "extp             %[Temp2],     $ac2,           31           \n\t" /* even 8 */
375           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 7 */
376 
377           /* ODD pixels */
378           "ulw              %[qload1],    1(%[src])                   \n\t"
379           "ulw              %[qload2],    5(%[src])                    \n\t"
380 
381           "addqh_r.w        %[st2],       %[st2],         %[st1]       \n\t" /* average even 7 */
382 
383           /* odd 1. pixel */
384           "mtlo             %[vector_64], $ac1                         \n\t" /* odd 2 */
385           "mthi             $zero,        $ac1                         \n\t"
386           "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
387           "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
388           "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
389           "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
390           "sb               %[st2],       12(%[dst])                   \n\t" /* store even 7 to dst */
391           "ulw              %[qload3],    9(%[src])                    \n\t"
392           "dpa.w.ph         $ac3,         %[p1],          %[filter45]  \n\t" /* odd 1 */
393           "lbu              %[qload2],    14(%[dst])                   \n\t" /* load even 8 from dst */
394           "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 1 */
395           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 8 */
396 
397           "lbu              %[st1],       1(%[dst])                    \n\t" /* load odd 1 from dst */
398 
399           /* odd 2. pixel */
400           "mtlo             %[vector_64], $ac2                         \n\t" /* odd 3 */
401           "mthi             $zero,        $ac2                         \n\t"
402           "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average even 8 */
403           "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
404           "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
405           "sb               %[qload2],    14(%[dst])                   \n\t" /* store even 8 to dst */
406           "ulw              %[qload1],    13(%[src])                   \n\t"
407           "dpa.w.ph         $ac1,         %[p2],          %[filter45]  \n\t" /* odd 2 */
408           "lbu              %[qload3],    3(%[dst])                    \n\t" /* load odd 2 from dst */
409           "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 2 */
410           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 1 */
411 
412           /* odd 3. pixel */
413           "mtlo             %[vector_64], $ac3                         \n\t" /* odd 4 */
414           "mthi             $zero,        $ac3                         \n\t"
415           "addqh_r.w        %[st3],       %[st3],         %[st1]       \n\t" /* average odd 1 */
416           "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
417           "dpa.w.ph         $ac2,         %[p3],          %[filter45]  \n\t" /* odd 3 */
418           "sb               %[st3],       1(%[dst])                    \n\t" /* store odd 1 to dst */
419           "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 3 */
420           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 2 */
421 
422           /* odd 4. pixel */
423           "mtlo             %[vector_64], $ac1                         \n\t" /* odd 5 */
424           "mthi             $zero,        $ac1                         \n\t"
425           "addqh_r.w        %[qload3],    %[qload3],      %[st1]       \n\t" /* average odd 2 */
426           "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
427           "sb               %[qload3],    3(%[dst])                    \n\t" /* store odd 2 to dst */
428           "lbu              %[qload1],    5(%[dst])                    \n\t" /* load odd 3 from dst */
429           "dpa.w.ph         $ac3,         %[p4],          %[filter45]  \n\t" /* odd 4 */
430           "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 4 */
431           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 3 */
432 
433           "lbu              %[st1],       7(%[dst])                    \n\t" /* load odd 4 from dst */
434 
435           /* odd 5. pixel */
436           "mtlo             %[vector_64], $ac2                         \n\t" /* odd 6 */
437           "mthi             $zero,        $ac2                         \n\t"
438           "addqh_r.w        %[qload1],    %[qload1],      %[st2]       \n\t" /* average odd 3 */
439           "sb               %[qload1],    5(%[dst])                    \n\t" /* store odd 3 to dst */
440           "dpa.w.ph         $ac1,         %[p1],          %[filter45]  \n\t" /* odd 5 */
441           "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 5 */
442           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 4 */
443 
444           "lbu              %[qload1],    9(%[dst])                    \n\t" /* load odd 5 from dst */
445 
446           /* odd 6. pixel */
447           "mtlo             %[vector_64], $ac3                         \n\t" /* odd 7 */
448           "mthi             $zero,        $ac3                         \n\t"
449           "addqh_r.w        %[st1],       %[st1],         %[st3]       \n\t" /* average odd 4 */
450           "sb               %[st1],       7(%[dst])                    \n\t" /* store odd 4 to dst */
451           "dpa.w.ph         $ac2,         %[p5],          %[filter45]  \n\t" /* odd 6 */
452           "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 6 */
453           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 5 */
454 
455           /* odd 7. pixel */
456           "mtlo             %[vector_64], $ac1                         \n\t" /* odd 8 */
457           "mthi             $zero,        $ac1                         \n\t"
458           "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average odd 5 */
459           "sb               %[qload1],    9(%[dst])                    \n\t" /* store odd 5 to dst */
460           "lbu              %[qload2],    11(%[dst])                   \n\t" /* load odd 6 from dst */
461           "dpa.w.ph         $ac3,         %[p2],          %[filter45]  \n\t" /* odd 7 */
462           "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 7 */
463 
464           "lbu              %[qload3],    13(%[dst])                   \n\t" /* load odd 7 from dst */
465 
466           /* odd 8. pixel */
467           "dpa.w.ph         $ac1,         %[p3],          %[filter45]  \n\t" /* odd 8 */
468           "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 8 */
469 
470           "lbu              %[qload1],    15(%[dst])                   \n\t" /* load odd 8 from dst */
471 
472           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 6 */
473           "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average odd 6 */
474 
475           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 7 */
476           "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average odd 7 */
477 
478           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 8 */
479           "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average odd 8 */
480 
481           "sb               %[qload2],    11(%[dst])                   \n\t" /* store odd 6 to dst */
482           "sb               %[qload3],    13(%[dst])                   \n\t" /* store odd 7 to dst */
483           "sb               %[qload1],    15(%[dst])                   \n\t" /* store odd 8 to dst */
484 
485           : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [st1] "=&r"(st1),
486             [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2),
487             [p3] "=&r"(p3), [p4] "=&r"(p4), [qload3] "=&r"(qload3),
488             [p5] "=&r"(p5), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2),
489             [Temp3] "=&r"(Temp3)
490           : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm),
491             [dst] "r"(dst), [src] "r"(src));
492 
493       src += 16;
494       dst += 16;
495     }
496 
497     /* Next row... */
498     src_ptr += src_stride;
499     dst_ptr += dst_stride;
500   }
501 }
502 
convolve_bi_avg_horiz_64_dspr2(const uint8_t * src_ptr,int32_t src_stride,uint8_t * dst_ptr,int32_t dst_stride,const int16_t * filter_x0,int32_t h)503 static void convolve_bi_avg_horiz_64_dspr2(const uint8_t *src_ptr,
504                                            int32_t src_stride, uint8_t *dst_ptr,
505                                            int32_t dst_stride,
506                                            const int16_t *filter_x0,
507                                            int32_t h) {
508   int32_t y, c;
509   const uint8_t *src;
510   uint8_t *dst;
511   uint8_t *cm = vpx_ff_cropTbl;
512   uint32_t vector_64 = 64;
513   int32_t Temp1, Temp2, Temp3;
514   uint32_t qload1, qload2, qload3;
515   uint32_t p1, p2, p3, p4, p5;
516   uint32_t st1, st2, st3;
517   const int16_t *filter = &filter_x0[3];
518   uint32_t filter45;
519 
520   filter45 = ((const int32_t *)filter)[0];
521 
522   for (y = h; y--;) {
523     src = src_ptr;
524     dst = dst_ptr;
525 
526     /* prefetch data to cache memory */
527     prefetch_load(src_ptr + src_stride);
528     prefetch_load(src_ptr + src_stride + 32);
529     prefetch_load(src_ptr + src_stride + 64);
530     prefetch_store(dst_ptr + dst_stride);
531     prefetch_store(dst_ptr + dst_stride + 32);
532 
533     for (c = 0; c < 4; c++) {
534       __asm__ __volatile__(
535           "ulw              %[qload1],    0(%[src])                    \n\t"
536           "ulw              %[qload2],    4(%[src])                    \n\t"
537 
538           /* even 1. pixel */
539           "mtlo             %[vector_64], $ac1                         \n\t" /* even 1 */
540           "mthi             $zero,        $ac1                         \n\t"
541           "mtlo             %[vector_64], $ac2                         \n\t" /* even 2 */
542           "mthi             $zero,        $ac2                         \n\t"
543           "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
544           "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
545           "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
546           "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
547           "ulw              %[qload3],    8(%[src])                    \n\t"
548           "dpa.w.ph         $ac1,         %[p1],          %[filter45]  \n\t" /* even 1 */
549           "extp             %[Temp1],     $ac1,           31           \n\t" /* even 1 */
550           "lbu              %[st2],       0(%[dst])                    \n\t" /* load even 1 from dst */
551 
552           /* even 2. pixel */
553           "mtlo             %[vector_64], $ac3                         \n\t" /* even 3 */
554           "mthi             $zero,        $ac3                         \n\t"
555           "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
556           "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
557           "ulw              %[qload1],    12(%[src])                   \n\t"
558           "dpa.w.ph         $ac2,         %[p2],          %[filter45]  \n\t" /* even 1 */
559           "extp             %[Temp2],     $ac2,           31           \n\t" /* even 1 */
560           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 1 */
561 
562           "lbu              %[qload3],    2(%[dst])                    \n\t" /* load even 2 from dst */
563 
564           /* even 3. pixel */
565           "mtlo             %[vector_64], $ac1                         \n\t" /* even 4 */
566           "mthi             $zero,        $ac1                         \n\t"
567           "addqh_r.w        %[st2],       %[st2],         %[st1]       \n\t" /* average even 1 */
568           "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
569           "sb               %[st2],       0(%[dst])                    \n\t" /* store even 1 to dst */
570           "dpa.w.ph         $ac3,         %[p3],          %[filter45]  \n\t" /* even 3 */
571           "extp             %[Temp3],     $ac3,           31           \n\t" /* even 3 */
572           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 1 */
573 
574           /* even 4. pixel */
575           "mtlo             %[vector_64], $ac2                         \n\t" /* even 5 */
576           "mthi             $zero,        $ac2                         \n\t"
577           "addqh_r.w        %[qload3],    %[qload3],      %[st2]       \n\t" /* average even 2 */
578           "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
579           "sb               %[qload3],    2(%[dst])                    \n\t" /* store even 2 to dst */
580           "lbu              %[qload3],    4(%[dst])                    \n\t" /* load even 3 from dst */
581           "lbu              %[qload1],    6(%[dst])                    \n\t" /* load even 4 from dst */
582           "dpa.w.ph         $ac1,         %[p4],          %[filter45]  \n\t" /* even 4 */
583           "extp             %[Temp1],     $ac1,           31           \n\t" /* even 4 */
584           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 3 */
585 
586           /* even 5. pixel */
587           "mtlo             %[vector_64], $ac3                         \n\t" /* even 6 */
588           "mthi             $zero,        $ac3                         \n\t"
589           "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average even 3 */
590           "sb               %[qload3],    4(%[dst])                    \n\t" /* store even 3 to dst */
591           "dpa.w.ph         $ac2,         %[p1],          %[filter45]  \n\t" /* even 5 */
592           "extp             %[Temp2],     $ac2,           31           \n\t" /* even 5 */
593           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 4 */
594 
595           /* even 6. pixel */
596           "mtlo             %[vector_64], $ac1                         \n\t" /* even 7 */
597           "mthi             $zero,        $ac1                         \n\t"
598           "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average even 4 */
599           "sb               %[qload1],    6(%[dst])                    \n\t" /* store even 4 to dst */
600           "dpa.w.ph         $ac3,         %[p5],          %[filter45]  \n\t" /* even 6 */
601           "lbu              %[qload2],    8(%[dst])                    \n\t" /* load even 5 from dst */
602           "extp             %[Temp3],     $ac3,           31           \n\t" /* even 6 */
603           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 5 */
604 
605           /* even 7. pixel */
606           "mtlo             %[vector_64], $ac2                         \n\t" /* even 8 */
607           "mthi             $zero,        $ac2                         \n\t"
608           "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average even 5 */
609           "sb               %[qload2],    8(%[dst])                    \n\t" /* store even 5 to dst */
610           "dpa.w.ph         $ac1,         %[p2],          %[filter45]  \n\t" /* even 7 */
611           "lbu              %[qload3],    10(%[dst])                   \n\t" /* load even 6 from dst */
612           "extp             %[Temp1],     $ac1,           31           \n\t" /* even 7 */
613           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 6 */
614 
615           "lbu              %[st2],       12(%[dst])                   \n\t" /* load even 7 from dst */
616 
617           /* even 8. pixel */
618           "mtlo             %[vector_64], $ac3                         \n\t" /* odd 1 */
619           "mthi             $zero,        $ac3                         \n\t"
620           "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average even 6 */
621           "dpa.w.ph         $ac2,         %[p3],          %[filter45]  \n\t" /* even 8 */
622           "sb               %[qload3],    10(%[dst])                   \n\t" /* store even 6 to dst */
623           "extp             %[Temp2],     $ac2,           31           \n\t" /* even 8 */
624           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 7 */
625 
626           /* ODD pixels */
627           "ulw              %[qload1],    1(%[src])                   \n\t"
628           "ulw              %[qload2],    5(%[src])                    \n\t"
629 
630           "addqh_r.w        %[st2],       %[st2],         %[st1]       \n\t" /* average even 7 */
631 
632           /* odd 1. pixel */
633           "mtlo             %[vector_64], $ac1                         \n\t" /* odd 2 */
634           "mthi             $zero,        $ac1                         \n\t"
635           "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
636           "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
637           "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
638           "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
639           "sb               %[st2],       12(%[dst])                   \n\t" /* store even 7 to dst */
640           "ulw              %[qload3],    9(%[src])                    \n\t"
641           "dpa.w.ph         $ac3,         %[p1],          %[filter45]  \n\t" /* odd 1 */
642           "lbu              %[qload2],    14(%[dst])                   \n\t" /* load even 8 from dst */
643           "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 1 */
644           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 8 */
645 
646           "lbu              %[st1],       1(%[dst])                    \n\t" /* load odd 1 from dst */
647 
648           /* odd 2. pixel */
649           "mtlo             %[vector_64], $ac2                         \n\t" /* odd 3 */
650           "mthi             $zero,        $ac2                         \n\t"
651           "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average even 8 */
652           "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
653           "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
654           "sb               %[qload2],    14(%[dst])                   \n\t" /* store even 8 to dst */
655           "ulw              %[qload1],    13(%[src])                   \n\t"
656           "dpa.w.ph         $ac1,         %[p2],          %[filter45]  \n\t" /* odd 2 */
657           "lbu              %[qload3],    3(%[dst])                    \n\t" /* load odd 2 from dst */
658           "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 2 */
659           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 1 */
660 
661           /* odd 3. pixel */
662           "mtlo             %[vector_64], $ac3                         \n\t" /* odd 4 */
663           "mthi             $zero,        $ac3                         \n\t"
664           "addqh_r.w        %[st3],       %[st3],         %[st1]       \n\t" /* average odd 1 */
665           "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
666           "dpa.w.ph         $ac2,         %[p3],          %[filter45]  \n\t" /* odd 3 */
667           "sb               %[st3],       1(%[dst])                    \n\t" /* store odd 1 to dst */
668           "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 3 */
669           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 2 */
670 
671           /* odd 4. pixel */
672           "mtlo             %[vector_64], $ac1                         \n\t" /* odd 5 */
673           "mthi             $zero,        $ac1                         \n\t"
674           "addqh_r.w        %[qload3],    %[qload3],      %[st1]       \n\t" /* average odd 2 */
675           "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
676           "sb               %[qload3],    3(%[dst])                    \n\t" /* store odd 2 to dst */
677           "lbu              %[qload1],    5(%[dst])                    \n\t" /* load odd 3 from dst */
678           "dpa.w.ph         $ac3,         %[p4],          %[filter45]  \n\t" /* odd 4 */
679           "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 4 */
680           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 3 */
681 
682           "lbu              %[st1],       7(%[dst])                    \n\t" /* load odd 4 from dst */
683 
684           /* odd 5. pixel */
685           "mtlo             %[vector_64], $ac2                         \n\t" /* odd 6 */
686           "mthi             $zero,        $ac2                         \n\t"
687           "addqh_r.w        %[qload1],    %[qload1],      %[st2]       \n\t" /* average odd 3 */
688           "sb               %[qload1],    5(%[dst])                    \n\t" /* store odd 3 to dst */
689           "dpa.w.ph         $ac1,         %[p1],          %[filter45]  \n\t" /* odd 5 */
690           "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 5 */
691           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 4 */
692 
693           "lbu              %[qload1],    9(%[dst])                    \n\t" /* load odd 5 from dst */
694 
695           /* odd 6. pixel */
696           "mtlo             %[vector_64], $ac3                         \n\t" /* odd 7 */
697           "mthi             $zero,        $ac3                         \n\t"
698           "addqh_r.w        %[st1],       %[st1],         %[st3]       \n\t" /* average odd 4 */
699           "sb               %[st1],       7(%[dst])                    \n\t" /* store odd 4 to dst */
700           "dpa.w.ph         $ac2,         %[p5],          %[filter45]  \n\t" /* odd 6 */
701           "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 6 */
702           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 5 */
703 
704           /* odd 7. pixel */
705           "mtlo             %[vector_64], $ac1                         \n\t" /* odd 8 */
706           "mthi             $zero,        $ac1                         \n\t"
707           "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average odd 5 */
708           "sb               %[qload1],    9(%[dst])                    \n\t" /* store odd 5 to dst */
709           "lbu              %[qload2],    11(%[dst])                   \n\t" /* load odd 6 from dst */
710           "dpa.w.ph         $ac3,         %[p2],          %[filter45]  \n\t" /* odd 7 */
711           "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 7 */
712 
713           "lbu              %[qload3],    13(%[dst])                   \n\t" /* load odd 7 from dst */
714 
715           /* odd 8. pixel */
716           "dpa.w.ph         $ac1,         %[p3],          %[filter45]  \n\t" /* odd 8 */
717           "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 8 */
718 
719           "lbu              %[qload1],    15(%[dst])                   \n\t" /* load odd 8 from dst */
720 
721           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 6 */
722           "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average odd 6 */
723 
724           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 7 */
725           "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average odd 7 */
726 
727           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 8 */
728           "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average odd 8 */
729 
730           "sb               %[qload2],    11(%[dst])                   \n\t" /* store odd 6 to dst */
731           "sb               %[qload3],    13(%[dst])                   \n\t" /* store odd 7 to dst */
732           "sb               %[qload1],    15(%[dst])                   \n\t" /* store odd 8 to dst */
733 
734           : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [st1] "=&r"(st1),
735             [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2),
736             [p3] "=&r"(p3), [p4] "=&r"(p4), [qload3] "=&r"(qload3),
737             [p5] "=&r"(p5), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2),
738             [Temp3] "=&r"(Temp3)
739           : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm),
740             [dst] "r"(dst), [src] "r"(src));
741 
742       src += 16;
743       dst += 16;
744     }
745 
746     /* Next row... */
747     src_ptr += src_stride;
748     dst_ptr += dst_stride;
749   }
750 }
751 
vpx_convolve2_avg_horiz_dspr2(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const InterpKernel * filter,int x0_q4,int32_t x_step_q4,int y0_q4,int y_step_q4,int w,int h)752 void vpx_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
753                                    uint8_t *dst, ptrdiff_t dst_stride,
754                                    const InterpKernel *filter, int x0_q4,
755                                    int32_t x_step_q4, int y0_q4, int y_step_q4,
756                                    int w, int h) {
757   const int16_t *const filter_x = filter[x0_q4];
758   uint32_t pos = 38;
759 
760   assert(x_step_q4 == 16);
761 
762   /* bit positon for extract from acc */
763   __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
764                        :
765                        : [pos] "r"(pos));
766 
767   /* prefetch data to cache memory */
768   prefetch_load(src);
769   prefetch_load(src + 32);
770   prefetch_store(dst);
771 
772   switch (w) {
773     case 4:
774       convolve_bi_avg_horiz_4_dspr2(src, src_stride, dst, dst_stride, filter_x,
775                                     h);
776       break;
777     case 8:
778       convolve_bi_avg_horiz_8_dspr2(src, src_stride, dst, dst_stride, filter_x,
779                                     h);
780       break;
781     case 16:
782       convolve_bi_avg_horiz_16_dspr2(src, src_stride, dst, dst_stride, filter_x,
783                                      h, 1);
784       break;
785     case 32:
786       convolve_bi_avg_horiz_16_dspr2(src, src_stride, dst, dst_stride, filter_x,
787                                      h, 2);
788       break;
789     case 64:
790       prefetch_load(src + 64);
791       prefetch_store(dst + 32);
792 
793       convolve_bi_avg_horiz_64_dspr2(src, src_stride, dst, dst_stride, filter_x,
794                                      h);
795       break;
796     default:
797       vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
798                                 x_step_q4, y0_q4, y_step_q4, w, h);
799       break;
800   }
801 }
802 #endif
803