1 /*
2  *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vpx_dsp_rtcd.h"
12 #include "vpx_ports/asmdefs_mmi.h"
13 #include "vpx/vpx_integer.h"
14 #include "vpx_ports/mem.h"
15 
16 #define SAD_SRC_REF_ABS_SUB_64                                      \
17   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
18   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
19   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
20   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
21   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
22   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
23   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
24   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
25   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
26   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
27   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
28   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
29   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
30   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
31   "gsldlc1    %[ftmp1],   0x17(%[src])                        \n\t" \
32   "gsldrc1    %[ftmp1],   0x10(%[src])                        \n\t" \
33   "gsldlc1    %[ftmp2],   0x1f(%[src])                        \n\t" \
34   "gsldrc1    %[ftmp2],   0x18(%[src])                        \n\t" \
35   "gsldlc1    %[ftmp3],   0x17(%[ref])                        \n\t" \
36   "gsldrc1    %[ftmp3],   0x10(%[ref])                        \n\t" \
37   "gsldlc1    %[ftmp4],   0x1f(%[ref])                        \n\t" \
38   "gsldrc1    %[ftmp4],   0x18(%[ref])                        \n\t" \
39   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
40   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
41   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
42   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
43   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
44   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
45   "gsldlc1    %[ftmp1],   0x27(%[src])                        \n\t" \
46   "gsldrc1    %[ftmp1],   0x20(%[src])                        \n\t" \
47   "gsldlc1    %[ftmp2],   0x2f(%[src])                        \n\t" \
48   "gsldrc1    %[ftmp2],   0x28(%[src])                        \n\t" \
49   "gsldlc1    %[ftmp3],   0x27(%[ref])                        \n\t" \
50   "gsldrc1    %[ftmp3],   0x20(%[ref])                        \n\t" \
51   "gsldlc1    %[ftmp4],   0x2f(%[ref])                        \n\t" \
52   "gsldrc1    %[ftmp4],   0x28(%[ref])                        \n\t" \
53   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
54   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
55   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
56   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
57   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
58   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
59   "gsldlc1    %[ftmp1],   0x37(%[src])                        \n\t" \
60   "gsldrc1    %[ftmp1],   0x30(%[src])                        \n\t" \
61   "gsldlc1    %[ftmp2],   0x3f(%[src])                        \n\t" \
62   "gsldrc1    %[ftmp2],   0x38(%[src])                        \n\t" \
63   "gsldlc1    %[ftmp3],   0x37(%[ref])                        \n\t" \
64   "gsldrc1    %[ftmp3],   0x30(%[ref])                        \n\t" \
65   "gsldlc1    %[ftmp4],   0x3f(%[ref])                        \n\t" \
66   "gsldrc1    %[ftmp4],   0x38(%[ref])                        \n\t" \
67   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
68   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
69   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
70   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
71   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
72   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
73 
74 #define SAD_SRC_REF_ABS_SUB_32                                      \
75   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
76   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
77   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
78   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
79   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
80   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
81   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
82   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
83   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
84   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
85   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
86   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
87   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
88   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
89   "gsldlc1    %[ftmp1],   0x17(%[src])                        \n\t" \
90   "gsldrc1    %[ftmp1],   0x10(%[src])                        \n\t" \
91   "gsldlc1    %[ftmp2],   0x1f(%[src])                        \n\t" \
92   "gsldrc1    %[ftmp2],   0x18(%[src])                        \n\t" \
93   "gsldlc1    %[ftmp3],   0x17(%[ref])                        \n\t" \
94   "gsldrc1    %[ftmp3],   0x10(%[ref])                        \n\t" \
95   "gsldlc1    %[ftmp4],   0x1f(%[ref])                        \n\t" \
96   "gsldrc1    %[ftmp4],   0x18(%[ref])                        \n\t" \
97   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
98   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
99   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
100   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
101   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
102   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
103 
104 #define SAD_SRC_REF_ABS_SUB_16                                      \
105   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
106   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
107   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
108   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
109   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
110   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
111   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
112   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
113   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
114   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
115   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
116   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
117   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
118   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
119 
120 #define SAD_SRC_REF_ABS_SUB_8                                       \
121   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
122   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
123   "gsldlc1    %[ftmp2],   0x07(%[ref])                        \n\t" \
124   "gsldrc1    %[ftmp2],   0x00(%[ref])                        \n\t" \
125   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
126   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
127   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
128 
129 #if _MIPS_SIM == _ABIO32
130 #define SAD_SRC_REF_ABS_SUB_4                                       \
131   "ulw        %[tmp0],    0x00(%[src])                        \n\t" \
132   "mtc1       %[tmp0],    %[ftmp1]                            \n\t" \
133   "ulw        %[tmp0],    0x00(%[ref])                        \n\t" \
134   "mtc1       %[tmp0],    %[ftmp2]                            \n\t" \
135   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
136   "mthc1      $0,         %[ftmp1]                            \n\t" \
137   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
138   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
139 #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */
140 #define SAD_SRC_REF_ABS_SUB_4                                       \
141   "gslwlc1    %[ftmp1],   0x03(%[src])                        \n\t" \
142   "gslwrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
143   "gslwlc1    %[ftmp2],   0x03(%[ref])                        \n\t" \
144   "gslwrc1    %[ftmp2],   0x00(%[ref])                        \n\t" \
145   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
146   "mthc1      $0,         %[ftmp1]                            \n\t" \
147   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
148   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
149 #endif /* _MIPS_SIM == _ABIO32 */
150 
151 #define SAD_SRC_AVGREF_ABS_SUB_64                                   \
152   "gsldlc1    %[ftmp1],   0x07(%[second_pred])                \n\t" \
153   "gsldrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
154   "gsldlc1    %[ftmp2],   0x0f(%[second_pred])                \n\t" \
155   "gsldrc1    %[ftmp2],   0x08(%[second_pred])                \n\t" \
156   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
157   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
158   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
159   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
160   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
161   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
162   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
163   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
164   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
165   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
166   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
167   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
168   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
169   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
170   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
171   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
172   "gsldlc1    %[ftmp1],   0x17(%[second_pred])                \n\t" \
173   "gsldrc1    %[ftmp1],   0x10(%[second_pred])                \n\t" \
174   "gsldlc1    %[ftmp2],   0x1f(%[second_pred])                \n\t" \
175   "gsldrc1    %[ftmp2],   0x18(%[second_pred])                \n\t" \
176   "gsldlc1    %[ftmp3],   0x17(%[ref])                        \n\t" \
177   "gsldrc1    %[ftmp3],   0x10(%[ref])                        \n\t" \
178   "gsldlc1    %[ftmp4],   0x1f(%[ref])                        \n\t" \
179   "gsldrc1    %[ftmp4],   0x18(%[ref])                        \n\t" \
180   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
181   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
182   "gsldlc1    %[ftmp1],   0x17(%[src])                        \n\t" \
183   "gsldrc1    %[ftmp1],   0x10(%[src])                        \n\t" \
184   "gsldlc1    %[ftmp2],   0x1f(%[src])                        \n\t" \
185   "gsldrc1    %[ftmp2],   0x18(%[src])                        \n\t" \
186   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
187   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
188   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
189   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
190   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
191   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
192   "gsldlc1    %[ftmp1],   0x27(%[second_pred])                \n\t" \
193   "gsldrc1    %[ftmp1],   0x20(%[second_pred])                \n\t" \
194   "gsldlc1    %[ftmp2],   0x2f(%[second_pred])                \n\t" \
195   "gsldrc1    %[ftmp2],   0x28(%[second_pred])                \n\t" \
196   "gsldlc1    %[ftmp3],   0x27(%[ref])                        \n\t" \
197   "gsldrc1    %[ftmp3],   0x20(%[ref])                        \n\t" \
198   "gsldlc1    %[ftmp4],   0x2f(%[ref])                        \n\t" \
199   "gsldrc1    %[ftmp4],   0x28(%[ref])                        \n\t" \
200   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
201   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
202   "gsldlc1    %[ftmp1],   0x27(%[src])                        \n\t" \
203   "gsldrc1    %[ftmp1],   0x20(%[src])                        \n\t" \
204   "gsldlc1    %[ftmp2],   0x2f(%[src])                        \n\t" \
205   "gsldrc1    %[ftmp2],   0x28(%[src])                        \n\t" \
206   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
207   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
208   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
209   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
210   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
211   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
212   "gsldlc1    %[ftmp1],   0x37(%[second_pred])                \n\t" \
213   "gsldrc1    %[ftmp1],   0x30(%[second_pred])                \n\t" \
214   "gsldlc1    %[ftmp2],   0x3f(%[second_pred])                \n\t" \
215   "gsldrc1    %[ftmp2],   0x38(%[second_pred])                \n\t" \
216   "gsldlc1    %[ftmp3],   0x37(%[ref])                        \n\t" \
217   "gsldrc1    %[ftmp3],   0x30(%[ref])                        \n\t" \
218   "gsldlc1    %[ftmp4],   0x3f(%[ref])                        \n\t" \
219   "gsldrc1    %[ftmp4],   0x38(%[ref])                        \n\t" \
220   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
221   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
222   "gsldlc1    %[ftmp1],   0x37(%[src])                        \n\t" \
223   "gsldrc1    %[ftmp1],   0x30(%[src])                        \n\t" \
224   "gsldlc1    %[ftmp2],   0x3f(%[src])                        \n\t" \
225   "gsldrc1    %[ftmp2],   0x38(%[src])                        \n\t" \
226   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
227   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
228   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
229   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
230   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
231   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
232 
233 #define SAD_SRC_AVGREF_ABS_SUB_32                                   \
234   "gsldlc1    %[ftmp1],   0x07(%[second_pred])                \n\t" \
235   "gsldrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
236   "gsldlc1    %[ftmp2],   0x0f(%[second_pred])                \n\t" \
237   "gsldrc1    %[ftmp2],   0x08(%[second_pred])                \n\t" \
238   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
239   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
240   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
241   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
242   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
243   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
244   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
245   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
246   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
247   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
248   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
249   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
250   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
251   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
252   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
253   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
254   "gsldlc1    %[ftmp1],   0x17(%[second_pred])                \n\t" \
255   "gsldrc1    %[ftmp1],   0x10(%[second_pred])                \n\t" \
256   "gsldlc1    %[ftmp2],   0x1f(%[second_pred])                \n\t" \
257   "gsldrc1    %[ftmp2],   0x18(%[second_pred])                \n\t" \
258   "gsldlc1    %[ftmp3],   0x17(%[ref])                        \n\t" \
259   "gsldrc1    %[ftmp3],   0x10(%[ref])                        \n\t" \
260   "gsldlc1    %[ftmp4],   0x1f(%[ref])                        \n\t" \
261   "gsldrc1    %[ftmp4],   0x18(%[ref])                        \n\t" \
262   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
263   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
264   "gsldlc1    %[ftmp1],   0x17(%[src])                        \n\t" \
265   "gsldrc1    %[ftmp1],   0x10(%[src])                        \n\t" \
266   "gsldlc1    %[ftmp2],   0x1f(%[src])                        \n\t" \
267   "gsldrc1    %[ftmp2],   0x18(%[src])                        \n\t" \
268   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
269   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
270   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
271   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
272   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
273   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
274 
275 #define SAD_SRC_AVGREF_ABS_SUB_16                                   \
276   "gsldlc1    %[ftmp1],   0x07(%[second_pred])                \n\t" \
277   "gsldrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
278   "gsldlc1    %[ftmp2],   0x0f(%[second_pred])                \n\t" \
279   "gsldrc1    %[ftmp2],   0x08(%[second_pred])                \n\t" \
280   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
281   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
282   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
283   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
284   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
285   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
286   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
287   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
288   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
289   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
290   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
291   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
292   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
293   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
294   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
295   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
296 
297 #define SAD_SRC_AVGREF_ABS_SUB_8                                    \
298   "gsldlc1    %[ftmp1],   0x07(%[second_pred])                \n\t" \
299   "gsldrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
300   "gsldlc1    %[ftmp2],   0x07(%[ref])                        \n\t" \
301   "gsldrc1    %[ftmp2],   0x00(%[ref])                        \n\t" \
302   "pavgb      %[ftmp2],   %[ftmp1],       %[ftmp2]            \n\t" \
303   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
304   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
305   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
306   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
307   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
308 
309 #if _MIPS_SIM == _ABIO32
310 #define SAD_SRC_AVGREF_ABS_SUB_4                                    \
311   "ulw        %[tmp0],    0x00(%[second_pred])                \n\t" \
312   "mtc1       %[tmp0],    %[ftmp1]                            \n\t" \
313   "ulw        %[tmp0],    0x00(%[ref])                        \n\t" \
314   "mtc1       %[tmp0],    %[ftmp2]                            \n\t" \
315   "pavgb      %[ftmp2],   %[ftmp1],       %[ftmp2]            \n\t" \
316   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
317   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
318   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
319   "mthc1      $0,         %[ftmp1]                            \n\t" \
320   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
321   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
322 #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */
323 #define SAD_SRC_AVGREF_ABS_SUB_4                                    \
324   "gslwlc1    %[ftmp1],   0x03(%[second_pred])                \n\t" \
325   "gslwrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
326   "gslwlc1    %[ftmp2],   0x03(%[ref])                        \n\t" \
327   "gslwrc1    %[ftmp2],   0x00(%[ref])                        \n\t" \
328   "pavgb      %[ftmp2],   %[ftmp1],       %[ftmp2]            \n\t" \
329   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
330   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
331   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
332   "mthc1      $0,         %[ftmp1]                            \n\t" \
333   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
334   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
335 #endif /* _MIPS_SIM == _ABIO32 */
336 
337 // depending on call sites, pass **ref_array to avoid & in subsequent call and
338 // de-dup with 4D below.
339 #define sadMxNxK_mmi(m, n, k)                                                 \
340   void vpx_sad##m##x##n##x##k##_mmi(const uint8_t *src, int src_stride,       \
341                                     const uint8_t *ref_array, int ref_stride, \
342                                     uint32_t *sad_array) {                    \
343     int i;                                                                    \
344     for (i = 0; i < (k); ++i)                                                 \
345       sad_array[i] =                                                          \
346           vpx_sad##m##x##n##_mmi(src, src_stride, &ref_array[i], ref_stride); \
347   }
348 
349 // This appears to be equivalent to the above when k == 4 and refs is const
350 #define sadMxNx4D_mmi(m, n)                                                  \
351   void vpx_sad##m##x##n##x4d_mmi(const uint8_t *src, int src_stride,         \
352                                  const uint8_t *const ref_array[],           \
353                                  int ref_stride, uint32_t *sad_array) {      \
354     int i;                                                                   \
355     for (i = 0; i < 4; ++i)                                                  \
356       sad_array[i] =                                                         \
357           vpx_sad##m##x##n##_mmi(src, src_stride, ref_array[i], ref_stride); \
358   }
359 
vpx_sad64x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)360 static inline unsigned int vpx_sad64x(const uint8_t *src, int src_stride,
361                                       const uint8_t *ref, int ref_stride,
362                                       int counter) {
363   unsigned int sad;
364   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
365   mips_reg l_counter = counter;
366 
367   __asm__ volatile (
368     "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
369     "1:                                                         \n\t"
370     // Include two loop body, to reduce loop time.
371     SAD_SRC_REF_ABS_SUB_64
372     MMI_ADDU(%[src],     %[src],         %[src_stride])
373     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
374     SAD_SRC_REF_ABS_SUB_64
375     MMI_ADDU(%[src],     %[src],         %[src_stride])
376     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
377     MMI_ADDIU(%[counter], %[counter], -0x02)
378     "bnez       %[counter], 1b                                  \n\t"
379     "mfc1       %[sad],     %[ftmp5]                            \n\t"
380     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
381       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
382       [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
383     : [src_stride]"r"((mips_reg)src_stride),
384       [ref_stride]"r"((mips_reg)ref_stride)
385   );
386 
387   return sad;
388 }
389 
390 #define vpx_sad64xN(H)                                                   \
391   unsigned int vpx_sad64x##H##_mmi(const uint8_t *src, int src_stride,   \
392                                    const uint8_t *ref, int ref_stride) { \
393     return vpx_sad64x(src, src_stride, ref, ref_stride, H);              \
394   }
395 
396 vpx_sad64xN(64);
397 vpx_sad64xN(32);
398 sadMxNx4D_mmi(64, 64);
399 sadMxNx4D_mmi(64, 32);
400 
vpx_sad_avg64x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)401 static inline unsigned int vpx_sad_avg64x(const uint8_t *src, int src_stride,
402                                           const uint8_t *ref, int ref_stride,
403                                           const uint8_t *second_pred,
404                                           int counter) {
405   unsigned int sad;
406   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
407   mips_reg l_counter = counter;
408 
409   __asm__ volatile (
410     "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
411     "1:                                                         \n\t"
412     // Include two loop body, to reduce loop time.
413     SAD_SRC_AVGREF_ABS_SUB_64
414     MMI_ADDIU(%[second_pred], %[second_pred], 0x40)
415     MMI_ADDU(%[src],     %[src],         %[src_stride])
416     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
417     SAD_SRC_AVGREF_ABS_SUB_64
418     MMI_ADDIU(%[second_pred], %[second_pred], 0x40)
419     MMI_ADDU(%[src],     %[src],         %[src_stride])
420     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
421     MMI_ADDIU(%[counter], %[counter], -0x02)
422     "bnez       %[counter], 1b                                  \n\t"
423     "mfc1       %[sad],     %[ftmp5]                            \n\t"
424     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
425       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
426       [src]"+&r"(src), [ref]"+&r"(ref),
427       [second_pred]"+&r"((mips_reg)second_pred),
428       [sad]"=&r"(sad)
429     : [src_stride]"r"((mips_reg)src_stride),
430       [ref_stride]"r"((mips_reg)ref_stride)
431   );
432 
433   return sad;
434 }
435 
436 #define vpx_sad_avg64xN(H)                                                   \
437   unsigned int vpx_sad64x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
438                                        const uint8_t *ref, int ref_stride,   \
439                                        const uint8_t *second_pred) {         \
440     return vpx_sad_avg64x(src, src_stride, ref, ref_stride, second_pred, H); \
441   }
442 
443 vpx_sad_avg64xN(64);
444 vpx_sad_avg64xN(32);
445 
vpx_sad32x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)446 static inline unsigned int vpx_sad32x(const uint8_t *src, int src_stride,
447                                       const uint8_t *ref, int ref_stride,
448                                       int counter) {
449   unsigned int sad;
450   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
451   mips_reg l_counter = counter;
452 
453   __asm__ volatile (
454     "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
455     "1:                                                         \n\t"
456     // Include two loop body, to reduce loop time.
457     SAD_SRC_REF_ABS_SUB_32
458     MMI_ADDU(%[src],     %[src],         %[src_stride])
459     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
460     SAD_SRC_REF_ABS_SUB_32
461     MMI_ADDU(%[src],     %[src],         %[src_stride])
462     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
463     MMI_ADDIU(%[counter], %[counter], -0x02)
464     "bnez       %[counter], 1b                                  \n\t"
465     "mfc1       %[sad],     %[ftmp5]                            \n\t"
466     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
467       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
468       [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
469     : [src_stride]"r"((mips_reg)src_stride),
470       [ref_stride]"r"((mips_reg)ref_stride)
471   );
472 
473   return sad;
474 }
475 
476 #define vpx_sad32xN(H)                                                   \
477   unsigned int vpx_sad32x##H##_mmi(const uint8_t *src, int src_stride,   \
478                                    const uint8_t *ref, int ref_stride) { \
479     return vpx_sad32x(src, src_stride, ref, ref_stride, H);              \
480   }
481 
482 vpx_sad32xN(64);
483 vpx_sad32xN(32);
484 vpx_sad32xN(16);
485 sadMxNx4D_mmi(32, 64);
486 sadMxNx4D_mmi(32, 32);
487 sadMxNx4D_mmi(32, 16);
488 
vpx_sad_avg32x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)489 static inline unsigned int vpx_sad_avg32x(const uint8_t *src, int src_stride,
490                                           const uint8_t *ref, int ref_stride,
491                                           const uint8_t *second_pred,
492                                           int counter) {
493   unsigned int sad;
494   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
495   mips_reg l_counter = counter;
496 
497   __asm__ volatile (
498     "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
499     "1:                                                         \n\t"
500     // Include two loop body, to reduce loop time.
501     SAD_SRC_AVGREF_ABS_SUB_32
502     MMI_ADDIU(%[second_pred], %[second_pred], 0x20)
503     MMI_ADDU(%[src],     %[src],         %[src_stride])
504     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
505     SAD_SRC_AVGREF_ABS_SUB_32
506     MMI_ADDIU(%[second_pred], %[second_pred], 0x20)
507     MMI_ADDU(%[src],     %[src],         %[src_stride])
508     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
509     MMI_ADDIU(%[counter], %[counter], -0x02)
510     "bnez       %[counter], 1b                                  \n\t"
511     "mfc1       %[sad],     %[ftmp5]                            \n\t"
512     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
513       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
514       [src]"+&r"(src), [ref]"+&r"(ref),
515       [second_pred]"+&r"((mips_reg)second_pred),
516       [sad]"=&r"(sad)
517     : [src_stride]"r"((mips_reg)src_stride),
518       [ref_stride]"r"((mips_reg)ref_stride)
519   );
520 
521   return sad;
522 }
523 
524 #define vpx_sad_avg32xN(H)                                                   \
525   unsigned int vpx_sad32x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
526                                        const uint8_t *ref, int ref_stride,   \
527                                        const uint8_t *second_pred) {         \
528     return vpx_sad_avg32x(src, src_stride, ref, ref_stride, second_pred, H); \
529   }
530 
531 vpx_sad_avg32xN(64);
532 vpx_sad_avg32xN(32);
533 vpx_sad_avg32xN(16);
534 
vpx_sad16x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)535 static inline unsigned int vpx_sad16x(const uint8_t *src, int src_stride,
536                                       const uint8_t *ref, int ref_stride,
537                                       int counter) {
538   unsigned int sad;
539   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
540   mips_reg l_counter = counter;
541 
542   __asm__ volatile (
543     "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
544     "1:                                                         \n\t"
545     // Include two loop body, to reduce loop time.
546     SAD_SRC_REF_ABS_SUB_16
547     MMI_ADDU(%[src],     %[src],         %[src_stride])
548     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
549     SAD_SRC_REF_ABS_SUB_16
550     MMI_ADDU(%[src],     %[src],         %[src_stride])
551     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
552     MMI_ADDIU(%[counter], %[counter], -0x02)
553     "bnez       %[counter], 1b                                  \n\t"
554     "mfc1       %[sad],     %[ftmp5]                            \n\t"
555     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
556       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
557       [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
558     : [src_stride]"r"((mips_reg)src_stride),
559       [ref_stride]"r"((mips_reg)ref_stride)
560   );
561 
562   return sad;
563 }
564 
565 #define vpx_sad16xN(H)                                                   \
566   unsigned int vpx_sad16x##H##_mmi(const uint8_t *src, int src_stride,   \
567                                    const uint8_t *ref, int ref_stride) { \
568     return vpx_sad16x(src, src_stride, ref, ref_stride, H);              \
569   }
570 
571 vpx_sad16xN(32);
572 vpx_sad16xN(16);
573 vpx_sad16xN(8);
574 sadMxNxK_mmi(16, 16, 3);
575 sadMxNxK_mmi(16, 16, 8);
576 sadMxNxK_mmi(16, 8, 3);
577 sadMxNxK_mmi(16, 8, 8);
578 sadMxNx4D_mmi(16, 32);
579 sadMxNx4D_mmi(16, 16);
580 sadMxNx4D_mmi(16, 8);
581 
vpx_sad_avg16x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)582 static inline unsigned int vpx_sad_avg16x(const uint8_t *src, int src_stride,
583                                           const uint8_t *ref, int ref_stride,
584                                           const uint8_t *second_pred,
585                                           int counter) {
586   unsigned int sad;
587   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
588   mips_reg l_counter = counter;
589 
590   __asm__ volatile (
591     "xor        %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
592     "1:                                                         \n\t"
593     // Include two loop body, to reduce loop time.
594     SAD_SRC_AVGREF_ABS_SUB_16
595     MMI_ADDIU(%[second_pred], %[second_pred], 0x10)
596     MMI_ADDU(%[src],     %[src],         %[src_stride])
597     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
598     SAD_SRC_AVGREF_ABS_SUB_16
599     MMI_ADDIU(%[second_pred], %[second_pred], 0x10)
600     MMI_ADDU(%[src],     %[src],         %[src_stride])
601     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
602     MMI_ADDIU(%[counter], %[counter], -0x02)
603     "bnez       %[counter], 1b                                  \n\t"
604     "mfc1       %[sad],     %[ftmp5]                            \n\t"
605     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
606       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
607       [src]"+&r"(src), [ref]"+&r"(ref),
608       [second_pred]"+&r"((mips_reg)second_pred),
609       [sad]"=&r"(sad)
610     : [src_stride]"r"((mips_reg)src_stride),
611       [ref_stride]"r"((mips_reg)ref_stride)
612   );
613 
614   return sad;
615 }
616 
617 #define vpx_sad_avg16xN(H)                                                   \
618   unsigned int vpx_sad16x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
619                                        const uint8_t *ref, int ref_stride,   \
620                                        const uint8_t *second_pred) {         \
621     return vpx_sad_avg16x(src, src_stride, ref, ref_stride, second_pred, H); \
622   }
623 
624 vpx_sad_avg16xN(32);
625 vpx_sad_avg16xN(16);
626 vpx_sad_avg16xN(8);
627 
vpx_sad8x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)628 static inline unsigned int vpx_sad8x(const uint8_t *src, int src_stride,
629                                      const uint8_t *ref, int ref_stride,
630                                      int counter) {
631   unsigned int sad;
632   double ftmp1, ftmp2, ftmp3;
633   mips_reg l_counter = counter;
634 
635   __asm__ volatile (
636     "xor        %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
637     "1:                                                         \n\t"
638     // Include two loop body, to reduce loop time.
639     SAD_SRC_REF_ABS_SUB_8
640     MMI_ADDU(%[src],     %[src],         %[src_stride])
641     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
642     SAD_SRC_REF_ABS_SUB_8
643     MMI_ADDU(%[src],     %[src],         %[src_stride])
644     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
645     MMI_ADDIU(%[counter], %[counter], -0x02)
646     "bnez       %[counter], 1b                                  \n\t"
647     "mfc1       %[sad],     %[ftmp3]                            \n\t"
648     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
649       [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
650       [sad]"=&r"(sad)
651     : [src_stride]"r"((mips_reg)src_stride),
652       [ref_stride]"r"((mips_reg)ref_stride)
653   );
654 
655   return sad;
656 }
657 
658 #define vpx_sad8xN(H)                                                   \
659   unsigned int vpx_sad8x##H##_mmi(const uint8_t *src, int src_stride,   \
660                                   const uint8_t *ref, int ref_stride) { \
661     return vpx_sad8x(src, src_stride, ref, ref_stride, H);              \
662   }
663 
664 vpx_sad8xN(16);
665 vpx_sad8xN(8);
666 vpx_sad8xN(4);
667 sadMxNxK_mmi(8, 16, 3);
668 sadMxNxK_mmi(8, 16, 8);
669 sadMxNxK_mmi(8, 8, 3);
670 sadMxNxK_mmi(8, 8, 8);
671 sadMxNx4D_mmi(8, 16);
672 sadMxNx4D_mmi(8, 8);
673 sadMxNx4D_mmi(8, 4);
674 
vpx_sad_avg8x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)675 static inline unsigned int vpx_sad_avg8x(const uint8_t *src, int src_stride,
676                                          const uint8_t *ref, int ref_stride,
677                                          const uint8_t *second_pred,
678                                          int counter) {
679   unsigned int sad;
680   double ftmp1, ftmp2, ftmp3;
681   mips_reg l_counter = counter;
682 
683   __asm__ volatile (
684     "xor        %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
685     "1:                                                         \n\t"
686     // Include two loop body, to reduce loop time.
687     SAD_SRC_AVGREF_ABS_SUB_8
688     MMI_ADDIU(%[second_pred], %[second_pred], 0x08)
689     MMI_ADDU(%[src],     %[src],         %[src_stride])
690     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
691     SAD_SRC_AVGREF_ABS_SUB_8
692     MMI_ADDIU(%[second_pred], %[second_pred], 0x08)
693     MMI_ADDU(%[src],     %[src],         %[src_stride])
694     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
695     MMI_ADDIU(%[counter], %[counter], -0x02)
696     "bnez       %[counter], 1b                                  \n\t"
697     "mfc1       %[sad],     %[ftmp3]                            \n\t"
698     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
699       [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
700       [second_pred]"+&r"((mips_reg)second_pred),
701       [sad]"=&r"(sad)
702     : [src_stride]"r"((mips_reg)src_stride),
703       [ref_stride]"r"((mips_reg)ref_stride)
704   );
705 
706   return sad;
707 }
708 
709 #define vpx_sad_avg8xN(H)                                                   \
710   unsigned int vpx_sad8x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
711                                       const uint8_t *ref, int ref_stride,   \
712                                       const uint8_t *second_pred) {         \
713     return vpx_sad_avg8x(src, src_stride, ref, ref_stride, second_pred, H); \
714   }
715 
716 vpx_sad_avg8xN(16);
717 vpx_sad_avg8xN(8);
718 vpx_sad_avg8xN(4);
719 
vpx_sad4x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)720 static inline unsigned int vpx_sad4x(const uint8_t *src, int src_stride,
721                                      const uint8_t *ref, int ref_stride,
722                                      int counter) {
723   unsigned int sad;
724   double ftmp1, ftmp2, ftmp3;
725   mips_reg l_counter = counter;
726 
727   __asm__ volatile (
728     "xor        %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
729     "1:                                                         \n\t"
730     // Include two loop body, to reduce loop time.
731     SAD_SRC_REF_ABS_SUB_4
732     MMI_ADDU(%[src],     %[src],         %[src_stride])
733     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
734     SAD_SRC_REF_ABS_SUB_4
735     MMI_ADDU(%[src],     %[src],         %[src_stride])
736     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
737     MMI_ADDIU(%[counter], %[counter], -0x02)
738     "bnez       %[counter], 1b                                  \n\t"
739     "mfc1       %[sad],     %[ftmp3]                            \n\t"
740     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
741       [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
742       [sad]"=&r"(sad)
743     : [src_stride]"r"((mips_reg)src_stride),
744       [ref_stride]"r"((mips_reg)ref_stride)
745   );
746 
747   return sad;
748 }
749 
750 #define vpx_sad4xN(H)                                                   \
751   unsigned int vpx_sad4x##H##_mmi(const uint8_t *src, int src_stride,   \
752                                   const uint8_t *ref, int ref_stride) { \
753     return vpx_sad4x(src, src_stride, ref, ref_stride, H);              \
754   }
755 
756 vpx_sad4xN(8);
757 vpx_sad4xN(4);
758 sadMxNxK_mmi(4, 4, 3);
759 sadMxNxK_mmi(4, 4, 8);
760 sadMxNx4D_mmi(4, 8);
761 sadMxNx4D_mmi(4, 4);
762 
vpx_sad_avg4x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)763 static inline unsigned int vpx_sad_avg4x(const uint8_t *src, int src_stride,
764                                          const uint8_t *ref, int ref_stride,
765                                          const uint8_t *second_pred,
766                                          int counter) {
767   unsigned int sad;
768   double ftmp1, ftmp2, ftmp3;
769   mips_reg l_counter = counter;
770 
771   __asm__ volatile (
772     "xor        %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
773     "1:                                                         \n\t"
774     // Include two loop body, to reduce loop time.
775     SAD_SRC_AVGREF_ABS_SUB_4
776     MMI_ADDIU(%[second_pred], %[second_pred], 0x04)
777     MMI_ADDU(%[src],     %[src],         %[src_stride])
778     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
779     SAD_SRC_AVGREF_ABS_SUB_4
780     MMI_ADDIU(%[second_pred], %[second_pred], 0x04)
781     MMI_ADDU(%[src],     %[src],         %[src_stride])
782     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
783     MMI_ADDIU(%[counter], %[counter], -0x02)
784     "bnez       %[counter], 1b                                  \n\t"
785     "mfc1       %[sad],     %[ftmp3]                            \n\t"
786     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
787       [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
788       [second_pred]"+&r"((mips_reg)second_pred),
789       [sad]"=&r"(sad)
790     : [src_stride]"r"((mips_reg)src_stride),
791       [ref_stride]"r"((mips_reg)ref_stride)
792   );
793 
794   return sad;
795 }
796 
797 #define vpx_sad_avg4xN(H)                                                   \
798   unsigned int vpx_sad4x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
799                                       const uint8_t *ref, int ref_stride,   \
800                                       const uint8_t *second_pred) {         \
801     return vpx_sad_avg4x(src, src_stride, ref, ref_stride, second_pred, H); \
802   }
803 
804 vpx_sad_avg4xN(8);
805 vpx_sad_avg4xN(4);
806