1 /*
2  *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/basic_types.h"
12 
13 #include "libyuv/compare_row.h"
14 
15 #ifdef __cplusplus
16 namespace libyuv {
17 extern "C" {
18 #endif
19 
20 // This module is for Mips MMI.
21 #if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
22 
23 // Hakmem method for hamming distance.
HammingDistance_MMI(const uint8_t * src_a,const uint8_t * src_b,int count)24 uint32_t HammingDistance_MMI(const uint8_t* src_a,
25                              const uint8_t* src_b,
26                              int count) {
27   uint32_t diff = 0u;
28 
29   uint64_t temp = 0, temp1 = 0, ta = 0, tb = 0;
30   uint64_t c1 = 0x5555555555555555;
31   uint64_t c2 = 0x3333333333333333;
32   uint64_t c3 = 0x0f0f0f0f0f0f0f0f;
33   uint32_t c4 = 0x01010101;
34   uint64_t s1 = 1, s2 = 2, s3 = 4;
35   __asm__ volatile(
36       "1:	\n\t"
37       "ldc1   %[ta],    0(%[src_a])          \n\t"
38       "ldc1   %[tb],    0(%[src_b])          \n\t"
39       "xor    %[temp],  %[ta],      %[tb]    \n\t"
40       "psrlw  %[temp1], %[temp],    %[s1]    \n\t"  // temp1=x>>1
41       "and    %[temp1], %[temp1],   %[c1]    \n\t"  // temp1&=c1
42       "psubw  %[temp1], %[temp],    %[temp1] \n\t"  // x-temp1
43       "and    %[temp],  %[temp1],   %[c2]    \n\t"  // t = (u&c2)
44       "psrlw  %[temp1], %[temp1],   %[s2]    \n\t"  // u>>2
45       "and    %[temp1], %[temp1],   %[c2]    \n\t"  // u>>2 & c2
46       "paddw  %[temp1], %[temp1],   %[temp]  \n\t"  // t1 = t1+t
47       "psrlw  %[temp],  %[temp1],   %[s3]    \n\t"  // u>>4
48       "paddw  %[temp1], %[temp1],   %[temp]  \n\t"  // u+(u>>4)
49       "and    %[temp1], %[temp1],   %[c3]    \n\t"  //&c3
50       "dmfc1  $t0,      %[temp1]             \n\t"
51       "dsrl32 $t0,      $t0,        0        \n\t "
52       "mul    $t0,      $t0,        %[c4]    \n\t"
53       "dsrl   $t0,      $t0,        24       \n\t"
54       "dadd   %[diff],  %[diff],    $t0      \n\t"
55       "dmfc1  $t0,      %[temp1]             \n\t"
56       "mul    $t0,      $t0,        %[c4]    \n\t"
57       "dsrl   $t0,      $t0,        24       \n\t"
58       "dadd   %[diff],  %[diff],    $t0      \n\t"
59       "daddiu %[src_a], %[src_a],   8        \n\t"
60       "daddiu %[src_b], %[src_b],   8        \n\t"
61       "addiu  %[count], %[count],  -8        \n\t"
62       "bgtz   %[count], 1b \n\t"
63       "nop                            \n\t"
64       : [diff] "+r"(diff), [src_a] "+r"(src_a), [src_b] "+r"(src_b),
65         [count] "+r"(count), [ta] "+f"(ta), [tb] "+f"(tb), [temp] "+f"(temp),
66         [temp1] "+f"(temp1)
67       : [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [c4] "r"(c4), [s1] "f"(s1),
68         [s2] "f"(s2), [s3] "f"(s3)
69       : "memory");
70   return diff;
71 }
72 
SumSquareError_MMI(const uint8_t * src_a,const uint8_t * src_b,int count)73 uint32_t SumSquareError_MMI(const uint8_t* src_a,
74                             const uint8_t* src_b,
75                             int count) {
76   uint32_t sse = 0u;
77   uint32_t sse_hi = 0u, sse_lo = 0u;
78 
79   uint64_t src1, src2;
80   uint64_t diff, diff_hi, diff_lo;
81   uint64_t sse_sum, sse_tmp;
82 
83   const uint64_t mask = 0x0ULL;
84 
85   __asm__ volatile(
86       "xor        %[sse_sum],      %[sse_sum],        %[sse_sum]    \n\t"
87 
88       "1:                                                           \n\t"
89       "ldc1       %[src1],         0x00(%[src_a])                   \n\t"
90       "ldc1       %[src2],         0x00(%[src_b])                   \n\t"
91       "pasubub    %[diff],         %[src1],           %[src2]       \n\t"
92       "punpcklbh  %[diff_lo],      %[diff],           %[mask]       \n\t"
93       "punpckhbh  %[diff_hi],      %[diff],           %[mask]       \n\t"
94       "pmaddhw    %[sse_tmp],      %[diff_lo],        %[diff_lo]    \n\t"
95       "paddw      %[sse_sum],      %[sse_sum],        %[sse_tmp]    \n\t"
96       "pmaddhw    %[sse_tmp],      %[diff_hi],        %[diff_hi]    \n\t"
97       "paddw      %[sse_sum],      %[sse_sum],        %[sse_tmp]    \n\t"
98 
99       "daddiu     %[src_a],        %[src_a],          0x08          \n\t"
100       "daddiu     %[src_b],        %[src_b],          0x08          \n\t"
101       "daddiu     %[count],        %[count],         -0x08          \n\t"
102       "bnez       %[count],        1b                               \n\t"
103 
104       "mfc1       %[sse_lo],       %[sse_sum]                       \n\t"
105       "mfhc1      %[sse_hi],       %[sse_sum]                       \n\t"
106       "daddu      %[sse],          %[sse_hi],         %[sse_lo]     \n\t"
107       : [sse] "+&r"(sse), [diff] "=&f"(diff), [src1] "=&f"(src1),
108         [src2] "=&f"(src2), [diff_lo] "=&f"(diff_lo), [diff_hi] "=&f"(diff_hi),
109         [sse_sum] "=&f"(sse_sum), [sse_tmp] "=&f"(sse_tmp),
110         [sse_hi] "+&r"(sse_hi), [sse_lo] "+&r"(sse_lo)
111       : [src_a] "r"(src_a), [src_b] "r"(src_b), [count] "r"(count),
112         [mask] "f"(mask)
113       : "memory");
114 
115   return sse;
116 }
117 
118 #endif  // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
119 
120 #ifdef __cplusplus
121 }  // extern "C"
122 }  // namespace libyuv
123 #endif
124