1 /*
2  *  Copyright 2017 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/basic_types.h"
12 
13 #include "libyuv/compare_row.h"
14 #include "libyuv/row.h"
15 
16 // This module is for GCC MSA
17 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
18 #include "libyuv/macros_msa.h"
19 
20 #ifdef __cplusplus
21 namespace libyuv {
22 extern "C" {
23 #endif
24 
HammingDistance_MSA(const uint8_t * src_a,const uint8_t * src_b,int count)25 uint32_t HammingDistance_MSA(const uint8_t* src_a,
26                              const uint8_t* src_b,
27                              int count) {
28   uint32_t diff = 0u;
29   int i;
30   v16u8 src0, src1, src2, src3;
31   v2i64 vec0 = {0}, vec1 = {0};
32 
33   for (i = 0; i < count; i += 32) {
34     src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0);
35     src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16);
36     src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0);
37     src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16);
38     src0 ^= src2;
39     src1 ^= src3;
40     vec0 += __msa_pcnt_d((v2i64)src0);
41     vec1 += __msa_pcnt_d((v2i64)src1);
42     src_a += 32;
43     src_b += 32;
44   }
45 
46   vec0 += vec1;
47   diff = (uint32_t)__msa_copy_u_w((v4i32)vec0, 0);
48   diff += (uint32_t)__msa_copy_u_w((v4i32)vec0, 2);
49   return diff;
50 }
51 
SumSquareError_MSA(const uint8_t * src_a,const uint8_t * src_b,int count)52 uint32_t SumSquareError_MSA(const uint8_t* src_a,
53                             const uint8_t* src_b,
54                             int count) {
55   uint32_t sse = 0u;
56   int i;
57   v16u8 src0, src1, src2, src3;
58   v8i16 vec0, vec1, vec2, vec3;
59   v4i32 reg0 = {0}, reg1 = {0}, reg2 = {0}, reg3 = {0};
60   v2i64 tmp0;
61 
62   for (i = 0; i < count; i += 32) {
63     src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0);
64     src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16);
65     src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0);
66     src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16);
67     vec0 = (v8i16)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
68     vec1 = (v8i16)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
69     vec2 = (v8i16)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
70     vec3 = (v8i16)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
71     vec0 = __msa_hsub_u_h((v16u8)vec0, (v16u8)vec0);
72     vec1 = __msa_hsub_u_h((v16u8)vec1, (v16u8)vec1);
73     vec2 = __msa_hsub_u_h((v16u8)vec2, (v16u8)vec2);
74     vec3 = __msa_hsub_u_h((v16u8)vec3, (v16u8)vec3);
75     reg0 = __msa_dpadd_s_w(reg0, vec0, vec0);
76     reg1 = __msa_dpadd_s_w(reg1, vec1, vec1);
77     reg2 = __msa_dpadd_s_w(reg2, vec2, vec2);
78     reg3 = __msa_dpadd_s_w(reg3, vec3, vec3);
79     src_a += 32;
80     src_b += 32;
81   }
82 
83   reg0 += reg1;
84   reg2 += reg3;
85   reg0 += reg2;
86   tmp0 = __msa_hadd_s_d(reg0, reg0);
87   sse = (uint32_t)__msa_copy_u_w((v4i32)tmp0, 0);
88   sse += (uint32_t)__msa_copy_u_w((v4i32)tmp0, 2);
89   return sse;
90 }
91 
92 #ifdef __cplusplus
93 }  // extern "C"
94 }  // namespace libyuv
95 #endif
96 
97 #endif  // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
98