1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp8_rtcd.h"
12 #include "vpx_config.h"
13 #include "vp8/common/variance.h"
14 #include "vpx_ports/mem.h"
15 
16 extern void vp8_half_horiz_vert_variance16x_h_sse2
17 (
18     const unsigned char *ref_ptr,
19     int ref_pixels_per_line,
20     const unsigned char *src_ptr,
21     int src_pixels_per_line,
22     unsigned int Height,
23     int *sum,
24     unsigned int *sumsquared
25 );
26 extern void vp8_half_horiz_variance16x_h_sse2
27 (
28     const unsigned char *ref_ptr,
29     int ref_pixels_per_line,
30     const unsigned char *src_ptr,
31     int src_pixels_per_line,
32     unsigned int Height,
33     int *sum,
34     unsigned int *sumsquared
35 );
36 extern void vp8_half_vert_variance16x_h_sse2
37 (
38     const unsigned char *ref_ptr,
39     int ref_pixels_per_line,
40     const unsigned char *src_ptr,
41     int src_pixels_per_line,
42     unsigned int Height,
43     int *sum,
44     unsigned int *sumsquared
45 );
46 extern void vp8_filter_block2d_bil_var_ssse3
47 (
48     const unsigned char *ref_ptr,
49     int ref_pixels_per_line,
50     const unsigned char *src_ptr,
51     int src_pixels_per_line,
52     unsigned int Height,
53     int  xoffset,
54     int  yoffset,
55     int *sum,
56     unsigned int *sumsquared
57 );
58 
vp8_sub_pixel_variance16x16_ssse3(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)59 unsigned int vp8_sub_pixel_variance16x16_ssse3
60 (
61     const unsigned char  *src_ptr,
62     int  src_pixels_per_line,
63     int  xoffset,
64     int  yoffset,
65     const unsigned char *dst_ptr,
66     int dst_pixels_per_line,
67     unsigned int *sse
68 )
69 {
70     int xsum0;
71     unsigned int xxsum0;
72 
73     /* note we could avoid these if statements if the calling function
74      * just called the appropriate functions inside.
75      */
76     if (xoffset == 4 && yoffset == 0)
77     {
78         vp8_half_horiz_variance16x_h_sse2(
79             src_ptr, src_pixels_per_line,
80             dst_ptr, dst_pixels_per_line, 16,
81             &xsum0, &xxsum0);
82     }
83     else if (xoffset == 0 && yoffset == 4)
84     {
85         vp8_half_vert_variance16x_h_sse2(
86             src_ptr, src_pixels_per_line,
87             dst_ptr, dst_pixels_per_line, 16,
88             &xsum0, &xxsum0);
89     }
90     else if (xoffset == 4 && yoffset == 4)
91     {
92         vp8_half_horiz_vert_variance16x_h_sse2(
93             src_ptr, src_pixels_per_line,
94             dst_ptr, dst_pixels_per_line, 16,
95             &xsum0, &xxsum0);
96     }
97     else
98     {
99         vp8_filter_block2d_bil_var_ssse3(
100             src_ptr, src_pixels_per_line,
101             dst_ptr, dst_pixels_per_line, 16,
102             xoffset, yoffset,
103             &xsum0, &xxsum0);
104     }
105 
106     *sse = xxsum0;
107     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
108 }
109 
vp8_sub_pixel_variance16x8_ssse3(const unsigned char * src_ptr,int src_pixels_per_line,int xoffset,int yoffset,const unsigned char * dst_ptr,int dst_pixels_per_line,unsigned int * sse)110 unsigned int vp8_sub_pixel_variance16x8_ssse3
111 (
112     const unsigned char  *src_ptr,
113     int  src_pixels_per_line,
114     int  xoffset,
115     int  yoffset,
116     const unsigned char *dst_ptr,
117     int dst_pixels_per_line,
118     unsigned int *sse
119 
120 )
121 {
122     int xsum0;
123     unsigned int xxsum0;
124 
125     if (xoffset == 4 && yoffset == 0)
126     {
127         vp8_half_horiz_variance16x_h_sse2(
128             src_ptr, src_pixels_per_line,
129             dst_ptr, dst_pixels_per_line, 8,
130             &xsum0, &xxsum0);
131     }
132     else if (xoffset == 0 && yoffset == 4)
133     {
134         vp8_half_vert_variance16x_h_sse2(
135             src_ptr, src_pixels_per_line,
136             dst_ptr, dst_pixels_per_line, 8,
137             &xsum0, &xxsum0);
138     }
139     else if (xoffset == 4 && yoffset == 4)
140     {
141         vp8_half_horiz_vert_variance16x_h_sse2(
142             src_ptr, src_pixels_per_line,
143             dst_ptr, dst_pixels_per_line, 8,
144             &xsum0, &xxsum0);
145     }
146     else
147     {
148         vp8_filter_block2d_bil_var_ssse3(
149             src_ptr, src_pixels_per_line,
150             dst_ptr, dst_pixels_per_line, 8,
151             xoffset, yoffset,
152             &xsum0, &xxsum0);
153     }
154 
155     *sse = xxsum0;
156     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
157 }
158