1 // Copyright (c) 2012-2018 Intel Corporation
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in all
11 // copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 // SOFTWARE.
20 #include <cm/cm.h>
21 #include <cm/cmtl.h>
22
23 _GENX_ inline
Mean4x4Calculator(matrix<uchar,12,12> src)24 matrix<ushort, 4, 4> Mean4x4Calculator(
25 matrix<uchar, 12, 12> src
26 )
27 {
28 matrix<ushort, 4, 4>
29 mean4x4 = 0;
30 mean4x4(0, 0) = cm_sum<ushort>(src.select<4, 1, 4, 1>(2, 2));
31 mean4x4(0, 1) = cm_sum<ushort>(src.select<4, 1, 4, 1>(2, 4));
32 mean4x4(0, 2) = cm_sum<ushort>(src.select<4, 1, 4, 1>(2, 6));
33 mean4x4(0, 3) = cm_sum<ushort>(src.select<4, 1, 4, 1>(2, 8));
34 mean4x4(1, 0) = cm_sum<ushort>(src.select<4, 1, 4, 1>(4, 2));
35 mean4x4(1, 1) = cm_sum<ushort>(src.select<4, 1, 4, 1>(4, 4));
36 mean4x4(1, 2) = cm_sum<ushort>(src.select<4, 1, 4, 1>(4, 6));
37 mean4x4(1, 3) = cm_sum<ushort>(src.select<4, 1, 4, 1>(4, 8));
38 mean4x4(2, 0) = cm_sum<ushort>(src.select<4, 1, 4, 1>(6, 2));
39 mean4x4(2, 1) = cm_sum<ushort>(src.select<4, 1, 4, 1>(6, 4));
40 mean4x4(2, 2) = cm_sum<ushort>(src.select<4, 1, 4, 1>(6, 6));
41 mean4x4(2, 3) = cm_sum<ushort>(src.select<4, 1, 4, 1>(6, 8));
42 mean4x4(3, 0) = cm_sum<ushort>(src.select<4, 1, 4, 1>(8, 2));
43 mean4x4(3, 1) = cm_sum<ushort>(src.select<4, 1, 4, 1>(8, 4));
44 mean4x4(3, 2) = cm_sum<ushort>(src.select<4, 1, 4, 1>(8, 6));
45 mean4x4(3, 3) = cm_sum<ushort>(src.select<4, 1, 4, 1>(8, 8));
46 return (mean4x4 >>= 4);
47 }
48
49 _GENX_ inline
DispersionCalculator(matrix<uchar,12,12> src)50 matrix<uint, 4, 4> DispersionCalculator(
51 matrix<uchar, 12, 12> src
52 )
53 {
54 matrix<uint, 4, 4>
55 disper = 0;
56 matrix<ushort, 16, 16>
57 var = 0;
58 matrix<ushort, 4, 4>
59 mean4x4 = Mean4x4Calculator(src);
60
61 var.select<4, 1, 4, 1>(0, 0) = cm_abs<ushort>(src.select<4, 1, 4, 1>(0, 0) - mean4x4(0, 0));
62 var.select<4, 1, 4, 1>(0, 4) = cm_abs<ushort>(src.select<4, 1, 4, 1>(0, 2) - mean4x4(0, 1));
63 var.select<4, 1, 4, 1>(0, 8) = cm_abs<ushort>(src.select<4, 1, 4, 1>(0, 4) - mean4x4(0, 2));
64 var.select<4, 1, 4, 1>(0, 12) = cm_abs<ushort>(src.select<4, 1, 4, 1>(0, 6) - mean4x4(0, 3));
65 var.select<4, 1, 4, 1>(4, 0) = cm_abs<ushort>(src.select<4, 1, 4, 1>(2, 0) - mean4x4(1, 0));
66 var.select<4, 1, 4, 1>(4, 4) = cm_abs<ushort>(src.select<4, 1, 4, 1>(2, 2) - mean4x4(1, 1));
67 var.select<4, 1, 4, 1>(4, 8) = cm_abs<ushort>(src.select<4, 1, 4, 1>(2, 4) - mean4x4(1, 2));
68 var.select<4, 1, 4, 1>(4, 12) = cm_abs<ushort>(src.select<4, 1, 4, 1>(2, 6) - mean4x4(1, 3));
69 var.select<4, 1, 4, 1>(8, 0) = cm_abs<ushort>(src.select<4, 1, 4, 1>(4, 0) - mean4x4(2, 0));
70 var.select<4, 1, 4, 1>(8, 4) = cm_abs<ushort>(src.select<4, 1, 4, 1>(4, 2) - mean4x4(2, 1));
71 var.select<4, 1, 4, 1>(8, 8) = cm_abs<ushort>(src.select<4, 1, 4, 1>(4, 4) - mean4x4(2, 2));
72 var.select<4, 1, 4, 1>(8, 12) = cm_abs<ushort>(src.select<4, 1, 4, 1>(4, 6) - mean4x4(2, 3));
73 var.select<4, 1, 4, 1>(12, 0) = cm_abs<ushort>(src.select<4, 1, 4, 1>(6, 0) - mean4x4(3, 0));
74 var.select<4, 1, 4, 1>(12, 4) = cm_abs<ushort>(src.select<4, 1, 4, 1>(6, 2) - mean4x4(3, 1));
75 var.select<4, 1, 4, 1>(12, 8) = cm_abs<ushort>(src.select<4, 1, 4, 1>(6, 4) - mean4x4(3, 2));
76 var.select<4, 1, 4, 1>(12, 12) = cm_abs<ushort>(src.select<4, 1, 4, 1>(6, 6) - mean4x4(3, 3));
77 var *= var;
78
79 disper(0, 0) = cm_sum<uint>(var.select<4, 1, 4, 1>(0, 0));
80 disper(0, 1) = cm_sum<uint>(var.select<4, 1, 4, 1>(0, 4));
81 disper(0, 2) = cm_sum<uint>(var.select<4, 1, 4, 1>(0, 8));
82 disper(0, 3) = cm_sum<uint>(var.select<4, 1, 4, 1>(0, 12));
83 disper(1, 0) = cm_sum<uint>(var.select<4, 1, 4, 1>(4, 0));
84 disper(1, 1) = cm_sum<uint>(var.select<4, 1, 4, 1>(4, 4));
85 disper(1, 2) = cm_sum<uint>(var.select<4, 1, 4, 1>(4, 8));
86 disper(1, 3) = cm_sum<uint>(var.select<4, 1, 4, 1>(4, 12));
87 disper(2, 0) = cm_sum<uint>(var.select<4, 1, 4, 1>(8, 0));
88 disper(2, 1) = cm_sum<uint>(var.select<4, 1, 4, 1>(8, 4));
89 disper(2, 2) = cm_sum<uint>(var.select<4, 1, 4, 1>(8, 8));
90 disper(2, 3) = cm_sum<uint>(var.select<4, 1, 4, 1>(8, 12));
91 disper(3, 0) = cm_sum<uint>(var.select<4, 1, 4, 1>(12, 0));
92 disper(3, 1) = cm_sum<uint>(var.select<4, 1, 4, 1>(12, 4));
93 disper(3, 2) = cm_sum<uint>(var.select<4, 1, 4, 1>(12, 8));
94 disper(3, 3) = cm_sum<uint>(var.select<4, 1, 4, 1>(12, 12));
95
96 return(disper >>= 4);
97 }
98
99 _GENX_ inline
SpatialDenoiser_8x8_NV12_Chroma(SurfaceIndex SURF_SRC,matrix<uchar,12,12> src,uint mbX,uint mbY,short th)100 matrix<uchar, 4, 8> SpatialDenoiser_8x8_NV12_Chroma(
101 SurfaceIndex SURF_SRC,
102 matrix<uchar, 12, 12> src,
103 uint mbX,
104 uint mbY,
105 short th
106 )
107 {
108 uint
109 x = mbX * 8,
110 y = mbY * 8,
111 xch = mbX * 8,
112 ych = mbY * 4;
113
114 matrix<uchar, 6, 12>
115 scm = 0;
116 matrix<uchar, 8, 8>
117 out = 0;
118 matrix<uchar, 4, 8>
119 och = 0;
120 if (th > 0)
121 {
122 float
123 stVal = th;
124 read_plane(SURF_SRC, GENX_SURFACE_UV_PLANE, xch - 2, ych - 1, scm);
125 matrix<uint, 4, 4>
126 Disp = DispersionCalculator(src);
127
128 matrix<float, 4, 4>
129 f_disp = Disp,
130 h1 = 0.0f,
131 h2 = 0.0f,
132 hh = 0.0f,
133 k0 = 0.0f,
134 k1 = 0.0f,
135 k2 = 0.0f;
136 h1 = cm_exp(-(f_disp / (stVal / 10.0f)));
137 f_disp = Disp * 2.0f;
138 h2 = cm_exp(-(f_disp / (stVal / 10.0f)));
139 hh = 1.0f + 4.0f * (h1 + h2);
140 k0 = 1.0f / hh;
141 k1 = h1 / hh;
142 k2 = h2 / hh;
143
144 matrix<float, 4, 8>
145 k0c = 0.0f,
146 k1c = 0.0f,
147 k2c = 0.0f;
148 k0c.select<4, 1, 4, 2>(0, 0) = k0c.select<4, 1, 4, 2>(0, 1) = k0;
149 k1c.select<4, 1, 4, 2>(0, 0) = k1c.select<4, 1, 4, 2>(0, 1) = k1;
150 k2c.select<4, 1, 4, 2>(0, 0) = k2c.select<4, 1, 4, 2>(0, 1) = k2;
151
152 och = (scm.select<4, 1, 8, 1>(1, 2) * k0c) + (
153 (scm.select<4, 1, 8, 1>(1, 0) + scm.select<4, 1, 8, 1>(1, 4) + scm.select<4, 1, 8, 1>(0, 2) + scm.select<4, 1, 8, 1>(2, 2)) * k1c) + (
154 (scm.select<4, 1, 8, 1>(0, 0) + scm.select<4, 1, 8, 1>(0, 4) + scm.select<4, 1, 8, 1>(2, 0) + scm.select<4, 1, 8, 1>(2, 4)) * k2c) + 0.5f;
155 }
156 else
157 read_plane(SURF_SRC, GENX_SURFACE_UV_PLANE, xch, ych, och);
158
159 return och;
160 }
161
162 _GENX_ inline
SpatialDenoiser_8x8_Y(matrix<uchar,12,12> src,short th)163 matrix<uchar, 8, 8> SpatialDenoiser_8x8_Y(
164 matrix<uchar, 12, 12> src,
165 short th
166 )
167 {
168 if (th > 0)
169 {
170 float
171 stVal = th;
172 matrix<uchar, 8, 8>
173 out = 0;
174 matrix<uint, 4, 4>
175 Disp = DispersionCalculator(src);
176
177 matrix<float, 4, 4>
178 f_disp = Disp,
179 h1 = 0.0f,
180 h2 = 0.0f,
181 hh = 0.0f,
182 k0 = 0.0f,
183 k1 = 0.0f,
184 k2 = 0.0f;
185 h1 = cm_exp(-(f_disp / (stVal / 10.0f)));
186 f_disp = Disp * 2.0f;
187 h2 = cm_exp(-(f_disp / (stVal / 10.0f)));
188 hh = 1.0f + 4.0f * (h1 + h2);
189 k0 = 1.0f / hh;
190 k1 = h1 / hh;
191 k2 = h2 / hh;
192
193 #pragma unroll
194 for (uint i = 0; i < 4; i++)
195 #pragma unroll
196 for (uint j = 0; j < 4; j++)
197 out.select<2, 1, 2, 1>(i * 2, j * 2) = (src.select<2, 1, 2, 1>(2 + 2 * i, 2 + 2 * j) * k0(i, j)) + (
198 (src.select<2, 1, 2, 1>(2 + 2 * i, 1 + 2 * j) + src.select<2, 1, 2, 1>(2 + 2 * i, 3 + 2 * j) + src.select<2, 1, 2, 1>(1 + 2 * i, 2 + 2 * j) + src.select<2, 1, 2, 1>(3 + 2 * i, 2 + 2 * j))*k1(i, j)) + (
199 (src.select<2, 1, 2, 1>(1 + 2 * i, 1 + 2 * j) + src.select<2, 1, 2, 1>(1 + 2 * i, 3 + 2 * j) + src.select<2, 1, 2, 1>(3 + 2 * i, 1 + 2 * j) + src.select<2, 1, 2, 1>(3 + 2 * i, 3 + 2 * j))*k2(i, j)) + 0.5f;
200
201 return out;
202 }
203 else
204 return src.select<8, 1, 8, 1>(2, 2);
205 }