1 // Copyright (c) 2012-2020 Intel Corporation
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in all
11 // copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 // SOFTWARE.
20 #include "../include/genx_me_common.h"
21 #include "../include/genx_blend_mc.h"
22 #include "../include/genx_sd_common.h"
23 
24 extern "C" _GENX_MAIN_
McP16_4MV_1SURF_WITH_CHR(SurfaceIndex SURF_CONTROL,SurfaceIndex SURF_REF1,SurfaceIndex SURF_MV16x16_1,SurfaceIndex SURF_SRC,SurfaceIndex SURF_OUT,uint start_xy,uint scene_nums)25 void McP16_4MV_1SURF_WITH_CHR(
26     SurfaceIndex SURF_CONTROL,
27     SurfaceIndex SURF_REF1,
28     SurfaceIndex SURF_MV16x16_1,
29     SurfaceIndex SURF_SRC,
30     SurfaceIndex SURF_OUT,
31     uint         start_xy,
32     uint         scene_nums
33 )
34 {
35     vector<ushort, 2>
36         start_mbXY = start_xy;
37     vector<uint, 1>
38         scene_numbers = scene_nums;
39     uchar
40         scnFref = scene_numbers.format<uchar>()[0],
41         scnSrc = scene_numbers.format<uchar>()[1];
42     uint
43         mbX = get_thread_origin_x() + start_mbXY.format<ushort>()[0],
44         mbY = get_thread_origin_y() + start_mbXY.format<ushort>()[1],
45         x = mbX << 3,
46         y = mbY << 3;
47     vector<uchar, 96>
48         control;
49     read(SURF_CONTROL, 0, control);
50     ushort
51         width = control.format<ushort>()[30],
52         height = control.format<ushort>()[31],
53         th = control.format<short>()[32],
54         sTh = control.format<short>()[35];
55     matrix<short, 2, 4>
56         mv8_g4 = 0,
57         mv = 0;
58     matrix<uchar, 12, 12>
59         srcCh = 0,
60         preFil = 0;
61     matrix<uchar, 8, 8>
62         src = 0,
63         out = 0,
64         out2 = 0,
65         fil = 0;
66     matrix<uchar, 4, 8>
67         och = 0,
68         scm = 0;
69     vector<float, 2>
70         RsCsT = 0.0f;
71     short
72         nsc = scnFref == scnSrc;
73     read_plane(SURF_SRC, GENX_SURFACE_Y_PLANE, x - 2, y - 2, srcCh);
74     if (th > 0)
75     {
76         RsCsT = Genx_RsCs_aprox_8x8Block(srcCh.select<8, 1, 8, 1>(2, 2));
77         //Reference generation
78         mv8_g4 = MV_Neighborhood_read(SURF_MV16x16_1, width, height, mbX, mbY);
79         out = OMC_Ref_Generation(SURF_REF1, width, height, mbX, mbY, mv8_g4);
80         mv = (mv8_g4 * mv8_g4) / 16 * nsc;
81         int size = cm_sum<int>(mv);
82         int simFactor1 = 0;
83         if (nsc)
84         {
85             simFactor1 = mergeStrengthCalculator(out, srcCh, RsCsT, nsc, th, size);
86             srcCh.select<8, 1, 8, 1>(2, 2) = mergeBlocksRef(srcCh, out, simFactor1);
87         }
88         fil = srcCh.select<8, 1, 8, 1>(2, 2);
89         och = SpatialDenoiser_8x8_NV12_Chroma(SURF_SRC, srcCh, mbX, mbY, sTh);
90     }
91     else
92     {
93         read_plane(SURF_SRC, GENX_SURFACE_Y_PLANE, x, y, fil);
94         read_plane(SURF_SRC, GENX_SURFACE_UV_PLANE, x, y >> 1, och);
95     }
96     write_plane(SURF_OUT, GENX_SURFACE_Y_PLANE, x, y, fil);
97     write_plane(SURF_OUT, GENX_SURFACE_UV_PLANE, x, y >> 1, och);
98 }
99 
100 #define TEST 1
101 
102 extern "C" _GENX_MAIN_
McP16_4MV_2SURF_WITH_CHR(SurfaceIndex SURF_CONTROL,SurfaceIndex SURF_REF1,SurfaceIndex SURF_MV16x16_1,SurfaceIndex SURF_REF2,SurfaceIndex SURF_MV16x16_2,SurfaceIndex SURF_SRC,SurfaceIndex SURF_OUT,uint start_xy,uint scene_nums)103 void McP16_4MV_2SURF_WITH_CHR(
104     SurfaceIndex SURF_CONTROL,
105     SurfaceIndex SURF_REF1,
106     SurfaceIndex SURF_MV16x16_1,
107     SurfaceIndex SURF_REF2,
108     SurfaceIndex SURF_MV16x16_2,
109     SurfaceIndex SURF_SRC,
110     SurfaceIndex SURF_OUT,
111     uint         start_xy,
112     uint         scene_nums
113 )
114 {
115     vector<ushort, 2>
116         start_mbXY = start_xy;
117     vector<uint,1>
118         scene_numbers  = scene_nums;
119     uchar
120         scnFref = scene_numbers.format<uchar>()[0],
121         scnBref = scene_numbers.format<uchar>()[2],
122         scnSrc  = scene_numbers.format<uchar>()[1],
123         run     = 0;
124     uint
125         mbX  = get_thread_origin_x() + start_mbXY.format<ushort>()[0],
126         mbY  = get_thread_origin_y() + start_mbXY.format<ushort>()[1],
127         x    = mbX << 3,
128         y    = mbY << 3;
129     vector<uchar, 96>
130         control;
131     read(SURF_CONTROL, 0, control);
132     ushort
133         width  = control.format<ushort>()[30],
134         height = control.format<ushort>()[31],
135         th     = control.format<short>()[32],
136         sTh    = control.format<short>()[35];
137     matrix<short,2,4>
138         mv8_g4 = 0,
139         mv = 0;
140 #if TEST
141     matrix<uchar, 12, 16>
142         srcCh = 0,
143         preFil = 0;
144 #else
145     matrix<uchar, 12, 12>
146         srcCh  = 0,
147         preFil = 0;
148 #endif
149     matrix<uchar, 8, 8>
150         src  = 0,
151         out  = 0,
152         out2 = 0,
153         out3 = 0,
154         out4 = 0,
155         fil  = 0;
156     matrix<uchar, 4, 8>
157         och = 0;
158     matrix<uchar, 4, 8>
159         scm = 0;
160     vector<float, 2>
161         RsCsT = 0.0f,
162         RsCsT1 = 0.0f,
163         RsCsT2 = 0.0f;
164     short
165         dif1 = scnFref == scnSrc,
166         dif2 = scnSrc == scnBref,
167         dift = !dif1 + !dif2;
168     read_plane(SURF_SRC, GENX_SURFACE_UV_PLANE, x, y >> 1, och);
169     if (th > 0)
170     {
171         read_plane(SURF_SRC, GENX_SURFACE_Y_PLANE, x - 2, y - 2, srcCh);
172         RsCsT = Genx_RsCs_aprox_8x8Block(srcCh.select<8, 1, 8, 1>(2, 2));
173         //First reference
174         mv8_g4 = MV_Neighborhood_read(SURF_MV16x16_1, width, height, mbX, mbY);
175         out = OMC_Ref_Generation(SURF_REF1, width, height, mbX, mbY, mv8_g4);
176         mv = (mv8_g4 * mv8_g4) / 16 * dif1;
177         int size1 = cm_sum<int>(mv);
178         //Second reference
179         mv8_g4 = MV_Neighborhood_read(SURF_MV16x16_2, width, height, mbX, mbY);
180         out2 = OMC_Ref_Generation(SURF_REF2, width, height, mbX, mbY, mv8_g4);
181         mv = (mv8_g4 * mv8_g4) / 16 * dif2;
182         int size2 = cm_sum<int>(mv);
183 
184         int size = ((size1 * dif1) + (size2 * dif2));
185         if (dif1 + dif2)
186             size /= (dif1 + dif2);
187 
188         if (size >= DISTANCETH || dift)
189         {
190             int simFactor1 = mergeStrengthCalculator(out,  srcCh, RsCsT, dif1, th, size1);
191             int simFactor2 = mergeStrengthCalculator(out2, srcCh, RsCsT, dif2, th, size2);
192             fil = mergeBlocks2Ref(srcCh, out, simFactor1, out2, simFactor2);
193         }
194         else
195         {
196             out = MedianIdx_8x8_3ref(out, srcCh.select<8, 1, 8, 1>(2, 2), out2);
197             int simFactor1 = mergeStrengthCalculator(out, srcCh, RsCsT, true, th, size);
198             fil = mergeBlocksRef(srcCh, out, simFactor1);
199         }
200     }
201     else
202     {
203         read_plane(SURF_SRC, GENX_SURFACE_Y_PLANE, x, y, fil);
204     }
205     write_plane(SURF_OUT, GENX_SURFACE_Y_PLANE, x, y, fil);
206     write_plane(SURF_OUT, GENX_SURFACE_UV_PLANE, x, y >> 1, och);
207 }
208 
209 extern "C" _GENX_MAIN_
MC_MERGE4(SurfaceIndex SURF_REF1,SurfaceIndex SURF_REF2,uint start_xy)210 void MC_MERGE4(
211     SurfaceIndex SURF_REF1,
212     SurfaceIndex SURF_REF2,
213     uint         start_xy
214 )
215 {
216     vector<ushort, 2>
217         start_mbXY = start_xy;
218     uint
219         mbX = get_thread_origin_x() + start_mbXY.format<ushort>()[0],
220         mbY = get_thread_origin_y() + start_mbXY.format<ushort>()[1],
221         x = mbX << 4,
222         y = mbY << 4;
223     matrix<uchar, 16, 16>
224         ref1 = 0,
225         ref2 = 0;
226 
227     read_plane(SURF_REF1, GENX_SURFACE_Y_PLANE, x, y, ref1);
228     read_plane(SURF_REF2, GENX_SURFACE_Y_PLANE, x, y, ref2);
229 
230     ref1 = (ref1 + ref2 + 1) >> 1;
231     write_plane(SURF_REF1, GENX_SURFACE_Y_PLANE, x, y, ref1);
232 }
233 
234 #define VAR_SC_DATA_SIZE 8 //2 * size of float in bytes
235 extern "C" _GENX_MAIN_
MC_VAR_SC_CALC(SurfaceIndex SURF_SRC,SurfaceIndex SURF_NOISE,uint start_xy)236 void MC_VAR_SC_CALC(
237     SurfaceIndex SURF_SRC,
238     SurfaceIndex SURF_NOISE,
239     uint         start_xy
240 )
241 {
242     vector<ushort, 2>
243         start_mbXY = start_xy;
244     int
245         mbX = get_thread_origin_x() + start_mbXY.format<ushort>()[0],
246         mbY = get_thread_origin_y() + start_mbXY.format<ushort>()[1],
247         x = mbX * 16,
248         y = mbY * 16;
249     matrix<uchar, 17, 32>
250         src = 0;
251     matrix<short, 16, 16>
252         tmp = 0;
253     matrix<float, 1, 2>
254         var_sc = 0.0f;
255 
256     read_plane(SURF_SRC, GENX_SURFACE_Y_PLANE, x - 1, y - 1, src.select<8, 1, 32, 1>(0, 0));
257     read_plane(SURF_SRC, GENX_SURFACE_Y_PLANE, x - 1, y + 7, src.select<8, 1, 32, 1>(8, 0));
258     read_plane(SURF_SRC, GENX_SURFACE_Y_PLANE, x - 1, y + 15, src.select<1, 1, 32, 1>(16, 0));
259     matrix<ushort, 4, 4>
260         rs4x4,
261         cs4x4;
262     matrix<short, 16, 16>
263         tmpRs = src.select<16, 1, 16, 1>(0, 1) - src.select<16, 1, 16, 1>(1, 1),
264         tmpCs = src.select<16, 1, 16, 1>(1, 0) - src.select<16, 1, 16, 1>(1, 1);
265 #pragma unroll
266     for (uchar i = 0; i < 4; i++)
267     {
268 #pragma unroll
269         for (uchar j = 0; j < 4; j++)
270         {
271             rs4x4[i][j] = cm_shr<ushort>(cm_sum<uint>(cm_mul<ushort>(tmpRs.select<4, 1, 4, 1>(i << 2, j << 2), tmpRs.select<4, 1, 4, 1>(i << 2, j << 2))), 4, SAT);
272             cs4x4[i][j] = cm_shr<ushort>(cm_sum<uint>(cm_mul<ushort>(tmpCs.select<4, 1, 4, 1>(i << 2, j << 2), tmpCs.select<4, 1, 4, 1>(i << 2, j << 2))), 4, SAT);
273         }
274     }
275     float
276         average = cm_sum<float>(src.select<16, 1, 16, 1>(1, 1)) / 256.0f,
277         square = cm_sum<float>(cm_mul<ushort>(src.select<16, 1, 16, 1>(1, 1), src.select<16, 1, 16, 1>(1, 1))) / 256.0f;
278     var_sc(0, 0) = (square - average * average);//Variance value
279     float
280         RsFull = cm_sum<float>(rs4x4),
281         CsFull = cm_sum<float>(cs4x4);
282     var_sc(0, 1) = (RsFull + CsFull) / 16.0f;     //RsCs value
283 
284     write(SURF_NOISE, mbX * VAR_SC_DATA_SIZE, mbY, var_sc);
285 }