1 // Copyright (c) 2012-2020 Intel Corporation
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in all
11 // copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 // SOFTWARE.
20 #include "../include/genx_me_common.h"
21 #define COMPLEX_BIDIR 1
22 #define INVERTMOTION 1
23
24 typedef matrix<uchar, 4, 32> UniIn;
25
26 _GENX_ inline
SetRef(vector_ref<short,2>,vector<short,2> mv_predictor,vector_ref<char,2> searchWindow,vector<uchar,2>,vector_ref<short,2> reference)27 void SetRef(
28 vector_ref<short, 2> /*source*/, // IN: SourceX, SourceY
29 vector<short, 2> mv_predictor, // IN: mv predictor
30 vector_ref<char, 2> searchWindow, // IN: reference window w/h
31 vector<uchar, 2> /*picSize*/, // IN: pic size w/h
32 vector_ref<short, 2> reference
33 ) // OUT: Ref0X, Ref0Y
34 {
35 vector<short, 2>
36 Width = (searchWindow - 16) >> 1,
37 MaxMvLen,
38 mask,
39 res,
40 otherRes;
41
42 // set up parameters
43 MaxMvLen[0] = 0x7fff / 4;
44 MaxMvLen[1] = 0x7fff / 4;
45
46 // fields and MBAFF are not supported
47 // remove quater pixel fraction
48 mv_predictor = mv_predictor >> 2;
49
50 //
51 // set the reference position
52 //
53 reference = mv_predictor;
54 reference[1] &= -2;
55 reference -= Width;
56
57 res = MaxMvLen - Width;
58 mask = (mv_predictor > res);
59 otherRes = MaxMvLen - (searchWindow - 16);
60 reference.merge(otherRes, mask);
61
62 res = -res;
63 mask = (mv_predictor < res);
64 otherRes = -MaxMvLen;
65 reference.merge(otherRes, mask);
66 }
67
68 extern "C" _GENX_MAIN_
MeP16_1MV_MRE(SurfaceIndex SURF_CONTROL,SurfaceIndex SURF_SRC_AND_REF,SurfaceIndex SURF_DIST16x16,SurfaceIndex SURF_MV16x16,uint start_xy,uchar blSize)69 void MeP16_1MV_MRE(
70 SurfaceIndex SURF_CONTROL,
71 SurfaceIndex SURF_SRC_AND_REF,
72 SurfaceIndex SURF_DIST16x16,
73 SurfaceIndex SURF_MV16x16,
74 uint start_xy,
75 uchar blSize
76 )
77 {
78 vector<uint, 1>
79 start_mbXY = start_xy;
80 uint
81 mbX = get_thread_origin_x() + start_mbXY.format<ushort>()[0],
82 mbY = get_thread_origin_y() + start_mbXY.format<ushort>()[1],
83 x = mbX * blSize,
84 y = mbY * blSize;
85
86 vector<uchar, 96> control;
87 read(SURF_CONTROL, 0, control);
88
89 uchar
90 maxNumSu = control.format<uchar>()[56],
91 lenSp = control.format<uchar>()[57];
92 ushort
93 width = control.format<ushort>()[30],
94 height = control.format<ushort>()[31],
95 mre_width = control.format<ushort>()[33],
96 mre_height = control.format<ushort>()[34],
97 precision = control.format<ushort>()[36];
98
99 cm_assert(x > width);
100 // read MB record data
101 UniIn
102 uniIn = 0;
103 matrix<uchar, 9, 32>
104 imeOut;
105 matrix<uchar, 2, 32>
106 imeIn = 0;
107 matrix<uchar, 4, 32>
108 fbrIn;
109
110 // declare parameters for VME
111 matrix<uint, 16, 2>
112 costs = 0;
113 vector<short, 2>
114 mvPred = 0,
115 mvPred2 = 0;
116 uchar
117 x_r = 64,
118 y_r = 32;
119
120 // load search path
121 imeIn.select<2, 1, 32, 1>(0) = control.select<64, 1>(0);
122
123 // M0.2
124 VME_SET_UNIInput_SrcX(uniIn, x);
125 VME_SET_UNIInput_SrcY(uniIn, y);
126
127 // M0.3 various prediction parameters
128 VME_SET_DWORD(uniIn, 0, 3, 0x76a40000); // BMEDisableFBR=1 InterSAD=2 8x8 16x16
129 //VME_SET_DWORD(uniIn, 0, 3, 0x76a00000); // BMEDisableFBR=0 InterSAD=2 SubMbPartMask=0x76: 8x8 16x16
130 //VME_SET_UNIInput_BMEDisableFBR(uniIn);
131 // M1.1 MaxNumMVs
132 VME_SET_UNIInput_MaxNumMVs(uniIn, 32);
133 // M0.5 Reference Window Width & Height
134 VME_SET_UNIInput_RefW(uniIn, x_r);//48);
135 VME_SET_UNIInput_RefH(uniIn, y_r);//40);
136 VME_SET_UNIInput_EarlyImeSuccessEn(uniIn);
137
138 // M0.0 Ref0X, Ref0Y
139 vector_ref<short, 2>
140 sourceXY = uniIn.row(0).format<short>().select<2, 1>(4);
141 vector<uchar, 2>
142 widthHeight;
143 widthHeight[0] = (height >> 4) - 1;
144 widthHeight[1] = (width >> 4);
145 vector_ref<char, 2>
146 searchWindow = uniIn.row(0).format<char>().select<2, 1>(22);
147
148 vector_ref<short, 2>
149 ref0XY = uniIn.row(0).format<short>().select<2, 1>(0);
150 SetRef(sourceXY, mvPred, searchWindow, widthHeight, ref0XY);
151
152 vector_ref<short, 2>
153 ref1XY = uniIn.row(0).format<short>().select<2, 1>(2);
154 SetRef(sourceXY, mvPred2, searchWindow, widthHeight, ref1XY);
155
156 // M1.0-3 Search path parameters & start centers & MaxNumMVs again!!!
157 VME_SET_UNIInput_AdaptiveEn(uniIn);
158 VME_SET_UNIInput_T8x8FlagForInterEn(uniIn);
159 VME_SET_UNIInput_MaxNumMVs(uniIn, 0x3f);
160 VME_SET_UNIInput_MaxNumSU(uniIn, maxNumSu);
161 VME_SET_UNIInput_LenSP(uniIn, lenSp);
162 //VME_SET_UNIInput_BiWeight(uniIn, 32);
163
164 // M1.2 Start0X, Start0Y
165 vector<char, 2>
166 start0 = searchWindow;
167 start0 = ((start0 - 16) >> 3) & 0x0f;
168 uniIn.row(1)[10] = start0[0] | (start0[1] << 4);
169
170 uniIn.row(1)[6] = 0x20;
171 uniIn.row(1)[31] = 0x1;
172
173 vector<short, 2>
174 ref0 = uniIn.row(0).format<short>().select<2, 1>(0);
175 vector<ushort, 16>
176 costCenter = uniIn.row(3).format<ushort>().select<16, 1>(0);
177
178 vector<short, 2>
179 mv16;
180 matrix<uint, 1, 1>
181 dist16x16;
182 run_vme_ime(uniIn, imeIn,
183 VME_STREAM_OUT, VME_SEARCH_SINGLE_REF_SINGLE_REC_SINGLE_START,
184 SURF_SRC_AND_REF, ref0XY, NULL, costCenter, imeOut);
185 VME_GET_IMEOutput_Rec0_16x16_Mv(imeOut, mv16);
186 VME_GET_IMEOutput_Rec0_16x16_Distortion(imeOut, dist16x16);
187
188 // distortions calculated before updates (subpel, bidir search)
189 write(SURF_DIST16x16, mbX * DIST_SIZE, mbY, dist16x16); //16x16 Forward SAD
190
191 if (precision)
192 {//QPEL
193 VME_SET_UNIInput_SubPelMode(uniIn, 3);
194 VME_CLEAR_UNIInput_BMEDisableFBR(uniIn);
195 SLICE(fbrIn.format<uint>(), 1, 16, 2) = 0; // zero L1 motion vectors
196 VME_SET_UNIInput_FBRMbModeInput(uniIn, 0);
197 VME_SET_UNIInput_FBRSubMBShapeInput(uniIn, 0);
198 VME_SET_UNIInput_FBRSubPredModeInput(uniIn, 3);
199 matrix<uchar, 7, 32>
200 fbrOut16x16;
201 fbrIn.format<uint, 4, 8>().select<4, 1, 4, 2>(0, 0) = mv16.format<uint>()[0]; // motion vectors 16x16
202 run_vme_fbr(uniIn, fbrIn, SURF_SRC_AND_REF, 0, 0, 0, fbrOut16x16);
203 VME_GET_FBROutput_Rec0_16x16_Mv(fbrOut16x16, mv16);
204 VME_GET_FBROutput_Dist_16x16_Bi(fbrOut16x16, dist16x16);
205 }
206
207 // distortions Actual complete distortion
208 //write(SURF_DIST16x16, mbX * DIST_SIZE, mbY, dist16x16);
209
210 // motion vectors
211 write(SURF_MV16x16, mbX * MVDATA_SIZE, mbY, mv16); //16x16mv Ref0
212 }
213
214 extern "C" _GENX_MAIN_
MeP16_1MV_MRE_8x8(SurfaceIndex SURF_CONTROL,SurfaceIndex SURF_SRC_AND_REF,SurfaceIndex SURF_DIST8x8,SurfaceIndex SURF_MV8x8,uint start_xy,uchar blSize)215 void MeP16_1MV_MRE_8x8(
216 SurfaceIndex SURF_CONTROL,
217 SurfaceIndex SURF_SRC_AND_REF,
218 SurfaceIndex SURF_DIST8x8,
219 SurfaceIndex SURF_MV8x8,
220 uint start_xy,
221 uchar blSize
222 )
223 {
224 vector<uint, 1>
225 start_mbXY = start_xy;
226 uint
227 mbX = get_thread_origin_x() + start_mbXY.format<ushort>()[0],
228 mbY = get_thread_origin_y() + start_mbXY.format<ushort>()[1],
229 x = mbX * blSize,
230 y = mbY * blSize;
231
232 vector<uchar, 96>
233 control;
234 read(SURF_CONTROL, 0, control);
235
236 uchar
237 maxNumSu = control.format<uchar>()[56],
238 lenSp = control.format<uchar>()[57];
239 ushort
240 width = control.format<ushort>()[30],
241 height = control.format<ushort>()[31],
242 mre_width = control.format<ushort>()[33],
243 mre_height = control.format<ushort>()[34],
244 precision = control.format<ushort>()[36];
245
246
247 // read MB record data
248 UniIn
249 uniIn = 0;
250 matrix<uchar, 9, 32>
251 imeOut;
252 matrix<uchar, 2, 32>
253 imeIn = 0;
254 matrix<uchar, 4, 32>
255 fbrIn;
256
257 // declare parameters for VME
258 matrix<uint, 16, 2>
259 costs = 0;
260 vector<short, 2>
261 mvPred = 0,
262 mvPred2 = 0;
263 //read(SURF_MV16x16, mbX * MVDATA_SIZE, mbY, mvPred); // these pred MVs will be updated later here
264 uchar
265 x_r = 64,
266 y_r = 32;
267
268 // load search path
269 imeIn.select<2, 1, 32, 1>(0) = control.select<64, 1>(0);
270
271 // M0.2
272 VME_SET_UNIInput_SrcX(uniIn, x);
273 VME_SET_UNIInput_SrcY(uniIn, y);
274
275 // M0.3 various prediction parameters
276 //VME_SET_DWORD(uniIn, 0, 3, 0x76a40000); // BMEDisableFBR=1 InterSAD=2 8x8 16x16
277 //VME_SET_DWORD(uniIn, 0, 3, 0x76a00000); // BMEDisableFBR=0 InterSAD=2 SubMbPartMask=0x76: 8x8 16x16
278 VME_SET_DWORD(uniIn, 0, 3, 0x77a00000); // BMEDisableFBR=0 InterSAD=2 SubMbPartMask=0x77: 8x8
279 //VME_SET_UNIInput_BMEDisableFBR(uniIn);
280 // M1.1 MaxNumMVs
281 VME_SET_UNIInput_MaxNumMVs(uniIn, 32);
282 // M0.5 Reference Window Width & Height
283 VME_SET_UNIInput_RefW(uniIn, x_r);//48);
284 VME_SET_UNIInput_RefH(uniIn, y_r);//40);
285
286 // M0.0 Ref0X, Ref0Y
287 vector_ref<short, 2>
288 sourceXY = uniIn.row(0).format<short>().select<2, 1>(4);
289 vector<uchar, 2>
290 widthHeight;
291 widthHeight[0] = (height >> 4) - 1;
292 widthHeight[1] = (width >> 4);
293 vector_ref<char, 2>
294 searchWindow = uniIn.row(0).format<char>().select<2, 1>(22);
295
296 vector_ref<short, 2>
297 ref0XY = uniIn.row(0).format<short>().select<2, 1>(0);
298 SetRef(sourceXY, mvPred, searchWindow, widthHeight, ref0XY);
299
300 // M1.0-3 Search path parameters & start centers & MaxNumMVs again!!!
301 VME_SET_UNIInput_AdaptiveEn(uniIn);
302 VME_SET_UNIInput_T8x8FlagForInterEn(uniIn);
303 VME_SET_UNIInput_MaxNumMVs(uniIn, 0x3f);
304 VME_SET_UNIInput_MaxNumSU(uniIn, maxNumSu);
305 VME_SET_UNIInput_LenSP(uniIn, lenSp);
306 //VME_SET_UNIInput_BiWeight(uniIn, 32);
307
308 // M1.2 Start0X, Start0Y
309 vector<char, 2>
310 start0 = searchWindow;
311 start0 = ((start0 - 16) >> 3) & 0x0f;
312 uniIn.row(1)[10] = start0[0] | (start0[1] << 4);
313
314 uniIn.row(1)[6] = 0x20;
315 uniIn.row(1)[31] = 0x1;
316
317 vector<short, 2>
318 ref0 = uniIn.row(0).format<short>().select<2, 1>(0);
319 vector<ushort, 16>
320 costCenter = uniIn.row(3).format<ushort>().select<16, 1>(0);
321
322 VME_SET_UNIInput_EarlyImeSuccessEn(uniIn);
323 matrix<short, 2, 4>
324 mv8;
325 vector<uint, 4>
326 dist8;
327
328 run_vme_ime(uniIn, imeIn,
329 VME_STREAM_OUT, VME_SEARCH_SINGLE_REF_SINGLE_REC_SINGLE_START,
330 SURF_SRC_AND_REF, ref0XY, NULL, costCenter, imeOut);
331 mv8 = imeOut.row(8).format<short>().select<8, 1>(8); // 4 MVs
332 dist8 = imeOut.row(7).format<ushort>().select<4, 1>(4);
333 // distortions Integer search results
334 // 8x8
335 write(SURF_DIST8x8, mbX * DIST_SIZE * 2, mbY * 2, dist8.format<uint, 2, 2>()); //8x8 Forward SAD
336 if (precision)
337 {//QPEL
338 VME_SET_UNIInput_SubPelMode(uniIn, 3);
339 VME_CLEAR_UNIInput_BMEDisableFBR(uniIn);
340 SLICE(fbrIn.format<uint>(), 1, 16, 2) = 0; // zero L1 motion vectors
341 matrix<uchar, 7, 32> fbrOut8x8;
342 VME_SET_UNIInput_FBRMbModeInput(uniIn, 3);
343 VME_SET_UNIInput_FBRSubMBShapeInput(uniIn, 0);
344 VME_SET_UNIInput_FBRSubPredModeInput(uniIn, 3);
345 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(0, 0) = mv8.format<uint>()[0]; // motion vectors 8x8_0
346 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(1, 0) = mv8.format<uint>()[1]; // motion vectors 8x8_1
347 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(2, 0) = mv8.format<uint>()[2]; // motion vectors 8x8_2
348 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(3, 0) = mv8.format<uint>()[3]; // motion vectors 8x8_3
349 run_vme_fbr(uniIn, fbrIn, SURF_SRC_AND_REF, 3, 0, 0, fbrOut8x8);
350 VME_GET_FBROutput_Rec0_8x8_4Mv(fbrOut8x8, mv8.format<uint>());
351 VME_GET_FBROutput_Dist_8x8_Bi(fbrOut8x8, dist8);
352 }
353
354 // distortions actual complete distortion calculation
355 // 8x8
356 //write(SURF_DIST8x8 , mbX * DIST_SIZE * 2 , mbY * 2, dist8.format<uint,2,2>()); //8x8 Bidir distortions
357
358 // motion vectors
359 // 8x8
360 write(SURF_MV8x8, mbX * MVDATA_SIZE * 2, mbY * 2, mv8); //8x8mvs Ref0
361 }
362
363 extern "C" _GENX_MAIN_
MeP16bi_1MV2_MRE(SurfaceIndex SURF_CONTROL,SurfaceIndex SURF_SRC_AND_REF,SurfaceIndex SURF_SRC_AND_REF2,SurfaceIndex SURF_DIST16x16,SurfaceIndex SURF_MV16x16,SurfaceIndex SURF_MV16x16_2,uint start_xy,uchar blSize,char forwardRefDist,char backwardRefDist)364 void MeP16bi_1MV2_MRE(
365 SurfaceIndex SURF_CONTROL,
366 SurfaceIndex SURF_SRC_AND_REF,
367 SurfaceIndex SURF_SRC_AND_REF2,
368 SurfaceIndex SURF_DIST16x16,
369 SurfaceIndex SURF_MV16x16,
370 SurfaceIndex SURF_MV16x16_2,
371 uint start_xy,
372 uchar blSize,
373 char forwardRefDist,
374 char backwardRefDist
375 )
376 {
377 vector<uint, 1>
378 start_mbXY = start_xy;
379 uint
380 mbX = get_thread_origin_x() + start_mbXY.format<ushort>()[0],
381 mbY = get_thread_origin_y() + start_mbXY.format<ushort>()[1],
382 x = mbX * blSize,
383 y = mbY * blSize;
384
385 vector<uchar, 96>
386 control;
387 read(SURF_CONTROL, 0, control);
388
389 uchar
390 maxNumSu = control.format<uchar>()[56],
391 lenSp = control.format<uchar>()[57];
392 ushort
393 width = control.format<ushort>()[30],
394 height = control.format<ushort>()[31],
395 mre_width = control.format<ushort>()[33],
396 mre_height = control.format<ushort>()[34],
397 precision = control.format<ushort>()[36];
398
399 // read MB record data
400 UniIn
401 uniIn = 0;
402 #if COMPLEX_BIDIR
403 matrix<uchar, 9, 32>
404 imeOut;
405 #else
406 matrix<uchar, 11, 32>
407 imeOut;
408 #endif
409 matrix<uchar, 2, 32>
410 imeIn = 0;
411 matrix<uchar, 4, 32>
412 fbrIn;
413
414 // declare parameters for VME
415 matrix<uint, 16, 2> costs = 0;
416 vector<short, 2>
417 mvPred = 0,
418 mvPred2 = 0;
419 //read(SURF_MV16x16, mbX * MVDATA_SIZE, mbY, mvPred); // these pred MVs will be updated later here
420
421 #if COMPLEX_BIDIR
422 uchar
423 x_r = 64,
424 y_r = 32;
425 #else
426 uchar
427 x_r = 32,
428 y_r = 32;
429 #endif
430
431 // load search path
432 imeIn.select<2, 1, 32, 1>(0) = control.select<64, 1>(0);
433
434 // M0.2
435 VME_SET_UNIInput_SrcX(uniIn, x);
436 VME_SET_UNIInput_SrcY(uniIn, y);
437
438 // M0.3 various prediction parameters
439 #if COMPLEX_BIDIR
440 VME_SET_DWORD(uniIn, 0, 3, 0x76a40000); // BMEDisableFBR=1 InterSAD=2 8x8 16x16
441 #else
442 VME_SET_DWORD(uniIn, 0, 3, 0x76a00000); // BMEDisableFBR=0 InterSAD=2 SubMbPartMask=0x76: 8x8 16x16
443 #endif
444 //VME_SET_UNIInput_BMEDisableFBR(uniIn);
445 // M1.1 MaxNumMVs
446 VME_SET_UNIInput_MaxNumMVs(uniIn, 32);
447 // M0.5 Reference Window Width & Height
448 VME_SET_UNIInput_RefW(uniIn, x_r);//48);
449 VME_SET_UNIInput_RefH(uniIn, y_r);//40);
450
451 // M0.0 Ref0X, Ref0Y
452 vector_ref<short, 2>
453 sourceXY = uniIn.row(0).format<short>().select<2, 1>(4);
454 vector<uchar, 2>
455 widthHeight;
456 widthHeight[0] = (height >> 4) - 1;
457 widthHeight[1] = (width >> 4);
458 vector_ref<char, 2>
459 searchWindow = uniIn.row(0).format<char>().select<2, 1>(22);
460
461 vector_ref<short, 2>
462 ref0XY = uniIn.row(0).format<short>().select<2, 1>(0);
463 SetRef(sourceXY, mvPred, searchWindow, widthHeight, ref0XY);
464
465 vector_ref<short, 2>
466 ref1XY = uniIn.row(0).format<short>().select<2, 1>(2);
467
468 // M1.0-3 Search path parameters & start centers & MaxNumMVs again!!!
469 VME_SET_UNIInput_AdaptiveEn(uniIn);
470 VME_SET_UNIInput_T8x8FlagForInterEn(uniIn);
471 VME_SET_UNIInput_MaxNumMVs(uniIn, 0x3f);
472 VME_SET_UNIInput_MaxNumSU(uniIn, maxNumSu);
473 VME_SET_UNIInput_LenSP(uniIn, lenSp);
474 //VME_SET_UNIInput_BiWeight(uniIn, 32);
475
476 // M1.2 Start0X, Start0Y
477 vector<char, 2>
478 start0 = searchWindow;
479 start0 = ((start0 - 16) >> 3) & 0x0f;
480 uniIn.row(1)[10] = start0[0] | (start0[1] << 4);
481
482 uniIn.row(1)[6] = 0x20;
483 uniIn.row(1)[31] = 0x1;
484
485 vector<short, 2>
486 ref0 = uniIn.row(0).format<short>().select<2, 1>(0);
487 vector<ushort, 16>
488 costCenter = uniIn.row(3).format<ushort>().select<16, 1>(0);
489
490 VME_SET_UNIInput_EarlyImeSuccessEn(uniIn);
491 vector<short, 2>
492 mv16, mv16_2;
493 matrix<uint, 1, 1>
494 dist16x16,
495 dist16x16_2;
496 #if COMPLEX_BIDIR
497 run_vme_ime(uniIn, imeIn,
498 VME_STREAM_OUT, VME_SEARCH_SINGLE_REF_SINGLE_REC_SINGLE_START,
499 SURF_SRC_AND_REF, ref0XY, NULL, costCenter, imeOut);
500 VME_GET_IMEOutput_Rec0_16x16_Mv(imeOut, mv16);
501 VME_GET_IMEOutput_Rec0_16x16_Distortion(imeOut, dist16x16);
502
503 mvPred2 = mv16 * backwardRefDist / forwardRefDist;
504 SetRef(sourceXY, mvPred2, searchWindow, widthHeight, ref1XY);
505 run_vme_ime(uniIn, imeIn,
506 VME_STREAM_OUT, VME_SEARCH_SINGLE_REF_SINGLE_REC_SINGLE_START,
507 SURF_SRC_AND_REF2, ref1XY, NULL, costCenter, imeOut);
508 VME_GET_IMEOutput_Rec0_16x16_Mv(imeOut, mv16_2);
509 VME_GET_IMEOutput_Rec0_16x16_Distortion(imeOut, dist16x16_2);
510 #else
511 run_vme_ime(uniIn, imeIn,
512 VME_STREAM_OUT, VME_SEARCH_DUAL_REF_DUAL_REC,
513 SURF_SRC_AND_REF, ref0XY, ref1XY, costCenter, imeOut);
514
515 VME_GET_IMEOutput_Rec0_16x16_Mv(imeOut, mv16);
516 VME_GET_IMEOutput_Rec0_16x16_Distortion(imeOut, dist16x16);
517
518 VME_GET_IMEOutput_Rec1_16x16_Mv(imeOut, mv16_2);
519 VME_GET_IMEOutput_Rec1_16x16_Distortion(imeOut, dist16x16_2);
520 #endif
521 // distortions calculated before updates (subpel, bidir search)
522 write(SURF_DIST16x16, mbX * DIST_SIZE, mbY, dist16x16); //16x16 Forward SAD
523
524 if (precision)//QPEL
525 VME_SET_UNIInput_SubPelMode(uniIn, 3);
526 else
527 VME_SET_UNIInput_SubPelMode(uniIn, 0);
528 VME_SET_UNIInput_BiWeight(uniIn, 32);
529
530 VME_CLEAR_UNIInput_BMEDisableFBR(uniIn);
531 SLICE(fbrIn.format<uint>(), 1, 16, 2) = 0; // zero L1 motion vectors
532 VME_SET_UNIInput_FBRMbModeInput(uniIn, 0);
533 VME_SET_UNIInput_FBRSubMBShapeInput(uniIn, 0);
534 if (precision)//QPEL
535 VME_SET_UNIInput_FBRSubPredModeInput(uniIn, 3);
536 else
537 VME_SET_UNIInput_FBRSubPredModeInput(uniIn, 0);
538
539 matrix<uchar, 7, 32>
540 fbrOut16x16;
541 fbrIn.format<uint, 4, 8>().select<4, 1, 4, 2>(0, 0) = mv16.format<uint>()[0]; // motion vectors 16x16
542 fbrIn.format<uint, 4, 8>().select<4, 1, 4, 2>(0, 1) = mv16_2.format<uint>()[0];
543 run_vme_fbr(uniIn, fbrIn, SURF_SRC_AND_REF, 0, 0, 170, fbrOut16x16);
544 VME_GET_FBROutput_Rec0_16x16_Mv(fbrOut16x16, mv16);
545 VME_GET_FBROutput_Rec1_16x16_Mv(fbrOut16x16, mv16_2);
546 VME_GET_FBROutput_Dist_16x16_Bi(fbrOut16x16, dist16x16);
547
548
549 // distortions Actual complete distortion
550 //write(SURF_DIST16x16, mbX * DIST_SIZE, mbY, dist16x16);
551
552 // motion vectors
553 write(SURF_MV16x16, mbX * MVDATA_SIZE, mbY, mv16); //16x16mv Ref0
554 write(SURF_MV16x16_2, mbX * MVDATA_SIZE, mbY, mv16_2); //16x16mv Ref1
555 }
556
557 extern "C" _GENX_MAIN_
MeP16bi_1MV2_MRE_8x8(SurfaceIndex SURF_CONTROL,SurfaceIndex SURF_SRC_AND_REF,SurfaceIndex SURF_SRC_AND_REF2,SurfaceIndex SURF_DIST8x8,SurfaceIndex SURF_MV8x8,SurfaceIndex SURF_MV8x8_2,uint start_xy,uchar blSize,char forwardRefDist,char backwardRefDist)558 void MeP16bi_1MV2_MRE_8x8(
559 SurfaceIndex SURF_CONTROL,
560 SurfaceIndex SURF_SRC_AND_REF,
561 SurfaceIndex SURF_SRC_AND_REF2,
562 SurfaceIndex SURF_DIST8x8,
563 SurfaceIndex SURF_MV8x8,
564 SurfaceIndex SURF_MV8x8_2,
565 uint start_xy,
566 uchar blSize,
567 char forwardRefDist,
568 char backwardRefDist
569 )
570 {
571 vector<uint, 1>
572 start_mbXY = start_xy;
573 uint
574 mbX = get_thread_origin_x() + start_mbXY.format<ushort>()[0],
575 mbY = get_thread_origin_y() + start_mbXY.format<ushort>()[1],
576 x = mbX * blSize,
577 y = mbY * blSize;
578
579 vector<uchar, 96>
580 control;
581 read(SURF_CONTROL, 0, control);
582
583 uchar
584 maxNumSu = control.format<uchar>()[56],
585 lenSp = control.format<uchar>()[57];
586 ushort
587 width = control.format<ushort>()[30],
588 height = control.format<ushort>()[31],
589 mre_width = control.format<ushort>()[33],
590 mre_height = control.format<ushort>()[34],
591 precision = control.format<ushort>()[36];
592 // read MB record data
593 #if CMRT_EMU
594 if (x >= width)
595 return;
596 cm_assert(x < width);
597 #endif
598 UniIn
599 uniIn = 0;
600 #if COMPLEX_BIDIR
601 matrix<uchar, 9, 32>
602 imeOut;
603 #else
604 matrix<uchar, 11, 32>
605 imeOut;
606 #endif
607 matrix<uchar, 2, 32>
608 imeIn = 0;
609 matrix<uchar, 4, 32>
610 fbrIn;
611
612 // declare parameters for VME
613 matrix<uint, 16, 2>
614 costs = 0;
615 vector<short, 2>
616 mvPred = 0,
617 mvPred2 = 0;
618 //read(SURF_MV16x16, mbX * MVDATA_SIZE, mbY, mvPred); // these pred MVs will be updated later here
619 #if COMPLEX_BIDIR
620 uchar x_r = 48;
621 uchar y_r = 40;
622 #else
623 uchar
624 x_r = 32,
625 y_r = 32;
626 #endif
627
628 // load search path
629 imeIn.select<2, 1, 32, 1>(0) = control.select<64, 1>(0);
630
631 // M0.2
632 VME_SET_UNIInput_SrcX(uniIn, x);
633 VME_SET_UNIInput_SrcY(uniIn, y);
634
635 // M0.3 various prediction parameters
636 //VME_SET_DWORD(uniIn, 0, 3, 0x76a40000); // BMEDisableFBR=1 InterSAD=2 8x8 16x16
637 VME_SET_DWORD(uniIn, 0, 3, 0x76a00000); // BMEDisableFBR=0 InterSAD=2 SubMbPartMask=0x76: 8x8 16x16
638 //VME_SET_DWORD(uniIn, 0, 3, 0x77a00000); // BMEDisableFBR=0 InterSAD=2 SubMbPartMask=0x77: 8x8
639 //VME_SET_UNIInput_BMEDisableFBR(uniIn);
640 // M1.1 MaxNumMVs
641 VME_SET_UNIInput_MaxNumMVs(uniIn, 32);
642 // M0.5 Reference Window Width & Height
643 VME_SET_UNIInput_RefW(uniIn, x_r);//48);
644 VME_SET_UNIInput_RefH(uniIn, y_r);//40);
645
646 // M0.0 Ref0X, Ref0Y
647 vector_ref<short, 2>
648 sourceXY = uniIn.row(0).format<short>().select<2, 1>(4);
649 vector<uchar, 2>
650 widthHeight;
651 widthHeight[0] = (height >> 4) - 1;
652 widthHeight[1] = (width >> 4);
653 vector_ref<char, 2>
654 searchWindow = uniIn.row(0).format<char>().select<2, 1>(22);
655
656 vector_ref<short, 2>
657 ref0XY = uniIn.row(0).format<short>().select<2, 1>(0);
658 SetRef(sourceXY, mvPred, searchWindow, widthHeight, ref0XY);
659
660 vector_ref<short, 2>
661 ref1XY = uniIn.row(0).format<short>().select<2, 1>(2);
662 SetRef(sourceXY, mvPred2, searchWindow, widthHeight, ref1XY);
663
664 // M1.0-3 Search path parameters & start centers & MaxNumMVs again!!!
665 VME_SET_UNIInput_AdaptiveEn(uniIn);
666 VME_SET_UNIInput_T8x8FlagForInterEn(uniIn);
667 VME_SET_UNIInput_MaxNumMVs(uniIn, 0x3f);
668 VME_SET_UNIInput_MaxNumSU(uniIn, maxNumSu);
669 VME_SET_UNIInput_LenSP(uniIn, lenSp);
670 //VME_SET_UNIInput_BiWeight(uniIn, 32);
671
672 // M1.2 Start0X, Start0Y
673 vector<char, 2>
674 start0 = searchWindow;
675 start0 = ((start0 - 16) >> 3) & 0x0f;
676 uniIn.row(1)[10] = start0[0] | (start0[1] << 4);
677
678 uniIn.row(1)[6] = 0x20;
679 uniIn.row(1)[31] = 0x1;
680
681 vector<short, 2>
682 ref0 = uniIn.row(0).format<short>().select<2, 1>(0);
683 vector<ushort, 16>
684 costCenter = uniIn.row(3).format<ushort>().select<16, 1>(0);
685 VME_SET_UNIInput_EarlyImeSuccessEn(uniIn);
686 #if COMPLEX_BIDIR
687 matrix<short, 2, 4>
688 mv8,
689 mv8_2;
690 #else
691 matrix<uint, 2, 2>
692 mv8,
693 mv8_2;
694 #endif
695 vector<uint, 4>
696 dist8,
697 dist8_2;
698 #if COMPLEX_BIDIR
699 run_vme_ime(uniIn, imeIn,
700 VME_STREAM_OUT, VME_SEARCH_SINGLE_REF_SINGLE_REC_SINGLE_START,
701 SURF_SRC_AND_REF, ref0XY, NULL, costCenter, imeOut);
702 mv8 = imeOut.row(8).format<short>().select<8, 1>(8); // 4 MVs
703 dist8 = imeOut.row(7).format<ushort>().select<4, 1>(4);
704 vector<short, 2>
705 mv16;
706 VME_GET_IMEOutput_Rec0_16x16_Mv(imeOut, mv16);
707
708 #if !INVERTMOTION
709 run_vme_ime(uniIn, imeIn,
710 VME_STREAM_OUT, VME_SEARCH_SINGLE_REF_SINGLE_REC_SINGLE_START,
711 SURF_SRC_AND_REF2, ref1XY, NULL, costCenter, imeOut);
712 mv8_2 = imeOut.row(8).format<short>().select<8, 1>(8); // 4 MVs
713 dist8_2 = imeOut.row(7).format<ushort>().select<4, 1>(4);
714
715 #else
716 mvPred2 = mv16 * backwardRefDist / forwardRefDist;
717 SetRef(sourceXY, mvPred2, searchWindow, widthHeight, ref1XY);
718 run_vme_ime(uniIn, imeIn,
719 VME_STREAM_OUT, VME_SEARCH_SINGLE_REF_SINGLE_REC_SINGLE_START,
720 SURF_SRC_AND_REF2, ref1XY, NULL, costCenter, imeOut);
721 mv8_2 = imeOut.row(8).format<short>().select<8, 1>(8); // 4 MVs
722 dist8_2 = imeOut.row(7).format<ushort>().select<4, 1>(4);
723 //mv8_2 = mv8 * -1;
724 #endif
725 #else
726 run_vme_ime(uniIn, imeIn,
727 VME_STREAM_OUT, VME_SEARCH_DUAL_REF_DUAL_REC,
728 SURF_SRC_AND_REF, ref0XY, ref1XY, costCenter, imeOut);
729
730 //VME_GET_IMEOutput_Rec0_16x16_Mv(imeOut, mv16);
731 VME_GET_IMEOutput_Rec0_8x8_4Mv(imeOut, mv8);
732 //VME_GET_IMEOutput_Rec0_16x16_Distortion(imeOut, dist16x16);
733 VME_GET_IMEOutput_Rec0_8x8_4Distortion(imeOut, dist8);
734
735 //VME_GET_IMEOutput_Rec1_16x16_Mv(imeOut, mv16_2);
736 VME_GET_IMEOutput_Rec1_8x8_4Mv(imeOut, mv8_2);
737 //VME_GET_IMEOutput_Rec1_16x16_Distortion(imeOut, dist16x16_2);
738 VME_GET_IMEOutput_Rec1_8x8_4Distortion(imeOut, dist8_2);
739 #endif
740
741
742 // distortions Integer search results
743 // 8x8
744 write(SURF_DIST8x8, mbX * DIST_SIZE * 2, mbY * 2, dist8.format<uint, 2, 2>()); //8x8 Forward SAD
745
746
747 if (precision)//QPEL
748 VME_SET_UNIInput_SubPelMode(uniIn, 3);
749 else
750 VME_SET_UNIInput_SubPelMode(uniIn, 0);
751 VME_SET_UNIInput_BiWeight(uniIn, 32);
752 VME_CLEAR_UNIInput_BMEDisableFBR(uniIn);
753 SLICE(fbrIn.format<uint>(), 1, 16, 2) = 0; // zero L1 motion vectors
754 matrix<uchar, 7, 32> fbrOut8x8;
755 VME_SET_UNIInput_FBRMbModeInput(uniIn, 3);
756 VME_SET_UNIInput_FBRSubMBShapeInput(uniIn, 0);
757 if (precision)//QPEL
758 VME_SET_UNIInput_FBRSubPredModeInput(uniIn, 3);
759 else
760 VME_SET_UNIInput_FBRSubPredModeInput(uniIn, 0);
761 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(0, 0) = mv8.format<uint>()[0]; // motion vectors 8x8_0
762 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(1, 0) = mv8.format<uint>()[1]; // motion vectors 8x8_1
763 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(2, 0) = mv8.format<uint>()[2]; // motion vectors 8x8_2
764 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(3, 0) = mv8.format<uint>()[3]; // motion vectors 8x8_3
765 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(0, 1) = mv8_2.format<uint>()[0]; // motion vectors 8x8_2_0
766 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(1, 1) = mv8_2.format<uint>()[1]; // motion vectors 8x8_2_1
767 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(2, 1) = mv8_2.format<uint>()[2]; // motion vectors 8x8_2_2
768 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(3, 1) = mv8_2.format<uint>()[3]; // motion vectors 8x8_2_3
769 run_vme_fbr(uniIn, fbrIn, SURF_SRC_AND_REF, 3, 0, 170, fbrOut8x8);
770 VME_GET_FBROutput_Rec0_8x8_4Mv(fbrOut8x8, mv8.format<uint>());
771 VME_GET_FBROutput_Rec1_8x8_4Mv(fbrOut8x8, mv8_2.format<uint>());
772 VME_GET_FBROutput_Dist_8x8_Bi(fbrOut8x8, dist8);
773
774
775 // distortions actual complete distortion calculation
776 // 8x8
777 //write(SURF_DIST8x8 , mbX * DIST_SIZE * 2 , mbY * 2, dist8.format<uint,2,2>()); //8x8 Bidir distortions
778
779 // motion vectors
780 // 8x8
781 write(SURF_MV8x8, mbX * MVDATA_SIZE * 2, mbY * 2, mv8); //8x8mvs Ref0
782 write(SURF_MV8x8_2, mbX * MVDATA_SIZE * 2, mbY * 2, mv8_2); //8x8mvs Ref1
783 }
784
785 extern "C" _GENX_MAIN_
MeP16_1ME_2BiRef_MRE_8x8(SurfaceIndex SURF_CONTROL,SurfaceIndex SURF_SRC_AND_REF,SurfaceIndex SURF_SRC_AND_REF2,SurfaceIndex SURF_DIST8x8,SurfaceIndex SURF_MV8x8,SurfaceIndex SURF_MV8x8_2,uint start_xy,uchar blSize,char forwardRefDist,char backwardRefDist)786 void MeP16_1ME_2BiRef_MRE_8x8(
787 SurfaceIndex SURF_CONTROL,
788 SurfaceIndex SURF_SRC_AND_REF,
789 SurfaceIndex SURF_SRC_AND_REF2,
790 SurfaceIndex SURF_DIST8x8,
791 SurfaceIndex SURF_MV8x8,
792 SurfaceIndex SURF_MV8x8_2,
793 uint start_xy,
794 uchar blSize,
795 char forwardRefDist,
796 char backwardRefDist
797 )
798 {
799 vector<uint, 1>
800 start_mbXY = start_xy;
801 uint
802 mbX = get_thread_origin_x() + start_mbXY.format<ushort>()[0],
803 mbY = get_thread_origin_y() + start_mbXY.format<ushort>()[1],
804 x = mbX * blSize,
805 y = mbY * blSize;
806
807 vector<uchar, 96>
808 control;
809 read(SURF_CONTROL, 0, control);
810
811 uchar
812 maxNumSu = control.format<uchar>()[56],
813 lenSp = control.format<uchar>()[57];
814 ushort
815 width = control.format<ushort>()[30],
816 height = control.format<ushort>()[31],
817 mre_width = control.format<ushort>()[33],
818 mre_height = control.format<ushort>()[34],
819 precision = control.format<ushort>()[36];
820 // read MB record data
821 #if CMRT_EMU
822 if (x >= width)
823 return;
824 cm_assert(x < width);
825 #endif
826 UniIn
827 uniIn = 0;
828 #if COMPLEX_BIDIR
829 matrix<uchar, 9, 32>
830 imeOut;
831 #else
832 matrix<uchar, 11, 32>
833 imeOut;
834 #endif
835 matrix<uchar, 2, 32>
836 imeIn = 0;
837 matrix<uchar, 4, 32>
838 fbrIn;
839
840 // declare parameters for VME
841 matrix<uint, 16, 2>
842 costs = 0;
843 vector<short, 2>
844 mvPred = 0,
845 mvPred2 = 0;
846 //read(SURF_MV16x16, mbX * MVDATA_SIZE, mbY, mvPred); // these pred MVs will be updated later here
847 #if COMPLEX_BIDIR
848 uchar x_r = 48;
849 uchar y_r = 40;
850 #else
851 uchar
852 x_r = 32,
853 y_r = 32;
854 #endif
855
856 // load search path
857 imeIn.select<2, 1, 32, 1>(0) = control.select<64, 1>(0);
858
859 // M0.2
860 VME_SET_UNIInput_SrcX(uniIn, x);
861 VME_SET_UNIInput_SrcY(uniIn, y);
862
863 // M0.3 various prediction parameters
864 //VME_SET_DWORD(uniIn, 0, 3, 0x76a40000); // BMEDisableFBR=1 InterSAD=2 8x8 16x16
865 VME_SET_DWORD(uniIn, 0, 3, 0x76a00000); // BMEDisableFBR=0 InterSAD=2 SubMbPartMask=0x76: 8x8 16x16
866 //VME_SET_DWORD(uniIn, 0, 3, 0x77a00000); // BMEDisableFBR=0 InterSAD=2 SubMbPartMask=0x77: 8x8
867 //VME_SET_UNIInput_BMEDisableFBR(uniIn);
868 // M1.1 MaxNumMVs
869 VME_SET_UNIInput_MaxNumMVs(uniIn, 32);
870 // M0.5 Reference Window Width & Height
871 VME_SET_UNIInput_RefW(uniIn, x_r);//48);
872 VME_SET_UNIInput_RefH(uniIn, y_r);//40);
873
874 // M0.0 Ref0X, Ref0Y
875 vector_ref<short, 2>
876 sourceXY = uniIn.row(0).format<short>().select<2, 1>(4);
877 vector<uchar, 2>
878 widthHeight;
879 widthHeight[0] = (height >> 4) - 1;
880 widthHeight[1] = (width >> 4);
881 vector_ref<char, 2>
882 searchWindow = uniIn.row(0).format<char>().select<2, 1>(22);
883
884 vector_ref<short, 2>
885 ref0XY = uniIn.row(0).format<short>().select<2, 1>(0);
886 SetRef(sourceXY, mvPred, searchWindow, widthHeight, ref0XY);
887
888 vector_ref<short, 2>
889 ref1XY = uniIn.row(0).format<short>().select<2, 1>(2);
890 SetRef(sourceXY, mvPred2, searchWindow, widthHeight, ref1XY);
891
892 // M1.0-3 Search path parameters & start centers & MaxNumMVs again!!!
893 VME_SET_UNIInput_AdaptiveEn(uniIn);
894 VME_SET_UNIInput_T8x8FlagForInterEn(uniIn);
895 VME_SET_UNIInput_MaxNumMVs(uniIn, 0x3f);
896 VME_SET_UNIInput_MaxNumSU(uniIn, maxNumSu);
897 VME_SET_UNIInput_LenSP(uniIn, lenSp);
898 //VME_SET_UNIInput_BiWeight(uniIn, 32);
899
900 // M1.2 Start0X, Start0Y
901 vector<char, 2>
902 start0 = searchWindow;
903 start0 = ((start0 - 16) >> 3) & 0x0f;
904 uniIn.row(1)[10] = start0[0] | (start0[1] << 4);
905
906 uniIn.row(1)[6] = 0x20;
907 uniIn.row(1)[31] = 0x1;
908
909 vector<short, 2>
910 ref0 = uniIn.row(0).format<short>().select<2, 1>(0);
911 vector<ushort, 16>
912 costCenter = uniIn.row(3).format<ushort>().select<16, 1>(0);
913 VME_SET_UNIInput_EarlyImeSuccessEn(uniIn);
914
915 matrix<short, 2, 4>
916 mv8,
917 mv8_2;
918
919 vector<uint, 4>
920 dist8,
921 dist8_2;
922
923 run_vme_ime(uniIn, imeIn,
924 VME_STREAM_OUT, VME_SEARCH_SINGLE_REF_SINGLE_REC_SINGLE_START,
925 SURF_SRC_AND_REF, ref0XY, NULL, costCenter, imeOut);
926 mv8 = imeOut.row(8).format<short>().select<8, 1>(8); // 4 MVs
927 dist8 = imeOut.row(7).format<ushort>().select<4, 1>(4);
928 vector<short, 2>
929 mv16;
930 VME_GET_IMEOutput_Rec0_16x16_Mv(imeOut, mv16);
931
932
933 #if 0
934 mvPred2 = -mv16;
935 // M0.5 Reference Window Width & Height
936 VME_SET_UNIInput_RefW(uniIn, 32);//48);
937 VME_SET_UNIInput_RefH(uniIn, 32);//40);
938 SetRef(sourceXY, mvPred2, searchWindow, widthHeight, ref1XY);
939 run_vme_ime(uniIn, imeIn,
940 VME_STREAM_OUT, VME_SEARCH_SINGLE_REF_SINGLE_REC_SINGLE_START,
941 SURF_SRC_AND_REF2, ref1XY, NULL, costCenter, imeOut);
942 mv8_2 = imeOut.row(8).format<short>().select<8, 1>(8); // 4 MVs
943 dist8_2 = imeOut.row(7).format<ushort>().select<4, 1>(4);
944 #else
945 mv8_2 = -mv8; // 4 MVs
946 #endif
947 // distortions Integer search results
948 // 8x8
949 write(SURF_DIST8x8, mbX * DIST_SIZE * 2, mbY * 2, dist8.format<uint, 2, 2>()); //8x8 Forward SAD
950
951
952 if (precision)//QPEL
953 VME_SET_UNIInput_SubPelMode(uniIn, 3);
954 else
955 VME_SET_UNIInput_SubPelMode(uniIn, 0);
956 VME_SET_UNIInput_BiWeight(uniIn, 32);
957 VME_CLEAR_UNIInput_BMEDisableFBR(uniIn);
958 SLICE(fbrIn.format<uint>(), 1, 16, 2) = 0; // zero L1 motion vectors
959 matrix<uchar, 7, 32> fbrOut8x8;
960 VME_SET_UNIInput_FBRMbModeInput(uniIn, 3);
961 VME_SET_UNIInput_FBRSubMBShapeInput(uniIn, 0);
962 if (precision)//QPEL
963 VME_SET_UNIInput_FBRSubPredModeInput(uniIn, 3);
964 else
965 VME_SET_UNIInput_FBRSubPredModeInput(uniIn, 0);
966 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(0, 0) = mv8.format<uint>()[0]; // motion vectors 8x8_0
967 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(1, 0) = mv8.format<uint>()[1]; // motion vectors 8x8_1
968 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(2, 0) = mv8.format<uint>()[2]; // motion vectors 8x8_2
969 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(3, 0) = mv8.format<uint>()[3]; // motion vectors 8x8_3
970 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(0, 1) = mv8_2.format<uint>()[0]; // motion vectors 8x8_2_0
971 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(1, 1) = mv8_2.format<uint>()[1]; // motion vectors 8x8_2_1
972 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(2, 1) = mv8_2.format<uint>()[2]; // motion vectors 8x8_2_2
973 fbrIn.format<uint, 4, 8>().select<1, 1, 4, 2>(3, 1) = mv8_2.format<uint>()[3]; // motion vectors 8x8_2_3
974 run_vme_fbr(uniIn, fbrIn, SURF_SRC_AND_REF, 3, 0, 170, fbrOut8x8);
975 VME_GET_FBROutput_Rec0_8x8_4Mv(fbrOut8x8, mv8.format<uint>());
976 VME_GET_FBROutput_Rec1_8x8_4Mv(fbrOut8x8, mv8_2.format<uint>());
977 VME_GET_FBROutput_Dist_8x8_Bi(fbrOut8x8, dist8);
978
979
980 // distortions actual complete distortion calculation
981 // 8x8
982 //write(SURF_DIST8x8 , mbX * DIST_SIZE * 2 , mbY * 2, dist8.format<uint,2,2>()); //8x8 Bidir distortions
983
984 // motion vectors
985 // 8x8
986 write(SURF_MV8x8, mbX * MVDATA_SIZE * 2, mbY * 2, mv8); //8x8mvs Ref0
987 write(SURF_MV8x8_2, mbX * MVDATA_SIZE * 2, mbY * 2, mv8_2); //8x8mvs Ref1
988 }