1 /* ***** BEGIN LICENSE BLOCK *****
2 *
3 * $Id: me_utils.cpp,v 1.23 2008/10/21 04:55:46 asuraparaju Exp $ $Name: Dirac_1_0_2 $
4 *
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 *
7 * The contents of this file are subject to the Mozilla Public License
8 * Version 1.1 (the "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
11 *
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
14 * the specific language governing rights and limitations under the License.
15 *
16 * The Original Code is BBC Research and Development code.
17 *
18 * The Initial Developer of the Original Code is the British Broadcasting
19 * Corporation.
20 * Portions created by the Initial Developer are Copyright (C) 2004.
21 * All Rights Reserved.
22 *
23 * Contributor(s): Thomas Davies (Original Author),
24 *                 Peter Meerwald (pmeerw@users.sourceforge.net)
25 *                 Steve Bearcroft (bearcrsw@users.sourceforge.net)
26 *
27 * Alternatively, the contents of this file may be used under the terms of
28 * the GNU General Public License Version 2 (the "GPL"), or the GNU Lesser
29 * Public License Version 2.1 (the "LGPL"), in which case the provisions of
30 * the GPL or the LGPL are applicable instead of those above. If you wish to
31 * allow use of your version of this file only under the terms of the either
32 * the GPL or LGPL and not to allow others to use your version of this file
33 * under the MPL, indicate your decision by deleting the provisions above
34 * and replace them with the notice and other provisions required by the GPL
35 * or LGPL. If you do not delete the provisions above, a recipient may use
36 * your version of this file under the terms of any one of the MPL, the GPL
37 * or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
39 
40 ///////////////////////////////////
41 //-------------------------------//
42 //utilities for motion estimation//
43 //-------------------------------//
44 ///////////////////////////////////
45 
46 #if defined(HAVE_MMX)
47 #include <climits>
48 #endif
49 #include <libdirac_motionest/me_utils.h>
50 #include <libdirac_motionest/me_utils_mmx.h>
51 #include <libdirac_common/common.h>
52 
53 using namespace dirac;
54 
55 #include <algorithm>
56 //#define INTRA_HAAR
57 
SetBlockLimits(const OLBParams & bparams,const PicArray & m_pic_data,const int xbpos,const int ybpos)58 void BlockDiffParams::SetBlockLimits( const OLBParams& bparams ,
59                                       const PicArray& m_pic_data ,
60                                       const int xbpos , const int ybpos)
61 {
62     const int loc_xp = xbpos * bparams.Xbsep() - bparams.Xoffset();
63     const int loc_yp = ybpos * bparams.Ybsep() - bparams.Yoffset();
64 
65     m_xp=std::max( loc_xp , 0 );
66     m_yp=std::max( loc_yp , 0 );
67 
68     m_xl = bparams.Xblen() - m_xp + loc_xp;
69     m_yl = bparams.Yblen() - m_yp + loc_yp;
70 
71      //constrain block lengths to fall within the picture
72     m_xl = ( ( m_xp + m_xl - 1) > m_pic_data.LastX() ) ? ( m_pic_data.LastX() + 1 - m_xp ): m_xl;
73     m_yl = ( ( m_yp + m_yl - 1) > m_pic_data.LastY() ) ? ( m_pic_data.LastY() + 1 - m_yp ) : m_yl;
74 
75     m_xend = m_xp+m_xl;
76     m_yend = m_yp+m_yl;
77 
78 }
79 
80 // Block difference class functions
81 
82 // Constructors ...
83 
BlockDiff(const PicArray & ref,const PicArray & pic)84 BlockDiff::BlockDiff(const PicArray& ref,const PicArray& pic) :
85     m_pic_data( pic ),
86     m_ref_data( ref )
87 {}
88 
PelBlockDiff(const PicArray & ref,const PicArray & pic)89 PelBlockDiff::PelBlockDiff( const PicArray& ref , const PicArray& pic ) :
90     BlockDiff( ref , pic )
91 {}
92 
IntraBlockDiff(const PicArray & pic)93 IntraBlockDiff::IntraBlockDiff( const PicArray& pic ) :
94     m_pic_data( pic )
95 {}
96 
BiBlockDiff(const PicArray & ref1,const PicArray & ref2,const PicArray & pic)97 BiBlockDiff::BiBlockDiff( const PicArray& ref1 , const PicArray& ref2 ,
98                           const PicArray& pic) :
99     m_pic_data( pic ),
100     m_ref_data1( ref1 ),
101     m_ref_data2( ref2 )
102 {}
103 
BlockDiffUp(const PicArray & ref,const PicArray & pic)104 BlockDiffUp::BlockDiffUp( const PicArray& ref , const PicArray& pic ):
105     BlockDiff( ref , pic )
106 {}
107 
BlockDiffHalfPel(const PicArray & ref,const PicArray & pic)108 BlockDiffHalfPel::BlockDiffHalfPel( const PicArray& ref , const PicArray& pic ) :
109     BlockDiffUp( ref , pic )
110 {}
111 
BlockDiffQuarterPel(const PicArray & ref,const PicArray & pic)112 BlockDiffQuarterPel::BlockDiffQuarterPel( const PicArray& ref , const PicArray& pic ) :
113     BlockDiffUp( ref , pic )
114 {}
115 
BlockDiffEighthPel(const PicArray & ref,const PicArray & pic)116 BlockDiffEighthPel::BlockDiffEighthPel( const PicArray& ref , const PicArray& pic ) :
117     BlockDiffUp( ref , pic )
118 {}
119 
BiBlockHalfPel(const PicArray & ref1,const PicArray & ref2,const PicArray & pic)120 BiBlockHalfPel::BiBlockHalfPel( const PicArray& ref1 , const PicArray& ref2 ,
121                                           const PicArray& pic ):
122     BiBlockDiff( ref1 , ref2 , pic)
123 {}
124 
BiBlockQuarterPel(const PicArray & ref1,const PicArray & ref2,const PicArray & pic)125 BiBlockQuarterPel::BiBlockQuarterPel( const PicArray& ref1 , const PicArray& ref2 ,
126                                           const PicArray& pic ):
127     BiBlockDiff( ref1 , ref2 , pic)
128 {}
129 
BiBlockEighthPel(const PicArray & ref1,const PicArray & ref2,const PicArray & pic)130 BiBlockEighthPel::BiBlockEighthPel( const PicArray& ref1 , const PicArray& ref2 ,
131                                           const PicArray& pic ):
132     BiBlockDiff( ref1 , ref2 , pic)
133 {}
134 
135 
136 // Difference functions ...
137 
Diff(const BlockDiffParams & dparams,const MVector & mv)138 float PelBlockDiff::Diff( const BlockDiffParams& dparams, const MVector& mv )
139 {
140     if (dparams.Xl() <= 0 || dparams.Yl() <= 0)
141     {
142         return 0;
143     }
144 
145     CalcValueType sum( 0 );
146 
147     const ImageCoords ref_start( dparams.Xp()+mv.x , dparams.Yp()+mv.y );
148     const ImageCoords ref_stop( dparams.Xend()+mv.x , dparams.Yend()+mv.y );
149 
150     bool bounds_check( false );
151 
152     if ( ref_start.x<0 ||
153          ref_stop.x >= m_ref_data.LengthX() ||
154          ref_start.y<0 ||
155          ref_stop.y >= m_ref_data.LengthY() )
156         bounds_check = true;
157 
158     if ( !bounds_check )
159     {
160 #if defined(HAVE_MMX)
161         return static_cast<float>(simple_block_diff_mmx_4(dparams, mv, m_pic_data, m_ref_data, INT_MAX));
162 #else
163         ValueType diff;
164         for ( int j=dparams.Yp() ; j<dparams.Yp()+dparams.Yl() ; ++j )
165         {
166             for(int i=dparams.Xp() ; i< dparams.Xp()+dparams.Xl() ; ++i )
167             {
168                 diff = m_pic_data[j][i]-m_ref_data[j+mv.y][i+mv.x];
169                 sum += std::abs( diff );
170             }// i
171         }// j
172 #endif /* HAVE_MMX */
173     }
174     else
175     {
176 #if defined (HAVE_MMX)
177         return static_cast<float>(bchk_simple_block_diff_mmx_4(dparams, mv, m_pic_data, m_ref_data, INT_MAX));
178 #else
179         ValueType diff;
180         for ( int j=dparams.Yp() ; j < dparams.Yp()+dparams.Yl() ; ++j )
181         {
182             for( int i=dparams.Xp() ; i < dparams.Xp()+dparams.Xl() ; ++i )
183             {
184                 diff = m_pic_data[j][i] - m_ref_data[BChk(j+mv.y , m_ref_data.LengthY())][BChk(i+mv.x , m_ref_data.LengthX())];
185                 sum += std::abs( diff );
186 
187             }// i
188         }// j
189 
190 #endif /* HAVE_MMX */
191     }
192 
193     return static_cast<float>( sum );
194 }
195 
Diff(const BlockDiffParams & dparams,const MVector & mv,float & best_sum,MVector & best_mv)196 void PelBlockDiff::Diff( const BlockDiffParams& dparams,
197                          const MVector& mv,
198                          float& best_sum,
199                          MVector& best_mv )
200 {
201     if (dparams.Xl() <= 0 || dparams.Yl() <= 0)
202     {
203         return;
204     }
205 
206     CalcValueType sum( 0 );
207 
208     const ImageCoords ref_start( dparams.Xp()+mv.x , dparams.Yp()+mv.y );
209     const ImageCoords ref_stop( dparams.Xend()+mv.x , dparams.Yend()+mv.y );
210 
211     bool bounds_check( false );
212 
213     if ( ref_start.x<0 ||
214          ref_stop.x >= m_ref_data.LengthX() ||
215          ref_start.y<0 ||
216          ref_stop.y >= m_ref_data.LengthY() )
217         bounds_check = true;
218 
219     if ( !bounds_check )
220     {
221 #if defined (HAVE_MMX)
222         sum  = simple_block_diff_mmx_4(dparams, mv, m_pic_data, m_ref_data, static_cast<int>(best_sum));
223         if (sum < best_sum)
224         {
225                best_sum = sum;
226                best_mv = mv;
227         }
228         return;
229 #else
230         ValueType diff;
231         ValueType *pic_curr = &m_pic_data[dparams.Yp()][dparams.Xp()];
232         const int pic_next( m_pic_data.LengthX() - dparams.Xl() ); // - go down a row and back along
233 
234 
235         ValueType *ref_curr = &m_ref_data[ref_start.y][ref_start.x];
236 
237         for( int y=dparams.Yl(); y>0; --y, pic_curr+=pic_next, ref_curr+=pic_next )
238         {
239             for( int x=dparams.Xl(); x>0; --x, ++pic_curr, ++ref_curr )
240             {
241                 diff = (*pic_curr)-(*ref_curr);
242                 sum += std::abs( diff );
243             }// x
244 
245             if ( sum>=best_sum )
246                 return;
247 
248         }// y
249 #endif /* HAVE_MMX */
250     }
251     else
252     {
253 #if defined (HAVE_MMX)
254            sum = (bchk_simple_block_diff_mmx_4(dparams, mv, m_pic_data, m_ref_data, static_cast<int>(best_sum)));
255         if (sum < best_sum)
256         {
257             best_sum = sum;
258             best_mv = mv;
259         }
260         return;
261 #else
262         ValueType diff;
263         for ( int j=dparams.Yp() ; j<dparams.Yend() ; ++j )
264         {
265             for( int i=dparams.Xp() ; i<dparams.Xend() ; ++i )
266             {
267                 diff = m_pic_data[j][i] - m_ref_data[BChk(j+mv.y , m_ref_data.LengthY())][BChk(i+mv.x , m_ref_data.LengthX())];
268                 sum += std::abs( diff );
269 
270             }// i
271 
272             if ( sum>=best_sum )
273                 return;
274 
275         }// j
276 #endif /* HAVE_MMX */
277     }
278 
279     best_sum = sum;
280     best_mv = mv;
281 
282 }
283 
CalcDC(const BlockDiffParams & dparams)284 ValueType IntraBlockDiff::CalcDC( const BlockDiffParams& dparams ){
285 
286     CalcValueType int_dc( 0 );
287     if (dparams.Xl() <= 0 || dparams.Yl() <= 0)
288     {
289         return 0;
290     }
291 
292     for ( int j=dparams.Yp() ; j<dparams.Yp()+dparams.Yl() ; ++j)
293         for(int i=dparams.Xp(); i<dparams.Xp()+dparams.Xl() ; ++i )
294             int_dc += static_cast<int>( m_pic_data[j][i] );
295 
296     int_dc /= ( dparams.Xl() * dparams.Yl() );
297 
298     return static_cast<ValueType>( int_dc );
299 }
300 
301 #ifdef INTRA_HAAR
Diff(const BlockDiffParams & dparams,ValueType & dc_val)302 float IntraBlockDiff::Diff( const BlockDiffParams& dparams , ValueType& dc_val )
303 {
304     if (dparams.Xl() <= 0 || dparams.Yl() <= 0)
305     {
306         dc_val = 0;
307         return 0;
308     }
309 
310     dc_val = CalcDC(dparams);
311 
312     // Now compute the resulting SAD
313     ValueType dc( dc_val );
314     CalcValueType intra_cost( 0 );
315 
316     for (int j=dparams.Yp(); j<dparams.Yend() ; j+=2){
317         for( int i=dparams.Xp() ; i<dparams.Xend() ;i+=2 ){
318             intra_cost += std::abs( m_pic_data[j][i]
319 	                          + m_pic_data[j][i+1]
320 	                          + m_pic_data[j+1][i]
321 	                          + m_pic_data[j+1][i+1]
322 				  - 4*dc );
323             intra_cost += std::abs( m_pic_data[j][i]
324 	                          + m_pic_data[j][i+1]
325 	                          - m_pic_data[j+1][i]
326 	                          - m_pic_data[j+1][i+1] );
327             intra_cost += std::abs( m_pic_data[j][i]
328 	                          - m_pic_data[j][i+1]
329 	                          + m_pic_data[j+1][i]
330 	                          - m_pic_data[j+1][i+1] );
331             intra_cost += std::abs( m_pic_data[j][i]
332 	                          - m_pic_data[j][i+1]
333 	                          - m_pic_data[j+1][i]
334 	                          + m_pic_data[j+1][i+1] );
335         }
336     }
337 
338     return static_cast<float>( intra_cost );
339 }
340 
341 #else
342 
Diff(const BlockDiffParams & dparams,ValueType & dc_val)343 float IntraBlockDiff::Diff( const BlockDiffParams& dparams , ValueType& dc_val )
344 {
345     if (dparams.Xl() <= 0 || dparams.Yl() <= 0)
346     {
347         dc_val = 0;
348         return 0;
349     }
350 
351      //computes the cost if block is predicted by its dc component
352 #if defined(HAVE_MMX)
353     CalcValueType intra_cost =
354             simple_intra_block_diff_mmx_4 (dparams, m_pic_data, dc_val);
355 
356 #ifdef DIRAC_DEBUG
357     CalcValueType int_dc( 0 );
358     ValueType non_mmx_dc(0);
359 
360     for ( int j=dparams.Yp() ; j<dparams.Yp()+dparams.Yl() ; ++j)
361         for(int i=dparams.Xp(); i<dparams.Xp()+dparams.Xl() ; ++i )
362             int_dc += static_cast<int>( m_pic_data[j][i] );
363 
364     int_dc /= ( dparams.Xl() * dparams.Yl() );
365 
366     non_mmx_dc = static_cast<ValueType>( int_dc );
367 
368     // Now compute the resulting SAD
369     ValueType dc( non_mmx_dc );
370     CalcValueType non_mmx_intra_cost( 0 );
371 
372     for (int j=dparams.Yp(); j<dparams.Yend() ; ++j)
373         for( int i=dparams.Xp() ; i<dparams.Xend() ;++i )
374             non_mmx_intra_cost += std::abs( m_pic_data[j][i] - dc );
375 
376     if (non_mmx_dc != dc_val || non_mmx_intra_cost != intra_cost)
377     {
378         std::cerr << "MMX vals: dc=" << dc_val;
379         std::cerr << " cost=" << intra_cost << std::endl;
380         //print_arr (pic_data, width[i%5], height[i%5]);
381         std::cerr << "non-MMX vals: dc=" << non_mmx_dc;
382         std::cerr << " cost=" << non_mmx_intra_cost << std::endl;
383     }
384 #endif
385     return static_cast<float>( intra_cost );
386 #else
387     CalcValueType int_dc( 0 );
388 
389     for ( int j=dparams.Yp() ; j<dparams.Yp()+dparams.Yl() ; ++j)
390         for(int i=dparams.Xp(); i<dparams.Xp()+dparams.Xl() ; ++i )
391             int_dc += static_cast<int>( m_pic_data[j][i] );
392 
393     int_dc /= ( dparams.Xl() * dparams.Yl() );
394 
395     dc_val = static_cast<ValueType>( int_dc );
396 
397     // Now compute the resulting SAD
398     ValueType dc( dc_val );
399     CalcValueType intra_cost( 0 );
400 
401     for (int j=dparams.Yp(); j<dparams.Yend() ; ++j)
402         for( int i=dparams.Xp() ; i<dparams.Xend() ;++i )
403             intra_cost += std::abs( m_pic_data[j][i] - dc );
404 
405     return static_cast<float>( intra_cost );
406 #endif //HAVE_MMX
407 }
408 #endif
409 
Diff(const BlockDiffParams & dparams,const MVector & mv)410 float BlockDiffHalfPel::Diff(  const BlockDiffParams& dparams ,
411                                       const MVector& mv )
412 {
413     if (dparams.Xl() <= 0 || dparams.Yl() <= 0)
414     {
415         return 0;
416     }
417    //Where to start in the upconverted image
418     const ImageCoords ref_start( ( dparams.Xp()<<1 ) + mv.x ,( dparams.Yp()<<1 ) + mv.y );
419     const ImageCoords ref_stop( ref_start.x+(dparams.Xl()<<1) , ref_start.y+(dparams.Yl()<<1));
420 
421 
422     bool bounds_check( false );
423 
424     if ( ref_start.x<0 ||
425          ref_stop.x >= m_ref_data.LengthX() ||
426          ref_start.y<0 ||
427          ref_stop.y >= m_ref_data.LengthY() )
428         bounds_check = true;
429 
430     float sum( 0 );
431 
432     if ( !bounds_check )
433     {
434 #if defined (HAVE_MMX)
435         MVector rmdr(0,0);
436         const ImageCoords start_pos(dparams.Xp(), dparams.Yp());
437         const ImageCoords end_pos(dparams.Xp() + dparams.Xl(), dparams.Yp() + dparams.Yl());
438         sum = simple_block_diff_up_mmx_4 (m_pic_data, m_ref_data,
439                                         start_pos, end_pos,
440                                         ref_start, ref_stop,
441                                         rmdr,
442                                         sum,
443                                         static_cast<float>(INT_MAX));
444 #else
445         ValueType* pic_curr = &m_pic_data[dparams.Yp()][dparams.Xp()];
446         const int pic_next( m_pic_data.LengthX() - dparams.Xl() );// go down a row and back up
447         ValueType *ref_curr = &m_ref_data[ref_start.y][ref_start.x];
448         const int ref_next( (m_ref_data.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
449 
450         for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
451         {
452             for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
453             {
454                 sum += std::abs( *ref_curr - *pic_curr );
455             }// x
456         }// y
457 #endif
458 
459     }
460     else
461     {
462         // We're doing bounds checking because we'll fall off the edge of the reference otherwise.
463         ValueType* pic_curr = &m_pic_data[dparams.Yp()][dparams.Xp()];
464         const int pic_next( m_pic_data.LengthX() - dparams.Xl() );// go down a row and back up
465         for( int y=dparams.Yl(), ry=ref_start.y, by=BChk(ry,m_ref_data.LengthY());
466              y>0;
467              --y, pic_curr+=pic_next, ry+=2 , by=BChk(ry,m_ref_data.LengthY()))
468         {
469              for( int x=dparams.Xl() , rx=ref_start.x , bx=BChk(rx,m_ref_data.LengthX());
470                   x>0 ;
471                   --x, ++pic_curr, rx+=2 , bx=BChk(rx,m_ref_data.LengthX()))
472              {
473                  sum += std::abs( m_ref_data[by][bx] -*pic_curr);
474              }// x
475         }// y
476 
477     }
478 
479     return sum;
480 
481 }
482 
Diff(const BlockDiffParams & dparams,const MVector & mv,const float mvcost,const float lambda,MvCostData & best_costs,MVector & best_mv)483 void BlockDiffHalfPel::Diff( const BlockDiffParams& dparams,
484                                    const MVector& mv ,
485                                    const float mvcost,
486                                    const float lambda,
487                                    MvCostData& best_costs ,
488                                    MVector& best_mv )
489 {
490     if (dparams.Xl() <= 0 || dparams.Yl() <= 0)
491     {
492         return;
493     }
494 
495     //Where to start in the upconverted image
496     const ImageCoords ref_start( ( dparams.Xp()<<1 ) + mv.x ,( dparams.Yp()<<1 ) + mv.y );
497     const ImageCoords ref_stop( ref_start.x+(dparams.Xl()<<1) , ref_start.y+(dparams.Yl()<<1));
498 
499 
500     bool bounds_check( false );
501 
502     if ( ref_start.x<0 ||
503          ref_stop.x >= m_ref_data.LengthX() ||
504          ref_start.y<0 ||
505          ref_stop.y >= m_ref_data.LengthY() )
506         bounds_check = true;
507 
508     const float start_val( mvcost*lambda );
509     float sum( start_val );
510 
511     if ( !bounds_check )
512     {
513 #if defined (HAVE_MMX)
514 
515         const ImageCoords start_pos(dparams.Xp(), dparams.Yp());
516         const ImageCoords end_pos(dparams.Xp() + dparams.Xl(), dparams.Yp() + dparams.Yl());
517         MVector rmdr(0,0);
518         sum = simple_block_diff_up_mmx_4 (m_pic_data, m_ref_data,
519                                         start_pos, end_pos,
520                                         ref_start, ref_stop,
521                                         rmdr,
522                                         sum,
523                                         best_costs.total);
524         if ( sum>=best_costs.total )
525             return;
526 #else
527            ValueType* pic_curr = &m_pic_data[dparams.Yp()][dparams.Xp()];
528            const int pic_next( m_pic_data.LengthX() - dparams.Xl() );// go down a row and back up
529         ValueType *ref_curr = &m_ref_data[ref_start.y][ref_start.x];
530         const int ref_next( (m_ref_data.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
531 
532         for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
533         {
534             for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
535             {
536                 sum += std::abs( *ref_curr - *pic_curr );
537             }// x
538 
539             if ( sum>=best_costs.total )
540                 return;
541 
542         }// y
543 #endif
544     }
545     else
546     {
547         // We're doing bounds checking because we'll fall off the edge of the reference otherwise.
548            ValueType* pic_curr = &m_pic_data[dparams.Yp()][dparams.Xp()];
549            const int pic_next( m_pic_data.LengthX() - dparams.Xl() );// go down a row and back up
550         for( int y=dparams.Yl(), ry=ref_start.y, by=BChk(ry,m_ref_data.LengthY());
551              y>0;
552              --y, pic_curr+=pic_next, ry+=2 , by=BChk(ry,m_ref_data.LengthY()))
553         {
554              for( int x=dparams.Xl() , rx=ref_start.x , bx=BChk(rx,m_ref_data.LengthX());
555                   x>0 ;
556                   --x, ++pic_curr, rx+=2 , bx=BChk(rx,m_ref_data.LengthX()))
557              {
558                  sum += std::abs( m_ref_data[by][bx] -*pic_curr);
559              }// x
560 
561              if ( sum>=best_costs.total )
562                 return;
563 
564         }// y
565 
566     }
567 
568     best_mv = mv;
569     best_costs.total = sum;
570     best_costs.mvcost = mvcost;
571     best_costs.SAD = sum - start_val;
572 }
573 
Diff(const BlockDiffParams & dparams,const MVector & mv)574 float BlockDiffQuarterPel::Diff(  const BlockDiffParams& dparams , const MVector& mv )
575 {
576     if (dparams.Xl() <= 0 || dparams.Yl() <= 0)
577     {
578         return 0;
579     }
580    // Set up the start point in the reference image by rounding the motion vector
581     // to 1/2 pel accuracy.NB: bit shift rounds negative values DOWN, as required
582     const MVector roundvec( mv.x>>1 , mv.y>>1 );
583 
584     //Get the remainder after rounding. NB rmdr values always 0 or 1
585     const MVector rmdr( mv.x & 1 , mv.y & 1 );
586 
587     //Where to start in the upconverted image
588     const ImageCoords ref_start( ( dparams.Xp()<<1 ) + roundvec.x ,( dparams.Yp()<<1 ) + roundvec.y );
589     const ImageCoords ref_stop( ref_start.x+(dparams.Xl()<<1) , ref_start.y+(dparams.Yl()<<1));
590     bool bounds_check( false );
591 
592     if ( ref_start.x<0 ||
593          ref_stop.x >= m_ref_data.LengthX() ||
594          ref_start.y<0 ||
595          ref_stop.y >= m_ref_data.LengthY() )
596         bounds_check = true;
597 
598     float sum( 0.0f );
599        CalcValueType temp;
600 
601 
602     if ( !bounds_check )
603     {
604 #if defined (HAVE_MMX)
605         const ImageCoords start_pos(dparams.Xp(), dparams.Yp());
606         const ImageCoords end_pos(dparams.Xp() + dparams.Xl(), dparams.Yp() + dparams.Yl());
607 
608         sum = simple_block_diff_up_mmx_4 (m_pic_data, m_ref_data,
609                                         start_pos, end_pos,
610                                         ref_start, ref_stop,
611                                         rmdr,
612                                         sum,
613                                         static_cast<float>(INT_MAX));
614 
615 #else
616         ValueType* pic_curr = &m_pic_data[dparams.Yp()][dparams.Xp()];
617         const int pic_next( m_pic_data.LengthX() - dparams.Xl() );// go down a row and back up
618         if( rmdr.x == 0 && rmdr.y == 0 )
619         {
620             ValueType *ref_curr = &m_ref_data[ref_start.y][ref_start.x];
621             const int ref_next( (m_ref_data.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
622             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
623             {
624                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
625                 {
626                     sum += std::abs( *ref_curr - *pic_curr );
627                 }// x
628             }// y
629         }
630         else if( rmdr.y == 0 )
631         {
632             ValueType *ref_curr = &m_ref_data[ref_start.y][ref_start.x];
633             const int ref_next( (m_ref_data.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
634             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
635             {
636                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
637                 {
638                     temp = (    CalcValueType( ref_curr[0] ) +
639                                 CalcValueType( ref_curr[1] ) +
640                                 1
641                             ) >> 1;
642                     sum += std::abs( temp - *pic_curr );
643                 }// x
644             }// y
645         }
646         else if( rmdr.x == 0 )
647         {
648             ValueType *ref_curr = &m_ref_data[ref_start.y][ref_start.x];
649             const int ref_next( (m_ref_data.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
650             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
651             {
652                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
653                 {
654                     temp = (    CalcValueType( ref_curr[0] ) +
655                                 CalcValueType( ref_curr[m_ref_data.LengthX()] ) +
656                                 1
657                             ) >> 1;
658                     sum += std::abs( temp - *pic_curr );
659                 }// x
660             }// y
661         }
662         else
663         {
664             ValueType *ref_curr = &m_ref_data[ref_start.y][ref_start.x];
665             const int ref_next( (m_ref_data.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
666             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
667             {
668                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
669                 {
670                     temp = (    CalcValueType( ref_curr[0] ) +
671                                 CalcValueType( ref_curr[1] ) +
672                                 CalcValueType( ref_curr[m_ref_data.LengthX()+0] ) +
673                                 CalcValueType( ref_curr[m_ref_data.LengthX()+1] ) +
674                                 2
675                             ) >> 2;
676                     sum += std::abs( temp - *pic_curr );
677                 }// x
678             }// y
679         }
680 #endif // HAVE_MMX
681     }
682     else
683     {
684         // We're doing bounds checking because we'll fall off the edge of the reference otherwise.
685 
686        // weights for doing linear interpolation, calculated from the remainder values
687         const ValueType linear_wts[4] = {  (2 - rmdr.x) * (2 - rmdr.y),    //tl
688                                            rmdr.x * (2 - rmdr.y),          //tr
689                                            (2 - rmdr.x) * rmdr.y,          //bl
690                                            rmdr.x * rmdr.y };              //br
691 
692         const int refXlen( m_ref_data.LengthX() );
693         const int refYlen( m_ref_data.LengthY() );
694 
695         for(int y = dparams.Yp(), uY = ref_start.y,BuY=BChk(uY,refYlen),BuY1=BChk(uY+1,refYlen);
696             y < dparams.Yend(); ++y, uY += 2,BuY=BChk(uY,refYlen),BuY1=BChk(uY+1,refYlen))
697         {
698             for(int x = dparams.Xp(), uX = ref_start.x,BuX=BChk(uX,refXlen),BuX1=BChk(uX+1,refXlen);
699                 x < dparams.Xend(); ++x, uX += 2,BuX=BChk(uX,refXlen),BuX1=BChk(uX+1,refXlen))
700             {
701 
702                 temp = (     linear_wts[0] * CalcValueType( m_ref_data[BuY][BuX] ) +
703                              linear_wts[1] * CalcValueType( m_ref_data[BuY][BuX1] ) +
704                              linear_wts[2] * CalcValueType( m_ref_data[BuY1][BuX] )+
705                              linear_wts[3] * CalcValueType( m_ref_data[BuY1][BuX1] ) +
706                              2
707                         ) >> 2;
708                 sum += std::abs( temp - m_pic_data[y][x] );
709             }// x
710         }// y
711 
712     }
713 
714     return sum;
715 
716 }
717 
Diff(const BlockDiffParams & dparams,const MVector & mv,const float mvcost,const float lambda,MvCostData & best_costs,MVector & best_mv)718 void BlockDiffQuarterPel::Diff( const BlockDiffParams& dparams,
719                                    const MVector& mv ,
720                                    const float mvcost,
721                                    const float lambda,
722                                    MvCostData& best_costs ,
723                                    MVector& best_mv)
724 {
725     if (dparams.Xl() <= 0 || dparams.Yl() <= 0)
726     {
727         return;
728     }
729 
730     // Set up the start point in the reference image by rounding the motion vector
731     // to 1/2 pel accuracy.NB: bit shift rounds negative values DOWN, as required
732     const MVector roundvec( mv.x>>1 , mv.y>>1 );
733 
734     //Get the remainder after rounding. NB rmdr values always 0 or 1
735     const MVector rmdr( mv.x & 1 , mv.y & 1 );
736 
737     //Where to start in the upconverted image
738     const ImageCoords ref_start( ( dparams.Xp()<<1 ) + roundvec.x ,( dparams.Yp()<<1 ) + roundvec.y );
739     const ImageCoords ref_stop( ref_start.x+(dparams.Xl()<<1) , ref_start.y+(dparams.Yl()<<1));
740 
741     bool bounds_check( false );
742 
743     if ( ref_start.x<0 ||
744          ref_stop.x >= m_ref_data.LengthX() ||
745          ref_start.y<0 ||
746          ref_stop.y >= m_ref_data.LengthY() )
747         bounds_check = true;
748 
749     const float start_val( mvcost*lambda );
750     float sum( start_val );
751 
752     CalcValueType temp;
753 
754     if ( !bounds_check )
755     {
756 #if defined (HAVE_MMX)
757         const ImageCoords start_pos(dparams.Xp(), dparams.Yp());
758         const ImageCoords end_pos(dparams.Xp() + dparams.Xl(), dparams.Yp() + dparams.Yl());
759 
760         sum = simple_block_diff_up_mmx_4 (m_pic_data, m_ref_data,
761                                         start_pos, end_pos,
762                                         ref_start, ref_stop,
763                                         rmdr,
764                                         sum,
765                                         best_costs.total);
766 
767         if ( sum>=best_costs.total )
768             return;
769 #else
770         ValueType* pic_curr = &m_pic_data[dparams.Yp()][dparams.Xp()];
771         const int pic_next( m_pic_data.LengthX() - dparams.Xl() );// go down a row and back up
772         ValueType *ref_curr = &m_ref_data[ref_start.y][ref_start.x];
773         const int ref_next( (m_ref_data.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
774 
775         if( rmdr.x == 0 && rmdr.y == 0 )
776         {
777             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
778             {
779                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
780                 {
781                     sum += std::abs( *ref_curr - *pic_curr );
782                 }// x
783 
784                 if ( sum>=best_costs.total )
785                     return;
786             }// y
787         }
788         else if( rmdr.y == 0 )
789         {
790             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
791             {
792                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
793                 {
794                     temp = (    CalcValueType( ref_curr[0] ) +
795                                 CalcValueType( ref_curr[1] ) +
796                                 1
797                             ) >> 1;
798                     sum += std::abs( temp - *pic_curr );
799                 }// x
800 
801                 if ( sum>=best_costs.total )
802                     return;
803             }// y
804         }
805         else if( rmdr.x == 0 )
806         {
807             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
808             {
809                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
810                 {
811                     temp = (    CalcValueType( ref_curr[0] ) +
812                                 CalcValueType( ref_curr[m_ref_data.LengthX()] ) +
813                                 1
814                             ) >> 1;
815                     sum += std::abs( temp - *pic_curr );
816                 }// x
817 
818                 if ( sum>=best_costs.total )
819                     return;
820 
821             }// y
822         }
823         else
824         {
825             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
826             {
827                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
828                 {
829                     temp = (    CalcValueType( ref_curr[0] ) +
830                                 CalcValueType( ref_curr[1] ) +
831                                 CalcValueType( ref_curr[m_ref_data.LengthX()+0] ) +
832                                 CalcValueType( ref_curr[m_ref_data.LengthX()+1] ) +
833                                 2
834                             ) >> 2;
835                     sum += std::abs( temp - *pic_curr );
836                 }// x
837 
838                 if ( sum>=best_costs.total )
839                     return;
840 
841             }// y
842 
843         }
844 #endif // HAVE_MMX
845     }
846     else
847     {
848         // We're doing bounds checking because we'll fall off the edge of the reference otherwise.
849 
850        // weights for doing linear interpolation, calculated from the remainder values
851         const ValueType linear_wts[4] = {  (2 - rmdr.x) * (2 - rmdr.y),    //tl
852                                            rmdr.x * (2 - rmdr.y),          //tr
853                                            (2 - rmdr.x) * rmdr.y,          //bl
854                                            rmdr.x * rmdr.y };              //br
855 
856         const int refXlen( m_ref_data.LengthX() );
857         const int refYlen( m_ref_data.LengthY() );
858 
859         for(int y = dparams.Yp(), uY = ref_start.y,BuY=BChk(uY,refYlen),BuY1=BChk(uY+1,refYlen);
860             y < dparams.Yend(); ++y, uY += 2,BuY=BChk(uY,refYlen),BuY1=BChk(uY+1,refYlen))
861         {
862             for(int x = dparams.Xp(), uX = ref_start.x,BuX=BChk(uX,refXlen),BuX1=BChk(uX+1,refXlen);
863                 x < dparams.Xend(); ++x, uX += 2,BuX=BChk(uX,refXlen),BuX1=BChk(uX+1,refXlen))
864             {
865 
866                 temp = (     linear_wts[0] * CalcValueType( m_ref_data[BuY][BuX] ) +
867                              linear_wts[1] * CalcValueType( m_ref_data[BuY][BuX1] ) +
868                              linear_wts[2] * CalcValueType( m_ref_data[BuY1][BuX] )+
869                              linear_wts[3] * CalcValueType( m_ref_data[BuY1][BuX1] ) +
870                              2
871                         ) >> 2;
872                 sum += std::abs( temp - m_pic_data[y][x] );
873             }// x
874 
875             if ( sum>=best_costs.total )
876                 return;
877 
878         }// y
879 
880     }
881 
882     // Since we've got here, we must have beaten the best cost to date
883 
884     best_mv = mv;
885     best_costs.total = sum;
886     best_costs.mvcost = mvcost;
887     best_costs.SAD = sum - start_val;
888 }
889 
Diff(const BlockDiffParams & dparams,const MVector & mv)890 float BlockDiffEighthPel::Diff(  const BlockDiffParams& dparams , const MVector& mv )
891 {
892     if (dparams.Xl() <= 0 || dparams.Yl() <= 0)
893     {
894         return 0;
895     }
896    //Set up the start point in the reference image by rounding the motion vector
897     //NB: bit shift rounds negative values DOWN, as required
898     const MVector roundvec( mv.x>>2 , mv.y>>2 );
899 
900     //Get the remainder after rounding. NB rmdr values always 0,1,2 or 3
901     const MVector rmdr( mv.x & 3 , mv.y & 3 );
902 
903     //Where to start in the upconverted image
904     const ImageCoords ref_start( ( dparams.Xp()<<1 ) + roundvec.x ,( dparams.Yp()<<1 ) + roundvec.y );
905     const ImageCoords ref_stop( ref_start.x+(dparams.Xl()<<1) , ref_start.y+(dparams.Yl()<<1));
906 
907     ValueType* pic_curr = &m_pic_data[dparams.Yp()][dparams.Xp()];
908     const int pic_next( m_pic_data.LengthX() - dparams.Xl() );// go down a row and back up
909 
910     //weights for doing linear interpolation, calculated from the remainder values
911     const ValueType linear_wts[4] = {  (4 - rmdr.x) * (4 - rmdr.y),    //tl
912                                        rmdr.x * (4 - rmdr.y),          //tr
913                                        (4 - rmdr.x) * rmdr.y,          //bl
914                                        rmdr.x * rmdr.y };              //br
915 
916     bool bounds_check( false );
917 
918     if ( ref_start.x<0 ||
919          ref_stop.x >= m_ref_data.LengthX() ||
920          ref_start.y<0 ||
921          ref_stop.y >= m_ref_data.LengthY() )
922         bounds_check = true;
923 
924     float sum( 0.0f );
925 
926     CalcValueType temp;
927 
928     if ( !bounds_check )
929     {
930         ValueType *ref_curr = &m_ref_data[ref_start.y][ref_start.x];
931         const int ref_next( (m_ref_data.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
932 
933         if( rmdr.x == 0 && rmdr.y == 0 )
934         {
935             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
936             {
937                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
938                 {
939                     sum += CalcValueType( std::abs( ref_curr[0] - *pic_curr ) );
940                 }// x
941             }// y
942         }
943         else if( rmdr.y == 0 )
944         {
945             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
946             {
947                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
948                 {
949                     temp = ((    linear_wts[0] * CalcValueType( ref_curr[0] ) +
950                                  linear_wts[1] * CalcValueType( ref_curr[1] ) +
951                                  8
952                             ) >> 4);
953                     sum += std::abs( temp - *pic_curr );
954                 }// x
955             }// y
956         }
957         else if( rmdr.x == 0 )
958         {
959             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
960             {
961                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
962                 {
963                     temp = ((    linear_wts[0] * CalcValueType( ref_curr[0] ) +
964                                  linear_wts[2] * CalcValueType( ref_curr[m_ref_data.LengthX()+0] ) +
965                                        8
966                                    ) >> 4);
967                     sum += std::abs( temp - *pic_curr );
968                 }// x
969             }// y
970         }
971         else
972         {
973             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
974             {
975                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
976                 {
977                     temp = ((    linear_wts[0] * CalcValueType( ref_curr[0] ) +
978                                  linear_wts[1] * CalcValueType( ref_curr[1] ) +
979                                  linear_wts[2] * CalcValueType( ref_curr[m_ref_data.LengthX()+0] ) +
980                                  linear_wts[3] * CalcValueType( ref_curr[m_ref_data.LengthX()+1] ) +
981                                  8
982                             ) >> 4);
983                     sum += std::abs( temp - *pic_curr );
984                 }// x
985             }// y
986         }
987     }
988     else
989     {
990         // We're doing bounds checking because we'll fall off the edge of the reference otherwise.
991        const int refXlen( m_ref_data.LengthX() );
992        const int refYlen( m_ref_data.LengthY() );
993 
994        for(int y = dparams.Yp(), uY = ref_start.y,BuY=BChk(uY,refYlen),BuY1=BChk(uY+1,refYlen);
995             y < dparams.Yend(); ++y, uY += 2,BuY=BChk(uY,refYlen),BuY1=BChk(uY+1,refYlen))
996         {
997             for(int x = dparams.Xp(), uX = ref_start.x,BuX=BChk(uX,refXlen),BuX1=BChk(uX+1,refXlen);
998                 x < dparams.Xend(); ++x, uX += 2,BuX=BChk(uX,refXlen),BuX1=BChk(uX+1,refXlen))
999             {
1000 
1001                 temp = ( linear_wts[0] * CalcValueType( m_ref_data[BuY][BuX] ) +
1002                          linear_wts[1] * CalcValueType( m_ref_data[BuY][BuX1] ) +
1003                          linear_wts[2] * CalcValueType( m_ref_data[BuY1][BuX] )+
1004                          linear_wts[3] * CalcValueType( m_ref_data[BuY1][BuX1] ) +
1005                          8
1006                         ) >> 4;
1007                 sum += std::abs( temp - m_pic_data[y][x] );
1008             }// x
1009         }// y
1010 
1011     }
1012 
1013     return sum;
1014 }
1015 
Diff(const BlockDiffParams & dparams,const MVector & mv,const float mvcost,const float lambda,MvCostData & best_costs,MVector & best_mv)1016 void BlockDiffEighthPel::Diff( const BlockDiffParams& dparams,
1017                                    const MVector& mv ,
1018                                    const float mvcost,
1019                                    const float lambda,
1020                                    MvCostData& best_costs ,
1021                                    MVector& best_mv)
1022 {
1023     if (dparams.Xl() <= 0 || dparams.Yl() <= 0)
1024     {
1025         return;
1026     }
1027     //Set up the start point in the reference image by rounding the motion vector
1028     //NB: bit shift rounds negative values DOWN, as required
1029     const MVector roundvec( mv.x>>2 , mv.y>>2 );
1030 
1031     //Get the remainder after rounding. NB rmdr values always 0,1,2 or 3
1032     const MVector rmdr( mv.x & 3 , mv.y & 3 );
1033 
1034     //Where to start in the upconverted image
1035     const ImageCoords ref_start( ( dparams.Xp()<<1 ) + roundvec.x ,( dparams.Yp()<<1 ) + roundvec.y );
1036     const ImageCoords ref_stop( ref_start.x+(dparams.Xl()<<1) , ref_start.y+(dparams.Yl()<<1));
1037 
1038     ValueType* pic_curr = &m_pic_data[dparams.Yp()][dparams.Xp()];
1039     const int pic_next( m_pic_data.LengthX() - dparams.Xl() );// go down a row and back up
1040 
1041     //weights for doing linear interpolation, calculated from the remainder values
1042     const ValueType linear_wts[4] = {  (4 - rmdr.x) * (4 - rmdr.y),    //tl
1043                                        rmdr.x * (4 - rmdr.y),          //tr
1044                                        (4 - rmdr.x) * rmdr.y,          //bl
1045                                        rmdr.x * rmdr.y };              //br
1046 
1047     bool bounds_check( false );
1048 
1049     if ( ref_start.x<0 ||
1050          ref_stop.x >= m_ref_data.LengthX() ||
1051          ref_start.y<0 ||
1052          ref_stop.y >= m_ref_data.LengthY() )
1053         bounds_check = true;
1054 
1055     const float start_val( mvcost*lambda );
1056     float sum( start_val );
1057 
1058     CalcValueType temp;
1059 
1060     if ( !bounds_check )
1061     {
1062         ValueType *ref_curr = &m_ref_data[ref_start.y][ref_start.x];
1063         const int ref_next( (m_ref_data.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
1064 
1065         if( rmdr.x == 0 && rmdr.y == 0 )
1066         {
1067             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1068             {
1069                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
1070                 {
1071                     sum += CalcValueType( std::abs( ref_curr[0] - *pic_curr ) );
1072                 }// x
1073 
1074                 if ( sum>=best_costs.total )
1075                     return;
1076 
1077             }// y
1078         }
1079         else if( rmdr.y == 0 )
1080         {
1081             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1082             {
1083                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
1084                 {
1085                     temp = ((    linear_wts[0] * CalcValueType( ref_curr[0] ) +
1086                                  linear_wts[1] * CalcValueType( ref_curr[1] ) +
1087                                  8
1088                             ) >> 4);
1089                     sum += std::abs( temp - *pic_curr );
1090                 }// x
1091 
1092                 if ( sum>=best_costs.total )
1093                     return;
1094 
1095             }// y
1096         }
1097         else if( rmdr.x == 0 )
1098         {
1099             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1100             {
1101                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
1102                 {
1103                     temp = ((    linear_wts[0] * CalcValueType( ref_curr[0] ) +
1104                                  linear_wts[2] * CalcValueType( ref_curr[m_ref_data.LengthX()+0] ) +
1105                                        8
1106                                    ) >> 4);
1107                     sum += std::abs( temp - *pic_curr );
1108                 }// x
1109 
1110                 if ( sum>=best_costs.total )
1111                     return;
1112 
1113             }// y
1114         }
1115         else
1116         {
1117             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1118             {
1119                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2 )
1120                 {
1121                     temp = ((    linear_wts[0] * CalcValueType( ref_curr[0] ) +
1122                                  linear_wts[1] * CalcValueType( ref_curr[1] ) +
1123                                  linear_wts[2] * CalcValueType( ref_curr[m_ref_data.LengthX()+0] ) +
1124                                  linear_wts[3] * CalcValueType( ref_curr[m_ref_data.LengthX()+1] ) +
1125                                  8
1126                             ) >> 4);
1127                     sum += std::abs( temp - *pic_curr );
1128                 }// x
1129 
1130                 if ( sum>=best_costs.total )
1131                     return;
1132 
1133             }// y
1134         }
1135     }
1136     else
1137     {
1138         // We're doing bounds checking because we'll fall off the edge of the reference otherwise.
1139        const int refXlen( m_ref_data.LengthX() );
1140        const int refYlen( m_ref_data.LengthY() );
1141 
1142        for(int y = dparams.Yp(), uY = ref_start.y,BuY=BChk(uY,refYlen),BuY1=BChk(uY+1,refYlen);
1143             y < dparams.Yend(); ++y, uY += 2,BuY=BChk(uY,refYlen),BuY1=BChk(uY+1,refYlen))
1144         {
1145             for(int x = dparams.Xp(), uX = ref_start.x,BuX=BChk(uX,refXlen),BuX1=BChk(uX+1,refXlen);
1146                 x < dparams.Xend(); ++x, uX += 2,BuX=BChk(uX,refXlen),BuX1=BChk(uX+1,refXlen))
1147             {
1148 
1149                 temp = ( linear_wts[0] * CalcValueType( m_ref_data[BuY][BuX] ) +
1150                          linear_wts[1] * CalcValueType( m_ref_data[BuY][BuX1] ) +
1151                          linear_wts[2] * CalcValueType( m_ref_data[BuY1][BuX] )+
1152                          linear_wts[3] * CalcValueType( m_ref_data[BuY1][BuX1] ) +
1153                          8
1154                         ) >> 4;
1155                 sum += std::abs( temp - m_pic_data[y][x] );
1156             }// x
1157 
1158             if ( sum>=best_costs.total )
1159                 return;
1160 
1161         }// y
1162 
1163     }
1164 
1165     // If we've got here we must have done better than the best costs so far
1166     best_mv = mv;
1167     best_costs.total = sum;
1168     best_costs.mvcost = mvcost;
1169     best_costs.SAD = sum - start_val;
1170 }
1171 
Diff(const BlockDiffParams & dparams,const MVector & mv1,const MVector & mv2)1172 float BiBlockHalfPel::Diff(  const BlockDiffParams& dparams ,
1173                              const MVector& mv1 ,
1174                              const MVector& mv2 )
1175 {
1176     if (dparams.Xl() <= 0 || dparams.Yl() <= 0)
1177     {
1178         return 0;
1179     }
1180     // First create a difference array, and subtract the reference 1 data into it
1181     TwoDArray<ValueType> diff_array( dparams.Yl() , dparams.Xl() );
1182 
1183     //Where to start in the upconverted images
1184     const ImageCoords ref_start1( ( dparams.Xp()<<1 ) + mv1.x ,( dparams.Yp()<<1 ) + mv1.y );
1185     const ImageCoords ref_stop1( ref_start1.x+(dparams.Xl()<<1) , ref_start1.y+(dparams.Yl()<<1));
1186 
1187     const ImageCoords ref_start2( ( dparams.Xp()<<1 ) + mv2.x ,( dparams.Yp()<<1 ) + mv2.y );
1188     const ImageCoords ref_stop2( ref_start2.x+(dparams.Xl()<<1) , ref_start2.y+(dparams.Yl()<<1));
1189 
1190     ValueType* pic_curr = &m_pic_data[dparams.Yp()][dparams.Xp()];
1191     const int pic_next( m_pic_data.LengthX() - dparams.Xl() );// go down a row and back up
1192 
1193     ValueType* diff_curr = &diff_array[0][0];
1194 
1195     bool bounds_check( false );
1196 
1197     if ( ref_start1.x<0 ||
1198          ref_stop1.x >= m_ref_data1.LengthX() ||
1199          ref_start1.y<0 ||
1200          ref_stop1.y >= m_ref_data1.LengthY() )
1201         bounds_check = true;
1202 
1203     if ( !bounds_check )
1204     {
1205         ValueType *ref_curr = &m_ref_data1[ref_start1.y][ref_start1.x];
1206         const int ref_next( (m_ref_data1.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
1207 
1208         for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next)
1209         {
1210             for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1211             {
1212                 *diff_curr = ( (*pic_curr)<<1 ) - *ref_curr;
1213 
1214             }// x
1215         }// y
1216 
1217     }
1218     else
1219     {
1220         // We're doing bounds checking because we'll fall off the edge of the reference otherwise.
1221         for( int y=dparams.Yl(), ry=ref_start1.y, by=BChk(ry,m_ref_data1.LengthY());
1222              y>0;
1223              --y, pic_curr+=pic_next, ry+=2 , by=BChk(ry,m_ref_data1.LengthY()))
1224         {
1225              for( int x=dparams.Xl() , rx=ref_start1.x , bx=BChk(rx,m_ref_data1.LengthX());
1226                   x>0 ;
1227                   --x, ++pic_curr, rx+=2 , ++diff_curr, bx=BChk(rx,m_ref_data1.LengthX()))
1228              {
1229                  *diff_curr = ( (*pic_curr)<<1 ) - m_ref_data1[by][bx];
1230              }// x
1231         }// y
1232 
1233     }
1234 
1235     // Now do the other reference
1236 
1237     bounds_check = false;
1238 
1239     if ( ref_start2.x<0 ||
1240          ref_stop2.x >= m_ref_data2.LengthX() ||
1241          ref_start2.y<0 ||
1242          ref_stop2.y >= m_ref_data2.LengthY() )
1243         bounds_check = true;
1244 
1245     float sum( 0 );
1246 
1247     diff_curr = &diff_array[0][0];
1248     ValueType temp;
1249 
1250     if ( !bounds_check )
1251     {
1252         ValueType *ref_curr = &m_ref_data2[ref_start2.y][ref_start2.x];
1253         const int ref_next( (m_ref_data2.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
1254 
1255         for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next)
1256         {
1257             for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1258             {
1259                 temp = (*diff_curr - *ref_curr )>>1;
1260                 sum += std::abs( temp );
1261 
1262             }// x
1263         }// y
1264 
1265     }
1266     else
1267     {
1268         // We're doing bounds checking because we'll fall off the edge of the reference otherwise.
1269         for( int y=dparams.Yl(), ry=ref_start2.y, by=BChk(ry,m_ref_data2.LengthY());
1270              y>0;
1271              --y, pic_curr+=pic_next, ry+=2 , by=BChk(ry,m_ref_data2.LengthY()))
1272         {
1273              for( int x=dparams.Xl() , rx=ref_start2.x , bx=BChk(rx,m_ref_data2.LengthX());
1274                   x>0 ;
1275                   --x, ++pic_curr, rx+=2 , ++diff_curr, bx=BChk(rx,m_ref_data2.LengthX()))
1276              {
1277                 temp = (*diff_curr - m_ref_data2[by][bx] )>>1;
1278                 sum += std::abs( temp );
1279              }// x
1280         }// y
1281 
1282     }
1283 
1284     return sum;
1285 
1286 }
1287 
Diff(const BlockDiffParams & dparams,const MVector & mv1,const MVector & mv2)1288 float BiBlockQuarterPel::Diff(  const BlockDiffParams& dparams ,
1289                              const MVector& mv1 ,
1290                              const MVector& mv2 )
1291 {
1292     if (dparams.Xl() <= 0 || dparams.Yl() <= 0)
1293     {
1294         return 0;
1295     }
1296     // First create a difference array, and subtract the reference 1 data into it
1297     TwoDArray<ValueType> diff_array( dparams.Yl() , dparams.Xl() );
1298 
1299    // Set up the start point in the reference images by rounding the motion vectors
1300     // to 1/2 pel accuracy.NB: bit shift rounds negative values DOWN, as required
1301     const MVector roundvec1 ( mv1.x>>1 , mv1.y>>1 );
1302     const MVector roundvec2 ( mv2.x>>1 , mv2.y>>1 );
1303 
1304    //Get the remainders after rounding. NB rmdr values always 0 or 1
1305     const MVector rmdr1( mv1.x & 1 , mv1.y & 1 );
1306     const MVector rmdr2( mv2.x & 1 , mv2.y & 1 );
1307 
1308     //Where to start in the upconverted images
1309     const ImageCoords ref_start1( ( dparams.Xp()<<1 ) + roundvec1.x ,( dparams.Yp()<<1 ) + roundvec1.y );
1310     const ImageCoords ref_stop1( ref_start1.x+(dparams.Xl()<<1) , ref_start1.y+(dparams.Yl()<<1));
1311 
1312     const ImageCoords ref_start2( ( dparams.Xp()<<1 ) + roundvec2.x ,( dparams.Yp()<<1 ) + roundvec2.y );
1313     const ImageCoords ref_stop2( ref_start2.x+(dparams.Xl()<<1) , ref_start2.y+(dparams.Yl()<<1));
1314 
1315     ValueType* pic_curr = &m_pic_data[dparams.Yp()][dparams.Xp()];
1316     const int pic_next( m_pic_data.LengthX() - dparams.Xl() );// go down a row and back up
1317 
1318     ValueType* diff_curr = &diff_array[0][0];
1319 
1320     bool bounds_check( false );
1321 
1322     if ( ref_start1.x<0 ||
1323          ref_stop1.x >= m_ref_data1.LengthX() ||
1324          ref_start1.y<0 ||
1325          ref_stop1.y >= m_ref_data1.LengthY() )
1326         bounds_check = true;
1327 
1328     ValueType temp;
1329 
1330     if ( !bounds_check )
1331     {
1332 #if defined (HAVE_MMX)
1333         const ImageCoords start_pos(dparams.Xp(), dparams.Yp());
1334         const ImageCoords end_pos(dparams.Xp() + dparams.Xl(), dparams.Yp() + dparams.Yl());
1335 
1336         simple_biblock_diff_pic_mmx_4 (m_pic_data, m_ref_data1, diff_array,
1337                                     start_pos, end_pos,
1338                                     ref_start1, ref_stop1,
1339                                     rmdr1);
1340 #else
1341         ValueType *ref_curr = &m_ref_data1[ref_start1.y][ref_start1.x];
1342         const int ref_next( (m_ref_data1.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
1343 
1344 
1345 
1346         if( rmdr1.x == 0 && rmdr1.y == 0 )
1347         {
1348             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1349             {
1350                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1351                 {
1352                     *diff_curr = ( (*pic_curr)<<1 ) - *ref_curr;
1353                 }// x
1354             }// y
1355         }
1356         else if( rmdr1.y == 0 )
1357         {
1358             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1359             {
1360                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1361                 {
1362                     temp = (    CalcValueType( ref_curr[0] ) +
1363                                 CalcValueType( ref_curr[1] ) +
1364                                 1
1365                             ) >> 1;
1366 
1367                     *diff_curr = ( (*pic_curr)<<1 ) - temp;
1368                 }// x
1369             }// y
1370         }
1371         else if( rmdr1.x == 0 )
1372         {
1373             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1374             {
1375                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1376                 {
1377                     temp = (    CalcValueType( ref_curr[0] ) +
1378                                 CalcValueType( ref_curr[m_ref_data1.LengthX()] ) +
1379                                 1
1380                             ) >> 1;
1381                   *diff_curr = ( (*pic_curr)<<1 ) - temp;
1382                 }// x
1383             }// y
1384         }
1385         else
1386         {
1387             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1388             {
1389                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1390                 {
1391                     temp = (    CalcValueType( ref_curr[0] ) +
1392                                 CalcValueType( ref_curr[1] ) +
1393                                 CalcValueType( ref_curr[m_ref_data1.LengthX()+0] ) +
1394                                 CalcValueType( ref_curr[m_ref_data1.LengthX()+1] ) +
1395                                 2
1396                             ) >> 2;
1397                   *diff_curr = ( (*pic_curr)<<1 ) - temp;
1398                 }// x
1399             }// y
1400         }
1401 
1402 #endif
1403     }
1404     else
1405     {
1406         const ValueType linear_wts[4] = {  (2 - rmdr1.x) * (2 - rmdr1.y),    //tl
1407                                            rmdr1.x * (2 - rmdr1.y),          //tr
1408                                            (2 - rmdr1.x) * rmdr1.y,          //bl
1409                                            rmdr1.x * rmdr1.y };              //br
1410 
1411         // We're doing bounds checking because we'll fall off the edge of the reference otherwise.
1412         for( int y=dparams.Yl(), ry=ref_start1.y, by=BChk(ry,m_ref_data1.LengthY()), by1=BChk(ry+1,m_ref_data1.LengthY());
1413              y>0;
1414              --y, pic_curr+=pic_next, ry+=2 , by=BChk(ry,m_ref_data1.LengthY()), by1=BChk(ry+1,m_ref_data1.LengthY()) )
1415         {
1416              for( int x=dparams.Xl() , rx=ref_start1.x , bx=BChk(rx,m_ref_data1.LengthX()), bx1=BChk(rx+1,m_ref_data1.LengthX());
1417                   x>0 ;
1418                   --x, ++pic_curr, rx+=2 , ++diff_curr, bx=BChk(rx,m_ref_data1.LengthX()), bx1=BChk(rx+1,m_ref_data1.LengthX()))
1419              {
1420                 temp = (     linear_wts[0] * CalcValueType( m_ref_data1[by][bx] ) +
1421                              linear_wts[1] * CalcValueType( m_ref_data1[by][bx1] ) +
1422                              linear_wts[2] * CalcValueType( m_ref_data1[by1][bx] )+
1423                              linear_wts[3] * CalcValueType( m_ref_data1[by1][bx1] ) +
1424                              2
1425                         ) >> 2;
1426                  *diff_curr = ( (*pic_curr)<<1 ) - temp;
1427              }// x
1428         }// y
1429     }
1430 
1431     // Now do the other reference
1432 
1433     bounds_check = false;
1434 
1435     if ( ref_start2.x<0 ||
1436          ref_stop2.x >= m_ref_data2.LengthX() ||
1437          ref_start2.y<0 ||
1438          ref_stop2.y >= m_ref_data2.LengthY() )
1439         bounds_check = true;
1440 
1441     float sum( 0 );
1442 
1443     diff_curr = &diff_array[0][0];
1444 
1445     if ( !bounds_check )
1446     {
1447 
1448 
1449 #if defined (HAVE_MMX)
1450             sum = static_cast<float>( simple_biblock_diff_up_mmx_4 (diff_array,
1451                                                 m_ref_data2,
1452                                                 ref_start2, ref_stop2,
1453                                                 rmdr2) );
1454 #else
1455         ValueType *ref_curr = &m_ref_data2[ref_start2.y][ref_start2.x];
1456         const int ref_next( (m_ref_data2.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
1457         if( rmdr2.x == 0 && rmdr2.y == 0 )
1458         {
1459             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1460             {
1461                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1462                 {
1463                     sum += std::abs( (*diff_curr - *ref_curr)>>1 );
1464                 }// x
1465             }// y
1466         }
1467         else if( rmdr2.y == 0 )
1468         {
1469             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1470             {
1471                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1472                 {
1473                     temp = (    CalcValueType( ref_curr[0] ) +
1474                                 CalcValueType( ref_curr[1] ) +
1475                                 1
1476                             ) >> 1;
1477 
1478                     sum += std::abs( (*diff_curr - temp)>>1 );
1479                 }// x
1480             }// y
1481         }
1482         else if( rmdr2.x == 0 )
1483         {
1484             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1485             {
1486                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1487                 {
1488                     temp = (    CalcValueType( ref_curr[0] ) +
1489                                 CalcValueType( ref_curr[m_ref_data2.LengthX()] ) +
1490                                 1
1491                             ) >> 1;
1492                     sum += std::abs( (*diff_curr - temp)>>1 );
1493                 }// x
1494             }// y
1495         }
1496         else
1497         {
1498             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1499             {
1500                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1501                 {
1502                     temp = (    CalcValueType( ref_curr[0] ) +
1503                                 CalcValueType( ref_curr[1] ) +
1504                                 CalcValueType( ref_curr[m_ref_data2.LengthX()+0] ) +
1505                                 CalcValueType( ref_curr[m_ref_data2.LengthX()+1] ) +
1506                                 2
1507                             ) >> 2;
1508                     sum += std::abs( (*diff_curr - temp)>>1 );
1509                 }// x
1510             }// y
1511         }
1512 #endif
1513     }
1514     else
1515     {
1516         const ValueType linear_wts[4] = {  (2 - rmdr2.x) * (2 - rmdr2.y),    //tl
1517                                            rmdr2.x * (2 - rmdr2.y),          //tr
1518                                            (2 - rmdr2.x) * rmdr2.y,          //bl
1519                                            rmdr2.x * rmdr2.y };              //br
1520 
1521         // We're doing bounds checking because we'll fall off the edge of the reference otherwise.
1522         for( int y=dparams.Yl(), ry=ref_start2.y, by=BChk(ry,m_ref_data2.LengthY()),by1=BChk(ry+1,m_ref_data2.LengthY());
1523              y>0;
1524              --y, pic_curr+=pic_next, ry+=2 , by=BChk(ry,m_ref_data2.LengthY()),by1=BChk(ry+1,m_ref_data2.LengthY()))
1525         {
1526              for( int x=dparams.Xl() , rx=ref_start2.x , bx=BChk(rx,m_ref_data2.LengthX()), bx1=BChk(rx+1,m_ref_data2.LengthX());
1527                   x>0 ;
1528                   --x, ++pic_curr, rx+=2 , ++diff_curr, bx=BChk(rx,m_ref_data2.LengthX()), bx1=BChk(rx+1,m_ref_data2.LengthX()))
1529              {
1530                 temp = (     linear_wts[0] * CalcValueType( m_ref_data2[by][bx] ) +
1531                              linear_wts[1] * CalcValueType( m_ref_data2[by][bx1] ) +
1532                              linear_wts[2] * CalcValueType( m_ref_data2[by1][bx] )+
1533                              linear_wts[3] * CalcValueType( m_ref_data2[by1][bx1] ) +
1534                              2
1535                         ) >> 2;
1536                 sum += std::abs( (*diff_curr - temp)>>1 );
1537              }// x
1538         }// y
1539     }
1540 
1541     return sum;
1542 
1543 }
1544 
Diff(const BlockDiffParams & dparams,const MVector & mv1,const MVector & mv2)1545 float BiBlockEighthPel::Diff(  const BlockDiffParams& dparams ,
1546                              const MVector& mv1 ,
1547                              const MVector& mv2 )
1548 {
1549     if (dparams.Xl() <= 0 || dparams.Yl() <= 0)
1550     {
1551         return 0;
1552     }
1553 
1554     // First create a difference array, and subtract the reference 1 data into it
1555     TwoDArray<ValueType> diff_array( dparams.Yl() , dparams.Xl() );
1556 
1557    // Set up the start point in the reference images by rounding the motion vectors
1558     // to 1/2 pel accuracy.NB: bit shift rounds negative values DOWN, as required
1559     const MVector roundvec1 ( mv1.x>>2 , mv1.y>>2 );
1560     const MVector roundvec2 ( mv2.x>>2 , mv2.y>>2 );
1561 
1562    //Get the remainders after rounding. NB rmdr values always 0-3
1563     const MVector rmdr1( mv1.x & 3 , mv1.y & 3 );
1564     const MVector rmdr2( mv2.x & 3 , mv2.y & 3 );
1565 
1566     //weights for doing linear interpolation, calculated from the remainder values
1567     const ValueType linear_wts1[4] = {  (4 - rmdr1.x) * (4 - rmdr1.y),    //tl
1568                                        rmdr1.x * (4 - rmdr1.y),          //tr
1569                                        (4 - rmdr1.x) * rmdr1.y,          //bl
1570                                        rmdr1.x * rmdr1.y };              //br
1571     const ValueType linear_wts2[4] = {  (4 - rmdr2.x) * (4 - rmdr2.y),    //tl
1572                                        rmdr2.x * (4 - rmdr2.y),          //tr
1573                                        (4 - rmdr2.x) * rmdr2.y,          //bl
1574                                        rmdr2.x * rmdr2.y };              //br
1575 
1576     //Where to start in the upconverted images
1577     const ImageCoords ref_start1( ( dparams.Xp()<<1 ) + roundvec1.x ,( dparams.Yp()<<1 ) + roundvec1.y );
1578     const ImageCoords ref_stop1( ref_start1.x+(dparams.Xl()<<1) , ref_start1.y+(dparams.Yl()<<1));
1579 
1580     const ImageCoords ref_start2( ( dparams.Xp()<<1 ) + roundvec2.x ,( dparams.Yp()<<1 ) + roundvec2.y );
1581     const ImageCoords ref_stop2( ref_start2.x+(dparams.Xl()<<1) , ref_start2.y+(dparams.Yl()<<1));
1582 
1583     ValueType* pic_curr = &m_pic_data[dparams.Yp()][dparams.Xp()];
1584     const int pic_next( m_pic_data.LengthX() - dparams.Xl() );// go down a row and back up
1585 
1586     ValueType* diff_curr = &diff_array[0][0];
1587 
1588     bool bounds_check( false );
1589 
1590     if ( ref_start1.x<0 ||
1591          ref_stop1.x >= m_ref_data1.LengthX() ||
1592          ref_start1.y<0 ||
1593          ref_stop1.y >= m_ref_data1.LengthY() )
1594         bounds_check = true;
1595 
1596     ValueType temp;
1597 
1598     if ( !bounds_check )
1599     {
1600         ValueType *ref_curr = &m_ref_data1[ref_start1.y][ref_start1.x];
1601         const int ref_next( (m_ref_data1.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
1602 
1603         if( rmdr1.x == 0 && rmdr1.y == 0 )
1604         {
1605             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1606             {
1607                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1608                 {
1609                     *diff_curr = ( (*pic_curr)<<1 ) - *ref_curr;
1610                 }// x
1611             }// y
1612         }
1613         else if( rmdr1.y == 0 )
1614         {
1615             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1616             {
1617                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1618                 {
1619                     temp = ((    linear_wts1[0] * CalcValueType( ref_curr[0] ) +
1620                                  linear_wts1[1] * CalcValueType( ref_curr[1] ) +
1621                                  8
1622                             ) >> 4);
1623 
1624                     *diff_curr = ( (*pic_curr)<<1 ) - temp;
1625                 }// x
1626             }// y
1627         }
1628         else if( rmdr1.x == 0 )
1629         {
1630             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1631             {
1632                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1633                 {
1634                     temp = ((    linear_wts1[0] * CalcValueType( ref_curr[0] ) +
1635                                  linear_wts1[2] * CalcValueType( ref_curr[m_ref_data1.LengthX()+0] ) +
1636                                        8
1637                                    ) >> 4);
1638 
1639                     *diff_curr = ( (*pic_curr)<<1 ) - temp;
1640                 }// x
1641             }// y
1642         }
1643         else
1644         {
1645             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1646             {
1647                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1648                 {
1649                     temp = ((    linear_wts1[0] * CalcValueType( ref_curr[0] ) +
1650                                  linear_wts1[1] * CalcValueType( ref_curr[1] ) +
1651                                  linear_wts1[2] * CalcValueType( ref_curr[m_ref_data1.LengthX()+0] ) +
1652                                  linear_wts1[3] * CalcValueType( ref_curr[m_ref_data1.LengthX()+1] ) +
1653                                  8
1654                             ) >> 4);
1655                   *diff_curr = ( (*pic_curr)<<1 ) - temp;
1656                 }// x
1657             }// y
1658         }
1659 
1660 
1661     }
1662     else
1663     {
1664         // We're doing bounds checking because we'll fall off the edge of the reference otherwise.
1665         for( int y=dparams.Yl(), ry=ref_start1.y, by=BChk(ry,m_ref_data1.LengthY()), by1=BChk(ry+1,m_ref_data1.LengthY());
1666              y>0;
1667              --y, pic_curr+=pic_next, ry+=2 , by=BChk(ry,m_ref_data1.LengthY()), by1=BChk(ry+1,m_ref_data1.LengthY()) )
1668         {
1669              for( int x=dparams.Xl() , rx=ref_start1.x , bx=BChk(rx,m_ref_data1.LengthX()), bx1=BChk(rx+1,m_ref_data1.LengthX());
1670                   x>0 ;
1671                   --x, ++pic_curr, rx+=2 , ++diff_curr, bx=BChk(rx,m_ref_data1.LengthX()), bx1=BChk(rx+1,m_ref_data1.LengthX()))
1672              {
1673                 temp = (     linear_wts1[0] * CalcValueType( m_ref_data1[by][bx] ) +
1674                              linear_wts1[1] * CalcValueType( m_ref_data1[by][bx1] ) +
1675                              linear_wts1[2] * CalcValueType( m_ref_data1[by1][bx] )+
1676                              linear_wts1[3] * CalcValueType( m_ref_data1[by1][bx1] ) +
1677                              8
1678                         ) >> 4;
1679                  *diff_curr = ( (*pic_curr)<<1 ) - temp;
1680              }// x
1681         }// y
1682     }
1683 
1684     // Now do the other reference
1685 
1686     bounds_check = false;
1687 
1688     if ( ref_start2.x<0 ||
1689          ref_stop2.x >= m_ref_data2.LengthX() ||
1690          ref_start2.y<0 ||
1691          ref_stop2.y >= m_ref_data2.LengthY() )
1692         bounds_check = true;
1693 
1694     float sum( 0 );
1695 
1696     diff_curr = &diff_array[0][0];
1697 
1698     if ( !bounds_check )
1699     {
1700         ValueType *ref_curr = &m_ref_data2[ref_start2.y][ref_start2.x];
1701         const int ref_next( (m_ref_data2.LengthX() - dparams.Xl())*2 );// go down 2 rows and back up
1702 
1703         if( rmdr2.x == 0 && rmdr2.y == 0 )
1704         {
1705             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1706             {
1707                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1708                 {
1709                     sum += std::abs( (*diff_curr - *ref_curr)>>1 );
1710                 }// x
1711             }// y
1712         }
1713         else if( rmdr2.y == 0 )
1714         {
1715             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1716             {
1717                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1718                 {
1719                     temp = ((    linear_wts2[0] * CalcValueType( ref_curr[0] ) +
1720                                  linear_wts2[1] * CalcValueType( ref_curr[1] ) +
1721                                  8
1722                             ) >> 4);
1723 
1724                     sum += std::abs( (*diff_curr - temp)>>1 );
1725                 }// x
1726             }// y
1727         }
1728         else if( rmdr2.x == 0 )
1729         {
1730             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1731             {
1732                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1733                 {
1734                     temp = ((    linear_wts2[0] * CalcValueType( ref_curr[0] ) +
1735                                  linear_wts2[2] * CalcValueType( ref_curr[m_ref_data2.LengthX()+0] ) +
1736                                        8
1737                                    ) >> 4);
1738 
1739                     sum += std::abs( (*diff_curr - temp)>>1 );
1740                 }// x
1741             }// y
1742         }
1743         else
1744         {
1745             for( int y=dparams.Yl(); y > 0; --y, pic_curr+=pic_next, ref_curr+=ref_next )
1746             {
1747                 for( int x=dparams.Xl(); x > 0; --x, ++pic_curr, ref_curr+=2, ++diff_curr )
1748                 {
1749                     temp = ((    linear_wts2[0] * CalcValueType( ref_curr[0] ) +
1750                                  linear_wts2[1] * CalcValueType( ref_curr[1] ) +
1751                                  linear_wts2[2] * CalcValueType( ref_curr[m_ref_data2.LengthX()+0] ) +
1752                                  linear_wts2[3] * CalcValueType( ref_curr[m_ref_data2.LengthX()+1] ) +
1753                                  8
1754                             ) >> 4);
1755                     sum += std::abs( (*diff_curr - temp)>>1 );
1756                 }// x
1757             }// y
1758         }
1759 
1760     }
1761     else
1762     {
1763         // We're doing bounds checking because we'll fall off the edge of the reference otherwise.
1764         for( int y=dparams.Yl(), ry=ref_start1.y, by=BChk(ry,m_ref_data2.LengthY()),by1=BChk(ry+1,m_ref_data2.LengthY());
1765              y>0;
1766              --y, pic_curr+=pic_next, ry+=2 , by=BChk(ry,m_ref_data2.LengthY()),by1=BChk(ry+1,m_ref_data2.LengthY()))
1767         {
1768              for( int x=dparams.Xl() , rx=ref_start1.x , bx=BChk(rx,m_ref_data2.LengthX()), bx1=BChk(rx+1,m_ref_data2.LengthX());
1769                   x>0 ;
1770                   --x, ++pic_curr, rx+=2 , ++diff_curr, bx=BChk(rx,m_ref_data2.LengthX()), bx1=BChk(rx+1,m_ref_data2.LengthX()))
1771              {
1772                 temp = (     linear_wts2[0] * CalcValueType( m_ref_data2[by][bx] ) +
1773                              linear_wts2[1] * CalcValueType( m_ref_data2[by][bx1] ) +
1774                              linear_wts2[2] * CalcValueType( m_ref_data2[by1][bx] )+
1775                              linear_wts2[3] * CalcValueType( m_ref_data2[by1][bx1] ) +
1776                              8
1777                         ) >> 4;
1778                 sum += std::abs( (*diff_curr - temp)>>1 );
1779              }// x
1780         }// y
1781     }
1782 
1783     return sum;
1784 
1785 }
1786