1 /* eedi2.c
2 
3    Copyright (c) 2003-2021 HandBrake Team
4    This file is part of the HandBrake source code
5    Homepage: <http://handbrake.fr/>.
6    It may be used under the terms of the GNU General Public License v2.
7    For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
8 
9    The EEDI2 interpolator was created by tritical:
10    http://web.missouri.edu/~kes25c/
11 */
12 
13 #include "handbrake/handbrake.h"
14 #include "handbrake/eedi2.h"
15 
16 /**
17  * EEDI2 directional limit lookup table
18  *
19  * These values are used to limit the range of edge direction searches and filtering.
20  */
21 const int eedi2_limlut[33] __attribute__ ((aligned (16))) = {
22                          6, 6, 7, 7, 8, 8, 9, 9, 9, 10,
23                          10, 11, 11, 12, 12, 12, 12, 12, 12, 12,
24                          12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
25                          12, -1, -1 };
26 
27 /**
28  * Analog of _aligned_malloc
29  * @param size Size of memory being pointed to
30  * @param align_size Size of memory chunks to align to (must be power of 2)
31  */
eedi2_aligned_malloc(size_t size,size_t align_size)32 void *eedi2_aligned_malloc( size_t size, size_t align_size )
33 {
34   char * ptr, * ptr2, * aligned_ptr;
35   int align_mask = align_size - 1;
36 
37   ptr = (char *)malloc( size + align_size + sizeof( int ) );
38   if( ptr==NULL ) return( NULL );
39 
40   ptr2 = ptr + sizeof( int );
41   aligned_ptr = ptr2 + ( align_size - ( (size_t)ptr2 & align_mask ) );
42 
43 
44   ptr2 = aligned_ptr - sizeof( int );
45   *( (int *)ptr2 ) = (int)( aligned_ptr - ptr );
46 
47   return( aligned_ptr );
48 }
49 
50 /**
51  * Analog of _aligned_free
52  * @param ptr The aligned pointer, created with eedi2_aligned_malloc, to be freed
53  */
eedi2_aligned_free(void * ptr)54 void eedi2_aligned_free( void *ptr )
55 {
56   int * ptr2 = (int *)ptr - 1;
57   ptr -= * ptr2;
58   free(ptr);
59 }
60 
61 /**
62  * Sorts metrics for median filtering
63  * @param order Pointer to the table of values to sort
64  * @param length Length of the order array
65  */
eedi2_sort_metrics(int * order,const int length)66 void eedi2_sort_metrics( int *order, const int length )
67 {
68     int i;
69     for( i = 1; i < length; ++i )
70     {
71         int j = i;
72         const int temp = order[j];
73         while( j > 0 && order[j-1] > temp )
74         {
75             order[j] = order[j-1];
76             --j;
77         }
78         order[j] = temp;
79     }
80 }
81 
82 /**
83  * Bitblits an image plane (overwrites one bitmap with another)
84  * @param dtsp Pointer to destination bitmap
85  * @param dst_pitch Stride of destination bitmap
86  * @param srcp Pointer to source bitmap
87  * @param src_pitch Stride of destination bitmap
88  * @param row_size Width of the bitmap being copied
89  * @param height Height of the source bitmap
90  *
91  * When row_size, dst_pitch, and src_pitch are equal, eedi2_bit_blit can work more quickly by copying the whole plane at once instead of individual lines.
92  */
eedi2_bit_blit(uint8_t * dstp,int dst_pitch,const uint8_t * srcp,int src_pitch,int row_size,int height)93 void eedi2_bit_blit( uint8_t * dstp, int dst_pitch,
94                      const uint8_t * srcp, int src_pitch,
95                      int row_size, int height )
96 {
97     if( ( !height ) || ( !row_size ) )
98         return;
99 
100     if( height == 1 || ( dst_pitch == src_pitch && src_pitch == row_size ) )
101     {
102         memcpy( dstp, srcp, row_size * height );
103     }
104     else
105     {
106         int y;
107         for( y = height; y > 0; --y )
108         {
109             memcpy( dstp, srcp, row_size );
110             dstp += dst_pitch;
111             srcp += src_pitch;
112         }
113     }
114 }
115 
116 /**
117  * A specialized variant of bit_blit, just for setting up the initial, field-sized bitmap planes that EEDI2 interpolates from.
118  * @param src Pointer to source bitmap plane being copied from
119  * @param dst Pointer to the destination bitmap plane being copied to
120  * @param pitch Stride of both bitmaps
121  * @param height Height of the original, full-size src plane being copied from
122  */
eedi2_fill_half_height_buffer_plane(uint8_t * src,uint8_t * dst,int pitch,int height)123 void eedi2_fill_half_height_buffer_plane( uint8_t * src, uint8_t * dst, int pitch, int height )
124 {
125     /* When TFF, we want to copy alternating
126        lines starting at 0, the top field.
127        When BFF, we want to start at line 1. */
128     int y;
129     for( y = height; y > 0; y = y - 2 )
130     {
131       memcpy( dst, src, pitch );
132       dst += pitch;
133       src += pitch * 2;
134     }
135 }
136 
137 /**
138  * A specialized variant of bit_blit, just for resizing the field-height maps EEDI2 generates to frame-height...a simple line doubler
139  * @param srcp Pointer to source bitmap plane being copied from
140  * @param dstp Pointer to the destination bitmap plane being copied to
141  * @param height Height of the input, half-size src plane being copied from
142  * @param pitch Stride of both bitmaps
143  */
eedi2_upscale_by_2(uint8_t * srcp,uint8_t * dstp,int height,int pitch)144 void eedi2_upscale_by_2( uint8_t * srcp, uint8_t * dstp, int height, int pitch )
145 {
146     int y;
147     for( y = height; y > 0; y-- )
148     {
149       memcpy( dstp, srcp, pitch );
150       dstp += pitch;
151       memcpy( dstp, srcp, pitch );
152       srcp += pitch;
153       dstp += pitch;
154     }
155 }
156 
157 /**
158  * Finds places where vertically adjacent pixels abruptly change in intensity, i.e., sharp edges.
159  * @param dstp Pointer to the destination bitmap
160  * @param dst_pitch Stride of dstp
161  * @param srcp Pointer to the source bitmap
162  * @param src_pitch Stride of srcp
163  * @param mtresh Magnitude threshold, ensures it doesn't mark edges on pixels that are too similar (10 is a good default value)
164  * @param vthresh Variance threshold, ensures it doesn't look for edges in highly random pixel blocks (20 is a good default value)
165  * @param lthresh Laplacian threshold, ensures edges are still prominent in the 2nd spatial derivative of the srcp plane (20 is a good default value)
166  * @param height Height of half-height single-field frame
167  * @param width Width of srcp bitmap rows, as opposed to the padded stride in src_pitch
168  */
eedi2_build_edge_mask(uint8_t * dstp,int dst_pitch,uint8_t * srcp,int src_pitch,int mthresh,int lthresh,int vthresh,int height,int width)169 void eedi2_build_edge_mask( uint8_t * dstp, int dst_pitch, uint8_t *srcp, int src_pitch,
170                             int mthresh, int lthresh, int vthresh, int height, int width )
171 {
172     int x, y;
173 
174     mthresh = mthresh * 10;
175     vthresh = vthresh * 81;
176 
177     memset( dstp, 0, ( height / 2 ) * dst_pitch );
178 
179     srcp += src_pitch;
180     dstp += dst_pitch;
181     unsigned char *srcpp = srcp-src_pitch;
182     unsigned char *srcpn = srcp+src_pitch;
183     for( y = 1; y < height - 1; ++y )
184     {
185         for( x = 1; x < width-1; ++x )
186         {
187             if( ( abs( srcpp[x]  -   srcp[x] ) < 10 &&
188                   abs(  srcp[x]  -  srcpn[x] ) < 10 &&
189                   abs( srcpp[x]  -  srcpn[x] ) < 10 )
190               ||
191                 ( abs( srcpp[x-1] -  srcp[x-1] ) < 10 &&
192                   abs(  srcp[x-1] - srcpn[x-1] ) < 10 &&
193                   abs( srcpp[x-1] - srcpn[x-1] ) < 10 &&
194                   abs( srcpp[x+1] -  srcp[x+1] ) < 10 &&
195                   abs(  srcp[x+1] - srcpn[x+1] ) < 10 &&
196                   abs( srcpp[x+1] - srcpn[x+1] ) < 10) )
197                 continue;
198 
199             const int sum = srcpp[x-1] + srcpp[x] + srcpp[x+1] +
200                              srcp[x-1] +  srcp[x]+   srcp[x+1] +
201                             srcpn[x-1] + srcpn[x] + srcpn[x+1];
202 
203             const int sumsq = srcpp[x-1] * srcpp[x-1] +
204                               srcpp[x]   * srcpp[x]   +
205                               srcpp[x+1] * srcpp[x+1] +
206                                srcp[x-1] *  srcp[x-1] +
207                                srcp[x]   *  srcp[x]   +
208                                srcp[x+1] *  srcp[x+1] +
209                               srcpn[x-1] * srcpn[x-1] +
210                               srcpn[x]   * srcpn[x]   +
211                               srcpn[x+1] * srcpn[x+1];
212 
213             if( 9 * sumsq-sum * sum < vthresh )
214                 continue;
215 
216             const int Ix = srcp[x+1] - srcp[x-1];
217             const int Iy = MAX( MAX( abs( srcpp[x] - srcpn[x] ),
218                                      abs( srcpp[x] -  srcp[x] ) ),
219                                 abs( srcp[x] - srcpn[x] ) );
220             if( Ix * Ix + Iy * Iy >= mthresh )
221             {
222                 dstp[x] = 255;
223                 continue;
224             }
225 
226             const int Ixx =  srcp[x-1] - 2 * srcp[x] +  srcp[x+1];
227             const int Iyy = srcpp[x]   - 2 * srcp[x] + srcpn[x];
228             if( abs( Ixx ) + abs( Iyy ) >= lthresh )
229                 dstp[x] = 255;
230         }
231         dstp += dst_pitch;
232         srcpp += src_pitch;
233         srcp += src_pitch;
234         srcpn += src_pitch;
235     }
236 }
237 
238 /**
239  * Expands and smooths out the edge mask
240  * @param mskp Pointer to the source edge mask being read from
241  * @param msk_pitch Stride of mskp
242  * @param dstp Pointer to the destination to store the dilated edge mask
243  * @param dst_pitch Stride of dstp
244  * @param dstr Dilation threshold, ensures a pixel is only retained as an edge in dstp if this number of adjacent pixels or greater are also edges in mskp (4 is a good default value)
245  * @param height Height of half-height field-sized frame
246  * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
247  */
eedi2_dilate_edge_mask(uint8_t * mskp,int msk_pitch,uint8_t * dstp,int dst_pitch,int dstr,int height,int width)248 void eedi2_dilate_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch,
249                              int dstr, int height, int width )
250 {
251     int x, y;
252 
253     eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height );
254 
255     mskp += msk_pitch;
256     unsigned char *mskpp = mskp - msk_pitch;
257     unsigned char *mskpn = mskp + msk_pitch;
258     dstp += dst_pitch;
259     for( y = 1; y < height - 1; ++y )
260     {
261         for( x = 1; x < width - 1; ++x )
262         {
263             if( mskp[x] != 0 )
264                 continue;
265 
266             int count = 0;
267             if( mskpp[x-1] == 0xFF ) ++count;
268             if( mskpp[x]   == 0xFF ) ++count;
269             if( mskpp[x+1] == 0xFF ) ++count;
270             if(  mskp[x-1] == 0xFF ) ++count;
271             if(  mskp[x+1] == 0xFF ) ++count;
272             if( mskpn[x-1] == 0xFF ) ++count;
273             if( mskpn[x]   == 0xFF ) ++count;
274             if( mskpn[x+1] == 0xFF ) ++count;
275 
276             if( count >= dstr )
277                 dstp[x] = 0xFF;
278         }
279         mskpp += msk_pitch;
280         mskp += msk_pitch;
281         mskpn += msk_pitch;
282         dstp += dst_pitch;
283     }
284 }
285 
286 /**
287  * Contracts the edge mask
288  * @param mskp Pointer to the source edge mask being read from
289  * @param msk_pitch Stride of mskp
290  * @param dstp Pointer to the destination to store the eroded edge mask
291  * @param dst_pitch Stride of dstp
292  * @param estr Erosion threshold, ensures a pixel isn't retained as an edge in dstp if fewer than this number of adjacent pixels are also edges in mskp (2 is a good default value)
293  * @param height Height of half-height field-sized frame
294  * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
295  */
eedi2_erode_edge_mask(uint8_t * mskp,int msk_pitch,uint8_t * dstp,int dst_pitch,int estr,int height,int width)296 void eedi2_erode_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch,
297                             int estr, int height, int width )
298 {
299     int x, y;
300 
301     eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height );
302 
303     mskp += msk_pitch;
304     unsigned char *mskpp = mskp - msk_pitch;
305     unsigned char *mskpn = mskp + msk_pitch;
306     dstp += dst_pitch;
307     for ( y = 1; y < height - 1; ++y )
308     {
309         for ( x = 1; x < width - 1; ++x )
310         {
311             if( mskp[x] != 0xFF ) continue;
312 
313             int count = 0;
314             if  ( mskpp[x-1] == 0xFF ) ++count;
315             if  ( mskpp[x]   == 0xFF ) ++count;
316             if  ( mskpp[x+1] == 0xFF ) ++count;
317             if  (  mskp[x-1] == 0xFF ) ++count;
318             if  (  mskp[x+1] == 0xFF ) ++count;
319             if  ( mskpn[x-1] == 0xFF ) ++count;
320             if  ( mskpn[x]   == 0xFF ) ++count;
321             if  ( mskpn[x+1] == 0xFF ) ++count;
322 
323             if  ( count < estr) dstp[x] = 0;
324         }
325         mskpp += msk_pitch;
326         mskp += msk_pitch;
327         mskpn += msk_pitch;
328         dstp += dst_pitch;
329     }
330 }
331 
332 /**
333  * Smooths out horizontally aligned holes in the mask
334  *
335  * If none of the 6 horizontally adjacent pixels are edges, mark the current pixel as not edged.
336  * If at least 1 of the 3 on either side are edges, mark the current pixel as an edge.
337  *
338  * @param mskp Pointer to the source edge mask being read from
339  * @param msk_pitch Stride of mskp
340  * @param dstp Pointer to the destination to store the smoothed edge mask
341  * @param dst_pitch Stride of dstp
342  * @param height Height of half-height field-sized frame
343  * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
344  */
eedi2_remove_small_gaps(uint8_t * mskp,int msk_pitch,uint8_t * dstp,int dst_pitch,int height,int width)345 void eedi2_remove_small_gaps( uint8_t * mskp, int msk_pitch, uint8_t * dstp, int dst_pitch,
346                               int height, int width )
347 {
348     int x, y;
349 
350     eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height );
351 
352     mskp += msk_pitch;
353     dstp += dst_pitch;
354     for( y = 1; y < height - 1; ++y )
355     {
356         for( x = 3; x < width - 3; ++x )
357         {
358             if( mskp[x] )
359             {
360                 if( mskp[x-3] ) continue;
361                 if( mskp[x-2] ) continue;
362                 if( mskp[x-1] ) continue;
363                 if( mskp[x+1] ) continue;
364                 if( mskp[x+2] ) continue;
365                 if( mskp[x+3] ) continue;
366                 dstp[x] = 0;
367             }
368             else
369             {
370                 if ( ( mskp[x+1] && ( mskp[x-1] || mskp[x-2] || mskp[x-3] ) ) ||
371                      ( mskp[x+2] && ( mskp[x-1] || mskp[x-2] ) ) ||
372                      ( mskp[x+3] && mskp[x-1] ) )
373                     dstp[x] = 0xFF;
374             }
375         }
376         mskp += msk_pitch;
377         dstp += dst_pitch;
378     }
379 }
380 
381 /**
382  * Calculates spatial direction vectors for the edges. This is EEDI2's timesink, and can be thought of as YADIF_CHECK on steroids, as both try to discern which angle a given edge follows
383  * @param plane The plane of the image being processed, to know to reduce maxd for chroma planes (HandBrake only works with YUV420 video so it is assumed they are half-height)
384  * @param mskp Pointer to the source edge mask being read from
385  * @param msk_pitch Stride of mskp
386  * @param srcp Pointer to the source image being filtered
387  * @param src_pitch Stride of srcp
388  * @param dstp Pointer to the destination to store the dilated edge mask
389  * @param dst_pitch Stride of dstp
390  * @param maxd Maximum pixel distance to search (24 is a good default value)
391  * @param nt Noise threshold (50 is a good default value)
392  * @param height Height of half-height field-sized frame
393  * @param width Width of srcp bitmap rows, as opposed to the pdded stride in src_pitch
394  */
eedi2_calc_directions(const int plane,uint8_t * mskp,int msk_pitch,uint8_t * srcp,int src_pitch,uint8_t * dstp,int dst_pitch,int maxd,int nt,int height,int width)395 void eedi2_calc_directions( const int plane, uint8_t * mskp, int msk_pitch, uint8_t * srcp, int src_pitch,
396                             uint8_t * dstp, int dst_pitch, int maxd, int nt, int height, int width  )
397 {
398     int x, y, u, i;
399 
400     memset( dstp, 255, dst_pitch * height );
401     mskp += msk_pitch;
402     dstp += dst_pitch;
403     srcp += src_pitch;
404     unsigned char *src2p = srcp - src_pitch * 2;
405     unsigned char *srcpp = srcp - src_pitch;
406     unsigned char *srcpn = srcp + src_pitch;
407     unsigned char *src2n = srcp + src_pitch * 2;
408     unsigned char *mskpp = mskp - msk_pitch;
409     unsigned char *mskpn = mskp + msk_pitch;
410     const int maxdt = plane == 0 ? maxd : ( maxd >> 1 );
411 
412     for( y = 1; y < height - 1; ++y )
413     {
414         for( x = 1; x < width - 1; ++x )
415         {
416             if( mskp[x] != 0xFF || ( mskp[x-1] != 0xFF && mskp[x+1] != 0xFF ) )
417                 continue;
418             const int startu = MAX( -x + 1, -maxdt );
419             const int stopu = MIN( width - 2 - x, maxdt );
420             int minb = MIN( 13 * nt,
421                             ( abs( srcp[x] - srcpn[x] ) +
422                               abs( srcp[x] - srcpp[x] ) ) * 6 );
423             int mina = MIN( 19 * nt,
424                             ( abs( srcp[x] - srcpn[x] ) +
425                               abs( srcp[x] - srcpp[x] ) ) * 9 );
426             int minc = mina;
427             int mind = minb;
428             int mine = minb;
429             int dira = -5000, dirb = -5000, dirc = -5000, dird = -5000, dire = -5000;
430             for( u = startu; u <= stopu; ++u )
431             {
432                 if( y == 1 ||
433                       mskpp[x-1+u] == 0xFF || mskpp[x+u] == 0xFF || mskpp[x+1+u] == 0xFF )
434                 {
435                     if( y == height - 2 ||
436                         mskpn[x-1-u] == 0xFF || mskpn[x-u] == 0xFF || mskpn[x+1-u] == 0xFF )
437                     {
438                         const int diffsn = abs(  srcp[x-1] - srcpn[x-1-u] ) +
439                                            abs(  srcp[x]   - srcpn[x-u] )   +
440                                            abs(  srcp[x+1] - srcpn[x+1-u] );
441 
442                         const int diffsp = abs(  srcp[x-1] - srcpp[x-1+u] ) +
443                                            abs(  srcp[x]   - srcpp[x+u] )   +
444                                            abs(  srcp[x+1] - srcpp[x+1+u] );
445 
446                         const int diffps = abs( srcpp[x-1] -  srcp[x-1-u] ) +
447                                            abs( srcpp[x]   -  srcp[x-u] )   +
448                                            abs( srcpp[x+1] -  srcp[x+1-u] );
449 
450                         const int diffns = abs( srcpn[x-1] -  srcp[x-1+u] ) +
451                                            abs( srcpn[x]   -  srcp[x+u] )   +
452                                            abs( srcpn[x+1] -  srcp[x+1+u] );
453 
454                         const int diff = diffsn + diffsp + diffps + diffns;
455                         int diffd = diffsp + diffns;
456                         int diffe = diffsn + diffps;
457                         if( diff < minb )
458                         {
459                             dirb = u;
460                             minb = diff;
461                         }
462                         if( __builtin_expect( y > 1, 1) )
463                         {
464                             const int diff2pp = abs( src2p[x-1] - srcpp[x-1-u] ) +
465                                             abs( src2p[x]   - srcpp[x-u] )   +
466                                             abs( src2p[x+1] - srcpp[x+1-u] );
467                             const int diffp2p = abs( srcpp[x-1] - src2p[x-1+u] ) +
468                                             abs( srcpp[x]   - src2p[x+u] )   +
469                                             abs( srcpp[x+1] - src2p[x+1+u] );
470                             const int diffa = diff + diff2pp + diffp2p;
471                             diffd += diffp2p;
472                             diffe += diff2pp;
473                             if( diffa < mina )
474                             {
475                                 dira = u;
476                                 mina = diffa;
477                             }
478                         }
479                         if( __builtin_expect( y < height-2, 1) )
480                         {
481                             const int diff2nn = abs( src2n[x-1] - srcpn[x-1+u] ) +
482                                                 abs( src2n[x]   - srcpn[x+u] )   +
483                                                 abs( src2n[x+1] - srcpn[x+1+u] );
484                             const int diffn2n = abs( srcpn[x-1] - src2n[x-1-u] ) +
485                                                 abs( srcpn[x]   - src2n[x-u] )   +
486                                                 abs( srcpn[x+1] - src2n[x+1-u] );
487                             const int diffc = diff + diff2nn + diffn2n;
488                             diffd += diff2nn;
489                             diffe += diffn2n;
490                             if( diffc < minc )
491                             {
492                                 dirc = u;
493                                 minc = diffc;
494                             }
495                         }
496                         if( diffd < mind )
497                         {
498                             dird = u;
499                             mind = diffd;
500                         }
501                         if( diffe < mine )
502                         {
503                             dire = u;
504                             mine = diffe;
505                         }
506                     }
507                 }
508             }
509             int order[5], k=0;
510             if( dira != -5000 ) order[k++] = dira;
511             if( dirb != -5000 ) order[k++] = dirb;
512             if( dirc != -5000 ) order[k++] = dirc;
513             if( dird != -5000 ) order[k++] = dird;
514             if( dire != -5000 ) order[k++] = dire;
515             if( k > 1 )
516             {
517                 eedi2_sort_metrics( order, k );
518                 const int mid = ( k & 1 ) ?
519                                     order[k>>1] :
520                                     ( order[(k-1)>>1] + order[k>>1] + 1 ) >> 1;
521                 const int tlim = MAX( eedi2_limlut[abs(mid)] >> 2, 2 );
522                 int sum = 0, count = 0;
523                 for( i = 0; i < k; ++i )
524                 {
525                     if( abs( order[i] - mid ) <= tlim )
526                     {
527                         ++count;
528                         sum += order[i];
529                     }
530                 }
531                 if( count > 1 )
532                     dstp[x] = 128 + ( (int)( (float)sum / (float)count ) * 4 );
533                 else
534                     dstp[x] = 128;
535             }
536             else dstp[x] = 128;
537         }
538         mskpp += msk_pitch;
539         mskp += msk_pitch;
540         mskpn += msk_pitch;
541         src2p += src_pitch;
542         srcpp += src_pitch;
543         srcp += src_pitch;
544         srcpn += src_pitch;
545         src2n += src_pitch;
546         dstp += dst_pitch;
547     }
548 }
549 
550 /**
551  * Filters the edge mask
552  * @param mskp Pointer to the source edge mask being read from
553  * @param msk_pitch Stride of mskp
554  * @param dmskp Pointer to the edge direction mask
555  * @param dmsk_pitch Stride of dmskp
556  * @param dstp Pointer to the destination to store the filtered edge mask
557  * @param dst_pitch Stride of dstp
558  * @param height Height of half-height field-sized frame
559  * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
560  */
eedi2_filter_map(uint8_t * mskp,int msk_pitch,uint8_t * dmskp,int dmsk_pitch,uint8_t * dstp,int dst_pitch,int height,int width)561 void eedi2_filter_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
562                        uint8_t * dstp, int dst_pitch, int height, int width )
563 {
564     int x, y, j;
565 
566     eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
567 
568     mskp += msk_pitch;
569     dmskp += dmsk_pitch;
570     dstp += dst_pitch;
571     unsigned char *dmskpp = dmskp - dmsk_pitch;
572     unsigned char *dmskpn = dmskp + dmsk_pitch;
573 
574     for( y = 1; y < height - 1; ++y )
575     {
576         for( x = 1; x < width - 1; ++x )
577         {
578             if( dmskp[x] == 0xFF || mskp[x] != 0xFF )
579                 continue;
580             const int dir = ( dmskp[x] - 128 ) >> 2;
581             const int lim = MAX( abs( dir ) * 2, 12 );
582             int ict = 0, icb = 0;
583             if( dir < 0 )
584             {
585                 const int dirt = MAX( -x, dir );
586                 for( j = dirt; j <= 0; ++j )
587                 {
588                     if( ( abs( dmskpp[x+j] - dmskp[x] ) > lim && dmskpp[x+j] != 0xFF ) ||
589                         ( dmskp[x+j] == 0xFF && dmskpp[x+j] == 0xFF ) ||
590                         ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
591                     {
592                         ict = 1;
593                         break;
594                     }
595                 }
596             }
597             else
598             {
599                 const int dirt = MIN( width - x - 1, dir );
600                 for( j = 0; j <= dirt; ++j )
601                 {
602                     if( ( abs( dmskpp[x+j] - dmskp[x] ) > lim && dmskpp[x+j] != 0xFF ) ||
603                         ( dmskp[x+j] == 0xFF && dmskpp[x+j] == 0xFF ) ||
604                         ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
605                     {
606                         ict = 1;
607                         break;
608                     }
609                 }
610             }
611             if( ict )
612             {
613                 if( dir < 0 )
614                 {
615                     const int dirt = MIN( width - x - 1, abs( dir ) );
616                     for( j = 0; j <= dirt; ++j )
617                     {
618                         if( ( abs( dmskpn[x+j] - dmskp[x] ) > lim && dmskpn[x+j] != 0xFF ) ||
619                             ( dmskpn[x+j] == 0xFF && dmskp[x+j] == 0xFF ) ||
620                             ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
621                         {
622                             icb = 1;
623                             break;
624                         }
625                     }
626                 }
627                 else
628                 {
629                     const int dirt = MAX( -x, -dir );
630                     for( j = dirt; j <= 0; ++j )
631                     {
632                         if( ( abs( dmskpn[x+j] - dmskp[x] ) > lim && dmskpn[x+j] != 0xFF ) ||
633                             ( dmskpn[x+j] == 0xFF && dmskp[x+j] == 0xFF ) ||
634                             ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
635                         {
636                             icb = 1;
637                             break;
638                         }
639                     }
640                 }
641                 if( icb )
642                     dstp[x] = 255;
643             }
644         }
645         mskp += msk_pitch;
646         dmskpp += dmsk_pitch;
647         dmskp += dmsk_pitch;
648         dmskpn += dmsk_pitch;
649         dstp += dst_pitch;
650     }
651 }
652 
653 
654 /**
655  * Filters the edge direction mask
656  * @param mskp Pointer to the edge mask
657  * @param msk_pitch Stride of mskp
658  * @param dmskp Pointer to the edge direction mask being read from
659  * @param dmsk_pitch Stride of dmskp
660  * @param dstp Pointer to the destination to store the filtered edge direction mask
661  * @param dst_pitch Stride of dstp
662  * @param height Height of half_height field-sized frame
663  * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
664  */
eedi2_filter_dir_map(uint8_t * mskp,int msk_pitch,uint8_t * dmskp,int dmsk_pitch,uint8_t * dstp,int dst_pitch,int height,int width)665 void eedi2_filter_dir_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
666                            uint8_t * dstp, int dst_pitch, int height, int width )
667 {
668     int x, y, i;
669 
670     eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
671 
672     dmskp += dmsk_pitch;
673     unsigned char *dmskpp = dmskp - dmsk_pitch;
674     unsigned char *dmskpn = dmskp + dmsk_pitch;
675     dstp += dst_pitch;
676     mskp += msk_pitch;
677     for( y = 1; y < height - 1; ++y )
678     {
679         for( x = 1; x < width - 1; ++x )
680         {
681             if( mskp[x] != 0xFF ) continue;
682             int u = 0, order[9];
683             if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
684             if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
685             if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
686             if(  dmskp[x-1] != 0xFF ) order[u++] =  dmskp[x-1];
687             if(  dmskp[x]   != 0xFF ) order[u++] =  dmskp[x];
688             if(  dmskp[x+1] != 0xFF ) order[u++] =  dmskp[x+1];
689             if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1];
690             if( dmskpn[x]   != 0xFF ) order[u++] = dmskpn[x];
691             if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1];
692             if( u < 4 )
693             {
694                 dstp[x] = 255;
695                 continue;
696             }
697             eedi2_sort_metrics( order, u );
698             const int mid = ( u & 1 ) ?
699                 order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
700             int sum = 0, count = 0;
701             const int lim = eedi2_limlut[abs(mid-128)>>2];
702             for( i = 0; i < u; ++i )
703             {
704                 if( abs( order[i] - mid ) <= lim )
705                 {
706                     ++count;
707                     sum += order[i];
708                 }
709             }
710             if( count < 4 || ( count < 5 && dmskp[x] == 0xFF ) )
711             {
712                 dstp[x] = 255;
713                 continue;
714             }
715             dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
716         }
717         dmskpp += dmsk_pitch;
718         dmskp += dmsk_pitch;
719         dmskpn += dmsk_pitch;
720         dstp += dst_pitch;
721         mskp += msk_pitch;
722     }
723 }
724 
725 /**
726  * Smoothes out the edge direction map
727  * @param mskp Pointer to the edge mask
728  * @param msk_pitch Stride of mskp
729  * @param dmskp Pointer to the edge direction mask being read from
730  * @param dmsk_pitch Stride of dmskp
731  * @param dstp Pointer to the destination to store the expanded edge direction mask
732  * @param dst_pitch Stride of dstp
733  * @param height Height of half-height field-sized frame
734  * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
735  */
eedi2_expand_dir_map(uint8_t * mskp,int msk_pitch,uint8_t * dmskp,int dmsk_pitch,uint8_t * dstp,int dst_pitch,int height,int width)736 void eedi2_expand_dir_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
737                            uint8_t * dstp, int dst_pitch, int height, int width )
738 {
739     int x, y, i;
740 
741     eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
742 
743     dmskp += dmsk_pitch;
744     unsigned char *dmskpp = dmskp - dmsk_pitch;
745     unsigned char *dmskpn = dmskp + dmsk_pitch;
746     dstp += dst_pitch;
747     mskp += msk_pitch;
748     for( y = 1; y < height - 1; ++y )
749     {
750         for( x = 1; x < width - 1; ++x )
751         {
752             if( dmskp[x] != 0xFF || mskp[x] != 0xFF ) continue;
753             int u = 0, order[9];
754             if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
755             if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
756             if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
757             if(  dmskp[x-1] != 0xFF ) order[u++] =  dmskp[x-1];
758             if(  dmskp[x+1] != 0xFF ) order[u++] =  dmskp[x+1];
759             if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1];
760             if( dmskpn[x]   != 0xFF ) order[u++] = dmskpn[x];
761             if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1];
762             if( u < 5 ) continue;
763             eedi2_sort_metrics( order, u );
764             const int mid = ( u & 1 ) ?
765                 order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
766             int sum = 0, count = 0;
767             const int lim = eedi2_limlut[abs(mid-128)>>2];
768             for( i = 0; i < u; ++i )
769             {
770                 if( abs( order[i] - mid ) <= lim )
771                 {
772                     ++count;
773                     sum += order[i];
774                 }
775             }
776             if( count < 5 ) continue;
777             dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
778         }
779         dmskpp += dmsk_pitch;
780         dmskp += dmsk_pitch;
781         dmskpn += dmsk_pitch;
782         dstp += dst_pitch;
783         mskp += msk_pitch;
784     }
785 }
786 
787 /**
788  * Re-draws a clearer, less blocky frame-height edge direction mask
789  * @param mskp Pointer to the edge mask
790  * @param msk_pitch Stride of mskp
791  * @param dmskp Pointer to the edge direction mask being read from
792  * @param dmsk_pitch Stride of dmskp
793  * @param dstp Pointer to the destination to store the redrawn direction mask
794  * @param dst_pitch Stride of dstp
795  * @param tff Whether or not the frame parity is Top Field First
796  * @param height Height of the full-frame output
797  * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
798  */
eedi2_mark_directions_2x(uint8_t * mskp,int msk_pitch,uint8_t * dmskp,int dmsk_pitch,uint8_t * dstp,int dst_pitch,int tff,int height,int width)799 void eedi2_mark_directions_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
800                                uint8_t * dstp, int dst_pitch, int tff, int height, int width )
801 {
802     int x, y, i;
803     memset( dstp, 255, dst_pitch * height );
804     dstp  += dst_pitch  * ( 2 - tff );
805     dmskp += dmsk_pitch * ( 1 - tff );
806     mskp  += msk_pitch  * ( 1 - tff );
807     unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
808     unsigned char *mskpn = mskp + msk_pitch * 2;
809     for( y = 2 - tff; y < height - 1; y += 2 )
810     {
811         for( x = 1; x < width - 1; ++x )
812         {
813             if( mskp[x] != 0xFF && mskpn[x] != 0xFF ) continue;
814             int v = 0, order[6];
815             if(  dmskp[x-1] != 0xFF ) order[v++] = dmskp[x-1];
816             if(  dmskp[x]   != 0xFF ) order[v++] = dmskp[x];
817             if(  dmskp[x+1] != 0xFF ) order[v++] = dmskp[x+1];
818             if( dmskpn[x-1] != 0xFF ) order[v++] = dmskpn[x-1];
819             if( dmskpn[x]   != 0xFF ) order[v++] = dmskpn[x];
820             if( dmskpn[x+1] != 0xFF ) order[v++] = dmskpn[x+1];
821             if( v < 3 ) continue;
822             else
823             {
824                 eedi2_sort_metrics( order, v );
825                 const int mid = ( v & 1 ) ? order[v>>1] : ( order[(v-1)>>1] + order[v>>1]+1) >> 1;
826                 const int lim = eedi2_limlut[abs(mid-128)>>2];
827                 int u = 0;
828                 if( abs( dmskp[x-1] - dmskpn[x-1] ) <= lim ||
829                     dmskp[x-1] == 0xFF || dmskpn[x-1] == 0xFF )
830                         ++u;
831                 if( abs( dmskp[x]   - dmskpn[x] )   <= lim ||
832                     dmskp[x]   == 0xFF || dmskpn[x]   == 0xFF )
833                         ++u;
834                 if( abs( dmskp[x+1] - dmskpn[x-1] ) <= lim ||
835                     dmskp[x+1] == 0xFF || dmskpn[x+1] == 0xFF)
836                         ++u;
837                 if( u < 2 ) continue;
838                 int count = 0, sum = 0;
839                 for( i = 0; i < v; ++i )
840                 {
841                     if( abs( order[i] - mid ) <= lim )
842                     {
843                         ++count;
844                         sum += order[i];
845                     }
846                 }
847                 if( count < v - 2 || count < 2 ) continue;
848                 dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
849             }
850         }
851         mskp += msk_pitch * 2;
852         mskpn += msk_pitch * 2;
853         dstp += dst_pitch * 2;
854         dmskp += dmsk_pitch * 2;
855         dmskpn += dmsk_pitch * 2;
856     }
857 }
858 
859 /**
860  * Filters the frame-height edge direction mask
861  * @param mskp Pointer to the edge mask
862  * @param msk_pitch Stride of mskp
863  * @param dmskp Pointer to the edge direction mask being read from
864  * @param dmsk_pitch Stride of dmskp
865  * @param dstp Pointer to the destination to store the filtered direction mask
866  * @param dst_pitch Stride of dstp
867  * @param field Field to filter
868  * @param height Height of the full-frame output
869  * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
870  */
eedi2_filter_dir_map_2x(uint8_t * mskp,int msk_pitch,uint8_t * dmskp,int dmsk_pitch,uint8_t * dstp,int dst_pitch,int field,int height,int width)871 void eedi2_filter_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
872                               uint8_t * dstp, int dst_pitch, int field, int height, int width )
873 {
874     int x, y, i;
875     eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
876     dmskp += dmsk_pitch * ( 2 - field );
877     unsigned char *dmskpp = dmskp - dmsk_pitch * 2;
878     unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
879     mskp += msk_pitch * ( 1 - field );
880     unsigned char *mskpn = mskp + msk_pitch * 2;
881     dstp += dst_pitch * ( 2 - field );
882     for( y = 2 - field; y < height - 1; y += 2 )
883     {
884         for( x = 1; x < width - 1; ++x )
885         {
886             if( mskp[x] != 0xFF && mskpn[x] != 0xFF ) continue;
887             int u = 0, order[9];
888             if( y > 1 )
889             {
890                 if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
891                 if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
892                 if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
893             }
894             if( dmskp[x-1] != 0xFF ) order[u++] = dmskp[x-1];
895             if( dmskp[x]   != 0xFF ) order[u++] = dmskp[x];
896             if( dmskp[x+1] != 0xFF ) order[u++] = dmskp[x+1];
897             if( y < height - 2 )
898             {
899                 if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1];
900                 if( dmskpn[x]   != 0xFF ) order[u++] = dmskpn[x];
901                 if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1];
902             }
903             if( u < 4 )
904             {
905                 dstp[x] = 255;
906                 continue;
907             }
908             eedi2_sort_metrics( order, u );
909             const int mid = ( u & 1 ) ? order[u>>1] : (order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
910             int sum = 0, count = 0;
911             const int lim = eedi2_limlut[abs(mid-128)>>2];
912             for( i = 0; i < u; ++i )
913             {
914                 if( abs( order[i] - mid ) <= lim )
915                 {
916                     ++count;
917                     sum += order[i];
918                 }
919             }
920             if( count < 4 || ( count < 5 && dmskp[x] == 0xFF ) )
921             {
922                 dstp[x] = 255;
923                 continue;
924             }
925             dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
926         }
927         mskp += msk_pitch * 2;
928         mskpn += msk_pitch * 2;
929         dmskpp += dmsk_pitch * 2;
930         dmskp += dmsk_pitch * 2;
931         dmskpn += dmsk_pitch * 2;
932         dstp += dst_pitch * 2;
933     }
934 }
935 
936 /**
937  * Smoothes out the frame-height edge direction mask
938  * @param mskp Pointer to the edge mask
939  * @param msk_pitch Stride of mskp
940  * @param dmskp Pointer to the edge direction mask being read from
941  * @param dmsk_pitch Stride of dmskp
942  * @param dstp Pointer to the destination to store the expanded direction mask
943  * @param dst_pitch Stride of dstp
944  * @param field Field to filter
945  * @param height Height of the full-frame output
946  * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
947  */
eedi2_expand_dir_map_2x(uint8_t * mskp,int msk_pitch,uint8_t * dmskp,int dmsk_pitch,uint8_t * dstp,int dst_pitch,int field,int height,int width)948 void eedi2_expand_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
949                               uint8_t * dstp, int dst_pitch, int field, int height, int width )
950 {
951     int x, y, i;
952 
953     eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
954 
955     dmskp += dmsk_pitch * ( 2 - field );
956     unsigned char *dmskpp = dmskp - dmsk_pitch * 2;
957     unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
958     mskp += msk_pitch * ( 1 - field );
959     unsigned char *mskpn = mskp + msk_pitch * 2;
960     dstp += dst_pitch * ( 2 - field );
961     for( y = 2 - field; y < height - 1; y += 2)
962     {
963         for( x = 1; x < width - 1; ++x )
964         {
965             if( dmskp[x] != 0xFF || ( mskp[x] != 0xFF && mskpn[x] != 0xFF ) ) continue;
966             int u = 0, order[9];
967             if( y > 1 )
968             {
969                 if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
970                 if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
971                 if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
972             }
973             if( dmskp[x-1] != 0xFF ) order[u++] = dmskp[x-1];
974             if( dmskp[x+1] != 0xFF ) order[u++] = dmskp[x+1];
975             if( y < height - 2 )
976             {
977                 if( dmskpn[x-1] != 0xFF) order[u++] = dmskpn[x-1];
978                 if( dmskpn[x]   != 0xFF) order[u++] = dmskpn[x];
979                 if( dmskpn[x+1] != 0xFF) order[u++] = dmskpn[x+1];
980             }
981             if( u < 5 ) continue;
982             eedi2_sort_metrics( order, u );
983             const int mid = ( u & 1 ) ? order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
984             int sum = 0, count = 0;
985             const int lim = eedi2_limlut[abs(mid-128)>>2];
986             for( i = 0; i < u; ++i )
987             {
988                 if( abs( order[i] - mid ) <= lim )
989                 {
990                     ++count;
991                     sum += order[i];
992                 }
993             }
994             if( count < 5 ) continue;
995             dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
996         }
997         mskp += msk_pitch * 2;
998         mskpn += msk_pitch * 2;
999         dmskpp += dmsk_pitch * 2;
1000         dmskp += dmsk_pitch * 2;
1001         dmskpn += dmsk_pitch * 2;
1002         dstp += dst_pitch * 2;
1003     }
1004 }
1005 
1006 /**
1007  * Like the name suggests, this function fills in gaps in the frame-height edge direction mask
1008  * @param mskp Pointer to the edge mask
1009  * @param msk_pitch Stride of mskp
1010  * @param dmskp Pointer to the edge direction mask being read from
1011  * @param dmsk_pitch Stride of dmskp
1012  * @param dstp Pointer to the destination to store the filled-in direction mask
1013  * @param dst_pitch Stride of dstp
1014  * @param field Field to filter
1015  * @param height Height of the full-frame output
1016  * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
1017  */
eedi2_fill_gaps_2x(uint8_t * mskp,int msk_pitch,uint8_t * dmskp,int dmsk_pitch,uint8_t * dstp,int dst_pitch,int field,int height,int width)1018 void eedi2_fill_gaps_2x( uint8_t *mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
1019                          uint8_t * dstp, int dst_pitch, int field, int height, int width )
1020 {
1021     int x, y, j;
1022 
1023     eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
1024 
1025     dmskp += dmsk_pitch * ( 2 - field );
1026     unsigned char *dmskpp = dmskp - dmsk_pitch * 2;
1027     unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
1028     mskp += msk_pitch * ( 1 - field );
1029     unsigned char *mskpp = mskp - msk_pitch * 2;
1030     unsigned char *mskpn = mskp + msk_pitch * 2;
1031     unsigned char *mskpnn = mskpn + msk_pitch * 2;
1032     dstp += dst_pitch * ( 2 - field );
1033     for( y = 2 - field; y < height - 1; y += 2 )
1034     {
1035         for( x = 1; x < width - 1; ++x )
1036         {
1037             if( dmskp[x] != 0xFF ||
1038                 ( mskp[x] != 0xFF && mskpn[x] != 0xFF ) ) continue;
1039             int u = x - 1, back = 500, forward = -500;
1040             while( u )
1041             {
1042                 if( dmskp[u] != 0xFF )
1043                 {
1044                     back = dmskp[u];
1045                     break;
1046                 }
1047                 if( mskp[u] != 0xFF && mskpn[u] != 0xFF ) break;
1048                 --u;
1049             }
1050             int v = x + 1;
1051             while( v < width )
1052             {
1053                 if( dmskp[v] != 0xFF )
1054                 {
1055                     forward = dmskp[v];
1056                     break;
1057                 }
1058                 if( mskp[v] != 0xFF && mskpn[v] != 0xFF ) break;
1059                 ++v;
1060             }
1061             int tc = 1, bc = 1;
1062             int mint = 500, maxt = -20;
1063             int minb = 500, maxb = -20;
1064             for( j = u; j <= v; ++j )
1065             {
1066                 if( tc )
1067                 {
1068                     if( y <= 2 || dmskpp[j] == 0xFF || ( mskpp[j] != 0xFF && mskp[j] != 0xFF ) )
1069                     {
1070                         tc = 0;
1071                         mint = maxt = 20;
1072                     }
1073                     else
1074                     {
1075                         if( dmskpp[j] < mint ) mint = dmskpp[j];
1076                         if( dmskpp[j] > maxt ) maxt = dmskpp[j];
1077                     }
1078                 }
1079                 if( bc )
1080                 {
1081                     if( y >= height - 3 || dmskpn[j] == 0xFF || ( mskpn[j] != 0xFF && mskpnn[j] != 0xFF ) )
1082                     {
1083                         bc = 0;
1084                         minb = maxb = 20;
1085                     }
1086                     else
1087                     {
1088                         if( dmskpn[j] < minb ) minb = dmskpn[j];
1089                         if( dmskpn[j] > maxb ) maxb = dmskpn[j];
1090                     }
1091                 }
1092             }
1093             if( maxt == -20 ) maxt = mint = 20;
1094             if( maxb == -20 ) maxb = minb = 20;
1095             int thresh = MAX(
1096                             MAX( MAX( abs( forward - 128 ), abs( back - 128 ) ) >> 2, 8 ),
1097                             MAX( abs( mint - maxt ), abs( minb - maxb ) ) );
1098             const int flim = MIN(
1099                                 MAX( abs( forward - 128 ), abs( back - 128 ) ) >> 2,
1100                                 6 );
1101             if( abs( forward - back ) <= thresh && ( v - u - 1 <= flim || tc || bc ) )
1102             {
1103                 double step = (double)( forward - back ) / (double)( v - u );
1104                 for( j = 0; j < v - u - 1; ++j )
1105                     dstp[u+j+1] = back + (int)( j * step + 0.5 );
1106             }
1107         }
1108         mskpp += msk_pitch * 2;
1109         mskp += msk_pitch * 2;
1110         mskpn += msk_pitch * 2;
1111         mskpnn += msk_pitch * 2;
1112         dmskpp += dmsk_pitch * 2;
1113         dmskp += dmsk_pitch * 2;
1114         dmskpn += dmsk_pitch * 2;
1115         dstp += dst_pitch * 2;
1116     }
1117 }
1118 
1119 /**
1120  * Actually renders the output frame, based on the edge and edge direction masks
1121  * @param plane The plane of the image being processed, to know to reduce a search distance for chroma planes (HandBrake only works with YUV420 video so it is assumed they are half-height)
1122  * @param dmskp Pointer to the edge direction mask being read from
1123  * @param dmsk_pitch Stride of dmskp
1124  * @param dstp Pointer to the line-doubled source field used being filtered in place
1125  * @param dst_pitch Stride of dstp
1126  * @param omskp Pointer to the destination to store the output edge mask used for post-processing
1127  * @param osmk_pitch Stride of omskp
1128  * @param field Field to filter
1129  * @nt Noise threshold, (50 is a good default value)
1130  * @param height Height of the full-frame output
1131  * @param width Width of dstp bitmap rows, as opposed to the pdded stride in dst_pitch
1132  */
eedi2_interpolate_lattice(const int plane,uint8_t * dmskp,int dmsk_pitch,uint8_t * dstp,int dst_pitch,uint8_t * omskp,int omsk_pitch,int field,int nt,int height,int width)1133 void eedi2_interpolate_lattice( const int plane, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
1134                                 int dst_pitch, uint8_t * omskp, int omsk_pitch, int field, int nt,
1135                                 int height, int width )
1136 {
1137     int x, y, u;
1138 
1139     if( field == 1 )
1140     {
1141         eedi2_bit_blit( dstp + ( height - 1 ) * dst_pitch,
1142                   dst_pitch,
1143                   dstp + ( height - 2 ) * dst_pitch,
1144                   dst_pitch,
1145                   width,
1146                   1 );
1147     }
1148     else
1149     {
1150         eedi2_bit_blit( dstp,
1151                   dst_pitch,
1152                   dstp + dst_pitch,
1153                   dst_pitch,
1154                   width,
1155                   1 );
1156     }
1157 
1158     dstp += dst_pitch * ( 1 - field );
1159     omskp += omsk_pitch * ( 1 - field );
1160     unsigned char *dstpn = dstp + dst_pitch;
1161     unsigned char *dstpnn = dstp + dst_pitch * 2;
1162     unsigned char *omskn = omskp + omsk_pitch * 2;
1163     dmskp += dmsk_pitch * ( 2 - field );
1164     for( y = 2 - field; y < height - 1; y += 2 )
1165     {
1166         for( x = 0; x < width; ++x )
1167         {
1168             int dir = dmskp[x];
1169             const int lim = eedi2_limlut[abs(dir-128)>>2];
1170             if( dir == 255 ||
1171                 ( abs( dmskp[x] - dmskp[x-1] ) > lim &&
1172                   abs( dmskp[x] - dmskp[x+1] ) > lim ) )
1173             {
1174                 dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
1175                 if( dir != 255 ) dmskp[x] = 128;
1176                 continue;
1177             }
1178             if( lim < 9 )
1179             {
1180                 const int sum =   dstp[x-1] +   dstp[x] +   dstp[x+1] +
1181                                 dstpnn[x-1] + dstpnn[x] + dstpnn[x+1];
1182                 const int sumsq = dstp[x-1] *   dstp[x-1] +
1183                                   dstp[x]   *   dstp[x]   +
1184                                   dstp[x+1] *   dstp[x+1] +
1185                                 dstpnn[x-1] * dstpnn[x-1] +
1186                                 dstpnn[x]   * dstpnn[x]   +
1187                                 dstpnn[x+1] * dstpnn[x+1];
1188                 if( 6 * sumsq - sum * sum < 576 )
1189                 {
1190                     dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
1191                     dmskp[x] = 255;
1192                     continue;
1193                 }
1194             }
1195             if( x > 1 && x < width - 2 &&
1196                 ( (   dstp[x] < MAX(   dstp[x-2],   dstp[x-1] ) - 3 &&
1197                       dstp[x] < MAX(   dstp[x+2],   dstp[x+1] ) - 3 &&
1198                     dstpnn[x] < MAX( dstpnn[x-2], dstpnn[x-1] ) - 3 &&
1199                     dstpnn[x] < MAX( dstpnn[x+2], dstpnn[x+1] ) - 3 )
1200                 ||
1201                   (   dstp[x] > MIN(   dstp[x-2],   dstp[x-1] ) + 3 &&
1202                       dstp[x] > MIN(   dstp[x+2],   dstp[x+1] ) + 3 &&
1203                     dstpnn[x] > MIN( dstpnn[x-2], dstpnn[x-1] ) + 3 &&
1204                     dstpnn[x] > MIN( dstpnn[x+2], dstpnn[x+1] ) + 3 ) ) )
1205             {
1206                 dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
1207                 dmskp[x] = 128;
1208                 continue;
1209             }
1210             dir = ( dir - 128 + 2 ) >> 2;
1211             int val = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
1212             const int startu = ( dir - 2 < 0 ) ?
1213                         MAX( -x + 1, MAX( dir - 2, -width + 2 + x ) )
1214                         :
1215                         MIN(  x - 1, MIN( dir - 2,  width - 2 - x ) );
1216             const int stopu =  ( dir + 2 < 0 ) ?
1217                         MAX( -x + 1, MAX( dir + 2, -width + 2 + x ) )
1218                         :
1219                         MIN(  x - 1, MIN( dir + 2,  width - 2 - x ) );
1220             int min = 8 * nt;
1221             for( u = startu; u <= stopu; ++u )
1222             {
1223                 const int diff =
1224                     abs(   dstp[x-1] - dstpnn[x-u-1] ) +
1225                     abs(   dstp[x]   - dstpnn[x-u] )   +
1226                     abs(   dstp[x+1] - dstpnn[x-u+1] ) +
1227                     abs( dstpnn[x-1] -   dstp[x+u-1] ) +
1228                     abs( dstpnn[x]   -   dstp[x+u] )   +
1229                     abs( dstpnn[x+1] -   dstp[x+u+1] );
1230                 if( diff < min &&
1231                     ( ( omskp[x-1+u] != 0xFF && abs( omskp[x-1+u] - dmskp[x] ) <= lim ) ||
1232                      (  omskp[x+u]   != 0xFF && abs( omskp[x+u]   - dmskp[x]) <= lim )  ||
1233                      (  omskp[x+1+u] != 0xFF && abs( omskp[x+1+u] - dmskp[x]) <= lim ) ) &&
1234                     ( ( omskn[x-1-u] != 0xFF && abs( omskn[x-1-u] - dmskp[x]) <= lim ) ||
1235                      (  omskn[x-u]   != 0xFF && abs( omskn[x-u]   - dmskp[x]) <= lim ) ||
1236                      (  omskn[x+1-u] != 0xFF && abs( omskn[x+1-u] - dmskp[x]) <= lim ) ) )
1237                 {
1238                     const int diff2 =
1239                         abs( dstp[x+(u>>1)-1] - dstpnn[x-(u>>1)-1] ) +
1240                         abs( dstp[x+(u>>1)]   - dstpnn[x-(u>>1)]   ) +
1241                         abs( dstp[x+(u>>1)+1] - dstpnn[x-(u>>1)+1] );
1242                     if( diff2 < 4 * nt &&
1243                         ( ( ( abs( omskp[x+(u>>1)] - omskn[x-(u>>1)]     ) <= lim ||
1244                               abs( omskp[x+(u>>1)] - omskn[x-((u+1)>>1)] ) <= lim ) &&
1245                             omskp[x+(u>>1)] != 0xFF )
1246                           ||
1247                           ( ( abs( omskp[x+((u+1)>>1)] - omskn[x-(u>>1)] )     <= lim ||
1248                               abs( omskp[x+((u+1)>>1)] - omskn[x-((u+1)>>1)] ) <= lim ) &&
1249                             omskp[x+((u+1)>>1)] != 0xFF ) ) )
1250                     {
1251                         if( ( abs( dmskp[x] - omskp[x+(u>>1)] )     <= lim ||
1252                               abs( dmskp[x] - omskp[x+((u+1)>>1)] ) <= lim ) &&
1253                             ( abs( dmskp[x] - omskn[x-(u>>1)] )     <= lim ||
1254                               abs( dmskp[x] - omskn[x-((u+1)>>1)] ) <= lim ) )
1255                         {
1256                             val = (   dstp[x+(u>>1)] +   dstp[x+((u+1)>>1)] +
1257                                     dstpnn[x-(u>>1)] + dstpnn[x-((u+1)>>1)] + 2 ) >> 2;
1258                             min = diff;
1259                             dir = u;
1260                         }
1261                     }
1262                 }
1263             }
1264             if( min != 8 * nt )
1265             {
1266                 dstpn[x] = val;
1267                 dmskp[x] = 128 + dir * 4;
1268             }
1269             else
1270             {
1271                 const int minm = MIN( dstp[x], dstpnn[x] );
1272                 const int maxm = MAX( dstp[x], dstpnn[x] );
1273                 const int d = plane == 0 ? 4 : 2;
1274                 const int startu = MAX( -x + 1, -d );
1275                 const int stopu = MIN( width - 2 - x, d );
1276                 min = 7 * nt;
1277                 for( u = startu; u <= stopu; ++u )
1278                 {
1279                     const int p1 =   dstp[x+(u>>1)] +   dstp[x+((u+1)>>1)];
1280                     const int p2 = dstpnn[x-(u>>1)] + dstpnn[x-((u+1)>>1)];
1281                     const int diff =
1282                         abs(   dstp[x-1] - dstpnn[x-u-1] ) +
1283                         abs(   dstp[x]   - dstpnn[x-u] )   +
1284                         abs(   dstp[x+1] - dstpnn[x-u+1] ) +
1285                         abs( dstpnn[x-1] - dstp[x+u-1] )   +
1286                         abs( dstpnn[x]   - dstp[x+u] )     +
1287                         abs( dstpnn[x+1] - dstp[x+u+1] )   +
1288                         abs( p1 - p2 );
1289                     if( diff < min )
1290                     {
1291                         const int valt = ( p1 + p2 + 2 ) >> 2;
1292                         if( valt >= minm && valt <= maxm )
1293                         {
1294                             val = valt;
1295                             min = diff;
1296                             dir = u;
1297                         }
1298                     }
1299                 }
1300                 dstpn[x] = val;
1301                 if( min == 7*nt ) dmskp[x] = 128;
1302                 else dmskp[x] = 128 + dir * 4;
1303             }
1304         }
1305         dstp += dst_pitch * 2;
1306         dstpn += dst_pitch * 2;
1307         dstpnn += dst_pitch * 2;
1308         dmskp += dmsk_pitch * 2;
1309         omskp += omsk_pitch * 2;
1310         omskn += omsk_pitch * 2;
1311     }
1312 }
1313 
1314 /**
1315  * Applies some extra filtering to smooth the edge direction mask
1316  * @param nmskp Pointer to the newly-filtered edge direction mask being read from
1317  * @param nmsk_pitch Stride of nmskp
1318  * @param omskp Pointer to the old unfiltered edge direction mask being read from
1319  * @param omsk_pitch Stride of osmkp
1320  * @param dstp Pointer to the output image being filtered in place
1321  * @param src_pitch Stride of dstp ....not sure why it's named this
1322  * @param field Field to filter
1323  * @param height Height of the full-frame output
1324  * @param width Width of dstp bitmap rows, as opposed to the pdded stride in src_pitch
1325  */
eedi2_post_process(uint8_t * nmskp,int nmsk_pitch,uint8_t * omskp,int omsk_pitch,uint8_t * dstp,int src_pitch,int field,int height,int width)1326 void eedi2_post_process( uint8_t * nmskp, int nmsk_pitch, uint8_t * omskp, int omsk_pitch,
1327                          uint8_t * dstp, int src_pitch, int field, int height, int width )
1328 {
1329     int x, y;
1330 
1331     nmskp += ( 2 - field ) * nmsk_pitch;
1332     omskp += ( 2 - field ) * omsk_pitch;
1333     dstp += ( 2 - field ) * src_pitch;
1334     unsigned char *srcpp = dstp - src_pitch;
1335     unsigned char *srcpn = dstp + src_pitch;
1336     for( y = 2 - field; y < height - 1; y += 2 )
1337     {
1338         for( x = 0; x < width; ++x )
1339         {
1340             const int lim = eedi2_limlut[abs(nmskp[x]-128)>>2];
1341             if( abs( nmskp[x] - omskp[x] ) > lim && omskp[x] != 255 && omskp[x] != 128 )
1342                 dstp[x] = ( srcpp[x] + srcpn[x] + 1 ) >> 1;
1343         }
1344         nmskp += nmsk_pitch * 2;
1345         omskp += omsk_pitch * 2;
1346         srcpp += src_pitch * 2;
1347         dstp += src_pitch * 2;
1348         srcpn += src_pitch * 2;
1349     }
1350 }
1351 
1352 /**
1353  * Blurs the source field plane
1354  * @param src Pointer to the half-height source field plane
1355  * @param src_pitch Stride of src
1356  * @param tmp Pointer to a temporary buffer for juggling bitmaps
1357  * @param tmp_pitch Stride of tmp
1358  * @param dst Pointer to the destination to store the blurred field plane
1359  * @param dst_pitch Stride of dst
1360  * @param height Height of the half-height field-sized frame
1361  * @param width Width of dstp bitmap rows, as opposed to the padded stride in dst_pitch
1362  */
eedi2_gaussian_blur1(uint8_t * src,int src_pitch,uint8_t * tmp,int tmp_pitch,uint8_t * dst,int dst_pitch,int height,int width)1363 void eedi2_gaussian_blur1( uint8_t * src, int src_pitch, uint8_t * tmp, int tmp_pitch, uint8_t * dst, int dst_pitch, int height, int width )
1364 {
1365     uint8_t * srcp = src;
1366     uint8_t * dstp = tmp;
1367     int x, y;
1368 
1369     for( y = 0; y < height; ++y )
1370     {
1371         dstp[0] = ( srcp[3] * 582 + srcp[2] * 7078 + srcp[1] * 31724 +
1372                     srcp[0] * 26152 + 32768 ) >> 16;
1373         dstp[1] = ( srcp[4] * 582 + srcp[3] * 7078 +
1374                     ( srcp[0] + srcp[2] ) * 15862 +
1375                     srcp[1] * 26152 + 32768 ) >> 16;
1376         dstp[2] = ( srcp[5] * 582 + ( srcp[0] + srcp[4] ) * 3539 +
1377                     ( srcp[1] + srcp[3] ) * 15862 +
1378                     srcp[2]*26152 + 32768 ) >> 16;
1379         for( x = 3; x < width - 3; ++x )
1380         {
1381             dstp[x] = ( ( srcp[x-3] + srcp[x+3] ) * 291 +
1382                         ( srcp[x-2] + srcp[x+2] ) * 3539 +
1383                         ( srcp[x-1] + srcp[x+1] ) * 15862 +
1384                         srcp[x] * 26152 + 32768 ) >> 16;
1385         }
1386         dstp[x] = ( srcp[x-3] * 582 + ( srcp[x-2] + srcp[x+2] ) * 3539 +
1387                     ( srcp[x-1] + srcp[x+1] ) * 15862 +
1388                     srcp[x]   * 26152 + 32768 ) >> 16;
1389         ++x;
1390         dstp[x] = ( srcp[x-3] * 582 + srcp[x-2] * 7078 +
1391                     ( srcp[x-1] + srcp[x+1] ) * 15862 +
1392                     srcp[x] * 26152 + 32768 ) >> 16;
1393         ++x;
1394         dstp[x] = ( srcp[x-3] * 582 + srcp[x-2] * 7078 +
1395                     srcp[x-1] * 31724 + srcp[x] * 26152 + 32768 ) >> 16;
1396         srcp += src_pitch;
1397         dstp += tmp_pitch;
1398     }
1399     srcp = tmp;
1400     dstp = dst;
1401     unsigned char *src3p = srcp - tmp_pitch * 3;
1402     unsigned char *src2p = srcp - tmp_pitch * 2;
1403     unsigned char *srcpp = srcp - tmp_pitch;
1404     unsigned char *srcpn = srcp + tmp_pitch;
1405     unsigned char *src2n = srcp + tmp_pitch * 2;
1406     unsigned char *src3n = srcp + tmp_pitch * 3;
1407     for( x = 0; x < width; ++x )
1408     {
1409         dstp[x] = ( src3n[x] * 582 + src2n[x] * 7078 + srcpn[x] * 31724 +
1410                      srcp[x] * 26152 + 32768 ) >> 16;
1411     }
1412     src3p += tmp_pitch;
1413     src2p += tmp_pitch;
1414     srcpp += tmp_pitch;
1415     srcp += tmp_pitch;
1416     srcpn += tmp_pitch;
1417     src2n += tmp_pitch;
1418     src3n += tmp_pitch;
1419     dstp += dst_pitch;
1420     for( x = 0; x < width; ++x )
1421     {
1422         dstp[x] = ( src3n[x] * 582 + src2n[x] * 7078 +
1423                     ( srcpp[x] + srcpn[x] ) * 15862 +
1424                     srcp[x] * 26152 + 32768 ) >> 16;
1425     }
1426     src3p += tmp_pitch;
1427     src2p += tmp_pitch;
1428     srcpp += tmp_pitch;
1429     srcp += tmp_pitch;
1430     srcpn += tmp_pitch;
1431     src2n += tmp_pitch;
1432     src3n += tmp_pitch;
1433     dstp += dst_pitch;
1434     for( x = 0; x < width; ++x )
1435     {
1436         dstp[x] = ( src3n[x] * 582 + ( src2p[x] + src2n[x] ) * 3539 +
1437                     ( srcpp[x] + srcpn[x] ) * 15862 +
1438                     srcp[x] * 26152 + 32768 ) >> 16;
1439     }
1440     src3p += src_pitch;
1441     src2p += src_pitch;
1442     srcpp += src_pitch;
1443     srcp += src_pitch;
1444     srcpn += src_pitch;
1445     src2n += src_pitch;
1446     src3n += src_pitch;
1447     dstp += dst_pitch;
1448     for( y = 3; y < height - 3; ++y )
1449     {
1450         for( x = 0; x < width; ++x )
1451         {
1452             dstp[x] = ( ( src3p[x] + src3n[x] ) * 291 +
1453                         ( src2p[x] + src2n[x] ) * 3539 +
1454                         ( srcpp[x] + srcpn[x] ) * 15862 +
1455                         srcp[x] * 26152 + 32768 ) >> 16;
1456         }
1457         src3p += tmp_pitch;
1458         src2p += tmp_pitch;
1459         srcpp += tmp_pitch;
1460         srcp += tmp_pitch;
1461         srcpn += tmp_pitch;
1462         src2n += tmp_pitch;
1463         src3n += tmp_pitch;
1464         dstp += dst_pitch;
1465     }
1466     for( x = 0; x < width; ++x )
1467     {
1468         dstp[x] = ( src3p[x] * 582 + ( src2p[x] + src2n[x] ) *3539 +
1469                     ( srcpp[x] + srcpn[x] ) * 15862 +
1470                     srcp[x] * 26152 + 32768 ) >> 16;
1471     }
1472     src3p += tmp_pitch;
1473     src2p += tmp_pitch;
1474     srcpp += tmp_pitch;
1475     srcp += tmp_pitch;
1476     srcpn += tmp_pitch;
1477     src2n += tmp_pitch;
1478     src3n += tmp_pitch;
1479     dstp += dst_pitch;
1480     for( x = 0; x < width; ++x )
1481     {
1482         dstp[x] = ( src3p[x] * 582 + src2p[x] * 7078 +
1483                     ( srcpp[x] + srcpn[x] ) * 15862 +
1484                      srcp[x] * 26152 + 32768 ) >> 16;
1485     }
1486     src3p += tmp_pitch;
1487     src2p += tmp_pitch;
1488     srcpp += tmp_pitch;
1489     srcp += tmp_pitch;
1490     srcpn += tmp_pitch;
1491     src2n += tmp_pitch;
1492     src3n += tmp_pitch;
1493     dstp += dst_pitch;
1494     for( x = 0; x < width; ++x )
1495     {
1496         dstp[x] = ( src3p[x] * 582   + src2p[x] * 7078 +
1497                     srcpp[x] * 31724 +  srcp[x] * 26152 + 32768 ) >> 16;
1498     }
1499 }
1500 
1501 
1502 /**
1503  * Blurs the spatial derivatives of the source field plane
1504  * @param src Pointer to the derivative array to filter
1505  * @param tmp Pointer to a temporary storage for the derivative array while it's being filtered
1506  * @param dst Pointer to the destination to store the filtered output derivative array
1507  * @param pitch Stride of the bitmap from which the src array is derived
1508  * @param height Height of the half-height field-sized frame from which the src array derivs were taken
1509  * @param width Width of the bitmap from which the src array is derived, as opposed to the padded stride in pitch
1510  */
eedi2_gaussian_blur_sqrt2(int * src,int * tmp,int * dst,const int pitch,int height,const int width)1511 void eedi2_gaussian_blur_sqrt2( int *src, int *tmp, int *dst, const int pitch, int height, const int width )
1512 {
1513     int * srcp = src;
1514     int * dstp = tmp;
1515     int x, y;
1516 
1517     for( y = 0; y < height; ++y )
1518     {
1519         x = 0;
1520         dstp[x] = ( srcp[x+4] * 678   + srcp[x+3] * 3902  + srcp[x+2] * 13618 +
1521                     srcp[x+1] * 28830 + srcp[x]   * 18508 + 32768 ) >> 16;
1522         ++x;
1523         dstp[x] = ( srcp[x+4] * 678   + srcp[x+3] * 3902 + srcp[x+2] * 13618 +
1524                     ( srcp[x-1] + srcp[x+1] ) *14415 +
1525                     srcp[x]   * 18508 + 32768 ) >> 16;
1526         ++x;
1527         dstp[x] = ( srcp[x+4] * 678   + srcp[x+3] * 3902 +
1528                     ( srcp[x-2] + srcp[x+2] ) * 6809 +
1529                     ( srcp[x-1] + srcp[x+1] ) * 14415 +
1530                     srcp[x]   * 18508 + 32768 ) >> 16;
1531         ++x;
1532         dstp[x] = ( srcp[x+4] * 678   + ( srcp[x-3] + srcp[x+3] ) * 1951 +
1533                     ( srcp[x-2] + srcp[x+2] ) * 6809 +
1534                     ( srcp[x-1] + srcp[x+1] ) * 14415 +
1535                     srcp[x]   * 18508 + 32768 ) >> 16;
1536 
1537         for( x = 4; x < width - 4; ++x )
1538         {
1539             dstp[x] = ( ( srcp[x-4] + srcp[x+4] ) * 339 +
1540                         ( srcp[x-3] + srcp[x+3] ) * 1951 +
1541                         ( srcp[x-2] + srcp[x+2] ) * 6809 +
1542                         ( srcp[x-1] + srcp[x+1] ) * 14415 +
1543                         srcp[x] * 18508 + 32768 ) >> 16;
1544         }
1545 
1546         dstp[x] = ( srcp[x-4] * 678 + ( srcp[x-3] + srcp[x+3] ) * 1951 +
1547                     ( srcp[x-2] + srcp[x+2] ) * 6809  +
1548                     ( srcp[x-1] + srcp[x+1] ) * 14415 +
1549                     srcp[x] * 18508 + 32768 ) >> 16;
1550         ++x;
1551         dstp[x] = ( srcp[x-4] * 678 + srcp[x-3] * 3902 +
1552                     ( srcp[x-2] + srcp[x+2] ) * 6809 +
1553                     ( srcp[x-1] + srcp[x+1] ) * 14415 +
1554                     srcp[x] * 18508 + 32768 ) >> 16;
1555         ++x;
1556         dstp[x] = ( srcp[x-4] * 678 + srcp[x+3] * 3902 + srcp[x-2] * 13618 +
1557                     ( srcp[x-1] + srcp[x+1] ) * 14415 +
1558                     srcp[x] * 18508 + 32768 ) >> 16;
1559         ++x;
1560         dstp[x] = ( srcp[x-4] * 678 + srcp[x-3] * 3902 + srcp[x-2] * 13618 +
1561                     srcp[x-1] * 28830 +
1562                     srcp[x] * 18508 + 32768 ) >> 16;
1563         srcp += pitch;
1564         dstp += pitch;
1565     }
1566     dstp = dst;
1567     srcp = tmp;
1568     int * src4p = srcp - pitch * 4;
1569     int * src3p = srcp - pitch * 3;
1570     int * src2p = srcp - pitch * 2;
1571     int * srcpp = srcp - pitch;
1572     int * srcpn = srcp + pitch;
1573     int * src2n = srcp + pitch * 2;
1574     int * src3n = srcp + pitch * 3;
1575     int * src4n = srcp + pitch * 4;
1576     for( x = 0; x < width; ++x )
1577     {
1578         dstp[x] = ( src4n[x] * 678   + src3n[x] * 3902  +
1579                     src2n[x] * 13618 + srcpn[x] * 28830 +
1580                      srcp[x] * 18508 + 32768 ) >> 18;
1581     }
1582     src4p += pitch;
1583     src3p += pitch;
1584     src2p += pitch;
1585     srcpp += pitch;
1586     srcp += pitch;
1587     srcpn += pitch;
1588     src2n += pitch;
1589     src3n += pitch;
1590     src4n += pitch;
1591     dstp += pitch;
1592     for( x = 0; x < width; ++x )
1593     {
1594         dstp[x] = ( src4n[x] * 678 + src3n[x] * 3902 + src2n[x] * 13618 +
1595                     ( srcpp[x] + srcpn[x] ) * 14415 +
1596                     srcp[x] * 18508 + 32768 ) >> 18;
1597     }
1598     src4p += pitch;
1599     src3p += pitch;
1600     src2p += pitch;
1601     srcpp += pitch;
1602     srcp += pitch;
1603     srcpn += pitch;
1604     src2n += pitch;
1605     src3n += pitch;
1606     src4n += pitch;
1607     dstp += pitch;
1608     for( x = 0; x < width; ++x )
1609     {
1610         dstp[x] = ( src4n[x] * 678 + src3n[x] * 3902 +
1611                     ( src2p[x] + src2n[x] ) * 6809 +
1612                     ( srcpp[x] + srcpn[x] ) * 14415 +
1613                     srcp[x] * 18508 + 32768 ) >> 18;
1614     }
1615     src4p += pitch;
1616     src3p += pitch;
1617     src2p += pitch;
1618     srcpp += pitch;
1619     srcp += pitch;
1620     srcpn += pitch;
1621     src2n += pitch;
1622     src3n += pitch;
1623     src4n += pitch;
1624     dstp += pitch;
1625     for( x = 0; x < width; ++x )
1626     {
1627         dstp[x] = ( src4n[x] * 678 + ( src3p[x] + src3n[x] ) * 1951 +
1628                     ( src2p[x] + src2n[x] ) * 6809 +
1629                     ( srcpp[x] + srcpn[x] ) * 14415 +
1630                     srcp[x] * 18508 + 32768 ) >> 18;
1631     }
1632     src4p += pitch;
1633     src3p += pitch;
1634     src2p += pitch;
1635     srcpp += pitch;
1636     srcp += pitch;
1637     srcpn += pitch;
1638     src2n += pitch;
1639     src3n += pitch;
1640     src4n += pitch;
1641     dstp += pitch;
1642     for( y = 4; y < height - 4; ++y )
1643     {
1644         for( x = 0; x < width; ++x )
1645         {
1646             dstp[x] = ( ( src4p[x] + src4n[x] ) * 339 +
1647                         ( src3p[x] + src3n[x] ) * 1951 +
1648                         ( src2p[x] + src2n[x] ) * 6809 +
1649                         ( srcpp[x] + srcpn[x] ) * 14415 +
1650                         srcp[x] * 18508 + 32768 ) >> 18;
1651         }
1652         src4p += pitch;
1653         src3p += pitch;
1654         src2p += pitch;
1655         srcpp += pitch;
1656         srcp += pitch;
1657         srcpn += pitch;
1658         src2n += pitch;
1659         src3n += pitch;
1660         src4n += pitch;
1661         dstp += pitch;
1662     }
1663     for( x = 0; x < width; ++x )
1664     {
1665         dstp[x] = ( src4p[x] * 678 +
1666                     ( src3p[x] + src3n[x] ) * 1951 +
1667                     ( src2p[x] + src2n[x] ) * 6809 +
1668                     ( srcpp[x] + srcpn[x] ) * 14415 +
1669                     srcp[x] * 18508 + 32768 ) >> 18;
1670     }
1671     src4p += pitch;
1672     src3p += pitch;
1673     src2p += pitch;
1674     srcpp += pitch;
1675     srcp += pitch;
1676     srcpn += pitch;
1677     src2n += pitch;
1678     src3n += pitch;
1679     src4n += pitch;
1680     dstp += pitch;
1681     for( x = 0; x < width; ++x )
1682     {
1683         dstp[x] = ( src4p[x] * 678 + src3p[x] * 3902 +
1684                     ( src2p[x] + src2n[x] ) * 6809 +
1685                     ( srcpp[x] + srcpn[x] ) * 14415 +
1686                     srcp[x] * 18508 + 32768 ) >> 18;
1687     }
1688     src4p += pitch;
1689     src3p += pitch;
1690     src2p += pitch;
1691     srcpp += pitch;
1692     srcp += pitch;
1693     srcpn += pitch;
1694     src2n += pitch;
1695     src3n += pitch;
1696     src4n += pitch;
1697     dstp += pitch;
1698     for( x = 0; x < width; ++x )
1699     {
1700         dstp[x] = ( src4p[x] * 678 + src3p[x] * 3902 + src2p[x] * 13618 +
1701                     ( srcpp[x] + srcpn[x] ) * 14415 +
1702                     srcp[x] * 18508 + 32768 ) >> 18;
1703     }
1704     src4p += pitch;
1705     src3p += pitch;
1706     src2p += pitch;
1707     srcpp += pitch;
1708     srcp += pitch;
1709     srcpn += pitch;
1710     src2n += pitch;
1711     src3n += pitch;
1712     src4n += pitch;
1713     dstp += pitch;
1714     for( x = 0; x < width; ++x )
1715     {
1716         dstp[x] = ( src4p[x] * 678   + src3p[x] * 3902 +
1717                     src2p[x] * 13618 + srcpp[x] * 28830 +
1718                     srcp[x]  * 18508 + 32768 ) >> 18;
1719     }
1720 }
1721 
1722 /**
1723  * Finds spatial derivatives for a a source field plane
1724  * @param srcp Pointer to the plane to derive
1725  * @param src_pitch Stride of srcp
1726  * @param height Height of the half-height field-sized frame
1727  * @param width Width of srcp bitmap rows, as opposed to the padded stride in src_pitch
1728  * @param x2 Pointed to the array to store the x/x derivatives
1729  * @param y2 Pointer to the array to store the y/y derivatives
1730  * @param xy Pointer to the array to store the x/y derivatives
1731  */
eedi2_calc_derivatives(uint8_t * srcp,int src_pitch,int height,int width,int * x2,int * y2,int * xy)1732 void eedi2_calc_derivatives( uint8_t *srcp, int src_pitch, int height, int width, int *x2, int *y2, int *xy)
1733 {
1734 
1735     unsigned char * srcpp = srcp - src_pitch;
1736     unsigned char * srcpn = srcp + src_pitch;
1737     int x, y;
1738     {
1739         const int Ix = srcp[1] -  srcp[0];
1740         const int Iy = srcp[0] - srcpn[0];
1741         x2[0] = ( Ix * Ix ) >> 1;
1742         y2[0] = ( Iy * Iy ) >> 1;
1743         xy[0] = ( Ix * Iy ) >> 1;
1744     }
1745     for( x = 1; x < width - 1; ++x )
1746     {
1747         const int Ix = srcp[x+1] -  srcp[x-1];
1748         const int Iy = srcp[x]   - srcpn[x];
1749         x2[x] = ( Ix * Ix ) >> 1;
1750         y2[x] = ( Iy * Iy ) >> 1;
1751         xy[x] = ( Ix * Iy ) >> 1;
1752     }
1753     {
1754         const int Ix = srcp[x] -  srcp[x-1];
1755         const int Iy = srcp[x] - srcpn[x];
1756         x2[x] = ( Ix * Ix ) >> 1;
1757         y2[x] = ( Iy * Iy ) >> 1;
1758         xy[x] = ( Ix * Iy ) >> 1;
1759     }
1760     srcpp += src_pitch;
1761     srcp += src_pitch;
1762     srcpn += src_pitch;
1763     x2 += src_pitch;
1764     y2 += src_pitch;
1765     xy += src_pitch;
1766     for( y = 1; y < height - 1; ++y )
1767     {
1768         {
1769             const int Ix =  srcp[1] -  srcp[0];
1770             const int Iy = srcpp[0] - srcpn[0];
1771             x2[0] = ( Ix * Ix ) >> 1;
1772             y2[0] = ( Iy * Iy ) >> 1;
1773             xy[0] = ( Ix * Iy ) >> 1;
1774         }
1775         for ( x = 1; x < width - 1; ++x )
1776         {
1777             const int Ix =  srcp[x+1] -  srcp[x-1];
1778             const int Iy = srcpp[x]   - srcpn[x];
1779             x2[x] = ( Ix * Ix ) >> 1;
1780             y2[x] = ( Iy * Iy ) >> 1;
1781             xy[x] = ( Ix * Iy ) >> 1;
1782         }
1783         {
1784             const int Ix =  srcp[x] -  srcp[x-1];
1785             const int Iy = srcpp[x] - srcpn[x];
1786             x2[x] = ( Ix *Ix ) >> 1;
1787             y2[x] = ( Iy *Iy ) >> 1;
1788             xy[x] = ( Ix *Iy ) >> 1;
1789         }
1790         srcpp += src_pitch;
1791         srcp += src_pitch;
1792         srcpn += src_pitch;
1793         x2 += src_pitch;
1794         y2 += src_pitch;
1795         xy += src_pitch;
1796     }
1797     {
1798         const int Ix =  srcp[1] - srcp[0];
1799         const int Iy = srcpp[0] - srcp[0];
1800         x2[0] = ( Ix * Ix ) >> 1;
1801         y2[0] = ( Iy * Iy ) >> 1;
1802         xy[0] = ( Ix * Iy ) >> 1;
1803     }
1804     for( x = 1; x < width - 1; ++x )
1805     {
1806         const int Ix =  srcp[x+1] - srcp[x-1];
1807         const int Iy = srcpp[x]   - srcp[x];
1808         x2[x] = ( Ix * Ix ) >> 1;
1809         y2[x] = ( Iy * Iy ) >> 1;
1810         xy[x] = ( Ix * Iy ) >> 1;
1811     }
1812     {
1813         const int Ix =  srcp[x] - srcp[x-1];
1814         const int Iy = srcpp[x] - srcp[x];
1815         x2[x] = ( Ix * Ix ) >> 1;
1816         y2[x] = ( Iy * Iy ) >> 1;
1817         xy[x] = ( Ix * Iy ) >> 1;
1818     }
1819 }
1820 
1821 /**
1822  * Filters junctions and corners for the output image
1823  * @param x2 Pointer to the x/x derivatives
1824  * @param y2 Pointer to the y/y derivatives
1825  * @param xy Pointer to the x/y derivatives
1826  * @param pitch Stride of the source field plane from which the derivatives were calculated
1827  * @param mskp Pointer to the edge direction mask
1828  * @param msk_pitch Stride of mskp
1829  * @param dstp Pointer to the output image being filtered in place
1830  * @param dst_pitch Stride of dstp
1831  * @param height Height of the full-frame output plane
1832  * @param width Width of dstp bitmap rows, as opposed to the padded stride in dst_pitch
1833  * @param field Field to filter
1834  */
eedi2_post_process_corner(int * x2,int * y2,int * xy,const int pitch,uint8_t * mskp,int msk_pitch,uint8_t * dstp,int dst_pitch,int height,int width,int field)1835 void eedi2_post_process_corner( int *x2, int *y2, int *xy, const int pitch, uint8_t * mskp, int msk_pitch, uint8_t * dstp, int dst_pitch, int height, int width, int field )
1836 {
1837     mskp += ( 8 - field ) * msk_pitch;
1838     dstp += ( 8 - field ) * dst_pitch;
1839     unsigned char * dstpp = dstp - dst_pitch;
1840     unsigned char * dstpn = dstp + dst_pitch;
1841     x2 += pitch * 3;
1842     y2 += pitch * 3;
1843     xy += pitch * 3;
1844     int *x2n = x2 + pitch;
1845     int *y2n = y2 + pitch;
1846     int *xyn = xy + pitch;
1847     int x, y;
1848 
1849     for( y = 8 - field; y < height - 7; y += 2 )
1850     {
1851         for( x = 4; x < width - 4; ++x )
1852         {
1853             if( mskp[x] == 255 || mskp[x] == 128 ) continue;
1854             const int c1 = (int)( x2[x]  *  y2[x] -  xy[x] * xy[x] - 0.09 *
1855                                   ( x2[x]  + y2[x] )  * ( x2[x]  + y2[x] ) );
1856             const int c2 = (int)( x2n[x] * y2n[x] - xyn[x]* xyn[x] - 0.09 *
1857                                   ( x2n[x] + y2n[x] ) * ( x2n[x] + y2n[x] ) );
1858             if (c1 > 775 || c2 > 775)
1859                 dstp[x] = ( dstpp[x] + dstpn[x] + 1 ) >> 1;
1860         }
1861         mskp += msk_pitch * 2;
1862         dstpp += dst_pitch * 2;
1863         dstp += dst_pitch * 2;
1864         dstpn += dst_pitch * 2;
1865         x2 += pitch;
1866         x2n += pitch;
1867         y2 += pitch;
1868         y2n += pitch;
1869         xy += pitch;
1870         xyn += pitch;
1871     }
1872 }
1873