1 /*************************************************************************
2  *
3  *     Program: FPX Filtering example and C code.
4  *
5  *     Purpose: This listing contains a description of a calling application,
6  *    and blurring and sharpening C code. The filtering code includes
7  *    a convolution routine adapted from fast convolution code by
8  *    Wolberg and Massalin.
9  *
10  *
11  *
12  *************************************************************************/
13 //  SCCSID      : @(#)filter.cpp  1.2 15:03:03 27 Jan 1997
14 //  ----------------------------------------------------------------------------
15 //  Copyright (c) 1999 Digital Imaging Group, Inc.
16 //  For conditions of distribution and use, see copyright notice
17 //  in Flashpix.h
18 //  ----------------------------------------------------------------------------
19 
20 #include <stdio.h>
21 #include <math.h>
22 #include <stdlib.h>
23 #include <string.h>
24 
25 #include "ptile.h"
26 #include "ptil_fpx.h"
27 
28 #ifndef Memoire_h
29   #include  "b_memory.h"
30 #endif
31 
32 #ifndef AdvancedIVUE_h
33   #include "viewimg.h"
34 #endif
35 
36 #ifndef PHierarchicalImage_h
37   #include "ph_image.h"
38 #endif
39 
40 #ifndef PResolutionLevel_h
41   #include "pr_level.h"
42 #endif
43 
44 #ifndef PResolutionFlashPix_h
45   #include "pres_fpx.h"
46 #endif
47 
48 /* Defines.
49  */
50 #define MAXKERNNUM  50      // Maximum number of kernels which can be defined.
51 #define MAXRESLEV 10      // Maximum number of levels allowed in resolution hierarchy
52 
53 #define NOKERNEL  -2
54 
55 #define RESCALE_MTF_HEIGHT 1000 // height of image to start MTF adjustment
56 
57 
58 
59 /*************************************************************************
60  *
61  *  Program: Includes all the resolution independent filtering routines
62  *    including convolution code.
63  *
64  *  List of Functions:
65  *    applyFilter()
66  *    unsharpMask()
67  *    blurFilter()
68  *    convolve()
69  *    initPackedLut()
70  *    fastconv()
71  *
72  *************************************************************************/
73 /* Macro definitions.
74  */
75 #define CLAMP(A,L,H)  ((A)<=(L) ? (L) : (A)<=(H) ? (A) : (H))
76 #define ABS(A)          ((A) >= 0 ? (A) : -(A))
77 #define Z(k)            ( (k == 0) ? 0 : 1 )
78 #define MASK            0x3FF
79 #define ROUNDD          1
80 #define PACK(A,B,C)     (((A)<<20) + ((B)<<10) + (C))
81 #define INTC(A)          ((long) ((A)*262144+32768) >> 16)
82 
83 const double log_ratio = 1.097610796626;    // log(2.14)/log(2.0)
84 
85 
86 /* Constants.
87  */
88 #define B_AMT   40.0
89 
90 
91 
92 
93 // The algorithm is based upon creating a lut(s) which contain pre-multiplied values. These
94 //  values are based upon the weighting values in the filter kernel. Typically, the filter
95 //  kernel does not change from one tile to the next in the same image/resolution. So. as a
96 //  simple optimization, the most recently generated lut(s) is saved statically. The kernel
97 //  values are also saved. If the filter (i.e., the kernel values) are the same as those
98 //  used for the current lut(s), then the saved lut(s) can be used again. Otherwise, a new
99 //  set of lut values is generated in initPackedLut().
100 //
101 static lutS   gLuts;        // Saved LUTs from a previous filtering operation
102 static double gKernelValue[9];  // A 9-point filter is the largest being used presently
103 static long   gKernelSize;    // Number of values stored in 'gKernelValue'
104 
105 
106 
107  /* -----------------------------------------------------------------------
108  * applyFilter
109  *
110  * Apply image filtering using acutance. This is the main filtering
111  * routine. The image hierarchy, filter parameters amd gaussian kernels
112  * are passed as arguments to the function and the appropriate level is
113  * either sharpened or blurred as specified in the change of acutance. The
114  * output image should be resized to the required output height by the
115  * calling application.
116  *
117  * Return -1 on error.
118  *
119  */
ApplyFilter(FPXBaselineColorSpace colorSpace)120 FPXStatus PTileFlashPix::ApplyFilter(FPXBaselineColorSpace colorSpace)
121 {
122   double    total;
123   filtParmS   FiltP;
124   ViewImage   *imageParam = fatherSubImage->fatherFile->imageParam;
125   float     delta_A;
126   int    h, w, nbTilesWidth, nbTilesHeight;
127   FPXStatus status;
128 
129 
130   /*  Set the following filter parameters:
131     FiltP.capture   --  default  7.0
132     FiltP.prefilter   --  default  10.0 for a good filter.
133               for Livepicture's reference implementation kernels:
134               Kernel: (1/6 1/3 1/3 1/6) use  24.0
135               Kernel: (1/2 1/2)   use  7.0
136     FiltP.printer   --  If the destination output is:
137             xls8600 thermal printer   use  12.0
138             Sun Trinitron 19in        use   6.0
139     FiltP.fraction    --  default 1.0
140     FiltP.height_p    --  height in pixels of the image requested by
141                 the application. eg. height of display
142                 window in pixels.
143     FiltP.height_o    --  height in pixels of the full resolution image
144             in the hierarchy eg. in[0].lines.
145     FiltP.delta_A   --  change in acutance specified by the user
146     FiltP.color_space --  RGB = filter all three channels using same filter.
147               YCC = filter luma channel only.
148    */
149   FiltP.capture   = 7.0;
150   FiltP.prefilter = 10.0;
151   FiltP.printer   = 6.0;
152   FiltP.fraction  = 1.0;
153 
154   FiltP.level = fatherSubImage->identifier;
155 
156   fatherSubImage->GetResolutionSizeInfo (&w, &h, &nbTilesWidth, &nbTilesHeight);
157   FiltP.height_r = ( w < h ? w : h);
158 
159   // We consider that the tile size is the display resolution (otherwise, we won't take that tile...)
160   FiltP.height_p = FiltP.height_r;
161 
162   // Get size of highest resolution image
163   fatherSubImage->fatherFile->firstSubImage->GetResolutionSizeInfo (&w, &h, &nbTilesWidth, &nbTilesHeight);
164   FiltP.height_o = ( w < h ? w : h);
165   imageParam->GetFiltering(&delta_A);
166   FiltP.delta_A = (double)delta_A;
167   FiltP.colorSpace = colorSpace;
168 
169   // Set total system MTF. Resolution level to filter has been set in FiltP.level,
170   // original image height set in FiltP.height_o and picture height at resolution
171   // level is set in FiltP.height_r. Note that all these were set by the calling
172   // application.
173   //
174   total = FiltP.capture / pow(4.0, (double)FiltP.level)
175     + Z(FiltP.level)*FiltP.prefilter/4.0
176     + FiltP.printer*(double)(FiltP.height_r*FiltP.height_r)
177        / (double)(FiltP.height_p*FiltP.height_p)
178     + 5.1e-6 * pow((double)FiltP.height_r/FiltP.fraction, 2.0);
179 
180     // Adjust the total MTF to prevent lower resolution images from being
181     // over blurred or sharpened
182     if (FiltP.height_p < RESCALE_MTF_HEIGHT) {  // apply rescaling
183       double rescale = 0.94 * pow((double)FiltP.height_p/RESCALE_MTF_HEIGHT,log_ratio) + 0.06;
184       total = total*rescale;
185     }
186 
187   FiltP.total = total;
188 
189   if (FiltP.delta_A > 0) {    /* Apply unsharp masking */
190     if ((status = UnsharpMask(&FiltP)) != FPX_OK) {
191 //      fprintf(stderr, "Error in unsharpMask()\n");
192       return status;
193     }
194   } else if (FiltP.delta_A < 0) { /* Apply blur filter */
195     if ((status = BlurFilter(&FiltP))  != FPX_OK) {
196 //      fprintf(stderr, "Error in blurFilter()\n");
197       return status;
198     }
199   }
200   return status;
201 }
202 
203 
204 
205 /* -----------------------------------------------------------------------
206  * unsharpMask
207  *
208  * Apply unsharp masking to input image. The following equation is used:
209  *
210  *  f'(x) = f(x) + beta*(f(x) - f(x)@G(g,x))  @ == convolved with
211  *
212  * where f(x) is the input image, G(g,x) is a Gaussian function of width g,
213  * beta is a scaling factor and f'(x) the sharpened image. beta is chosen as the
214  * value, lying between 0 and 5 which allows the smallest width Gaussian filter
215  * to give the required sharpening defined by the change in acutance.
216  *
217  * Returns: FPX_COLOR_CONVERSION_ERROR
218  *      FPX_MEMORY_ALLOCATION_FAILED
219  */
UnsharpMask(filtParmS * FiltP)220 FPXStatus PTileFlashPix::UnsharpMask(filtParmS *FiltP)
221 {
222   // ====================< Declarations >==========================
223   long  n, k;         // Counters
224   double  g_width, beta;      // selected Gaussian width and boost factor
225   double  delta_A;        // Change in acutance
226   double  maxBeta, total;     // intermediate values
227   long  kernelNum;        // Index of selected Gaussian kernel
228 
229   long  sum, pixVal;      // holders
230   firS  Gauss[MAXKERNNUM];    // Table of available gaussian kernels
231 
232   long  paramSet = 0;     // boolean to determine if a valid kernel was found
233   long  nc;           // Number of components in tile
234   long  NumKernels;       // The number of kernels in the database
235 
236   unsigned char* tempcomp[4];   // Pointer to the image components in temp, i.e RGBA
237   unsigned char* srcComp[4];    // Pointers to components in paddedTile
238   unsigned char* dstComp[4];    // Pointers to components in output tile (tile->pixels)
239 
240   Pixel*  paddedTile = NULL;    // Holds the minimum necessary pixels to filter
241   long  pad;          // number of pixels to pad each edge of the tile
242   FPXStatus status = FPX_OK;    // Return status
243 
244 
245   // Initialise Gaussian filter database.
246   if (GetFilterKernels(Gauss, &NumKernels) == -1)
247     return FPX_COLOR_CONVERSION_ERROR;
248 
249   // Set maximum Gaussian width which has been defined
250   g_width = 0;
251   for (n = 0; n < NumKernels; n++) {
252     if (g_width < (Gauss+n)->width) {
253       g_width = (Gauss+n)->width;
254     }
255   }
256 
257   // Set total system MTF
258   total = FiltP->total;
259 
260   // For all Gaussian widths which give suitable beta's (between 1 and 5)
261   // choose the filter with the smallest width.
262   delta_A = FiltP->delta_A;
263   double  tmpDelta = pow(10.0, delta_A/B_AMT) - 1.0;
264   for (n = 0; n < NumKernels; n++) {
265     beta = tmpDelta / (1.0 - sqrt(total)/sqrt(total + (Gauss+n)->width));
266     if (beta <= 5.0 && beta >= 0) {       // Valid range of beta
267       if (g_width >= (Gauss+n)->width) {
268         g_width = (Gauss+n)->width;
269         kernelNum = n;
270         maxBeta = beta;
271         if (!paramSet) paramSet = 1;
272       }
273     }
274   }
275 
276   if (!paramSet) {          // Couldn't find a suitable Gaussian
277     status = FPX_COLOR_CONVERSION_ERROR;
278     goto RETURN;
279   }
280   beta = maxBeta;
281 
282   // init packed luts
283   if (InitPackedLuts( (Gauss+kernelNum)->kernel, (Gauss+kernelNum)->elements) != 0)
284     return FPX_COLOR_CONVERSION_ERROR;            // Probably an overflow condition
285 
286   // Need to allocate at least 1/2 the filter width on each edge
287   pad = (short)((Gauss+kernelNum)->elements) - 1;
288 
289   if ((status = makePaddedRawPixels( pad, &paddedTile)) != FPX_OK)
290     goto RETURN;
291 
292   // Determine channel #'s in color space
293   // Alpha channel is not filtered
294   switch(FiltP->colorSpace) {
295     case  SPACE_32_BITS_RGB:      // The 24 bits are stored in the LSB part of the long
296     case  SPACE_32_BITS_ARGB:
297     case  SPACE_32_BITS_YCC:      // DAG added following for YCC
298     case  SPACE_32_BITS_AYCC:
299       {
300       srcComp[0] = ((unsigned char*)paddedTile) + 1;
301       srcComp[1] = ((unsigned char*)paddedTile) + 2;
302       srcComp[2] = ((unsigned char*)paddedTile) + 3;
303 
304       tempcomp[0] = ((unsigned char*)rawPixels) + 1;
305       tempcomp[1] = ((unsigned char*)rawPixels) + 2;
306       tempcomp[2] = ((unsigned char*)rawPixels) + 3;
307 
308       dstComp[0] = ((unsigned char*)pixels) + 1;
309       dstComp[1] = ((unsigned char*)pixels) + 2;
310       dstComp[2] = ((unsigned char*)pixels) + 3;
311 
312       nc=3;
313       } break;
314     case  SPACE_32_BITS_RGBA:
315     case  SPACE_32_BITS_YCCA:     // DAG added following for YCC
316       srcComp[0] = ((unsigned char*)paddedTile);
317       srcComp[1] = ((unsigned char*)paddedTile) + 1;
318       srcComp[2] = ((unsigned char*)paddedTile) + 2;
319 
320       tempcomp[0] = ((unsigned char*)rawPixels);
321       tempcomp[1] = ((unsigned char*)rawPixels) + 1;
322       tempcomp[2] = ((unsigned char*)rawPixels) + 2;
323 
324       dstComp[0] = ((unsigned char*)pixels);
325       dstComp[1] = ((unsigned char*)pixels) + 1;
326       dstComp[2] = ((unsigned char*)pixels) + 2;
327       nc=3;
328       break;
329     case  SPACE_32_BITS_M:
330     case  SPACE_32_BITS_AM:
331       srcComp[0]  = ((unsigned char*)paddedTile) + 3;
332       tempcomp[0] = ((unsigned char*)rawPixels) + 3;
333       dstComp[0]  = ((unsigned char*)pixels) + 3;
334       nc=1;
335       break;
336     case  SPACE_32_BITS_MA:
337       srcComp[0]  = ((unsigned char*)paddedTile) + 2;
338       tempcomp[0] = ((unsigned char*)rawPixels) + 2;
339       dstComp[0]  = ((unsigned char*)pixels) + 2;
340       nc=1;
341       break;
342     default:
343       // invalid colorspace
344       status = FPX_COLOR_CONVERSION_ERROR;
345       goto RETURN;
346   }
347 
348   // Sharpen image. Each pass does one of the components
349   // We Convolve() the padded tile into a normal-sized temp tile, then sum the
350   //  convolved output back into the original, raw data and put the result into
351   //  'pixels'
352   for (k = 0; k < nc; k++) {
353     if ((status = Convolve( srcComp[k], pad, &gLuts, dstComp[k])) != FPX_OK)
354       goto RETURN;
355 
356     unsigned char *padP, *tmpP, *dstP;
357     long      x, y,
358             padWid = (width + (2 * pad));     // Width of padded line in Pixels
359 
360     for (y = 0; y < height; y++) {
361       padP = srcComp[k]  +(((y + pad) * padWid) * sizeof( Pixel)) + (pad * sizeof( Pixel));
362       tmpP = tempcomp[k] + ((y * width)         * sizeof( Pixel));
363       dstP = dstComp[k]  + ((y * width)         * sizeof( Pixel));
364 
365       for (x = 0; x < width; x++) {
366         pixVal = (long)*tmpP;
367         sum = pixVal + (long)(beta*(double)(pixVal - (long)*dstP) + 0.5);
368         *dstP = (unsigned char)CLAMP(sum, 0, 255);
369         padP += sizeof( Pixel);
370         tmpP += sizeof( Pixel);
371         dstP += sizeof( Pixel);
372       }
373     }
374   }
375 
376  RETURN:
377   // Added this to make sure all memory is recovered on return
378   // It's easier to have one return point than to maintain
379   // multiple returns
380 
381   if (paddedTile)
382     FastDeleteArray( paddedTile, Pixel);
383   return status;
384 }
385 
386 /* -----------------------------------------------------------------------
387  * blurFilter
388  *
389  *
390  * Apply blurring to input image. The following equation is used:
391  *
392  *  f'(x) = f(x)@((1 - beta)*G(g1, x) + beta*G(g2, x)]
393  *
394  *
395  * where f(x) is the input image, G(g1, x) and G(g2, x) are gaussian functions of
396  * width g1 and g2 respectively, beta is a scaling factor and f'(x) the blurred
397  * image. Beta is chosen as the value, lying between 0 and 1 which allows the
398  * smallest width Gaussian filters to give the required blurring effect as
399  * defined by the change in acutance.
400  *
401  * The Gaussian widths g1 and g2 should be of the order of 1 to 10 magnitudes
402  * apart. For example the following combinations are allowed:
403  *  g1: 0.1 8 20  50
404  *  g2: 10  25  60  150
405  *
406  * Returns: FPX_COLOR_CONVERSION_ERROR
407  *      FPX_MEMORY_ALLOCATION_FAILED
408  *
409  *
410  */
BlurFilter(filtParmS * FiltP)411 FPXStatus PTileFlashPix::BlurFilter(filtParmS *FiltP)
412 {
413   long  n, n1, n2, k;
414   double  g1_width, g2_width, beta, oneMbeta;
415   double  maxBeta, delta_A;
416   double  widthg1, widthg2, rootg1, rootg2, sumwidth;
417   double  total;
418   long  kernelNum1, kernelNum2;
419 
420   firS  Gauss[MAXKERNNUM];
421 
422   double  blurKernel[9];
423   long  blurElements;
424 
425   long  paramSet = 0;
426   long  nc;
427   long  NumKernels;
428 
429   unsigned char* srcComp[4];  // Table of pointers to components
430   unsigned char* dstComp[4];
431     Pixel*  paddedTile = NULL;    // Holds the minimum necessary pixels to filter
432   long  pad;          // number of pixels to pad each edge of the tile
433 
434   FPXStatus status = FPX_OK;    // Return status
435 
436 
437   if (GetFilterKernels( Gauss, &NumKernels) == -1)
438     return FPX_COLOR_CONVERSION_ERROR;
439 
440   /* Set maximum Gaussian width which has been defined */
441   g1_width = 0;
442   for (n = 0; n < NumKernels; n++) {
443     if (g1_width < (Gauss+n)->width) {
444       g1_width = (Gauss+n)->width;
445     }
446   }
447   g2_width = g1_width;
448   sumwidth = g2_width + g1_width;
449 
450   /* Set total system MTF */
451   total = FiltP->total;
452 
453   /* For all Gaussian widths which give suitable beta's (between 0 and 1)
454    * choose the set of filter pairs with the smallest sum of widths.
455    */
456   delta_A = FiltP->delta_A;
457   double  tmpDelta = pow(10.0, delta_A/B_AMT);
458 
459   for (n1 = 0; n1 < NumKernels; n1++) {
460     for (n2 = 0; n2 < NumKernels; n2++) {
461     widthg1 = (Gauss+n1)->width;
462     widthg2 = (Gauss+n2)->width;
463     if (widthg1 != widthg2 && widthg2 > widthg1 && widthg2 < 10.0*widthg1) {
464       rootg1 = sqrt(total + widthg1);
465       rootg2 = sqrt(total + widthg2);
466 
467       beta = (tmpDelta * rootg1 * rootg2 /
468           sqrt(total) - rootg2)/(rootg1 - rootg2);
469 
470       if (beta <= 1.0 && beta >= 0) {   /* Valid range of beta */
471         if (sumwidth > widthg2 + widthg1) {
472           sumwidth = widthg2 + widthg1;
473           g1_width = widthg1;
474           g2_width = widthg2;
475           kernelNum1 = n1;
476           kernelNum2 = n2;
477           maxBeta = beta;
478           if (!paramSet) paramSet = 1;
479         }
480       }
481     }
482      }
483   }
484 
485   if (!paramSet) {
486     // Failed to find a suitable filter, so simply return the data unchanged (in this
487     //  case, copy the data from 'rawPixels' to the 'pixels' buffer.
488     memcpy( pixels, rawPixels, width * height * sizeof(Pixel) );
489     return FPX_OK;
490   }
491 
492   beta = maxBeta;
493   oneMbeta = 1.0 - beta;
494 
495   // Set up blurring kernel and number of elements
496   for (n=0; n < 9; n++) blurKernel[n] = (double)beta*(Gauss+kernelNum2)->kernel[n] +
497     (double)(oneMbeta)*(Gauss+kernelNum1)->kernel[n];
498 
499   if ((Gauss+kernelNum2)->elements > (Gauss+kernelNum1)->elements) {
500     blurElements = (Gauss+kernelNum2)->elements;
501   } else {
502     blurElements = (Gauss+kernelNum1)->elements;
503   }
504 
505   // init packed luts for filter kernel
506   if (InitPackedLuts( blurKernel, blurElements) != 0)
507     return FPX_COLOR_CONVERSION_ERROR;            // Probably an overflow condition
508 
509   pad = (3 * (gLuts.stages)) - 1;
510 
511   if ((status = makePaddedRawPixels( pad, &paddedTile)) != FPX_OK)
512     goto RETURN;
513 
514   // Determine channel #'s in color space
515   // Alpha channel is not filtered
516   switch(FiltP->colorSpace) {
517     case  SPACE_32_BITS_RGB:  // The 24 bits are stored in the LSB part of the long
518     case  SPACE_32_BITS_ARGB:
519     case  SPACE_32_BITS_YCC:  // DAG added following for YCC
520     case  SPACE_32_BITS_AYCC:
521       srcComp[0] = ((unsigned char*)paddedTile) + 1;
522       srcComp[1] = ((unsigned char*)paddedTile) + 2;
523       srcComp[2] = ((unsigned char*)paddedTile) + 3;
524 
525       dstComp[0] = ((unsigned char*)pixels) + 1;
526       dstComp[1] = ((unsigned char*)pixels) + 2;
527       dstComp[2] = ((unsigned char*)pixels) + 3;
528       nc=3;
529       break;
530     case  SPACE_32_BITS_RGBA:
531     case  SPACE_32_BITS_YCCA:   // DAG added following for YCC
532       srcComp[0] = ((unsigned char*)paddedTile);
533       srcComp[1] = ((unsigned char*)paddedTile) + 1;
534       srcComp[2] = ((unsigned char*)paddedTile) + 2;
535 
536       dstComp[0] = ((unsigned char*)pixels);
537       dstComp[1] = ((unsigned char*)pixels) + 1;
538       dstComp[2] = ((unsigned char*)pixels) + 2;
539       nc=3;
540       break;
541     case  SPACE_32_BITS_M:
542     case  SPACE_32_BITS_AM:
543       srcComp[0] = ((unsigned char*)paddedTile) + 3;
544       dstComp[0] = ((unsigned char*)pixels) + 3;
545       nc=1;
546       break;
547     case  SPACE_32_BITS_MA:
548       srcComp[0] = ((unsigned char*)paddedTile) + 2;
549       dstComp[0] = ((unsigned char*)pixels) + 2;
550       nc=1;
551       break;
552     default:
553       // invalid colorspace
554       status = FPX_COLOR_CONVERSION_ERROR;
555       goto RETURN;
556   }
557 
558   // Blur image
559   for (k = 0; k < nc; k++) {
560     if ((status = Convolve( srcComp[k], pad, &gLuts, dstComp[k])) != FPX_OK)
561       goto RETURN;
562   }
563 
564 
565   RETURN:
566   if (paddedTile)
567     FastDeleteArray( paddedTile, Pixel);
568   return status;
569 }
570 
571 
572 
573 
574 /* ======================================================================
575  *
576  *  Program: Fast Convolution
577  *
578  *  adapted from:
579  *  "Fast Convolution with Packed Lookup Tables" Wolberg G. and
580  *  Massalin H., in Heckbert P (editor), "Graphics Gems IV",
581  *  Academic Press, 1994, pages 447-464
582  *
583  *
584  * ======================================================================
585 */
586 
587 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
588  * convolve:
589  *
590  * Convolve input image I1 with kernel, a (2n-1) -point symmetric filter
591  *  kernel containing n entries: h[i] = kernel[ |i| ] for -n < i < n.
592  *  Output is stored in I2.
593  * It is assumed that the 'in' buffer has been padded on all sides by the amount
594  *  indicated by 'pad'. The 'out' buffer does not contain any padding, thus the
595  *  'in' and 'out' buffers are different sizes.
596  */
Convolve(unsigned char * in,long pad,lutS * luts,unsigned char * out)597 FPXStatus PTileFlashPix::Convolve( unsigned char *in, long pad, lutS *luts, unsigned char *out)
598 {
599   long      x, y, paddedWidth, paddedHeight;
600   unsigned char *src, *dst;
601   long      srcInc, dstInc;
602   unsigned char *temp;
603 
604   paddedWidth  = width  + (2 * pad);
605   paddedHeight = height + (2 * pad);
606 
607   // Allocate the temporary buffer to hold the 1st pass
608   temp = new unsigned char [width * sizeof( Pixel) * paddedHeight];
609   if (temp == NULL)
610     return FPX_MEMORY_ALLOCATION_FAILED;
611 
612   src = in;
613   dst = temp;
614   srcInc = paddedWidth * sizeof( Pixel);
615   dstInc =    width    * sizeof( Pixel);
616 
617   for (y = 0; y < paddedHeight; y++) {          // process all rows
618     Fastconv( src, width, pad, sizeof( Pixel), luts, dst);
619     src += srcInc;
620     dst += dstInc;
621   }
622 
623   src = temp;
624   dst = out;
625   dstInc = width * sizeof( Pixel);
626 
627   for (x = 0; x < width; x++) {             // process all columns
628     Fastconv(src, height, pad, dstInc, luts, dst);
629     src += sizeof( Pixel);
630     dst += sizeof( Pixel);
631   }
632 
633   /* free temporary image */
634   delete [] temp;
635 
636   return FPX_OK;
637 }
638 
639 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
640  * initPackedLuts:
641  *
642  * Initialize scaled and packed lookup tables in lut.
643  * Permit up to 3 cascaded stages for the following kernel sizes:
644  *  stage 0:   5-point kernel
645  *  stage 1:  11-point kernel
646  *  stage 2:  17-point kernel
647  * lut->lut0 <== packed entries (i*k2, i*k1, .5*i*k0), for i in [0, 255]
648  * lut->lut1 <== packed entries (i*k5, i*k4,    i*k3), for i in [0, 255]
649  * lut->lut2 <== packed entries (i*k8, i*k7,    i*k6), for i in [0, 255]
650  * where k0,...,k8 are taken in sequence from kernel[].
651  *
652  * note that in lut[0], k0 is halved since it corresponds to the center
653  * pixel's kernel value and it appears in both fwd0 and rev0 (see text).
654  *
655  * The lut(s) generated are in the static 'gLuts'. This lut is saved between calls along
656  *  with the kernel used to generate it. If the kernel values passed-in match those used
657  *  for the current lut, then the old lut is used; otherwise, new lut values are generated.
658  */
InitPackedLuts(double * kernel,long n)659 FPXStatus PTileFlashPix::InitPackedLuts(double *kernel, long n)
660 {
661   long  i, k, s, *lut;
662   long  b1, b2, b3;
663   double  k1, k2, k3;
664   double  sum;
665   lutS  *luts = &gLuts;
666 
667   // If the kernel passed-in is the same as that used to generate the current luts,
668   //  then just return.
669   for (i=0; i < n; i++)
670     if (kernel[i] != gKernelValue[i])
671       break;
672   if (i >= n)
673     return FPX_OK;
674 
675 
676 
677   /* enforce flat-field response constraint: sum of kernel values = 1*/
678   sum = kernel[0];
679   for (i=1; i<n; i++) sum += 2*kernel[i];    /* account for symmetry */
680   if (ABS(sum - 1) > 0.001) {
681 //    fprintf(stderr, "Warning: filter sum != 1 (=%f)\n", sum);
682     }
683 
684   /* init bias added to fields to avoid negative numbers (underflow) */
685   luts->bias = 0;
686 
687   /* set up lut stages, 3 kernel values at a time */
688   for (k = s = 0; k < n; s++) {     /* init lut (stage s) */
689     k1 = (k < n) ? kernel[k++] : 0;
690     k2 = (k < n) ? kernel[k++] : 0;
691     k3 = (k < n) ? kernel[k++] : 0;
692     if (k <= 3) k1 *= 0.5;        /* kernel[0]: halve k0  */
693 
694     /* select proper array in lut structure based on stage s */
695     switch (s) {
696       case 0: lut = luts->lut0; break;
697       case 1: lut = luts->lut1; break;
698       case 2: lut = luts->lut2; break;
699     }
700 
701     /* check k1, k2, k3 to avoid overflow in 10-bit fields */
702     if (ABS(k1) + ABS(k2) + ABS(k3) > 1) {
703 //      fprintf(stderr, "|%f|+|%f|+|%f| > 1\n", k1, k2, k3);
704       return FPX_COLOR_CONVERSION_ERROR;
705     }
706 
707     /* compute bias for each field to avoid underflow */
708     b1 = b2 = b3 = 0;
709     if (k1 < 0) b1 = (long)(-k1*1024.0);
710     if (k2 < 0) b2 = (long)(-k2*1024.0);
711     if (k3 < 0) b3 = (long)(-k3*1024.0);
712 
713     /* luts->bias will be subtracted in fastconv() after adding
714      * stages; multiply by 2 because of combined effect of fwd
715      * and rev terms
716      */
717     luts->bias += 2*(b1 + b2 + b3);
718 
719     /* scale and pack kernel values in lut */
720     for (i=0; i < 256; i++) {
721       /*
722        * INTC(A) forms fixed point field:
723        * (A*(1<<18)+(1<<15)) >> 16
724        */
725       lut[i] = PACK(  INTC(i*k3) + b3,
726           INTC(i*k2) + b2 + ROUNDD,
727           INTC(i*k1) + b1 );
728     }
729   }
730   luts->stages = s;
731 
732   // Save the kernel for testing next time the routine is called
733   for (i=0; i < n; i++)
734     gKernelValue[i] = kernel[i];
735   gKernelSize = n;
736 
737   return FPX_OK;
738 }
739 
740 
741 
742 
743 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
744  * Fast 1D convolver.
745  * Convolve len input samples in src with a symmetric kernel packed in luts,
746  * a lookup table that is created by initPackedLuts() from kernel values.
747  * It is assumed that 'src' has been padded at both ends by 'padLen', so it's
748  *  width is 'len' + (2 * 'padLen'). 'padLen' is based upon the number of stages
749  *  in 'luts', not the kernel size.
750  * It is assumed that 'dst' does not have room for padding.
751  * 'inStride' is the stride/offset from one value to be convolved to the next in
752  *  the input buffer. Note that only one component is convolved by this routine.
753  * 'outStride' is the stride/offset from one value to be convolved to the next in
754  *  the output buffer, 'dst'.
755  */
Fastconv(unsigned char * src,long len,long padLen,long stride,lutS * luts,unsigned char * dst)756 void PTileFlashPix::Fastconv(unsigned char *src, long len, long padLen, long stride,
757             lutS *luts, unsigned char *dst)
758 {
759   long      x, val, bias;
760   long      fwd0, fwd1, fwd2;
761   long      rev0, rev1, rev2;
762   long      *lut0, *lut1, *lut2;
763   unsigned char *p1, *p2, *ip, *op;
764   unsigned char buf[4096];
765 
766 
767   // copy src into buf
768   p1 = src;               // pointer to row (or column) of input
769   p2 = buf;               // pointer to row of padded buffer
770   for (x=0; x< (len + (2 * padLen)); x++) {
771     *p2++ = *p1;
772     p1 += stride;
773   }
774 
775   // Initialize input and output pointers, ip and op, respectively
776   ip = buf;
777   op = dst;
778 
779   // bias was added to lut entries to deal with negative kernel values
780   bias = luts->bias;
781 
782   switch(luts->stages) {
783   case 1: // 5-pt kernel
784     lut0 = luts->lut0;
785 
786     ip += 2;                  // ip[0] is center pixel
787     fwd0 = (lut0[ip[-2]] >> 10) + lut0[ip[-1]];
788     rev0 = (lut0[ip[0]] << 10) + lut0[ip[1]];
789 
790     while(len--) {
791       fwd0 = (fwd0 >> 10) + lut0[ip[0]];
792       rev0 = (rev0 << 10) + lut0[ip[2]];
793       val = ((fwd0 & MASK) + ((rev0 >> 20) & MASK) - bias)
794         >> 2;
795       *op = CLAMP(val, 0, 255);
796       ip++;
797       op += stride;
798     }
799     break;
800   case 2: // 11-pt kernel
801     lut0 = luts->lut0;
802     lut1 = luts->lut1;
803 
804     ip += 5;                // ip[0] is center pixel
805     fwd0 = (lut0[ip[-2]] >> 10) + lut0[ip[-1]];
806     rev0 = (lut0[ip[0]] << 10) + lut0[ip[1]];
807 
808     fwd1 = (lut1[ip[-5]] >> 10) + lut1[ip[-4]];
809     rev1 = (lut1[ip[3]] << 10) + lut1[ip[4]];
810 
811     while(len--) {
812       fwd0 = (fwd0 >> 10) + lut0[ip[0]];
813       rev0 = (rev0 << 10) + lut0[ip[2]];
814 
815       fwd1 = (fwd1 >> 10) + lut1[ip[-3]];
816       rev1 = (rev1 << 10) + lut1[ip[5]];
817 
818       val = ((fwd0 & MASK) + ((rev0 >> 20) & MASK)
819             +(fwd1 & MASK) + ((rev1 >> 20) & MASK) - bias)
820         >> 2;
821       *op = CLAMP(val, 0, 255);
822 
823       ip++;
824       op += stride;
825     }
826     break;
827   case 3: // 17-pt kernel
828     lut0 = luts->lut0;
829     lut1 = luts->lut1;
830     lut2 = luts->lut2;
831 
832     ip += 8;                    // ip[0] is center pixel
833     fwd0 = (lut0[ip[-2]] >> 10) + lut0[ip[-1]];
834     rev0 = (lut0[ip[ 0]] << 10) + lut0[ip[1]];
835 
836     fwd1 = (lut1[ip[-5]] >> 10) + lut1[ip[-4]];
837     rev1 = (lut1[ip[ 3]] << 10) + lut1[ip[4]];
838 
839     fwd2 = (lut2[ip[-8]] >> 10) + lut2[ip[-7]];
840     rev2 = (lut2[ip[ 6]] << 10) + lut2[ip[7]];
841 
842     while(len--) {
843       fwd0 = (fwd0 >> 10) + lut0[ip[0]];
844       rev0 = (rev0 << 10) + lut0[ip[2]];
845 
846       fwd1 = (fwd1 >> 10) + lut1[ip[-3]];
847       rev1 = (rev1 << 10) + lut1[ip[5]];
848 
849       fwd2 = (fwd2 >> 10) + lut2[ip[-6]];
850       rev2 = (rev2 << 10) + lut2[ip[8]];
851 
852       val = ((fwd0 & MASK) + ((rev0 >> 20) & MASK)
853             +(fwd1 & MASK) + ((rev1 >> 20) & MASK)
854             +(fwd2 & MASK) + ((rev2 >> 20) & MASK) - bias)
855         >> 2;
856       *op = CLAMP(val, 0, 255);
857 
858       ip++;
859       op += stride;
860     }
861     break;
862   }
863 }
864 
865 
866 
867 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
868  * getFilterKernels:
869  *
870  * Gaussian FIR kernel array of structures is defined and set.
871  *
872  * Format of database FIR kernel entry is:
873  *
874  *   g,  n,  k0,  k1,  k2,   k3,... kn-1
875  *
876  * where n is less than or equal to 9. This defines a (2n-1) point
877  * kernel which is symmetric and corresponds to an FIR filter of:
878  *
879  * kn-1, ..., k3, k2, k1, k0, k1, k2, k3, ..., kn-1
880  *
881  * The frequency response of the filter should approximate a Gaussian
882  * function of width g. All the filters defined are normalised. That is:
883  *
884  *  k0 + 2*(k1 + k2 + k3 + ...+ kn-1) = 1.0
885  *
886  * The FirS struct contans: filter width, # elements and cooef[9]
887  *
888  */
GetFilterKernels(firS * Gauss_array,long * NumKernels)889  long PTileFlashPix::GetFilterKernels(firS *Gauss_array, long *NumKernels)
890 {
891   long  i;
892   firS Gauss[] = {
893     { 1, 3, { 0.927947, 0.046292, -0.010266, 0, 0, 0, 0, 0, 0 }},
894     { 2, 3, { 0.863510, 0.084621, -0.016376, 0, 0, 0, 0, 0, 0 }},
895     { 5, 3, { 0.709951, 0.163083, -0.018058, 0, 0, 0, 0, 0, 0 }},
896     { 10, 3, { 0.549109, 0.221605, 0.003841, 0, 0, 0, 0, 0, 0 }},
897     { 15, 3, { 0.456657, 0.240525, 0.031146, 0, 0, 0, 0, 0, 0 }},
898     { 20, 3, { 0.399725, 0.245012, 0.055126, 0, 0, 0, 0, 0, 0 }},
899     { 25, 3, { 0.362202, 0.244322, 0.074576, 0, 0, 0, 0, 0, 0 }},
900     { 30, 3, { 0.336000, 0.241897, 0.090102, 0, 0, 0, 0, 0, 0 }},
901     { 40, 4, { 0.283671, 0.221641, 0.105732, 0.030791, 0, 0, 0, 0, 0 }},
902     { 50, 4, { 0.257185, 0.211101, 0.116785, 0.043522, 0, 0, 0, 0, 0 }},
903     { 60, 4, { 0.238807, 0.202564, 0.123695, 0.054338, 0, 0, 0, 0, 0 }},
904     { 80, 5, { 0.203005, 0.179459, 0.123936, 0.066898, 0.028204, 0, 0, 0, 0 }},
905     { 100, 5, { 0.185341, 0.167947, 0.124891, 0.076274, 0.038218, 0, 0, 0, 0 }},
906     { 120, 5, { 0.173222, 0.159578, 0.124665, 0.082670, 0.046476, 0, 0, 0, 0 }},
907     { 135, 6, { 0.157970, 0.146819, 0.117923, 0.081809, 0.049060, 0.025405, 0, 0, 0 }},
908     { 150, 6, { 0.151509, 0.141843, 0.116457, 0.083798, 0.052894, 0.029252, 0, 0, 0 }},
909     { 165, 7, { 0.141342, 0.133147, 0.111267, 0.082517, 0.054283, 0.031700, 0.016415, 0, 0 }},
910     { 180, 7, { 0.136262, 0.129005, 0.109428, 0.083204, 0.056678, 0.034618, 0.018937, 0, 0 }},
911     { 210, 7, { 0.128108, 0.122244, 0.106156, 0.083945, 0.060403, 0.039592, 0.023606, 0, 0 }},
912     { 240, 7, { 0.121869, 0.116980, 0.103387, 0.084197, 0.063127, 0.043627, 0.027747, 0, 0 }},
913     { 280, 7, { 0.115536, 0.111561, 0.100347, 0.084163, 0.065748, 0.047909, 0.032504, 0, 0 }},
914     { 320, 7, { 0.110736, 0.107404, 0.097889, 0.083935, 0.067621, 0.051271, 0.036512, 0, 0 }},
915     { 360, 7, { 0.106978, 0.104120, 0.095873, 0.083632, 0.069011, 0.053967, 0.039907, 0, 0 }},
916     { 400, 7, { 0.103961, 0.101465, 0.094197, 0.083308, 0.070074, 0.056169, 0.042807, 0, 0 }}
917   };
918 
919   *NumKernels = sizeof(Gauss)/sizeof(Gauss[0]);
920 
921   if (*NumKernels > MAXKERNNUM) {
922 //    fprintf(stderr, "Too many kernels defined.\n");
923     return -1;
924   }
925 
926   for (i = 0; i < *NumKernels; i++)
927     Gauss_array[i] = Gauss[i];
928 
929   return 0;
930 }
931 
932