1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -  This software is distributed in the hope that it will be
4  -  useful, but with NO WARRANTY OF ANY KIND.
5  -  No author or distributor accepts responsibility to anyone for the
6  -  consequences of using this software, or for whether it serves any
7  -  particular purpose or works at all, unless he or she says so in
8  -  writing.  Everyone is granted permission to copy, modify and
9  -  redistribute this source code, for commercial or non-commercial
10  -  purposes, with the following restrictions: (1) the origin of this
11  -  source code must not be misrepresented; (2) modified versions must
12  -  be plainly marked as such; and (3) this notice may not be removed
13  -  or altered from any source or modified source distribution.
14  *====================================================================*/
15 
16 
17 /*
18  *  scalelow.c
19  *
20  *         Color (interpolated) scaling: general case
21  *                  void       scaleColorLILow()
22  *
23  *         Grayscale (interpolated) scaling: general case
24  *                  void       scaleGrayLILow()
25  *
26  *         Color (interpolated) scaling: 2x upscaling
27  *                  void       scaleColor2xLILow()
28  *                  void       scaleColor2xLILineLow()
29  *
30  *         Grayscale (interpolated) scaling: 2x upscaling
31  *                  void       scaleGray2xLILow()
32  *                  void       scaleGray2xLILineLow()
33  *
34  *         Grayscale (interpolated) scaling: 4x upscaling
35  *                  void       scaleGray4xLILow()
36  *                  void       scaleGray4xLILineLow()
37  *
38  *         Grayscale and color scaling by closest pixel sampling
39  *                  l_int32    scaleBySamplingLow()
40  *
41  *         Color and grayscale downsampling with (antialias) lowpass filter
42  *                  l_int32    scaleSmoothLow()
43  *                  void       scaleRGBToGray2Low()
44  *
45  *         Color and grayscale downsampling with (antialias) area mapping
46  *                  l_int32    scaleColorAreaMapLow()
47  *                  l_int32    scaleGrayAreaMapLow()
48  *                  l_int32    scaleAreaMapLow2()
49  *
50  *         Binary scaling by closest pixel sampling
51  *                  l_int32    scaleBinaryLow()
52  *
53  *         Scale-to-gray 2x
54  *                  void       scaleToGray2Low()
55  *                  l_uint32  *makeSumTabSG2()
56  *                  l_uint8   *makeValTabSG2()
57  *
58  *         Scale-to-gray 3x
59  *                  void       scaleToGray3Low()
60  *                  l_uint32  *makeSumTabSG3()
61  *                  l_uint8   *makeValTabSG3()
62  *
63  *         Scale-to-gray 4x
64  *                  void       scaleToGray4Low()
65  *                  l_uint32  *makeSumTabSG4()
66  *                  l_uint8   *makeValTabSG4()
67  *
68  *         Scale-to-gray 6x
69  *                  void       scaleToGray6Low()
70  *                  l_uint8   *makeValTabSG6()
71  *
72  *         Scale-to-gray 8x
73  *                  void       scaleToGray8Low()
74  *                  l_uint8   *makeValTabSG8()
75  *
76  *         Scale-to-gray 16x
77  *                  void       scaleToGray16Low()
78  *
79  *         Grayscale mipmap
80  *                  l_int32    scaleMipmapLow()
81  *
82  */
83 
84 #include <stdio.h>
85 #include <stdlib.h>
86 #include <string.h>
87 #include "allheaders.h"
88 
89 #ifndef  NO_CONSOLE_IO
90 #define  DEBUG_OVERFLOW   0
91 #define  DEBUG_UNROLLING  0
92 #endif  /* ~NO_CONSOLE_IO */
93 
94 
95 /*------------------------------------------------------------------*
96  *            General linear interpolated color scaling             *
97  *------------------------------------------------------------------*/
98 /*!
99  *  scaleColorLILow()
100  *
101  *  We choose to divide each pixel into 16 x 16 sub-pixels.
102  *  Linear interpolation is equivalent to finding the
103  *  fractional area (i.e., number of sub-pixels divided
104  *  by 256) associated with each of the four nearest src pixels,
105  *  and weighting each pixel value by this fractional area.
106  *
107  *  P3 speed is about 7 x 10^6 dst pixels/sec/GHz
108  */
109 void
scaleColorLILow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)110 scaleColorLILow(l_uint32  *datad,
111                l_int32    wd,
112                l_int32    hd,
113                l_int32    wpld,
114                l_uint32  *datas,
115                l_int32    ws,
116                l_int32    hs,
117                l_int32    wpls)
118 {
119 l_int32    i, j, wm2, hm2;
120 l_int32    xpm, ypm;  /* location in src image, to 1/16 of a pixel */
121 l_int32    xp, yp, xf, yf;  /* src pixel and pixel fraction coordinates */
122 l_int32    v00r, v01r, v10r, v11r, v00g, v01g, v10g, v11g;
123 l_int32    v00b, v01b, v10b, v11b, area00, area01, area10, area11;
124 l_uint32   pixels1, pixels2, pixels3, pixels4, pixel;
125 l_uint32  *lines, *lined;
126 l_float32  scx, scy;
127 
128         /* (scx, scy) are scaling factors that are applied to the
129          * dest coords to get the corresponding src coords.
130          * We need them because we iterate over dest pixels
131          * and must find the corresponding set of src pixels. */
132     scx = 16. * (l_float32)ws / (l_float32)wd;
133     scy = 16. * (l_float32)hs / (l_float32)hd;
134 
135     wm2 = ws - 2;
136     hm2 = hs - 2;
137 
138         /* Iterate over the destination pixels */
139     for (i = 0; i < hd; i++) {
140         ypm = (l_int32)(scy * (l_float32)i);
141         yp = ypm >> 4;
142         yf = ypm & 0x0f;
143         lined = datad + i * wpld;
144         lines = datas + yp * wpls;
145         for (j = 0; j < wd; j++) {
146             xpm = (l_int32)(scx * (l_float32)j);
147             xp = xpm >> 4;
148             xf = xpm & 0x0f;
149 
150                 /* Do bilinear interpolation.  This is a simple
151                  * generalization of the calculation in scaleGrayLILow().
152                  * Without this, we could simply subsample:
153                  *     *(lined + j) = *(lines + xp);
154                  * which is faster but gives lousy results!  */
155             pixels1 = *(lines + xp);
156 
157             if (xp > wm2 || yp > hm2) {
158                 if (yp > hm2 && xp <= wm2) {  /* pixels near bottom */
159                     pixels2 = *(lines + xp + 1);
160                     pixels3 = pixels1;
161                     pixels4 = pixels2;
162                 }
163                 else if (xp > wm2 && yp <= hm2) {  /* pixels near right side */
164                     pixels2 = pixels1;
165                     pixels3 = *(lines + wpls + xp);
166                     pixels4 = pixels3;
167                 }
168                 else {  /* pixels at LR corner */
169                     pixels4 = pixels3 = pixels2 = pixels1;
170                 }
171             }
172             else {
173                 pixels2 = *(lines + xp + 1);
174                 pixels3 = *(lines + wpls + xp);
175                 pixels4 = *(lines + wpls + xp + 1);
176             }
177 
178             area00 = (16 - xf) * (16 - yf);
179             area10 = xf * (16 - yf);
180             area01 = (16 - xf) * yf;
181             area11 = xf * yf;
182             v00r = area00 * ((pixels1 >> L_RED_SHIFT) & 0xff);
183             v00g = area00 * ((pixels1 >> L_GREEN_SHIFT) & 0xff);
184             v00b = area00 * ((pixels1 >> L_BLUE_SHIFT) & 0xff);
185             v10r = area10 * ((pixels2 >> L_RED_SHIFT) & 0xff);
186             v10g = area10 * ((pixels2 >> L_GREEN_SHIFT) & 0xff);
187             v10b = area10 * ((pixels2 >> L_BLUE_SHIFT) & 0xff);
188             v01r = area01 * ((pixels3 >> L_RED_SHIFT) & 0xff);
189             v01g = area01 * ((pixels3 >> L_GREEN_SHIFT) & 0xff);
190             v01b = area01 * ((pixels3 >> L_BLUE_SHIFT) & 0xff);
191             v11r = area11 * ((pixels4 >> L_RED_SHIFT) & 0xff);
192             v11g = area11 * ((pixels4 >> L_GREEN_SHIFT) & 0xff);
193             v11b = area11 * ((pixels4 >> L_BLUE_SHIFT) & 0xff);
194             pixel = (((v00r + v10r + v01r + v11r + 128) << 16) & 0xff000000) |
195                     (((v00g + v10g + v01g + v11g + 128) << 8) & 0x00ff0000) |
196                     ((v00b + v10b + v01b + v11b + 128) & 0x0000ff00);
197             *(lined + j) = pixel;
198         }
199     }
200 
201     return;
202 }
203 
204 
205 /*------------------------------------------------------------------*
206  *            General linear interpolated gray scaling              *
207  *------------------------------------------------------------------*/
208 /*!
209  *  scaleGrayLILow()
210  *
211  *  We choose to divide each pixel into 16 x 16 sub-pixels.
212  *  Linear interpolation is equivalent to finding the
213  *  fractional area (i.e., number of sub-pixels divided
214  *  by 256) associated with each of the four nearest src pixels,
215  *  and weighting each pixel value by this fractional area.
216  */
217 void
scaleGrayLILow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)218 scaleGrayLILow(l_uint32  *datad,
219                l_int32    wd,
220                l_int32    hd,
221                l_int32    wpld,
222                l_uint32  *datas,
223                l_int32    ws,
224                l_int32    hs,
225                l_int32    wpls)
226 {
227 l_int32    i, j, wm2, hm2;
228 l_int32    xpm, ypm;  /* location in src image, to 1/16 of a pixel */
229 l_int32    xp, yp, xf, yf;  /* src pixel and pixel fraction coordinates */
230 l_int32    v00, v01, v10, v11, v00_val, v01_val, v10_val, v11_val;
231 l_uint8    val;
232 l_uint32  *lines, *lined;
233 l_float32  scx, scy;
234 
235         /* (scx, scy) are scaling factors that are applied to the
236          * dest coords to get the corresponding src coords.
237          * We need them because we iterate over dest pixels
238          * and must find the corresponding set of src pixels. */
239     scx = 16. * (l_float32)ws / (l_float32)wd;
240     scy = 16. * (l_float32)hs / (l_float32)hd;
241 
242     wm2 = ws - 2;
243     hm2 = hs - 2;
244 
245         /* Iterate over the destination pixels */
246     for (i = 0; i < hd; i++) {
247         ypm = (l_int32)(scy * (l_float32)i);
248         yp = ypm >> 4;
249         yf = ypm & 0x0f;
250         lined = datad + i * wpld;
251         lines = datas + yp * wpls;
252         for (j = 0; j < wd; j++) {
253             xpm = (l_int32)(scx * (l_float32)j);
254             xp = xpm >> 4;
255             xf = xpm & 0x0f;
256 
257                 /* Do bilinear interpolation.  Without this, we could
258                  * simply subsample:
259                  *   SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xp));
260                  * which is faster but gives lousy results!  */
261             v00_val = GET_DATA_BYTE(lines, xp);
262             if (xp > wm2 || yp > hm2) {
263                 if (yp > hm2 && xp <= wm2) {  /* pixels near bottom */
264                     v01_val = v00_val;
265                     v10_val = GET_DATA_BYTE(lines, xp + 1);
266                     v11_val = v10_val;
267                 }
268                 else if (xp > wm2 && yp <= hm2) {  /* pixels near right side */
269                     v01_val = GET_DATA_BYTE(lines + wpls, xp);
270                     v10_val = v00_val;
271                     v11_val = v01_val;
272                 }
273                 else {  /* pixels at LR corner */
274                     v10_val = v01_val = v11_val = v00_val;
275                 }
276             }
277             else {
278                 v10_val = GET_DATA_BYTE(lines, xp + 1);
279                 v01_val = GET_DATA_BYTE(lines + wpls, xp);
280                 v11_val = GET_DATA_BYTE(lines + wpls, xp + 1);
281             }
282 
283             v00 = (16 - xf) * (16 - yf) * v00_val;
284             v10 = xf * (16 - yf) * v10_val;
285             v01 = (16 - xf) * yf * v01_val;
286             v11 = xf * yf * v11_val;
287 
288             val = (l_uint8)((v00 + v01 + v10 + v11 + 128) / 256);
289             SET_DATA_BYTE(lined, j, val);
290         }
291     }
292 
293     return;
294 }
295 
296 
297 /*------------------------------------------------------------------*
298  *                2x linear interpolated color scaling              *
299  *------------------------------------------------------------------*/
300 /*!
301  *  scaleColor2xLILow()
302  *
303  *  This is a special case of 2x expansion by linear
304  *  interpolation.  Each src pixel contains 4 dest pixels.
305  *  The 4 dest pixels in src pixel 1 are numbered at
306  *  their UL corners.  The 4 dest pixels in src pixel 1
307  *  are related to that src pixel and its 3 neighboring
308  *  src pixels as follows:
309  *
310  *             1-----2-----|-----|-----|
311  *             |     |     |     |     |
312  *             |     |     |     |     |
313  *  src 1 -->  3-----4-----|     |     |  <-- src 2
314  *             |     |     |     |     |
315  *             |     |     |     |     |
316  *             |-----|-----|-----|-----|
317  *             |     |     |     |     |
318  *             |     |     |     |     |
319  *  src 3 -->  |     |     |     |     |  <-- src 4
320  *             |     |     |     |     |
321  *             |     |     |     |     |
322  *             |-----|-----|-----|-----|
323  *
324  *           dest      src
325  *           ----      ---
326  *           dp1    =  sp1
327  *           dp2    =  (sp1 + sp2) / 2
328  *           dp3    =  (sp1 + sp3) / 2
329  *           dp4    =  (sp1 + sp2 + sp3 + sp4) / 4
330  *
331  *  We iterate over the src pixels, and unroll the calculation
332  *  for each set of 4 dest pixels corresponding to that src
333  *  pixel, caching pixels for the next src pixel whenever possible.
334  *  The method is exactly analogous to the one we use for
335  *  scaleGray2xLILow() and its line version.
336  *
337  *  P3 speed is about 5 x 10^7 dst pixels/sec/GHz
338  */
339 void
scaleColor2xLILow(l_uint32 * datad,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)340 scaleColor2xLILow(l_uint32  *datad,
341                   l_int32    wpld,
342                   l_uint32  *datas,
343                   l_int32    ws,
344                   l_int32    hs,
345                   l_int32    wpls)
346 {
347 l_int32    i, hsm;
348 l_uint32  *lines, *lined;
349 
350     hsm = hs - 1;
351 
352         /* We're taking 2 src and 2 dest lines at a time,
353          * and for each src line, we're computing 2 dest lines.
354          * Call these 2 dest lines:  destline1 and destline2.
355          * The first src line is used for destline 1.
356          * On all but the last src line, both src lines are
357          * used in the linear interpolation for destline2.
358          * On the last src line, both destline1 and destline2
359          * are computed using only that src line (because there
360          * isn't a lower src line). */
361 
362         /* iterate over all but the last src line */
363     for (i = 0; i < hsm; i++) {
364         lines = datas + i * wpls;
365         lined = datad + 2 * i * wpld;
366         scaleColor2xLILineLow(lined, wpld, lines, ws, wpls, 0);
367     }
368 
369         /* last src line */
370     lines = datas + hsm * wpls;
371     lined = datad + 2 * hsm * wpld;
372     scaleColor2xLILineLow(lined, wpld, lines, ws, wpls, 1);
373 
374     return;
375 }
376 
377 
378 /*!
379  *  scaleColor2xLILineLow()
380  *
381  *      Input:  lined   (ptr to top destline, to be made from current src line)
382  *              wpld
383  *              lines   (ptr to current src line)
384  *              ws
385  *              wpls
386  *              lastlineflag  (1 if last src line; 0 otherwise)
387  *      Return: void
388  *
389  *  *** Warning: implicit assumption about RGB component ordering ***
390  */
391 void
scaleColor2xLILineLow(l_uint32 * lined,l_int32 wpld,l_uint32 * lines,l_int32 ws,l_int32 wpls,l_int32 lastlineflag)392 scaleColor2xLILineLow(l_uint32  *lined,
393                       l_int32    wpld,
394                       l_uint32  *lines,
395                       l_int32    ws,
396                       l_int32    wpls,
397                       l_int32    lastlineflag)
398 {
399 l_int32    j, jd, wsm;
400 l_int32    rval1, rval2, rval3, rval4, gval1, gval2, gval3, gval4;
401 l_int32    bval1, bval2, bval3, bval4;
402 l_uint32   pixels1, pixels2, pixels3, pixels4, pixel;
403 l_uint32  *linesp, *linedp;
404 
405     wsm = ws - 1;
406 
407     if (lastlineflag == 0) {
408         linesp = lines + wpls;
409         linedp = lined + wpld;
410         pixels1 = *lines;
411         pixels3 = *linesp;
412 
413             /* initialize with v(2) and v(4) */
414         rval2 = pixels1 >> 24;
415         gval2 = (pixels1 >> 16) & 0xff;
416         bval2 = (pixels1 >> 8) & 0xff;
417         rval4 = pixels3 >> 24;
418         gval4 = (pixels3 >> 16) & 0xff;
419         bval4 = (pixels3 >> 8) & 0xff;
420 
421         for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
422                 /* shift in previous src values */
423             rval1 = rval2;
424             gval1 = gval2;
425             bval1 = bval2;
426             rval3 = rval4;
427             gval3 = gval4;
428             bval3 = bval4;
429                 /* get new src values */
430             pixels2 = *(lines + j + 1);
431             pixels4 = *(linesp + j + 1);
432             rval2 = pixels2 >> 24;
433             gval2 = (pixels2 >> 16) & 0xff;
434             bval2 = (pixels2 >> 8) & 0xff;
435             rval4 = pixels4 >> 24;
436             gval4 = (pixels4 >> 16) & 0xff;
437             bval4 = (pixels4 >> 8) & 0xff;
438                 /* save dest values */
439             pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
440             *(lined + jd) = pixel;                               /* pix 1 */
441             pixel = ((((rval1 + rval2) << 23) & 0xff000000) |
442                      (((gval1 + gval2) << 15) & 0x00ff0000) |
443                      (((bval1 + bval2) << 7) & 0x0000ff00));
444             *(lined + jd + 1) = pixel;                           /* pix 2 */
445             pixel = ((((rval1 + rval3) << 23) & 0xff000000) |
446                      (((gval1 + gval3) << 15) & 0x00ff0000) |
447                      (((bval1 + bval3) << 7) & 0x0000ff00));
448             *(linedp + jd) = pixel;                              /* pix 3 */
449             pixel = ((((rval1 + rval2 + rval3 + rval4) << 22) & 0xff000000) |
450                      (((gval1 + gval2 + gval3 + gval4) << 14) & 0x00ff0000) |
451                      (((bval1 + bval2 + bval3 + bval4) << 6) & 0x0000ff00));
452             *(linedp + jd + 1) = pixel;                          /* pix 4 */
453         }
454             /* last src pixel on line */
455         rval1 = rval2;
456         gval1 = gval2;
457         bval1 = bval2;
458         rval3 = rval4;
459         gval3 = gval4;
460         bval3 = bval4;
461         pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
462         *(lined + 2 * wsm) = pixel;                        /* pix 1 */
463         *(lined + 2 * wsm + 1) = pixel;                    /* pix 2 */
464         pixel = ((((rval1 + rval3) << 23) & 0xff000000) |
465                  (((gval1 + gval3) << 15) & 0x00ff0000) |
466                  (((bval1 + bval3) << 7) & 0x0000ff00));
467         *(linedp + 2 * wsm) = pixel;                       /* pix 3 */
468         *(linedp + 2 * wsm + 1) = pixel;                   /* pix 4 */
469     }
470     else {   /* last row of src pixels: lastlineflag == 1 */
471         linedp = lined + wpld;
472         pixels2 = *lines;
473         rval2 = pixels2 >> 24;
474         gval2 = (pixels2 >> 16) & 0xff;
475         bval2 = (pixels2 >> 8) & 0xff;
476         for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
477             rval1 = rval2;
478             gval1 = gval2;
479             bval1 = bval2;
480             pixels2 = *(lines + j + 1);
481             rval2 = pixels2 >> 24;
482             gval2 = (pixels2 >> 16) & 0xff;
483             bval2 = (pixels2 >> 8) & 0xff;
484             pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
485             *(lined + jd) = pixel;                            /* pix 1 */
486             *(linedp + jd) = pixel;                           /* pix 2 */
487             pixel = ((((rval1 + rval2) << 23) & 0xff000000) |
488                      (((gval1 + gval2) << 15) & 0x00ff0000) |
489                      (((bval1 + bval2) << 7) & 0x0000ff00));
490             *(lined + jd + 1) = pixel;                        /* pix 3 */
491             *(linedp + jd + 1) = pixel;                       /* pix 4 */
492         }
493         rval1 = rval2;
494         gval1 = gval2;
495         bval1 = bval2;
496         pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
497         *(lined + 2 * wsm) = pixel;                           /* pix 1 */
498         *(lined + 2 * wsm + 1) = pixel;                       /* pix 2 */
499         *(linedp + 2 * wsm) = pixel;                          /* pix 3 */
500         *(linedp + 2 * wsm + 1) = pixel;                      /* pix 4 */
501     }
502 
503     return;
504 }
505 
506 
507 /*------------------------------------------------------------------*
508  *                2x linear interpolated gray scaling               *
509  *------------------------------------------------------------------*/
510 /*!
511  *  scaleGray2xLILow()
512  *
513  *  This is a special case of 2x expansion by linear
514  *  interpolation.  Each src pixel contains 4 dest pixels.
515  *  The 4 dest pixels in src pixel 1 are numbered at
516  *  their UL corners.  The 4 dest pixels in src pixel 1
517  *  are related to that src pixel and its 3 neighboring
518  *  src pixels as follows:
519  *
520  *             1-----2-----|-----|-----|
521  *             |     |     |     |     |
522  *             |     |     |     |     |
523  *  src 1 -->  3-----4-----|     |     |  <-- src 2
524  *             |     |     |     |     |
525  *             |     |     |     |     |
526  *             |-----|-----|-----|-----|
527  *             |     |     |     |     |
528  *             |     |     |     |     |
529  *  src 3 -->  |     |     |     |     |  <-- src 4
530  *             |     |     |     |     |
531  *             |     |     |     |     |
532  *             |-----|-----|-----|-----|
533  *
534  *           dest      src
535  *           ----      ---
536  *           dp1    =  sp1
537  *           dp2    =  (sp1 + sp2) / 2
538  *           dp3    =  (sp1 + sp3) / 2
539  *           dp4    =  (sp1 + sp2 + sp3 + sp4) / 4
540  *
541  *  We iterate over the src pixels, and unroll the calculation
542  *  for each set of 4 dest pixels corresponding to that src
543  *  pixel, caching pixels for the next src pixel whenever possible.
544  */
545 void
scaleGray2xLILow(l_uint32 * datad,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)546 scaleGray2xLILow(l_uint32  *datad,
547                  l_int32    wpld,
548                  l_uint32  *datas,
549                  l_int32    ws,
550                  l_int32    hs,
551                  l_int32    wpls)
552 {
553 l_int32    i, hsm;
554 l_uint32  *lines, *lined;
555 
556     hsm = hs - 1;
557 
558         /* We're taking 2 src and 2 dest lines at a time,
559          * and for each src line, we're computing 2 dest lines.
560          * Call these 2 dest lines:  destline1 and destline2.
561          * The first src line is used for destline 1.
562          * On all but the last src line, both src lines are
563          * used in the linear interpolation for destline2.
564          * On the last src line, both destline1 and destline2
565          * are computed using only that src line (because there
566          * isn't a lower src line). */
567 
568         /* iterate over all but the last src line */
569     for (i = 0; i < hsm; i++) {
570         lines = datas + i * wpls;
571         lined = datad + 2 * i * wpld;
572         scaleGray2xLILineLow(lined, wpld, lines, ws, wpls, 0);
573     }
574 
575         /* last src line */
576     lines = datas + hsm * wpls;
577     lined = datad + 2 * hsm * wpld;
578     scaleGray2xLILineLow(lined, wpld, lines, ws, wpls, 1);
579 
580     return;
581 }
582 
583 
584 /*!
585  *  scaleGray2xLILineLow()
586  *
587  *      Input:  lined   (ptr to top destline, to be made from current src line)
588  *              wpld
589  *              lines   (ptr to current src line)
590  *              ws
591  *              wpls
592  *              lastlineflag  (1 if last src line; 0 otherwise)
593  *      Return: void
594  */
595 void
scaleGray2xLILineLow(l_uint32 * lined,l_int32 wpld,l_uint32 * lines,l_int32 ws,l_int32 wpls,l_int32 lastlineflag)596 scaleGray2xLILineLow(l_uint32  *lined,
597                      l_int32    wpld,
598                      l_uint32  *lines,
599                      l_int32    ws,
600                      l_int32    wpls,
601                      l_int32    lastlineflag)
602 {
603 l_int32    j, jd, wsm, w;
604 l_int32    sval1, sval2, sval3, sval4;
605 l_uint32  *linesp, *linedp;
606 l_uint32   words, wordsp, wordd, worddp;
607 
608     wsm = ws - 1;
609 
610     if (lastlineflag == 0) {
611         linesp = lines + wpls;
612         linedp = lined + wpld;
613 
614             /* Unroll the loop 4x and work on full words */
615         words = lines[0];
616         wordsp = linesp[0];
617         sval2 = (words >> 24) & 0xff;
618         sval4 = (wordsp >> 24) & 0xff;
619         for (j = 0, jd = 0, w = 0; j + 3 < wsm; j += 4, jd += 8, w++) {
620                 /* At the top of the loop,
621                  * words == lines[w], wordsp == linesp[w]
622                  * and the top bytes of those have been loaded into
623                  * sval2 and sval4. */
624             sval1 = sval2;
625             sval2 = (words >> 16) & 0xff;
626             sval3 = sval4;
627             sval4 = (wordsp >> 16) & 0xff;
628             wordd = (sval1 << 24) | (((sval1 + sval2) >> 1) << 16);
629             worddp = (((sval1 + sval3) >> 1) << 24) |
630                 (((sval1 + sval2 + sval3 + sval4) >> 2) << 16);
631 
632             sval1 = sval2;
633             sval2 = (words >> 8) & 0xff;
634             sval3 = sval4;
635             sval4 = (wordsp >> 8) & 0xff;
636             wordd |= (sval1 << 8) | ((sval1 + sval2) >> 1);
637             worddp |= (((sval1 + sval3) >> 1) << 8) |
638                 ((sval1 + sval2 + sval3 + sval4) >> 2);
639             lined[w * 2] = wordd;
640             linedp[w * 2] = worddp;
641 
642             sval1 = sval2;
643             sval2 = words & 0xff;
644             sval3 = sval4;
645             sval4 = wordsp & 0xff;
646             wordd = (sval1 << 24) |                              /* pix 1 */
647                 (((sval1 + sval2) >> 1) << 16);                  /* pix 2 */
648             worddp = (((sval1 + sval3) >> 1) << 24) |            /* pix 3 */
649                 (((sval1 + sval2 + sval3 + sval4) >> 2) << 16);  /* pix 4 */
650 
651                 /* Load the next word as we need its first byte */
652             words = lines[w + 1];
653             wordsp = linesp[w + 1];
654             sval1 = sval2;
655             sval2 = (words >> 24) & 0xff;
656             sval3 = sval4;
657             sval4 = (wordsp >> 24) & 0xff;
658             wordd |= (sval1 << 8) |                              /* pix 1 */
659                 ((sval1 + sval2) >> 1);                          /* pix 2 */
660             worddp |= (((sval1 + sval3) >> 1) << 8) |            /* pix 3 */
661                 ((sval1 + sval2 + sval3 + sval4) >> 2);          /* pix 4 */
662             lined[w * 2 + 1] = wordd;
663             linedp[w * 2 + 1] = worddp;
664         }
665 
666             /* Finish up the last word */
667         for (; j < wsm; j++, jd += 2) {
668             sval1 = sval2;
669             sval3 = sval4;
670             sval2 = GET_DATA_BYTE(lines, j + 1);
671             sval4 = GET_DATA_BYTE(linesp, j + 1);
672             SET_DATA_BYTE(lined, jd, sval1);                     /* pix 1 */
673             SET_DATA_BYTE(lined, jd + 1, (sval1 + sval2) / 2);   /* pix 2 */
674             SET_DATA_BYTE(linedp, jd, (sval1 + sval3) / 2);      /* pix 3 */
675             SET_DATA_BYTE(linedp, jd + 1,
676                           (sval1 + sval2 + sval3 + sval4) / 4);  /* pix 4 */
677         }
678         sval1 = sval2;
679         sval3 = sval4;
680         SET_DATA_BYTE(lined, 2 * wsm, sval1);                     /* pix 1 */
681         SET_DATA_BYTE(lined, 2 * wsm + 1, sval1);                 /* pix 2 */
682         SET_DATA_BYTE(linedp, 2 * wsm, (sval1 + sval3) / 2);      /* pix 3 */
683         SET_DATA_BYTE(linedp, 2 * wsm + 1, (sval1 + sval3) / 2);  /* pix 4 */
684 
685 #if DEBUG_UNROLLING
686 #define CHECK_BYTE(a, b, c) if (GET_DATA_BYTE(a, b) != c) {\
687      fprintf(stderr, "Error: mismatch at %d, %d vs %d\n", \
688              j, GET_DATA_BYTE(a, b), c); }
689 
690         sval2 = GET_DATA_BYTE(lines, 0);
691         sval4 = GET_DATA_BYTE(linesp, 0);
692         for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
693             sval1 = sval2;
694             sval3 = sval4;
695             sval2 = GET_DATA_BYTE(lines, j + 1);
696             sval4 = GET_DATA_BYTE(linesp, j + 1);
697             CHECK_BYTE(lined, jd, sval1);                     /* pix 1 */
698             CHECK_BYTE(lined, jd + 1, (sval1 + sval2) / 2);   /* pix 2 */
699             CHECK_BYTE(linedp, jd, (sval1 + sval3) / 2);      /* pix 3 */
700             CHECK_BYTE(linedp, jd + 1,
701                           (sval1 + sval2 + sval3 + sval4) / 4);  /* pix 4 */
702         }
703         sval1 = sval2;
704         sval3 = sval4;
705         CHECK_BYTE(lined, 2 * wsm, sval1);                     /* pix 1 */
706         CHECK_BYTE(lined, 2 * wsm + 1, sval1);                 /* pix 2 */
707         CHECK_BYTE(linedp, 2 * wsm, (sval1 + sval3) / 2);      /* pix 3 */
708         CHECK_BYTE(linedp, 2 * wsm + 1, (sval1 + sval3) / 2);  /* pix 4 */
709 #undef CHECK_BYTE
710 #endif
711     }
712     else {   /* last row of src pixels: lastlineflag == 1 */
713         linedp = lined + wpld;
714         sval2 = GET_DATA_BYTE(lines, 0);
715         for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
716             sval1 = sval2;
717             sval2 = GET_DATA_BYTE(lines, j + 1);
718             SET_DATA_BYTE(lined, jd, sval1);                       /* pix 1 */
719             SET_DATA_BYTE(linedp, jd, sval1);                      /* pix 3 */
720             SET_DATA_BYTE(lined, jd + 1, (sval1 + sval2) / 2);     /* pix 2 */
721             SET_DATA_BYTE(linedp, jd + 1, (sval1 + sval2) / 2);    /* pix 4 */
722         }
723         sval1 = sval2;
724         SET_DATA_BYTE(lined, 2 * wsm, sval1);                     /* pix 1 */
725         SET_DATA_BYTE(lined, 2 * wsm + 1, sval1);                 /* pix 2 */
726         SET_DATA_BYTE(linedp, 2 * wsm, sval1);                    /* pix 3 */
727         SET_DATA_BYTE(linedp, 2 * wsm + 1, sval1);                /* pix 4 */
728     }
729 
730     return;
731 }
732 
733 
734 /*------------------------------------------------------------------*
735  *               4x linear interpolated gray scaling                *
736  *------------------------------------------------------------------*/
737 /*!
738  *  scaleGray4xLILow()
739  *
740  *  This is a special case of 4x expansion by linear
741  *  interpolation.  Each src pixel contains 16 dest pixels.
742  *  The 16 dest pixels in src pixel 1 are numbered at
743  *  their UL corners.  The 16 dest pixels in src pixel 1
744  *  are related to that src pixel and its 3 neighboring
745  *  src pixels as follows:
746  *
747  *             1---2---3---4---|---|---|---|---|
748  *             |   |   |   |   |   |   |   |   |
749  *             5---6---7---8---|---|---|---|---|
750  *             |   |   |   |   |   |   |   |   |
751  *  src 1 -->  9---a---b---c---|---|---|---|---|  <-- src 2
752  *             |   |   |   |   |   |   |   |   |
753  *             d---e---f---g---|---|---|---|---|
754  *             |   |   |   |   |   |   |   |   |
755  *             |===|===|===|===|===|===|===|===|
756  *             |   |   |   |   |   |   |   |   |
757  *             |---|---|---|---|---|---|---|---|
758  *             |   |   |   |   |   |   |   |   |
759  *  src 3 -->  |---|---|---|---|---|---|---|---|  <-- src 4
760  *             |   |   |   |   |   |   |   |   |
761  *             |---|---|---|---|---|---|---|---|
762  *             |   |   |   |   |   |   |   |   |
763  *             |---|---|---|---|---|---|---|---|
764  *
765  *           dest      src
766  *           ----      ---
767  *           dp1    =  sp1
768  *           dp2    =  (3 * sp1 + sp2) / 4
769  *           dp3    =  (sp1 + sp2) / 2
770  *           dp4    =  (sp1 + 3 * sp2) / 4
771  *           dp5    =  (3 * sp1 + sp3) / 4
772  *           dp6    =  (9 * sp1 + 3 * sp2 + 3 * sp3 + sp4) / 16
773  *           dp7    =  (3 * sp1 + 3 * sp2 + sp3 + sp4) / 8
774  *           dp8    =  (3 * sp1 + 9 * sp2 + 1 * sp3 + 3 * sp4) / 16
775  *           dp9    =  (sp1 + sp3) / 2
776  *           dp10   =  (3 * sp1 + sp2 + 3 * sp3 + sp4) / 8
777  *           dp11   =  (sp1 + sp2 + sp3 + sp4) / 4
778  *           dp12   =  (sp1 + 3 * sp2 + sp3 + 3 * sp4) / 8
779  *           dp13   =  (sp1 + 3 * sp3) / 4
780  *           dp14   =  (3 * sp1 + sp2 + 9 * sp3 + 3 * sp4) / 16
781  *           dp15   =  (sp1 + sp2 + 3 * sp3 + 3 * sp4) / 8
782  *           dp16   =  (sp1 + 3 * sp2 + 3 * sp3 + 9 * sp4) / 16
783  *
784  *  We iterate over the src pixels, and unroll the calculation
785  *  for each set of 16 dest pixels corresponding to that src
786  *  pixel, caching pixels for the next src pixel whenever possible.
787  */
788 void
scaleGray4xLILow(l_uint32 * datad,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)789 scaleGray4xLILow(l_uint32  *datad,
790                  l_int32    wpld,
791                  l_uint32  *datas,
792                  l_int32    ws,
793                  l_int32    hs,
794                  l_int32    wpls)
795 {
796 l_int32    i, hsm;
797 l_uint32  *lines, *lined;
798 
799     hsm = hs - 1;
800 
801         /* We're taking 2 src and 4 dest lines at a time,
802          * and for each src line, we're computing 4 dest lines.
803          * Call these 4 dest lines:  destline1 - destline4.
804          * The first src line is used for destline 1.
805          * Two src lines are used for all other dest lines,
806          * except for the last 4 dest lines, which are computed
807          * using only the last src line. */
808 
809         /* iterate over all but the last src line */
810     for (i = 0; i < hsm; i++) {
811         lines = datas + i * wpls;
812         lined = datad + 4 * i * wpld;
813         scaleGray4xLILineLow(lined, wpld, lines, ws, wpls, 0);
814     }
815 
816         /* last src line */
817     lines = datas + hsm * wpls;
818     lined = datad + 4 * hsm * wpld;
819     scaleGray4xLILineLow(lined, wpld, lines, ws, wpls, 1);
820 
821     return;
822 }
823 
824 
825 /*!
826  *  scaleGray4xLILineLow()
827  *
828  *      Input:  lined   (ptr to top destline, to be made from current src line)
829  *              wpld
830  *              lines   (ptr to current src line)
831  *              ws
832  *              wpls
833  *              lastlineflag  (1 if last src line; 0 otherwise)
834  *      Return: void
835  */
836 void
scaleGray4xLILineLow(l_uint32 * lined,l_int32 wpld,l_uint32 * lines,l_int32 ws,l_int32 wpls,l_int32 lastlineflag)837 scaleGray4xLILineLow(l_uint32  *lined,
838                      l_int32    wpld,
839                      l_uint32  *lines,
840                      l_int32    ws,
841                      l_int32    wpls,
842                      l_int32    lastlineflag)
843 {
844 l_int32    j, jd, wsm, wsm4;
845 l_int32    s1, s2, s3, s4, s1t, s2t, s3t, s4t;
846 l_uint32  *linesp, *linedp1, *linedp2, *linedp3;
847 
848     wsm = ws - 1;
849     wsm4 = 4 * wsm;
850 
851     if (lastlineflag == 0) {
852         linesp = lines + wpls;
853         linedp1 = lined + wpld;
854         linedp2 = lined + 2 * wpld;
855         linedp3 = lined + 3 * wpld;
856         s2 = GET_DATA_BYTE(lines, 0);
857         s4 = GET_DATA_BYTE(linesp, 0);
858         for (j = 0, jd = 0; j < wsm; j++, jd += 4) {
859             s1 = s2;
860             s3 = s4;
861             s2 = GET_DATA_BYTE(lines, j + 1);
862             s4 = GET_DATA_BYTE(linesp, j + 1);
863             s1t = 3 * s1;
864             s2t = 3 * s2;
865             s3t = 3 * s3;
866             s4t = 3 * s4;
867             SET_DATA_BYTE(lined, jd, s1);                             /* d1 */
868             SET_DATA_BYTE(lined, jd + 1, (s1t + s2) / 4);             /* d2 */
869             SET_DATA_BYTE(lined, jd + 2, (s1 + s2) / 2);              /* d3 */
870             SET_DATA_BYTE(lined, jd + 3, (s1 + s2t) / 4);             /* d4 */
871             SET_DATA_BYTE(linedp1, jd, (s1t + s3) / 4);                /* d5 */
872             SET_DATA_BYTE(linedp1, jd + 1, (9*s1 + s2t + s3t + s4) / 16); /*d6*/
873             SET_DATA_BYTE(linedp1, jd + 2, (s1t + s2t + s3 + s4) / 8); /* d7 */
874             SET_DATA_BYTE(linedp1, jd + 3, (s1t + 9*s2 + s3 + s4t) / 16);/*d8*/
875             SET_DATA_BYTE(linedp2, jd, (s1 + s3) / 2);                /* d9 */
876             SET_DATA_BYTE(linedp2, jd + 1, (s1t + s2 + s3t + s4) / 8);/* d10 */
877             SET_DATA_BYTE(linedp2, jd + 2, (s1 + s2 + s3 + s4) / 4);  /* d11 */
878             SET_DATA_BYTE(linedp2, jd + 3, (s1 + s2t + s3 + s4t) / 8);/* d12 */
879             SET_DATA_BYTE(linedp3, jd, (s1 + s3t) / 4);               /* d13 */
880             SET_DATA_BYTE(linedp3, jd + 1, (s1t + s2 + 9*s3 + s4t) / 16);/*d14*/
881             SET_DATA_BYTE(linedp3, jd + 2, (s1 + s2 + s3t + s4t) / 8); /* d15 */
882             SET_DATA_BYTE(linedp3, jd + 3, (s1 + s2t + s3t + 9*s4) / 16);/*d16*/
883         }
884         s1 = s2;
885         s3 = s4;
886         s1t = 3 * s1;
887         s3t = 3 * s3;
888         SET_DATA_BYTE(lined, wsm4, s1);                               /* d1 */
889         SET_DATA_BYTE(lined, wsm4 + 1, s1);                           /* d2 */
890         SET_DATA_BYTE(lined, wsm4 + 2, s1);                           /* d3 */
891         SET_DATA_BYTE(lined, wsm4 + 3, s1);                           /* d4 */
892         SET_DATA_BYTE(linedp1, wsm4, (s1t + s3) / 4);                 /* d5 */
893         SET_DATA_BYTE(linedp1, wsm4 + 1, (s1t + s3) / 4);             /* d6 */
894         SET_DATA_BYTE(linedp1, wsm4 + 2, (s1t + s3) / 4);             /* d7 */
895         SET_DATA_BYTE(linedp1, wsm4 + 3, (s1t + s3) / 4);             /* d8 */
896         SET_DATA_BYTE(linedp2, wsm4, (s1 + s3) / 2);                  /* d9 */
897         SET_DATA_BYTE(linedp2, wsm4 + 1, (s1 + s3) / 2);              /* d10 */
898         SET_DATA_BYTE(linedp2, wsm4 + 2, (s1 + s3) / 2);              /* d11 */
899         SET_DATA_BYTE(linedp2, wsm4 + 3, (s1 + s3) / 2);              /* d12 */
900         SET_DATA_BYTE(linedp3, wsm4, (s1 + s3t) / 4);                 /* d13 */
901         SET_DATA_BYTE(linedp3, wsm4 + 1, (s1 + s3t) / 4);             /* d14 */
902         SET_DATA_BYTE(linedp3, wsm4 + 2, (s1 + s3t) / 4);             /* d15 */
903         SET_DATA_BYTE(linedp3, wsm4 + 3, (s1 + s3t) / 4);             /* d16 */
904     }
905     else {   /* last row of src pixels: lastlineflag == 1 */
906         linedp1 = lined + wpld;
907         linedp2 = lined + 2 * wpld;
908         linedp3 = lined + 3 * wpld;
909         s2 = GET_DATA_BYTE(lines, 0);
910         for (j = 0, jd = 0; j < wsm; j++, jd += 4) {
911             s1 = s2;
912             s2 = GET_DATA_BYTE(lines, j + 1);
913             s1t = 3 * s1;
914             s2t = 3 * s2;
915             SET_DATA_BYTE(lined, jd, s1);                            /* d1 */
916             SET_DATA_BYTE(lined, jd + 1, (s1t + s2) / 4 );           /* d2 */
917             SET_DATA_BYTE(lined, jd + 2, (s1 + s2) / 2 );            /* d3 */
918             SET_DATA_BYTE(lined, jd + 3, (s1 + s2t) / 4 );           /* d4 */
919             SET_DATA_BYTE(linedp1, jd, s1);                          /* d5 */
920             SET_DATA_BYTE(linedp1, jd + 1, (s1t + s2) / 4 );         /* d6 */
921             SET_DATA_BYTE(linedp1, jd + 2, (s1 + s2) / 2 );          /* d7 */
922             SET_DATA_BYTE(linedp1, jd + 3, (s1 + s2t) / 4 );         /* d8 */
923             SET_DATA_BYTE(linedp2, jd, s1);                          /* d9 */
924             SET_DATA_BYTE(linedp2, jd + 1, (s1t + s2) / 4 );         /* d10 */
925             SET_DATA_BYTE(linedp2, jd + 2, (s1 + s2) / 2 );          /* d11 */
926             SET_DATA_BYTE(linedp2, jd + 3, (s1 + s2t) / 4 );         /* d12 */
927             SET_DATA_BYTE(linedp3, jd, s1);                          /* d13 */
928             SET_DATA_BYTE(linedp3, jd + 1, (s1t + s2) / 4 );         /* d14 */
929             SET_DATA_BYTE(linedp3, jd + 2, (s1 + s2) / 2 );          /* d15 */
930             SET_DATA_BYTE(linedp3, jd + 3, (s1 + s2t) / 4 );         /* d16 */
931         }
932         s1 = s2;
933         SET_DATA_BYTE(lined, wsm4, s1);                              /* d1 */
934         SET_DATA_BYTE(lined, wsm4 + 1, s1);                          /* d2 */
935         SET_DATA_BYTE(lined, wsm4 + 2, s1);                          /* d3 */
936         SET_DATA_BYTE(lined, wsm4 + 3, s1);                          /* d4 */
937         SET_DATA_BYTE(linedp1, wsm4, s1);                            /* d5 */
938         SET_DATA_BYTE(linedp1, wsm4 + 1, s1);                        /* d6 */
939         SET_DATA_BYTE(linedp1, wsm4 + 2, s1);                        /* d7 */
940         SET_DATA_BYTE(linedp1, wsm4 + 3, s1);                        /* d8 */
941         SET_DATA_BYTE(linedp2, wsm4, s1);                            /* d9 */
942         SET_DATA_BYTE(linedp2, wsm4 + 1, s1);                        /* d10 */
943         SET_DATA_BYTE(linedp2, wsm4 + 2, s1);                        /* d11 */
944         SET_DATA_BYTE(linedp2, wsm4 + 3, s1);                        /* d12 */
945         SET_DATA_BYTE(linedp3, wsm4, s1);                            /* d13 */
946         SET_DATA_BYTE(linedp3, wsm4 + 1, s1);                        /* d14 */
947         SET_DATA_BYTE(linedp3, wsm4 + 2, s1);                        /* d15 */
948         SET_DATA_BYTE(linedp3, wsm4 + 3, s1);                        /* d16 */
949     }
950 
951     return;
952 }
953 
954 
955 /*------------------------------------------------------------------*
956  *       Grayscale and color scaling by closest pixel sampling      *
957  *------------------------------------------------------------------*/
958 /*!
959  *  scaleBySamplingLow()
960  *
961  *  Notes:
962  *      (1) The dest must be cleared prior to this operation,
963  *          and we clear it here in the low-level code.
964  *      (2) We reuse dest pixels and dest pixel rows whenever
965  *          possible.  This speeds the upscaling; downscaling
966  *          is done by strict subsampling and is unaffected.
967  *      (3) Because we are sampling and not interpolating, this
968  *          routine works directly, without conversion to full
969  *          RGB color, for 2, 4 or 8 bpp palette color images.
970  */
971 l_int32
scaleBySamplingLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 d,l_int32 wpls)972 scaleBySamplingLow(l_uint32  *datad,
973                    l_int32    wd,
974                    l_int32    hd,
975                    l_int32    wpld,
976                    l_uint32  *datas,
977                    l_int32    ws,
978                    l_int32    hs,
979                    l_int32    d,
980                    l_int32    wpls)
981 {
982 l_int32    i, j, bpld;
983 l_int32    xs, prevxs, sval;
984 l_int32   *srow, *scol;
985 l_uint32   csval;
986 l_uint32  *lines, *prevlines, *lined, *prevlined;
987 l_float32  wratio, hratio;
988 
989     PROCNAME("scaleBySamplingLow");
990 
991         /* clear dest */
992     bpld = 4 * wpld;
993     memset((char *)datad, 0, hd * bpld);
994 
995         /* the source row corresponding to dest row i ==> srow[i]
996          * the source col corresponding to dest col j ==> scol[j]  */
997     if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
998         return ERROR_INT("srow not made", procName, 1);
999     if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
1000         return ERROR_INT("scol not made", procName, 1);
1001 
1002     wratio = (l_float32)ws / (l_float32)wd;
1003     hratio = (l_float32)hs / (l_float32)hd;
1004     for (i = 0; i < hd; i++)
1005         srow[i] = L_MIN((l_int32)(hratio * i + 0.5), hs - 1);
1006     for (j = 0; j < wd; j++)
1007         scol[j] = L_MIN((l_int32)(wratio * j + 0.5), ws - 1);
1008 
1009     prevlines = NULL;
1010     for (i = 0; i < hd; i++) {
1011         lines = datas + srow[i] * wpls;
1012         lined = datad + i * wpld;
1013         if (lines != prevlines) {  /* make dest from new source row */
1014             prevxs = -1;
1015             sval = 0;
1016             csval = 0;
1017             switch (d)
1018             {
1019             case 2:
1020                 for (j = 0; j < wd; j++) {
1021                     xs = scol[j];
1022                     if (xs != prevxs) {  /* get dest pix from source col */
1023                         sval = GET_DATA_DIBIT(lines, xs);
1024                         SET_DATA_DIBIT(lined, j, sval);
1025                         prevxs = xs;
1026                     }
1027                     else   /* copy prev dest pix */
1028                         SET_DATA_DIBIT(lined, j, sval);
1029                 }
1030                 break;
1031             case 4:
1032                 for (j = 0; j < wd; j++) {
1033                     xs = scol[j];
1034                     if (xs != prevxs) {  /* get dest pix from source col */
1035                         sval = GET_DATA_QBIT(lines, xs);
1036                         SET_DATA_QBIT(lined, j, sval);
1037                         prevxs = xs;
1038                     }
1039                     else   /* copy prev dest pix */
1040                         SET_DATA_QBIT(lined, j, sval);
1041                 }
1042                 break;
1043             case 8:
1044                 for (j = 0; j < wd; j++) {
1045                     xs = scol[j];
1046                     if (xs != prevxs) {  /* get dest pix from source col */
1047                         sval = GET_DATA_BYTE(lines, xs);
1048                         SET_DATA_BYTE(lined, j, sval);
1049                         prevxs = xs;
1050                     }
1051                     else   /* copy prev dest pix */
1052                         SET_DATA_BYTE(lined, j, sval);
1053                 }
1054                 break;
1055             case 16:
1056                 for (j = 0; j < wd; j++) {
1057                     xs = scol[j];
1058                     if (xs != prevxs) {  /* get dest pix from source col */
1059                         sval = GET_DATA_TWO_BYTES(lines, xs);
1060                         SET_DATA_TWO_BYTES(lined, j, sval);
1061                         prevxs = xs;
1062                     }
1063                     else   /* copy prev dest pix */
1064                         SET_DATA_TWO_BYTES(lined, j, sval);
1065                 }
1066                 break;
1067             case 32:
1068                 for (j = 0; j < wd; j++) {
1069                     xs = scol[j];
1070                     if (xs != prevxs) {  /* get dest pix from source col */
1071                         csval = lines[xs];
1072                         lined[j] = csval;
1073                         prevxs = xs;
1074                     }
1075                     else   /* copy prev dest pix */
1076                         lined[j] = csval;
1077                 }
1078                 break;
1079             default:
1080                 return ERROR_INT("pixel depth not supported", procName, 1);
1081                 break;
1082             }
1083         }
1084         else {  /* lines == prevlines; copy prev dest row */
1085             prevlined = lined - wpld;
1086             memcpy((char *)lined, (char *)prevlined, bpld);
1087         }
1088         prevlines = lines;
1089     }
1090 
1091     FREE(srow);
1092     FREE(scol);
1093 
1094     return 0;
1095 }
1096 
1097 
1098 /*------------------------------------------------------------------*
1099  *    Color and grayscale downsampling with (antialias) smoothing   *
1100  *------------------------------------------------------------------*/
1101 /*!
1102  *  scaleSmoothLow()
1103  *
1104  *  Notes:
1105  *      (1) This function is called on 8 or 32 bpp src and dest images.
1106  *      (2) size is the full width of the lowpass smoothing filter.
1107  *          It is correlated with the reduction ratio, being the
1108  *          nearest integer such that size is approximately equal to hs / hd.
1109  */
1110 l_int32
scaleSmoothLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 d,l_int32 wpls,l_int32 size)1111 scaleSmoothLow(l_uint32  *datad,
1112                l_int32    wd,
1113                l_int32    hd,
1114                l_int32    wpld,
1115                l_uint32  *datas,
1116                l_int32    ws,
1117                l_int32    hs,
1118                l_int32    d,
1119                l_int32    wpls,
1120                l_int32    size)
1121 {
1122 l_int32    i, j, m, n, xstart;
1123 l_int32    val, rval, gval, bval;
1124 l_int32   *srow, *scol;
1125 l_uint32  *lines, *lined, *line, *ppixel;
1126 l_uint32   pixel;
1127 l_float32  wratio, hratio, norm;
1128 
1129     PROCNAME("scaleSmoothLow");
1130 
1131         /* Clear dest */
1132     memset((char *)datad, 0, 4 * wpld * hd);
1133 
1134         /* Each dest pixel at (j,i) is computed as the average
1135            of size^2 corresponding src pixels.
1136            We store the UL corner location of the square of
1137            src pixels that correspond to dest pixel (j,i).
1138            The are labelled by the arrays srow[i] and scol[j]. */
1139     if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
1140         return ERROR_INT("srow not made", procName, 1);
1141     if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
1142         return ERROR_INT("scol not made", procName, 1);
1143 
1144     norm = 1. / (l_float32)(size * size);
1145     wratio = (l_float32)ws / (l_float32)wd;
1146     hratio = (l_float32)hs / (l_float32)hd;
1147     for (i = 0; i < hd; i++)
1148         srow[i] = L_MIN((l_int32)(hratio * i), hs - size);
1149     for (j = 0; j < wd; j++)
1150         scol[j] = L_MIN((l_int32)(wratio * j), ws - size);
1151 
1152         /* For each dest pixel, compute average */
1153     if (d == 8) {
1154         for (i = 0; i < hd; i++) {
1155             lines = datas + srow[i] * wpls;
1156             lined = datad + i * wpld;
1157             for (j = 0; j < wd; j++) {
1158                 xstart = scol[j];
1159                 val = 0;
1160                 for (m = 0; m < size; m++) {
1161                     line = lines + m * wpls;
1162                     for (n = 0; n < size; n++) {
1163                         val += GET_DATA_BYTE(line, xstart + n);
1164                     }
1165                 }
1166                 val = (l_int32)((l_float32)val * norm);
1167                 SET_DATA_BYTE(lined, j, val);
1168             }
1169         }
1170     }
1171     else {  /* d == 32 */
1172         for (i = 0; i < hd; i++) {
1173             lines = datas + srow[i] * wpls;
1174             lined = datad + i * wpld;
1175             for (j = 0; j < wd; j++) {
1176                 xstart = scol[j];
1177                 rval = gval = bval = 0;
1178                 for (m = 0; m < size; m++) {
1179                     ppixel = lines + m * wpls + xstart;
1180                     for (n = 0; n < size; n++) {
1181                         pixel = *(ppixel + n);
1182                         rval += (pixel >> L_RED_SHIFT) & 0xff;
1183                         gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1184                         bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1185                     }
1186                 }
1187                 rval = (l_int32)((l_float32)rval * norm);
1188                 gval = (l_int32)((l_float32)gval * norm);
1189                 bval = (l_int32)((l_float32)bval * norm);
1190                 *(lined + j) = rval << L_RED_SHIFT |
1191                                gval << L_GREEN_SHIFT |
1192                                bval << L_BLUE_SHIFT;
1193             }
1194         }
1195     }
1196 
1197     FREE(srow);
1198     FREE(scol);
1199     return 0;
1200 }
1201 
1202 
1203 /*!
1204  *  scaleRGBToGray2Low()
1205  *
1206  *  Note: This function is called with 32 bpp RGB src and 8 bpp,
1207  *        half-resolution dest.  The weights should add to 1.0.
1208  */
1209 void
scaleRGBToGray2Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_float32 rwt,l_float32 gwt,l_float32 bwt)1210 scaleRGBToGray2Low(l_uint32  *datad,
1211                    l_int32    wd,
1212                    l_int32    hd,
1213                    l_int32    wpld,
1214                    l_uint32  *datas,
1215                    l_int32    wpls,
1216                    l_float32  rwt,
1217                    l_float32  gwt,
1218                    l_float32  bwt)
1219 {
1220 l_int32    i, j, val, rval, gval, bval;
1221 l_uint32  *lines, *lined;
1222 l_uint32   pixel;
1223 
1224     rwt *= 0.25;
1225     gwt *= 0.25;
1226     bwt *= 0.25;
1227     for (i = 0; i < hd; i++) {
1228         lines = datas + 2 * i * wpls;
1229         lined = datad + i * wpld;
1230         for (j = 0; j < wd; j++) {
1231                 /* Sum each of the color components from 4 src pixels */
1232             pixel = *(lines + 2 * j);
1233             rval = (pixel >> L_RED_SHIFT) & 0xff;
1234             gval = (pixel >> L_GREEN_SHIFT) & 0xff;
1235             bval = (pixel >> L_BLUE_SHIFT) & 0xff;
1236             pixel = *(lines + 2 * j + 1);
1237             rval += (pixel >> L_RED_SHIFT) & 0xff;
1238             gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1239             bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1240             pixel = *(lines + wpls + 2 * j);
1241             rval += (pixel >> L_RED_SHIFT) & 0xff;
1242             gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1243             bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1244             pixel = *(lines + wpls + 2 * j + 1);
1245             rval += (pixel >> L_RED_SHIFT) & 0xff;
1246             gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1247             bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1248                 /* Generate the dest byte as a weighted sum of the averages */
1249             val = (l_int32)(rwt * rval + gwt * gval + bwt * bval);
1250             SET_DATA_BYTE(lined, j, val);
1251         }
1252     }
1253 }
1254 
1255 
1256 /*------------------------------------------------------------------*
1257  *                  General area mapped gray scaling                *
1258  *------------------------------------------------------------------*/
1259 /*!
1260  *  scaleColorAreaMapLow()
1261  *
1262  *  This should only be used for downscaling.
1263  *  We choose to divide each pixel into 16 x 16 sub-pixels.
1264  *  This is much slower than scaleSmoothLow(), but it gives a
1265  *  better representation, esp. for downscaling factors between
1266  *  1.5 and 5.  All src pixels are subdivided into 256 sub-pixels,
1267  *  and are weighted by the number of sub-pixels covered by
1268  *  the dest pixel.  This is about 2x slower than scaleSmoothLow(),
1269  *  but the results are significantly better on small text.
1270  */
1271 void
scaleColorAreaMapLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)1272 scaleColorAreaMapLow(l_uint32  *datad,
1273                     l_int32    wd,
1274                     l_int32    hd,
1275                     l_int32    wpld,
1276                     l_uint32  *datas,
1277                     l_int32    ws,
1278                     l_int32    hs,
1279                     l_int32    wpls)
1280 {
1281 l_int32    i, j, k, m, wm2, hm2;
1282 l_int32    area00, area10, area01, area11, areal, arear, areat, areab;
1283 l_int32    xu, yu;  /* UL corner in src image, to 1/16 of a pixel */
1284 l_int32    xl, yl;  /* LR corner in src image, to 1/16 of a pixel */
1285 l_int32    xup, yup, xuf, yuf;  /* UL src pixel: integer and fraction */
1286 l_int32    xlp, ylp, xlf, ylf;  /* LR src pixel: integer and fraction */
1287 l_int32    delx, dely, area;
1288 l_int32    v00r, v00g, v00b;  /* contrib. from UL src pixel */
1289 l_int32    v01r, v01g, v01b;  /* contrib. from LL src pixel */
1290 l_int32    v10r, v10g, v10b;  /* contrib from UR src pixel */
1291 l_int32    v11r, v11g, v11b;  /* contrib from LR src pixel */
1292 l_int32    vinr, ving, vinb;  /* contrib from all full interior src pixels */
1293 l_int32    vmidr, vmidg, vmidb;  /* contrib from side parts */
1294 l_int32    rval, gval, bval;
1295 l_uint32   pixel00, pixel10, pixel01, pixel11, pixel;
1296 l_uint32  *lines, *lined;
1297 l_float32  scx, scy;
1298 
1299         /* (scx, scy) are scaling factors that are applied to the
1300          * dest coords to get the corresponding src coords.
1301          * We need them because we iterate over dest pixels
1302          * and must find the corresponding set of src pixels. */
1303     scx = 16. * (l_float32)ws / (l_float32)wd;
1304     scy = 16. * (l_float32)hs / (l_float32)hd;
1305 
1306     wm2 = ws - 2;
1307     hm2 = hs - 2;
1308 
1309         /* Iterate over the destination pixels */
1310     for (i = 0; i < hd; i++) {
1311         yu = (l_int32)(scy * i);
1312         yl = (l_int32)(scy * (i + 1.0));
1313         yup = yu >> 4;
1314         yuf = yu & 0x0f;
1315         ylp = yl >> 4;
1316         ylf = yl & 0x0f;
1317         dely = ylp - yup;
1318         lined = datad + i * wpld;
1319         lines = datas + yup * wpls;
1320         for (j = 0; j < wd; j++) {
1321             xu = (l_int32)(scx * j);
1322             xl = (l_int32)(scx * (j + 1.0));
1323             xup = xu >> 4;
1324             xuf = xu & 0x0f;
1325             xlp = xl >> 4;
1326             xlf = xl & 0x0f;
1327             delx = xlp - xup;
1328 
1329                 /* If near the edge, just use a src pixel value */
1330             if (xlp > wm2 || ylp > hm2) {
1331                 *(lined + j) = *(lines + xup);
1332                 continue;
1333             }
1334 
1335                 /* Area summed over, in subpixels.  This varies
1336                  * due to the quantization, so we can't simply take
1337                  * the area to be a constant: area = scx * scy. */
1338             area = ((16 - xuf) + 16 * (delx - 1) + xlf) *
1339                    ((16 - yuf) + 16 * (dely - 1) + ylf);
1340 
1341                 /* Do area map summation */
1342             pixel00 = *(lines + xup);
1343             pixel10 = *(lines + xlp);
1344             pixel01 = *(lines + dely * wpls +  xup);
1345             pixel11 = *(lines + dely * wpls +  xlp);
1346             area00 = (16 - xuf) * (16 - yuf);
1347             area10 = xlf * (16 - yuf);
1348             area01 = (16 - xuf) * ylf;
1349             area11 = xlf * ylf;
1350             v00r = area00 * ((pixel00 >> L_RED_SHIFT) & 0xff);
1351             v00g = area00 * ((pixel00 >> L_GREEN_SHIFT) & 0xff);
1352             v00b = area00 * ((pixel00 >> L_BLUE_SHIFT) & 0xff);
1353             v10r = area10 * ((pixel10 >> L_RED_SHIFT) & 0xff);
1354             v10g = area10 * ((pixel10 >> L_GREEN_SHIFT) & 0xff);
1355             v10b = area10 * ((pixel10 >> L_BLUE_SHIFT) & 0xff);
1356             v01r = area01 * ((pixel01 >> L_RED_SHIFT) & 0xff);
1357             v01g = area01 * ((pixel01 >> L_GREEN_SHIFT) & 0xff);
1358             v01b = area01 * ((pixel01 >> L_BLUE_SHIFT) & 0xff);
1359             v11r = area11 * ((pixel11 >> L_RED_SHIFT) & 0xff);
1360             v11g = area11 * ((pixel11 >> L_GREEN_SHIFT) & 0xff);
1361             v11b = area11 * ((pixel11 >> L_BLUE_SHIFT) & 0xff);
1362             vinr = ving = vinb = 0;
1363             for (k = 1; k < dely; k++) {  /* for full src pixels */
1364                 for (m = 1; m < delx; m++) {
1365                     pixel = *(lines + k * wpls + xup + m);
1366                     vinr += 256 * ((pixel >> L_RED_SHIFT) & 0xff);
1367                     ving += 256 * ((pixel >> L_GREEN_SHIFT) & 0xff);
1368                     vinb += 256 * ((pixel >> L_BLUE_SHIFT) & 0xff);
1369                 }
1370             }
1371             vmidr = vmidg = vmidb = 0;
1372             areal = (16 - xuf) * 16;
1373             arear = xlf * 16;
1374             areat = 16 * (16 - yuf);
1375             areab = 16 * ylf;
1376             for (k = 1; k < dely; k++) {  /* for left side */
1377                 pixel = *(lines + k * wpls + xup);
1378                 vmidr += areal * ((pixel >> L_RED_SHIFT) & 0xff);
1379                 vmidg += areal * ((pixel >> L_GREEN_SHIFT) & 0xff);
1380                 vmidb += areal * ((pixel >> L_BLUE_SHIFT) & 0xff);
1381             }
1382             for (k = 1; k < dely; k++) {  /* for right side */
1383                 pixel = *(lines + k * wpls + xlp);
1384                 vmidr += arear * ((pixel >> L_RED_SHIFT) & 0xff);
1385                 vmidg += arear * ((pixel >> L_GREEN_SHIFT) & 0xff);
1386                 vmidb += arear * ((pixel >> L_BLUE_SHIFT) & 0xff);
1387             }
1388             for (m = 1; m < delx; m++) {  /* for top side */
1389                 pixel = *(lines + xup + m);
1390                 vmidr += areat * ((pixel >> L_RED_SHIFT) & 0xff);
1391                 vmidg += areat * ((pixel >> L_GREEN_SHIFT) & 0xff);
1392                 vmidb += areat * ((pixel >> L_BLUE_SHIFT) & 0xff);
1393             }
1394             for (m = 1; m < delx; m++) {  /* for bottom side */
1395                 pixel = *(lines + dely * wpls + xup + m);
1396                 vmidr += areab * ((pixel >> L_RED_SHIFT) & 0xff);
1397                 vmidg += areab * ((pixel >> L_GREEN_SHIFT) & 0xff);
1398                 vmidb += areab * ((pixel >> L_BLUE_SHIFT) & 0xff);
1399             }
1400 
1401                 /* Sum all the contributions */
1402             rval = (v00r + v01r + v10r + v11r + vinr + vmidr + 128) / area;
1403             gval = (v00g + v01g + v10g + v11g + ving + vmidg + 128) / area;
1404             bval = (v00b + v01b + v10b + v11b + vinb + vmidb + 128) / area;
1405 #if  DEBUG_OVERFLOW
1406             if (rval > 255) fprintf(stderr, "rval ovfl: %d\n", rval);
1407             if (rval > 255) fprintf(stderr, "gval ovfl: %d\n", gval);
1408             if (rval > 255) fprintf(stderr, "bval ovfl: %d\n", bval);
1409 #endif  /* DEBUG_OVERFLOW */
1410             composeRGBPixel(rval, gval, bval, lined + j);
1411         }
1412     }
1413 
1414     return;
1415 }
1416 
1417 
1418 /*!
1419  *  scaleGrayAreaMapLow()
1420  *
1421  *  This should only be used for downscaling.
1422  *  We choose to divide each pixel into 16 x 16 sub-pixels.
1423  *  This is about 2x slower than scaleSmoothLow(), but the results
1424  *  are significantly better on small text, esp. for downscaling
1425  *  factors between 1.5 and 5.  All src pixels are subdivided
1426  *  into 256 sub-pixels, and are weighted by the number of
1427  *  sub-pixels covered by the dest pixel.
1428  */
1429 void
scaleGrayAreaMapLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)1430 scaleGrayAreaMapLow(l_uint32  *datad,
1431                     l_int32    wd,
1432                     l_int32    hd,
1433                     l_int32    wpld,
1434                     l_uint32  *datas,
1435                     l_int32    ws,
1436                     l_int32    hs,
1437                     l_int32    wpls)
1438 {
1439 l_int32    i, j, k, m, wm2, hm2;
1440 l_int32    xu, yu;  /* UL corner in src image, to 1/16 of a pixel */
1441 l_int32    xl, yl;  /* LR corner in src image, to 1/16 of a pixel */
1442 l_int32    xup, yup, xuf, yuf;  /* UL src pixel: integer and fraction */
1443 l_int32    xlp, ylp, xlf, ylf;  /* LR src pixel: integer and fraction */
1444 l_int32    delx, dely, area;
1445 l_int32    v00;  /* contrib. from UL src pixel */
1446 l_int32    v01;  /* contrib. from LL src pixel */
1447 l_int32    v10;  /* contrib from UR src pixel */
1448 l_int32    v11;  /* contrib from LR src pixel */
1449 l_int32    vin;  /* contrib from all full interior src pixels */
1450 l_int32    vmid;  /* contrib from side parts that are full in 1 direction */
1451 l_int32    val;
1452 l_uint32  *lines, *lined;
1453 l_float32  scx, scy;
1454 
1455         /* (scx, scy) are scaling factors that are applied to the
1456          * dest coords to get the corresponding src coords.
1457          * We need them because we iterate over dest pixels
1458          * and must find the corresponding set of src pixels. */
1459     scx = 16. * (l_float32)ws / (l_float32)wd;
1460     scy = 16. * (l_float32)hs / (l_float32)hd;
1461 
1462     wm2 = ws - 2;
1463     hm2 = hs - 2;
1464 
1465         /* Iterate over the destination pixels */
1466     for (i = 0; i < hd; i++) {
1467         yu = (l_int32)(scy * i);
1468         yl = (l_int32)(scy * (i + 1.0));
1469         yup = yu >> 4;
1470         yuf = yu & 0x0f;
1471         ylp = yl >> 4;
1472         ylf = yl & 0x0f;
1473         dely = ylp - yup;
1474         lined = datad + i * wpld;
1475         lines = datas + yup * wpls;
1476         for (j = 0; j < wd; j++) {
1477             xu = (l_int32)(scx * j);
1478             xl = (l_int32)(scx * (j + 1.0));
1479             xup = xu >> 4;
1480             xuf = xu & 0x0f;
1481             xlp = xl >> 4;
1482             xlf = xl & 0x0f;
1483             delx = xlp - xup;
1484 
1485                 /* If near the edge, just use a src pixel value */
1486             if (xlp > wm2 || ylp > hm2) {
1487                 SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xup));
1488                 continue;
1489             }
1490 
1491                 /* Area summed over, in subpixels.  This varies
1492                  * due to the quantization, so we can't simply take
1493                  * the area to be a constant: area = scx * scy. */
1494             area = ((16 - xuf) + 16 * (delx - 1) + xlf) *
1495                    ((16 - yuf) + 16 * (dely - 1) + ylf);
1496 
1497                 /* Do area map summation */
1498             v00 = (16 - xuf) * (16 - yuf) * GET_DATA_BYTE(lines, xup);
1499             v10 = xlf * (16 - yuf) * GET_DATA_BYTE(lines, xlp);
1500             v01 = (16 - xuf) * ylf * GET_DATA_BYTE(lines + dely * wpls, xup);
1501             v11 = xlf * ylf * GET_DATA_BYTE(lines + dely * wpls, xlp);
1502             for (vin = 0, k = 1; k < dely; k++) {  /* for full src pixels */
1503                  for (m = 1; m < delx; m++) {
1504                      vin += 256 * GET_DATA_BYTE(lines + k * wpls, xup + m);
1505                  }
1506             }
1507             for (vmid = 0, k = 1; k < dely; k++)  /* for left side */
1508                 vmid += (16 - xuf) * 16 * GET_DATA_BYTE(lines + k * wpls, xup);
1509             for (k = 1; k < dely; k++)  /* for right side */
1510                 vmid += xlf * 16 * GET_DATA_BYTE(lines + k * wpls, xlp);
1511             for (m = 1; m < delx; m++)  /* for top side */
1512                 vmid += 16 * (16 - yuf) * GET_DATA_BYTE(lines, xup + m);
1513             for (m = 1; m < delx; m++)  /* for bottom side */
1514                 vmid += 16 * ylf * GET_DATA_BYTE(lines + dely * wpls, xup + m);
1515             val = (v00 + v01 + v10 + v11 + vin + vmid + 128) / area;
1516 #if  DEBUG_OVERFLOW
1517             if (val > 255) fprintf(stderr, "val overflow: %d\n", val);
1518 #endif  /* DEBUG_OVERFLOW */
1519             SET_DATA_BYTE(lined, j, val);
1520         }
1521     }
1522 
1523     return;
1524 }
1525 
1526 
1527 /*------------------------------------------------------------------*
1528  *                     2x area mapped downscaling                   *
1529  *------------------------------------------------------------------*/
1530 /*!
1531  *  scaleAreaMapLow2()
1532  *
1533  *  Note: This function is called with either 8 bpp gray or 32 bpp RGB.
1534  *        The result is a 2x reduced dest.
1535  */
1536 void
scaleAreaMapLow2(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 d,l_int32 wpls)1537 scaleAreaMapLow2(l_uint32  *datad,
1538                  l_int32    wd,
1539                  l_int32    hd,
1540                  l_int32    wpld,
1541                  l_uint32  *datas,
1542                  l_int32    d,
1543                  l_int32    wpls)
1544 {
1545 l_int32    i, j, val, rval, gval, bval;
1546 l_uint32  *lines, *lined;
1547 l_uint32   pixel;
1548 
1549     if (d == 8) {
1550         for (i = 0; i < hd; i++) {
1551             lines = datas + 2 * i * wpls;
1552             lined = datad + i * wpld;
1553             for (j = 0; j < wd; j++) {
1554                     /* Average each dest pixel using 4 src pixels */
1555                 val = GET_DATA_BYTE(lines, 2 * j);
1556                 val += GET_DATA_BYTE(lines, 2 * j + 1);
1557                 val += GET_DATA_BYTE(lines + wpls, 2 * j);
1558                 val += GET_DATA_BYTE(lines + wpls, 2 * j + 1);
1559                 val >>= 2;
1560                 SET_DATA_BYTE(lined, j, val);
1561             }
1562         }
1563     }
1564     else {  /* d == 32 */
1565         for (i = 0; i < hd; i++) {
1566             lines = datas + 2 * i * wpls;
1567             lined = datad + i * wpld;
1568             for (j = 0; j < wd; j++) {
1569                     /* Average each of the color components from 4 src pixels */
1570                 pixel = *(lines + 2 * j);
1571                 rval = (pixel >> L_RED_SHIFT) & 0xff;
1572                 gval = (pixel >> L_GREEN_SHIFT) & 0xff;
1573                 bval = (pixel >> L_BLUE_SHIFT) & 0xff;
1574                 pixel = *(lines + 2 * j + 1);
1575                 rval += (pixel >> L_RED_SHIFT) & 0xff;
1576                 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1577                 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1578                 pixel = *(lines + wpls + 2 * j);
1579                 rval += (pixel >> L_RED_SHIFT) & 0xff;
1580                 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1581                 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1582                 pixel = *(lines + wpls + 2 * j + 1);
1583                 rval += (pixel >> L_RED_SHIFT) & 0xff;
1584                 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1585                 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1586                 composeRGBPixel(rval >> 2, gval >> 2, bval >> 2, &pixel);
1587                 *(lined + j) = pixel;
1588             }
1589         }
1590     }
1591     return;
1592 }
1593 
1594 
1595 /*------------------------------------------------------------------*
1596  *              Binary scaling by closest pixel sampling            *
1597  *------------------------------------------------------------------*/
1598 /*
1599  *  scaleBinaryLow()
1600  *
1601  *  Notes:
1602  *      (1) The dest must be cleared prior to this operation,
1603  *          and we clear it here in the low-level code.
1604  *      (2) We reuse dest pixels and dest pixel rows whenever
1605  *          possible for upscaling; downscaling is done by
1606  *          strict subsampling.
1607  */
1608 l_int32
scaleBinaryLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)1609 scaleBinaryLow(l_uint32  *datad,
1610                l_int32    wd,
1611                l_int32    hd,
1612                l_int32    wpld,
1613                l_uint32  *datas,
1614                l_int32    ws,
1615                l_int32    hs,
1616                l_int32    wpls)
1617 {
1618 l_int32    i, j, bpld;
1619 l_int32    xs, prevxs, sval;
1620 l_int32   *srow, *scol;
1621 l_uint32  *lines, *prevlines, *lined, *prevlined;
1622 l_float32  wratio, hratio;
1623 
1624     PROCNAME("scaleBinaryLow");
1625 
1626         /* clear dest */
1627     bpld = 4 * wpld;
1628     memset((char *)datad, 0, hd * bpld);
1629 
1630         /* The source row corresponding to dest row i ==> srow[i]
1631          * The source col corresponding to dest col j ==> scol[j]  */
1632     if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
1633         return ERROR_INT("srow not made", procName, 1);
1634     if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
1635         return ERROR_INT("scol not made", procName, 1);
1636 
1637     wratio = (l_float32)ws / (l_float32)wd;
1638     hratio = (l_float32)hs / (l_float32)hd;
1639     for (i = 0; i < hd; i++)
1640         srow[i] = L_MIN((l_int32)(hratio * i + 0.5), hs - 1);
1641     for (j = 0; j < wd; j++)
1642         scol[j] = L_MIN((l_int32)(wratio * j + 0.5), ws - 1);
1643 
1644     prevlines = NULL;
1645     prevxs = -1;
1646     sval = 0;
1647     for (i = 0; i < hd; i++) {
1648         lines = datas + srow[i] * wpls;
1649         lined = datad + i * wpld;
1650         if (lines != prevlines) {  /* make dest from new source row */
1651             for (j = 0; j < wd; j++) {
1652                 xs = scol[j];
1653                 if (xs != prevxs) {  /* get dest pix from source col */
1654                     if ((sval = GET_DATA_BIT(lines, xs)))
1655                         SET_DATA_BIT(lined, j);
1656                     prevxs = xs;
1657                 }
1658                 else {  /* copy prev dest pix, if set */
1659                     if (sval)
1660                         SET_DATA_BIT(lined, j);
1661                 }
1662             }
1663         }
1664         else {  /* lines == prevlines; copy prev dest row */
1665             prevlined = lined - wpld;
1666             memcpy((char *)lined, (char *)prevlined, bpld);
1667         }
1668         prevlines = lines;
1669     }
1670 
1671     FREE(srow);
1672     FREE(scol);
1673 
1674     return 0;
1675 }
1676 
1677 
1678 /*------------------------------------------------------------------*
1679  *                         Scale-to-gray 2x                         *
1680  *------------------------------------------------------------------*/
1681 /*!
1682  *  scaleToGray2Low()
1683  *
1684  *      Input:  usual image variables
1685  *              sumtab  (made from makeSumTabSG2())
1686  *              valtab  (made from makeValTabSG2())
1687  *      Return: 0 if OK; 1 on error.
1688  *
1689  *  The output is processed in sets of 4 output bytes on a row,
1690  *  corresponding to 4 2x2 bit-blocks in the input image.
1691  *  Two lookup tables are used.  The first, sumtab, gets the
1692  *  sum of ON pixels in 4 sets of two adjacent bits,
1693  *  storing the result in 4 adjacent bytes.  After sums from
1694  *  two rows have been added, the second table, valtab,
1695  *  converts from the sum of ON pixels in the 2x2 block to
1696  *  an 8 bpp grayscale value between 0 (for 4 bits ON)
1697  *  and 255 (for 0 bits ON).
1698  */
1699 void
scaleToGray2Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_uint32 * sumtab,l_uint8 * valtab)1700 scaleToGray2Low(l_uint32  *datad,
1701                 l_int32    wd,
1702                 l_int32    hd,
1703                 l_int32    wpld,
1704                 l_uint32  *datas,
1705                 l_int32    wpls,
1706                 l_uint32  *sumtab,
1707                 l_uint8   *valtab)
1708 {
1709 l_int32    i, j, l, k, m, wd4, extra;
1710 l_uint32   sbyte1, sbyte2, sum;
1711 l_uint32  *lines, *lined;
1712 
1713         /* i indexes the dest lines
1714          * l indexes the source lines
1715          * j indexes the dest bytes
1716          * k indexes the source bytes
1717          * We take two bytes from the source (in 2 lines of 8 pixels
1718          * each) and convert them into four 8 bpp bytes of the dest. */
1719     wd4 = wd & 0xfffffffc;
1720     extra = wd - wd4;
1721     for (i = 0, l = 0; i < hd; i++, l += 2) {
1722         lines = datas + l * wpls;
1723         lined = datad + i * wpld;
1724         for (j = 0, k = 0; j < wd4; j += 4, k++) {
1725             sbyte1 = GET_DATA_BYTE(lines, k);
1726             sbyte2 = GET_DATA_BYTE(lines + wpls, k);
1727             sum = sumtab[sbyte1] + sumtab[sbyte2];
1728             SET_DATA_BYTE(lined, j, valtab[sum >> 24]);
1729             SET_DATA_BYTE(lined, j + 1, valtab[(sum >> 16) & 0xff]);
1730             SET_DATA_BYTE(lined, j + 2, valtab[(sum >> 8) & 0xff]);
1731             SET_DATA_BYTE(lined, j + 3, valtab[sum & 0xff]);
1732         }
1733         if (extra > 0) {
1734             sbyte1 = GET_DATA_BYTE(lines, k);
1735             sbyte2 = GET_DATA_BYTE(lines + wpls, k);
1736             sum = sumtab[sbyte1] + sumtab[sbyte2];
1737             for (m = 0; m < extra; m++) {
1738                 SET_DATA_BYTE(lined, j + m,
1739                               valtab[((sum >> (24 - 8 * m)) & 0xff)]);
1740             }
1741         }
1742 
1743     }
1744 
1745     return;
1746 }
1747 
1748 
1749 /*!
1750  *  makeSumTabSG2()
1751  *
1752  *  Returns a table of 256 l_uint32s, giving the four output
1753  *  8-bit grayscale sums corresponding to 8 input bits of a binary
1754  *  image, for a 2x scale-to-gray op.  The sums from two
1755  *  adjacent scanlines are then added and transformed to
1756  *  output four 8 bpp pixel values, using makeValTabSG2().
1757  */
1758 l_uint32  *
makeSumTabSG2(void)1759 makeSumTabSG2(void)
1760 {
1761 l_int32    i;
1762 l_int32    sum[] = {0, 1, 1, 2};
1763 l_uint32  *tab;
1764 
1765     PROCNAME("makeSumTabSG2");
1766 
1767     if ((tab = (l_uint32 *)CALLOC(256, sizeof(l_uint32))) == NULL)
1768         return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1769 
1770         /* Pack the four sums separately in four bytes */
1771     for (i = 0; i < 256; i++) {
1772         tab[i] = (sum[i & 0x3] | sum[(i >> 2) & 0x3] << 8 |
1773                   sum[(i >> 4) & 0x3] << 16 | sum[(i >> 6) & 0x3] << 24);
1774     }
1775 
1776     return tab;
1777 }
1778 
1779 
1780 /*!
1781  *  makeValTabSG2()
1782  *
1783  *  Returns an 8 bit value for the sum of ON pixels
1784  *  in a 2x2 square, according to
1785  *
1786  *         val = 255 - (255 * sum)/4
1787  *
1788  *  where sum is in set {0,1,2,3,4}
1789  */
1790 l_uint8 *
makeValTabSG2(void)1791 makeValTabSG2(void)
1792 {
1793 l_int32   i;
1794 l_uint8  *tab;
1795 
1796     PROCNAME("makeValTabSG2");
1797 
1798     if ((tab = (l_uint8 *)CALLOC(5, sizeof(l_uint8))) == NULL)
1799         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1800 
1801     for (i = 0; i < 5; i++)
1802         tab[i] = 255 - (i * 255) / 4;
1803 
1804     return tab;
1805 }
1806 
1807 
1808 /*------------------------------------------------------------------*
1809  *                         Scale-to-gray 3x                         *
1810  *------------------------------------------------------------------*/
1811 /*!
1812  *  scaleToGray3Low()
1813  *
1814  *      Input:  usual image variables
1815  *              sumtab  (made from makeSumTabSG3())
1816  *              valtab  (made from makeValTabSG3())
1817  *      Return: 0 if OK; 1 on error
1818  *
1819  *  Each set of 8 3x3 bit-blocks in the source image, which
1820  *  consist of 72 pixels arranged 24 pixels wide by 3 scanlines,
1821  *  is converted to a row of 8 8-bit pixels in the dest image.
1822  *  These 72 pixels of the input image are runs of 24 pixels
1823  *  in three adjacent scanlines.  Each run of 24 pixels is
1824  *  stored in the 24 LSbits of a 32-bit word.  We use 2 LUTs.
1825  *  The first, sumtab, takes 6 of these bits and stores
1826  *  sum, taken 3 bits at a time, in two bytes.  (See
1827  *  makeSumTabSG3).  This is done for each of the 3 scanlines,
1828  *  and the results are added.  We now have the sum of ON pixels
1829  *  in the first two 3x3 blocks in two bytes.  The valtab LUT
1830  *  then converts these values (which go from 0 to 9) to
1831  *  grayscale values between between 255 and 0.  (See makeValTabSG3).
1832  *  This process is repeated for each of the other 3 sets of
1833  *  6x3 input pixels, giving 8 output pixels in total.
1834  *
1835  *  Note: because the input image is processed in groups of
1836  *        24 x 3 pixels, the process clips the input height to
1837  *        (h - h % 3) and the input width to (w - w % 24).
1838  */
1839 void
scaleToGray3Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_uint32 * sumtab,l_uint8 * valtab)1840 scaleToGray3Low(l_uint32  *datad,
1841                 l_int32    wd,
1842                 l_int32    hd,
1843                 l_int32    wpld,
1844                 l_uint32  *datas,
1845                 l_int32    wpls,
1846                 l_uint32  *sumtab,
1847                 l_uint8   *valtab)
1848 {
1849 l_int32    i, j, l, k;
1850 l_uint32   threebytes1, threebytes2, threebytes3, sum;
1851 l_uint32  *lines, *lined;
1852 
1853         /* i indexes the dest lines
1854          * l indexes the source lines
1855          * j indexes the dest bytes
1856          * k indexes the source bytes
1857          * We take 9 bytes from the source (72 binary pixels
1858          * in three lines of 24 pixels each) and convert it
1859          * into 8 bytes of the dest (8 8bpp pixels in one line)   */
1860     for (i = 0, l = 0; i < hd; i++, l += 3) {
1861         lines = datas + l * wpls;
1862         lined = datad + i * wpld;
1863         for (j = 0, k = 0; j < wd; j += 8, k += 3) {
1864             threebytes1 = (GET_DATA_BYTE(lines, k) << 16) |
1865                           (GET_DATA_BYTE(lines, k + 1) << 8) |
1866                           GET_DATA_BYTE(lines, k + 2);
1867             threebytes2 = (GET_DATA_BYTE(lines + wpls, k) << 16) |
1868                           (GET_DATA_BYTE(lines + wpls, k + 1) << 8) |
1869                           GET_DATA_BYTE(lines + wpls, k + 2);
1870             threebytes3 = (GET_DATA_BYTE(lines + 2 * wpls, k) << 16) |
1871                           (GET_DATA_BYTE(lines + 2 * wpls, k + 1) << 8) |
1872                           GET_DATA_BYTE(lines + 2 * wpls, k + 2);
1873 
1874             sum = sumtab[(threebytes1 >> 18)] +
1875                   sumtab[(threebytes2 >> 18)] +
1876                   sumtab[(threebytes3 >> 18)];
1877             SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 2)]);
1878             SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
1879 
1880             sum = sumtab[((threebytes1 >> 12) & 0x3f)] +
1881                   sumtab[((threebytes2 >> 12) & 0x3f)] +
1882                   sumtab[((threebytes3 >> 12) & 0x3f)];
1883             SET_DATA_BYTE(lined, j + 2, valtab[GET_DATA_BYTE(&sum, 2)]);
1884             SET_DATA_BYTE(lined, j + 3, valtab[GET_DATA_BYTE(&sum, 3)]);
1885 
1886             sum = sumtab[((threebytes1 >> 6) & 0x3f)] +
1887                   sumtab[((threebytes2 >> 6) & 0x3f)] +
1888                   sumtab[((threebytes3 >> 6) & 0x3f)];
1889             SET_DATA_BYTE(lined, j + 4, valtab[GET_DATA_BYTE(&sum, 2)]);
1890             SET_DATA_BYTE(lined, j + 5, valtab[GET_DATA_BYTE(&sum, 3)]);
1891 
1892             sum = sumtab[(threebytes1 & 0x3f)] +
1893                   sumtab[(threebytes2 & 0x3f)] +
1894                   sumtab[(threebytes3 & 0x3f)];
1895             SET_DATA_BYTE(lined, j + 6, valtab[GET_DATA_BYTE(&sum, 2)]);
1896             SET_DATA_BYTE(lined, j + 7, valtab[GET_DATA_BYTE(&sum, 3)]);
1897         }
1898     }
1899 
1900     return;
1901 }
1902 
1903 
1904 
1905 /*!
1906  *  makeSumTabSG3()
1907  *
1908  *  Returns a table of 64 l_uint32s, giving the two output
1909  *  8-bit grayscale sums corresponding to 6 input bits of a binary
1910  *  image, for a 3x scale-to-gray op.  In practice, this would
1911  *  be used three times (on adjacent scanlines), and the sums would
1912  *  be added and then transformed to output 8 bpp pixel values,
1913  *  using makeValTabSG3().
1914  */
1915 l_uint32  *
makeSumTabSG3(void)1916 makeSumTabSG3(void)
1917 {
1918 l_int32    i;
1919 l_int32    sum[] = {0, 1, 1, 2, 1, 2, 2, 3};
1920 l_uint32  *tab;
1921 
1922     PROCNAME("makeSumTabSG3");
1923 
1924     if ((tab = (l_uint32 *)CALLOC(64, sizeof(l_uint32))) == NULL)
1925         return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1926 
1927         /* Pack the two sums separately in two bytes */
1928     for (i = 0; i < 64; i++) {
1929         tab[i] = (sum[i & 0x07]) | (sum[(i >> 3) & 0x07] << 8);
1930     }
1931 
1932     return tab;
1933 }
1934 
1935 
1936 /*!
1937  *  makeValTabSG3()
1938  *
1939  *  Returns an 8 bit value for the sum of ON pixels
1940  *  in a 3x3 square, according to
1941  *      val = 255 - (255 * sum)/9
1942  *  where sum is in set {0, ... ,9}
1943  */
1944 l_uint8 *
makeValTabSG3(void)1945 makeValTabSG3(void)
1946 {
1947 l_int32   i;
1948 l_uint8  *tab;
1949 
1950     PROCNAME("makeValTabSG3");
1951 
1952     if ((tab = (l_uint8 *)CALLOC(10, sizeof(l_uint8))) == NULL)
1953         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1954 
1955     for (i = 0; i < 10; i++)
1956         tab[i] = 0xff - (i * 255) / 9;
1957 
1958     return tab;
1959 }
1960 
1961 
1962 /*------------------------------------------------------------------*
1963  *                         Scale-to-gray 4x                         *
1964  *------------------------------------------------------------------*/
1965 /*!
1966  *  scaleToGray4Low()
1967  *
1968  *      Input:  usual image variables
1969  *              sumtab  (made from makeSumTabSG4())
1970  *              valtab  (made from makeValTabSG4())
1971  *      Return: 0 if OK; 1 on error.
1972  *
1973  *  The output is processed in sets of 2 output bytes on a row,
1974  *  corresponding to 2 4x4 bit-blocks in the input image.
1975  *  Two lookup tables are used.  The first, sumtab, gets the
1976  *  sum of ON pixels in two sets of four adjacent bits,
1977  *  storing the result in 2 adjacent bytes.  After sums from
1978  *  four rows have been added, the second table, valtab,
1979  *  converts from the sum of ON pixels in the 4x4 block to
1980  *  an 8 bpp grayscale value between 0 (for 16 bits ON)
1981  *  and 255 (for 0 bits ON).
1982  */
1983 void
scaleToGray4Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_uint32 * sumtab,l_uint8 * valtab)1984 scaleToGray4Low(l_uint32  *datad,
1985                 l_int32    wd,
1986                 l_int32    hd,
1987                 l_int32    wpld,
1988                 l_uint32  *datas,
1989                 l_int32    wpls,
1990                 l_uint32  *sumtab,
1991                 l_uint8   *valtab)
1992 {
1993 l_int32    i, j, l, k;
1994 l_uint32   sbyte1, sbyte2, sbyte3, sbyte4, sum;
1995 l_uint32  *lines, *lined;
1996 
1997         /* i indexes the dest lines
1998          * l indexes the source lines
1999          * j indexes the dest bytes
2000          * k indexes the source bytes
2001          * We take four bytes from the source (in 4 lines of 8 pixels
2002          * each) and convert it into two 8 bpp bytes of the dest. */
2003     for (i = 0, l = 0; i < hd; i++, l += 4) {
2004         lines = datas + l * wpls;
2005         lined = datad + i * wpld;
2006         for (j = 0, k = 0; j < wd; j += 2, k++) {
2007             sbyte1 = GET_DATA_BYTE(lines, k);
2008             sbyte2 = GET_DATA_BYTE(lines + wpls, k);
2009             sbyte3 = GET_DATA_BYTE(lines + 2 * wpls, k);
2010             sbyte4 = GET_DATA_BYTE(lines + 3 * wpls, k);
2011             sum = sumtab[sbyte1] + sumtab[sbyte2] +
2012                   sumtab[sbyte3] + sumtab[sbyte4];
2013             SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 2)]);
2014             SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
2015         }
2016     }
2017 
2018     return;
2019 }
2020 
2021 
2022 /*!
2023  *  makeSumTabSG4()
2024  *
2025  *  Returns a table of 256 l_uint32s, giving the two output
2026  *  8-bit grayscale sums corresponding to 8 input bits of a binary
2027  *  image, for a 4x scale-to-gray op.  The sums from four
2028  *  adjacent scanlines are then added and transformed to
2029  *  output 8 bpp pixel values, using makeValTabSG4().
2030  */
2031 l_uint32  *
makeSumTabSG4(void)2032 makeSumTabSG4(void)
2033 {
2034 l_int32    i;
2035 l_int32    sum[] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
2036 l_uint32  *tab;
2037 
2038     PROCNAME("makeSumTabSG4");
2039 
2040     if ((tab = (l_uint32 *)CALLOC(256, sizeof(l_uint32))) == NULL)
2041         return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2042 
2043         /* Pack the two sums separately in two bytes */
2044     for (i = 0; i < 256; i++) {
2045         tab[i] = (sum[i & 0xf]) | (sum[(i >> 4) & 0xf] << 8);
2046     }
2047 
2048     return tab;
2049 }
2050 
2051 
2052 /*!
2053  *  makeValTabSG4()
2054  *
2055  *  Returns an 8 bit value for the sum of ON pixels
2056  *  in a 4x4 square, according to
2057  *
2058  *         val = 255 - (255 * sum)/16
2059  *
2060  *  where sum is in set {0, ... ,16}
2061  */
2062 l_uint8 *
makeValTabSG4(void)2063 makeValTabSG4(void)
2064 {
2065 l_int32   i;
2066 l_uint8  *tab;
2067 
2068     PROCNAME("makeValTabSG4");
2069 
2070     if ((tab = (l_uint8 *)CALLOC(17, sizeof(l_uint8))) == NULL)
2071         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2072 
2073     for (i = 0; i < 17; i++)
2074         tab[i] = 0xff - (i * 255) / 16;
2075 
2076     return tab;
2077 }
2078 
2079 
2080 /*------------------------------------------------------------------*
2081  *                         Scale-to-gray 6x                         *
2082  *------------------------------------------------------------------*/
2083 /*!
2084  *  scaleToGray6Low()
2085  *
2086  *      Input:  usual image variables
2087  *              tab8  (made from makePixelSumTab8())
2088  *              valtab  (made from makeValTabSG6())
2089  *      Return: 0 if OK; 1 on error
2090  *
2091  *  Each set of 4 6x6 bit-blocks in the source image, which
2092  *  consist of 144 pixels arranged 24 pixels wide by 6 scanlines,
2093  *  is converted to a row of 4 8-bit pixels in the dest image.
2094  *  These 144 pixels of the input image are runs of 24 pixels
2095  *  in six adjacent scanlines.  Each run of 24 pixels is
2096  *  stored in the 24 LSbits of a 32-bit word.  We use 2 LUTs.
2097  *  The first, tab8, takes 6 of these bits and stores
2098  *  sum in one byte.  This is done for each of the 6 scanlines,
2099  *  and the results are added.
2100  *  We now have the sum of ON pixels in the first 6x6 block.  The
2101  *  valtab LUT then converts these values (which go from 0 to 36) to
2102  *  grayscale values between between 255 and 0.  (See makeValTabSG6).
2103  *  This process is repeated for each of the other 3 sets of
2104  *  6x6 input pixels, giving 4 output pixels in total.
2105  *
2106  *  Note: because the input image is processed in groups of
2107  *        24 x 6 pixels, the process clips the input height to
2108  *        (h - h % 6) and the input width to (w - w % 24).
2109  *
2110  */
2111 void
scaleToGray6Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_int32 * tab8,l_uint8 * valtab)2112 scaleToGray6Low(l_uint32  *datad,
2113                 l_int32    wd,
2114                 l_int32    hd,
2115                 l_int32    wpld,
2116                 l_uint32  *datas,
2117                 l_int32    wpls,
2118                 l_int32   *tab8,
2119                 l_uint8   *valtab)
2120 {
2121 l_int32    i, j, l, k;
2122 l_uint32   threebytes1, threebytes2, threebytes3;
2123 l_uint32   threebytes4, threebytes5, threebytes6, sum;
2124 l_uint32  *lines, *lined;
2125 
2126         /* i indexes the dest lines
2127          * l indexes the source lines
2128          * j indexes the dest bytes
2129          * k indexes the source bytes
2130          * We take 18 bytes from the source (144 binary pixels
2131          * in six lines of 24 pixels each) and convert it
2132          * into 4 bytes of the dest (four 8 bpp pixels in one line)   */
2133     for (i = 0, l = 0; i < hd; i++, l += 6) {
2134         lines = datas + l * wpls;
2135         lined = datad + i * wpld;
2136         for (j = 0, k = 0; j < wd; j += 4, k += 3) {
2137                 /* First grab the 18 bytes, 3 at a time, and put each set
2138                  * of 3 bytes into the LS bytes of a 32-bit word. */
2139             threebytes1 = (GET_DATA_BYTE(lines, k) << 16) |
2140                           (GET_DATA_BYTE(lines, k + 1) << 8) |
2141                           GET_DATA_BYTE(lines, k + 2);
2142             threebytes2 = (GET_DATA_BYTE(lines + wpls, k) << 16) |
2143                           (GET_DATA_BYTE(lines + wpls, k + 1) << 8) |
2144                           GET_DATA_BYTE(lines + wpls, k + 2);
2145             threebytes3 = (GET_DATA_BYTE(lines + 2 * wpls, k) << 16) |
2146                           (GET_DATA_BYTE(lines + 2 * wpls, k + 1) << 8) |
2147                           GET_DATA_BYTE(lines + 2 * wpls, k + 2);
2148             threebytes4 = (GET_DATA_BYTE(lines + 3 * wpls, k) << 16) |
2149                           (GET_DATA_BYTE(lines + 3 * wpls, k + 1) << 8) |
2150                           GET_DATA_BYTE(lines + 3 * wpls, k + 2);
2151             threebytes5 = (GET_DATA_BYTE(lines + 4 * wpls, k) << 16) |
2152                           (GET_DATA_BYTE(lines + 4 * wpls, k + 1) << 8) |
2153                           GET_DATA_BYTE(lines + 4 * wpls, k + 2);
2154             threebytes6 = (GET_DATA_BYTE(lines + 5 * wpls, k) << 16) |
2155                           (GET_DATA_BYTE(lines + 5 * wpls, k + 1) << 8) |
2156                           GET_DATA_BYTE(lines + 5 * wpls, k + 2);
2157 
2158                 /* Sum first set of 36 bits and convert to 0-255 */
2159             sum = tab8[(threebytes1 >> 18)] +
2160                   tab8[(threebytes2 >> 18)] +
2161                   tab8[(threebytes3 >> 18)] +
2162                   tab8[(threebytes4 >> 18)] +
2163                   tab8[(threebytes5 >> 18)] +
2164                    tab8[(threebytes6 >> 18)];
2165             SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 3)]);
2166 
2167                 /* Ditto for second set */
2168             sum = tab8[((threebytes1 >> 12) & 0x3f)] +
2169                   tab8[((threebytes2 >> 12) & 0x3f)] +
2170                   tab8[((threebytes3 >> 12) & 0x3f)] +
2171                   tab8[((threebytes4 >> 12) & 0x3f)] +
2172                   tab8[((threebytes5 >> 12) & 0x3f)] +
2173                   tab8[((threebytes6 >> 12) & 0x3f)];
2174             SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
2175 
2176             sum = tab8[((threebytes1 >> 6) & 0x3f)] +
2177                   tab8[((threebytes2 >> 6) & 0x3f)] +
2178                   tab8[((threebytes3 >> 6) & 0x3f)] +
2179                   tab8[((threebytes4 >> 6) & 0x3f)] +
2180                   tab8[((threebytes5 >> 6) & 0x3f)] +
2181                   tab8[((threebytes6 >> 6) & 0x3f)];
2182             SET_DATA_BYTE(lined, j + 2, valtab[GET_DATA_BYTE(&sum, 3)]);
2183 
2184             sum = tab8[(threebytes1 & 0x3f)] +
2185                   tab8[(threebytes2 & 0x3f)] +
2186                   tab8[(threebytes3 & 0x3f)] +
2187                   tab8[(threebytes4 & 0x3f)] +
2188                   tab8[(threebytes5 & 0x3f)] +
2189                   tab8[(threebytes6 & 0x3f)];
2190             SET_DATA_BYTE(lined, j + 3, valtab[GET_DATA_BYTE(&sum, 3)]);
2191         }
2192     }
2193 
2194     return;
2195 }
2196 
2197 
2198 /*!
2199  *  makeValTabSG6()
2200  *
2201  *  Returns an 8 bit value for the sum of ON pixels
2202  *  in a 6x6 square, according to
2203  *      val = 255 - (255 * sum)/36
2204  *  where sum is in set {0, ... ,36}
2205  */
2206 l_uint8 *
makeValTabSG6(void)2207 makeValTabSG6(void)
2208 {
2209 l_int32   i;
2210 l_uint8  *tab;
2211 
2212     PROCNAME("makeValTabSG6");
2213 
2214     if ((tab = (l_uint8 *)CALLOC(37, sizeof(l_uint8))) == NULL)
2215         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2216 
2217     for (i = 0; i < 37; i++)
2218         tab[i] = 0xff - (i * 255) / 36;
2219 
2220     return tab;
2221 }
2222 
2223 
2224 /*------------------------------------------------------------------*
2225  *                         Scale-to-gray 8x                         *
2226  *------------------------------------------------------------------*/
2227 /*!
2228  *  scaleToGray8Low()
2229  *
2230  *      Input:  usual image variables
2231  *              tab8  (made from makePixelSumTab8())
2232  *              valtab  (made from makeValTabSG8())
2233  *      Return: 0 if OK; 1 on error.
2234  *
2235  *  The output is processed one dest byte at a time,
2236  *  corresponding to 8 rows of src bytes in the input image.
2237  *  Two lookup tables are used.  The first, tab8, gets the
2238  *  sum of ON pixels in a byte.  After sums from 8 rows have
2239  *  been added, the second table, valtab, converts from this
2240  *  value (which is between 0 and 64) to an 8 bpp grayscale
2241  *  value between 0 (for all 64 bits ON) and 255 (for 0 bits ON).
2242  */
2243 void
scaleToGray8Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_int32 * tab8,l_uint8 * valtab)2244 scaleToGray8Low(l_uint32  *datad,
2245                 l_int32    wd,
2246                 l_int32    hd,
2247                 l_int32    wpld,
2248                 l_uint32  *datas,
2249                 l_int32    wpls,
2250                 l_int32   *tab8,
2251                 l_uint8   *valtab)
2252 {
2253 l_int32    i, j, k;
2254 l_int32    sbyte0, sbyte1, sbyte2, sbyte3, sbyte4, sbyte5, sbyte6, sbyte7, sum;
2255 l_uint32  *lines, *lined;
2256 
2257         /* i indexes the dest lines
2258          * k indexes the source lines
2259          * j indexes the src and dest bytes
2260          * We take 8 bytes from the source (in 8 lines of 8 pixels
2261          * each) and convert it into one 8 bpp byte of the dest. */
2262     for (i = 0, k = 0; i < hd; i++, k += 8) {
2263         lines = datas + k * wpls;
2264         lined = datad + i * wpld;
2265         for (j = 0; j < wd; j++) {
2266             sbyte0 = GET_DATA_BYTE(lines, j);
2267             sbyte1 = GET_DATA_BYTE(lines + wpls, j);
2268             sbyte2 = GET_DATA_BYTE(lines + 2 * wpls, j);
2269             sbyte3 = GET_DATA_BYTE(lines + 3 * wpls, j);
2270             sbyte4 = GET_DATA_BYTE(lines + 4 * wpls, j);
2271             sbyte5 = GET_DATA_BYTE(lines + 5 * wpls, j);
2272             sbyte6 = GET_DATA_BYTE(lines + 6 * wpls, j);
2273             sbyte7 = GET_DATA_BYTE(lines + 7 * wpls, j);
2274             sum = tab8[sbyte0] + tab8[sbyte1] +
2275                   tab8[sbyte2] + tab8[sbyte3] +
2276                   tab8[sbyte4] + tab8[sbyte5] +
2277                   tab8[sbyte6] + tab8[sbyte7];
2278             SET_DATA_BYTE(lined, j, valtab[sum]);
2279         }
2280     }
2281 
2282     return;
2283 }
2284 
2285 
2286 /*!
2287  *  makeValTabSG8()
2288  *
2289  *  Returns an 8 bit value for the sum of ON pixels
2290  *  in an 8x8 square, according to
2291  *      val = 255 - (255 * sum)/64
2292  *  where sum is in set {0, ... ,64}
2293  */
2294 l_uint8 *
makeValTabSG8(void)2295 makeValTabSG8(void)
2296 {
2297 l_int32   i;
2298 l_uint8  *tab;
2299 
2300     PROCNAME("makeValTabSG8");
2301 
2302     if ((tab = (l_uint8 *)CALLOC(65, sizeof(l_uint8))) == NULL)
2303         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2304 
2305     for (i = 0; i < 65; i++)
2306         tab[i] = 0xff - (i * 255) / 64;
2307 
2308     return tab;
2309 }
2310 
2311 
2312 /*------------------------------------------------------------------*
2313  *                         Scale-to-gray 16x                        *
2314  *------------------------------------------------------------------*/
2315 /*!
2316  *  scaleToGray16Low()
2317  *
2318  *      Input:  usual image variables
2319  *              tab8  (made from makePixelSumTab8())
2320  *      Return: 0 if OK; 1 on error.
2321  *
2322  *  The output is processed one dest byte at a time, corresponding
2323  *  to 16 rows consisting each of 2 src bytes in the input image.
2324  *  This uses one lookup table, tab8, which gives the sum of
2325  *  ON pixels in a byte.  After summing for all ON pixels in the
2326  *  32 src bytes, which is between 0 and 256, this is converted
2327  *  to an 8 bpp grayscale value between 0 (for 255 or 256 bits ON)
2328  *  and 255 (for 0 bits ON).
2329  */
2330 void
scaleToGray16Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_int32 * tab8)2331 scaleToGray16Low(l_uint32  *datad,
2332                   l_int32    wd,
2333                  l_int32    hd,
2334                  l_int32    wpld,
2335                  l_uint32  *datas,
2336                  l_int32    wpls,
2337                  l_int32   *tab8)
2338 {
2339 l_int32    i, j, k, m;
2340 l_int32    sum;
2341 l_uint32  *lines, *lined;
2342 
2343         /* i indexes the dest lines
2344          * k indexes the source lines
2345          * j indexes the dest bytes
2346          * m indexes the src bytes
2347          * We take 32 bytes from the source (in 16 lines of 16 pixels
2348          * each) and convert it into one 8 bpp byte of the dest. */
2349     for (i = 0, k = 0; i < hd; i++, k += 16) {
2350         lines = datas + k * wpls;
2351         lined = datad + i * wpld;
2352         for (j = 0; j < wd; j++) {
2353             m = 2 * j;
2354             sum = tab8[GET_DATA_BYTE(lines, m)];
2355             sum += tab8[GET_DATA_BYTE(lines, m + 1)];
2356             sum += tab8[GET_DATA_BYTE(lines + wpls, m)];
2357             sum += tab8[GET_DATA_BYTE(lines + wpls, m + 1)];
2358             sum += tab8[GET_DATA_BYTE(lines + 2 * wpls, m)];
2359             sum += tab8[GET_DATA_BYTE(lines + 2 * wpls, m + 1)];
2360             sum += tab8[GET_DATA_BYTE(lines + 3 * wpls, m)];
2361             sum += tab8[GET_DATA_BYTE(lines + 3 * wpls, m + 1)];
2362             sum += tab8[GET_DATA_BYTE(lines + 4 * wpls, m)];
2363             sum += tab8[GET_DATA_BYTE(lines + 4 * wpls, m + 1)];
2364             sum += tab8[GET_DATA_BYTE(lines + 5 * wpls, m)];
2365             sum += tab8[GET_DATA_BYTE(lines + 5 * wpls, m + 1)];
2366             sum += tab8[GET_DATA_BYTE(lines + 6 * wpls, m)];
2367             sum += tab8[GET_DATA_BYTE(lines + 6 * wpls, m + 1)];
2368             sum += tab8[GET_DATA_BYTE(lines + 7 * wpls, m)];
2369             sum += tab8[GET_DATA_BYTE(lines + 7 * wpls, m + 1)];
2370             sum += tab8[GET_DATA_BYTE(lines + 8 * wpls, m)];
2371             sum += tab8[GET_DATA_BYTE(lines + 8 * wpls, m + 1)];
2372             sum += tab8[GET_DATA_BYTE(lines + 9 * wpls, m)];
2373             sum += tab8[GET_DATA_BYTE(lines + 9 * wpls, m + 1)];
2374             sum += tab8[GET_DATA_BYTE(lines + 10 * wpls, m)];
2375             sum += tab8[GET_DATA_BYTE(lines + 10 * wpls, m + 1)];
2376             sum += tab8[GET_DATA_BYTE(lines + 11 * wpls, m)];
2377             sum += tab8[GET_DATA_BYTE(lines + 11 * wpls, m + 1)];
2378             sum += tab8[GET_DATA_BYTE(lines + 12 * wpls, m)];
2379             sum += tab8[GET_DATA_BYTE(lines + 12 * wpls, m + 1)];
2380             sum += tab8[GET_DATA_BYTE(lines + 13 * wpls, m)];
2381             sum += tab8[GET_DATA_BYTE(lines + 13 * wpls, m + 1)];
2382             sum += tab8[GET_DATA_BYTE(lines + 14 * wpls, m)];
2383             sum += tab8[GET_DATA_BYTE(lines + 14 * wpls, m + 1)];
2384             sum += tab8[GET_DATA_BYTE(lines + 15 * wpls, m)];
2385             sum += tab8[GET_DATA_BYTE(lines + 15 * wpls, m + 1)];
2386             sum = L_MIN(sum, 255);
2387             SET_DATA_BYTE(lined, j, 255 - sum);
2388         }
2389     }
2390 
2391     return;
2392 }
2393 
2394 
2395 
2396 /*------------------------------------------------------------------*
2397  *                         Grayscale mipmap                         *
2398  *------------------------------------------------------------------*/
2399 /*!
2400  *  scaleMipmapLow()
2401  *
2402  *  See notes in scale.c for pixScaleToGrayMipmap().  This function
2403  *  is here for pedagogical reasons.  It gives poor results on document
2404  *  images because of aliasing.
2405  */
2406 l_int32
scaleMipmapLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas1,l_int32 wpls1,l_uint32 * datas2,l_int32 wpls2,l_float32 red)2407 scaleMipmapLow(l_uint32  *datad,
2408                l_int32    wd,
2409                l_int32    hd,
2410                l_int32    wpld,
2411                l_uint32  *datas1,
2412                l_int32    wpls1,
2413                l_uint32  *datas2,
2414                l_int32    wpls2,
2415                l_float32  red)
2416 {
2417 l_int32    i, j, val1, val2, val, row2, col2;
2418 l_int32   *srow, *scol;
2419 l_uint32  *lines1, *lines2, *lined;
2420 l_float32  ratio, w1, w2;
2421 
2422     PROCNAME("scaleMipmapLow");
2423 
2424         /* Clear dest */
2425     memset((char *)datad, 0, 4 * wpld * hd);
2426 
2427         /* Each dest pixel at (j,i) is computed by interpolating
2428            between the two src images at the corresponding location.
2429            We store the UL corner locations of the square of
2430            src pixels in thelower-resolution image that correspond
2431            to dest pixel (j,i).  The are labelled by the arrays
2432            srow[i], scol[j].  The UL corner locations of the higher
2433            resolution src pixels are obtained from these arrays
2434            by multiplying by 2. */
2435     if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
2436         return ERROR_INT("srow not made", procName, 1);
2437     if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
2438         return ERROR_INT("scol not made", procName, 1);
2439     ratio = 1. / (2. * red);  /* 0.5 for red = 1, 1 for red = 0.5 */
2440     for (i = 0; i < hd; i++)
2441         srow[i] = (l_int32)(ratio * i);
2442     for (j = 0; j < wd; j++)
2443         scol[j] = (l_int32)(ratio * j);
2444 
2445         /* Get weights for linear interpolation: these are the
2446          * 'distances' of the dest image plane from the two
2447          * src image planes. */
2448     w1 = 2. * red - 1.;   /* w1 --> 1 as red --> 1 */
2449     w2 = 1. - w1;
2450 
2451         /* For each dest pixel, compute linear interpolation */
2452     for (i = 0; i < hd; i++) {
2453         row2 = srow[i];
2454         lines1 = datas1 + 2 * row2 * wpls1;
2455         lines2 = datas2 + row2 * wpls2;
2456         lined = datad + i * wpld;
2457         for (j = 0; j < wd; j++) {
2458             col2 = scol[j];
2459             val1 = GET_DATA_BYTE(lines1, 2 * col2);
2460             val2 = GET_DATA_BYTE(lines2, col2);
2461             val = (l_int32)(w1 * val1 + w2 * val2);
2462             SET_DATA_BYTE(lined, j, val);
2463         }
2464     }
2465 
2466     FREE(srow);
2467     FREE(scol);
2468     return 0;
2469 }
2470