1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 - This software is distributed in the hope that it will be
4 - useful, but with NO WARRANTY OF ANY KIND.
5 - No author or distributor accepts responsibility to anyone for the
6 - consequences of using this software, or for whether it serves any
7 - particular purpose or works at all, unless he or she says so in
8 - writing. Everyone is granted permission to copy, modify and
9 - redistribute this source code, for commercial or non-commercial
10 - purposes, with the following restrictions: (1) the origin of this
11 - source code must not be misrepresented; (2) modified versions must
12 - be plainly marked as such; and (3) this notice may not be removed
13 - or altered from any source or modified source distribution.
14 *====================================================================*/
15
16
17 /*
18 * scalelow.c
19 *
20 * Color (interpolated) scaling: general case
21 * void scaleColorLILow()
22 *
23 * Grayscale (interpolated) scaling: general case
24 * void scaleGrayLILow()
25 *
26 * Color (interpolated) scaling: 2x upscaling
27 * void scaleColor2xLILow()
28 * void scaleColor2xLILineLow()
29 *
30 * Grayscale (interpolated) scaling: 2x upscaling
31 * void scaleGray2xLILow()
32 * void scaleGray2xLILineLow()
33 *
34 * Grayscale (interpolated) scaling: 4x upscaling
35 * void scaleGray4xLILow()
36 * void scaleGray4xLILineLow()
37 *
38 * Grayscale and color scaling by closest pixel sampling
39 * l_int32 scaleBySamplingLow()
40 *
41 * Color and grayscale downsampling with (antialias) lowpass filter
42 * l_int32 scaleSmoothLow()
43 * void scaleRGBToGray2Low()
44 *
45 * Color and grayscale downsampling with (antialias) area mapping
46 * l_int32 scaleColorAreaMapLow()
47 * l_int32 scaleGrayAreaMapLow()
48 * l_int32 scaleAreaMapLow2()
49 *
50 * Binary scaling by closest pixel sampling
51 * l_int32 scaleBinaryLow()
52 *
53 * Scale-to-gray 2x
54 * void scaleToGray2Low()
55 * l_uint32 *makeSumTabSG2()
56 * l_uint8 *makeValTabSG2()
57 *
58 * Scale-to-gray 3x
59 * void scaleToGray3Low()
60 * l_uint32 *makeSumTabSG3()
61 * l_uint8 *makeValTabSG3()
62 *
63 * Scale-to-gray 4x
64 * void scaleToGray4Low()
65 * l_uint32 *makeSumTabSG4()
66 * l_uint8 *makeValTabSG4()
67 *
68 * Scale-to-gray 6x
69 * void scaleToGray6Low()
70 * l_uint8 *makeValTabSG6()
71 *
72 * Scale-to-gray 8x
73 * void scaleToGray8Low()
74 * l_uint8 *makeValTabSG8()
75 *
76 * Scale-to-gray 16x
77 * void scaleToGray16Low()
78 *
79 * Grayscale mipmap
80 * l_int32 scaleMipmapLow()
81 *
82 */
83
84 #include <stdio.h>
85 #include <stdlib.h>
86 #include <string.h>
87 #include "allheaders.h"
88
89 #ifndef NO_CONSOLE_IO
90 #define DEBUG_OVERFLOW 0
91 #define DEBUG_UNROLLING 0
92 #endif /* ~NO_CONSOLE_IO */
93
94
95 /*------------------------------------------------------------------*
96 * General linear interpolated color scaling *
97 *------------------------------------------------------------------*/
98 /*!
99 * scaleColorLILow()
100 *
101 * We choose to divide each pixel into 16 x 16 sub-pixels.
102 * Linear interpolation is equivalent to finding the
103 * fractional area (i.e., number of sub-pixels divided
104 * by 256) associated with each of the four nearest src pixels,
105 * and weighting each pixel value by this fractional area.
106 *
107 * P3 speed is about 7 x 10^6 dst pixels/sec/GHz
108 */
109 void
scaleColorLILow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)110 scaleColorLILow(l_uint32 *datad,
111 l_int32 wd,
112 l_int32 hd,
113 l_int32 wpld,
114 l_uint32 *datas,
115 l_int32 ws,
116 l_int32 hs,
117 l_int32 wpls)
118 {
119 l_int32 i, j, wm2, hm2;
120 l_int32 xpm, ypm; /* location in src image, to 1/16 of a pixel */
121 l_int32 xp, yp, xf, yf; /* src pixel and pixel fraction coordinates */
122 l_int32 v00r, v01r, v10r, v11r, v00g, v01g, v10g, v11g;
123 l_int32 v00b, v01b, v10b, v11b, area00, area01, area10, area11;
124 l_uint32 pixels1, pixels2, pixels3, pixels4, pixel;
125 l_uint32 *lines, *lined;
126 l_float32 scx, scy;
127
128 /* (scx, scy) are scaling factors that are applied to the
129 * dest coords to get the corresponding src coords.
130 * We need them because we iterate over dest pixels
131 * and must find the corresponding set of src pixels. */
132 scx = 16. * (l_float32)ws / (l_float32)wd;
133 scy = 16. * (l_float32)hs / (l_float32)hd;
134
135 wm2 = ws - 2;
136 hm2 = hs - 2;
137
138 /* Iterate over the destination pixels */
139 for (i = 0; i < hd; i++) {
140 ypm = (l_int32)(scy * (l_float32)i);
141 yp = ypm >> 4;
142 yf = ypm & 0x0f;
143 lined = datad + i * wpld;
144 lines = datas + yp * wpls;
145 for (j = 0; j < wd; j++) {
146 xpm = (l_int32)(scx * (l_float32)j);
147 xp = xpm >> 4;
148 xf = xpm & 0x0f;
149
150 /* Do bilinear interpolation. This is a simple
151 * generalization of the calculation in scaleGrayLILow().
152 * Without this, we could simply subsample:
153 * *(lined + j) = *(lines + xp);
154 * which is faster but gives lousy results! */
155 pixels1 = *(lines + xp);
156
157 if (xp > wm2 || yp > hm2) {
158 if (yp > hm2 && xp <= wm2) { /* pixels near bottom */
159 pixels2 = *(lines + xp + 1);
160 pixels3 = pixels1;
161 pixels4 = pixels2;
162 }
163 else if (xp > wm2 && yp <= hm2) { /* pixels near right side */
164 pixels2 = pixels1;
165 pixels3 = *(lines + wpls + xp);
166 pixels4 = pixels3;
167 }
168 else { /* pixels at LR corner */
169 pixels4 = pixels3 = pixels2 = pixels1;
170 }
171 }
172 else {
173 pixels2 = *(lines + xp + 1);
174 pixels3 = *(lines + wpls + xp);
175 pixels4 = *(lines + wpls + xp + 1);
176 }
177
178 area00 = (16 - xf) * (16 - yf);
179 area10 = xf * (16 - yf);
180 area01 = (16 - xf) * yf;
181 area11 = xf * yf;
182 v00r = area00 * ((pixels1 >> L_RED_SHIFT) & 0xff);
183 v00g = area00 * ((pixels1 >> L_GREEN_SHIFT) & 0xff);
184 v00b = area00 * ((pixels1 >> L_BLUE_SHIFT) & 0xff);
185 v10r = area10 * ((pixels2 >> L_RED_SHIFT) & 0xff);
186 v10g = area10 * ((pixels2 >> L_GREEN_SHIFT) & 0xff);
187 v10b = area10 * ((pixels2 >> L_BLUE_SHIFT) & 0xff);
188 v01r = area01 * ((pixels3 >> L_RED_SHIFT) & 0xff);
189 v01g = area01 * ((pixels3 >> L_GREEN_SHIFT) & 0xff);
190 v01b = area01 * ((pixels3 >> L_BLUE_SHIFT) & 0xff);
191 v11r = area11 * ((pixels4 >> L_RED_SHIFT) & 0xff);
192 v11g = area11 * ((pixels4 >> L_GREEN_SHIFT) & 0xff);
193 v11b = area11 * ((pixels4 >> L_BLUE_SHIFT) & 0xff);
194 pixel = (((v00r + v10r + v01r + v11r + 128) << 16) & 0xff000000) |
195 (((v00g + v10g + v01g + v11g + 128) << 8) & 0x00ff0000) |
196 ((v00b + v10b + v01b + v11b + 128) & 0x0000ff00);
197 *(lined + j) = pixel;
198 }
199 }
200
201 return;
202 }
203
204
205 /*------------------------------------------------------------------*
206 * General linear interpolated gray scaling *
207 *------------------------------------------------------------------*/
208 /*!
209 * scaleGrayLILow()
210 *
211 * We choose to divide each pixel into 16 x 16 sub-pixels.
212 * Linear interpolation is equivalent to finding the
213 * fractional area (i.e., number of sub-pixels divided
214 * by 256) associated with each of the four nearest src pixels,
215 * and weighting each pixel value by this fractional area.
216 */
217 void
scaleGrayLILow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)218 scaleGrayLILow(l_uint32 *datad,
219 l_int32 wd,
220 l_int32 hd,
221 l_int32 wpld,
222 l_uint32 *datas,
223 l_int32 ws,
224 l_int32 hs,
225 l_int32 wpls)
226 {
227 l_int32 i, j, wm2, hm2;
228 l_int32 xpm, ypm; /* location in src image, to 1/16 of a pixel */
229 l_int32 xp, yp, xf, yf; /* src pixel and pixel fraction coordinates */
230 l_int32 v00, v01, v10, v11, v00_val, v01_val, v10_val, v11_val;
231 l_uint8 val;
232 l_uint32 *lines, *lined;
233 l_float32 scx, scy;
234
235 /* (scx, scy) are scaling factors that are applied to the
236 * dest coords to get the corresponding src coords.
237 * We need them because we iterate over dest pixels
238 * and must find the corresponding set of src pixels. */
239 scx = 16. * (l_float32)ws / (l_float32)wd;
240 scy = 16. * (l_float32)hs / (l_float32)hd;
241
242 wm2 = ws - 2;
243 hm2 = hs - 2;
244
245 /* Iterate over the destination pixels */
246 for (i = 0; i < hd; i++) {
247 ypm = (l_int32)(scy * (l_float32)i);
248 yp = ypm >> 4;
249 yf = ypm & 0x0f;
250 lined = datad + i * wpld;
251 lines = datas + yp * wpls;
252 for (j = 0; j < wd; j++) {
253 xpm = (l_int32)(scx * (l_float32)j);
254 xp = xpm >> 4;
255 xf = xpm & 0x0f;
256
257 /* Do bilinear interpolation. Without this, we could
258 * simply subsample:
259 * SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xp));
260 * which is faster but gives lousy results! */
261 v00_val = GET_DATA_BYTE(lines, xp);
262 if (xp > wm2 || yp > hm2) {
263 if (yp > hm2 && xp <= wm2) { /* pixels near bottom */
264 v01_val = v00_val;
265 v10_val = GET_DATA_BYTE(lines, xp + 1);
266 v11_val = v10_val;
267 }
268 else if (xp > wm2 && yp <= hm2) { /* pixels near right side */
269 v01_val = GET_DATA_BYTE(lines + wpls, xp);
270 v10_val = v00_val;
271 v11_val = v01_val;
272 }
273 else { /* pixels at LR corner */
274 v10_val = v01_val = v11_val = v00_val;
275 }
276 }
277 else {
278 v10_val = GET_DATA_BYTE(lines, xp + 1);
279 v01_val = GET_DATA_BYTE(lines + wpls, xp);
280 v11_val = GET_DATA_BYTE(lines + wpls, xp + 1);
281 }
282
283 v00 = (16 - xf) * (16 - yf) * v00_val;
284 v10 = xf * (16 - yf) * v10_val;
285 v01 = (16 - xf) * yf * v01_val;
286 v11 = xf * yf * v11_val;
287
288 val = (l_uint8)((v00 + v01 + v10 + v11 + 128) / 256);
289 SET_DATA_BYTE(lined, j, val);
290 }
291 }
292
293 return;
294 }
295
296
297 /*------------------------------------------------------------------*
298 * 2x linear interpolated color scaling *
299 *------------------------------------------------------------------*/
300 /*!
301 * scaleColor2xLILow()
302 *
303 * This is a special case of 2x expansion by linear
304 * interpolation. Each src pixel contains 4 dest pixels.
305 * The 4 dest pixels in src pixel 1 are numbered at
306 * their UL corners. The 4 dest pixels in src pixel 1
307 * are related to that src pixel and its 3 neighboring
308 * src pixels as follows:
309 *
310 * 1-----2-----|-----|-----|
311 * | | | | |
312 * | | | | |
313 * src 1 --> 3-----4-----| | | <-- src 2
314 * | | | | |
315 * | | | | |
316 * |-----|-----|-----|-----|
317 * | | | | |
318 * | | | | |
319 * src 3 --> | | | | | <-- src 4
320 * | | | | |
321 * | | | | |
322 * |-----|-----|-----|-----|
323 *
324 * dest src
325 * ---- ---
326 * dp1 = sp1
327 * dp2 = (sp1 + sp2) / 2
328 * dp3 = (sp1 + sp3) / 2
329 * dp4 = (sp1 + sp2 + sp3 + sp4) / 4
330 *
331 * We iterate over the src pixels, and unroll the calculation
332 * for each set of 4 dest pixels corresponding to that src
333 * pixel, caching pixels for the next src pixel whenever possible.
334 * The method is exactly analogous to the one we use for
335 * scaleGray2xLILow() and its line version.
336 *
337 * P3 speed is about 5 x 10^7 dst pixels/sec/GHz
338 */
339 void
scaleColor2xLILow(l_uint32 * datad,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)340 scaleColor2xLILow(l_uint32 *datad,
341 l_int32 wpld,
342 l_uint32 *datas,
343 l_int32 ws,
344 l_int32 hs,
345 l_int32 wpls)
346 {
347 l_int32 i, hsm;
348 l_uint32 *lines, *lined;
349
350 hsm = hs - 1;
351
352 /* We're taking 2 src and 2 dest lines at a time,
353 * and for each src line, we're computing 2 dest lines.
354 * Call these 2 dest lines: destline1 and destline2.
355 * The first src line is used for destline 1.
356 * On all but the last src line, both src lines are
357 * used in the linear interpolation for destline2.
358 * On the last src line, both destline1 and destline2
359 * are computed using only that src line (because there
360 * isn't a lower src line). */
361
362 /* iterate over all but the last src line */
363 for (i = 0; i < hsm; i++) {
364 lines = datas + i * wpls;
365 lined = datad + 2 * i * wpld;
366 scaleColor2xLILineLow(lined, wpld, lines, ws, wpls, 0);
367 }
368
369 /* last src line */
370 lines = datas + hsm * wpls;
371 lined = datad + 2 * hsm * wpld;
372 scaleColor2xLILineLow(lined, wpld, lines, ws, wpls, 1);
373
374 return;
375 }
376
377
378 /*!
379 * scaleColor2xLILineLow()
380 *
381 * Input: lined (ptr to top destline, to be made from current src line)
382 * wpld
383 * lines (ptr to current src line)
384 * ws
385 * wpls
386 * lastlineflag (1 if last src line; 0 otherwise)
387 * Return: void
388 *
389 * *** Warning: implicit assumption about RGB component ordering ***
390 */
391 void
scaleColor2xLILineLow(l_uint32 * lined,l_int32 wpld,l_uint32 * lines,l_int32 ws,l_int32 wpls,l_int32 lastlineflag)392 scaleColor2xLILineLow(l_uint32 *lined,
393 l_int32 wpld,
394 l_uint32 *lines,
395 l_int32 ws,
396 l_int32 wpls,
397 l_int32 lastlineflag)
398 {
399 l_int32 j, jd, wsm;
400 l_int32 rval1, rval2, rval3, rval4, gval1, gval2, gval3, gval4;
401 l_int32 bval1, bval2, bval3, bval4;
402 l_uint32 pixels1, pixels2, pixels3, pixels4, pixel;
403 l_uint32 *linesp, *linedp;
404
405 wsm = ws - 1;
406
407 if (lastlineflag == 0) {
408 linesp = lines + wpls;
409 linedp = lined + wpld;
410 pixels1 = *lines;
411 pixels3 = *linesp;
412
413 /* initialize with v(2) and v(4) */
414 rval2 = pixels1 >> 24;
415 gval2 = (pixels1 >> 16) & 0xff;
416 bval2 = (pixels1 >> 8) & 0xff;
417 rval4 = pixels3 >> 24;
418 gval4 = (pixels3 >> 16) & 0xff;
419 bval4 = (pixels3 >> 8) & 0xff;
420
421 for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
422 /* shift in previous src values */
423 rval1 = rval2;
424 gval1 = gval2;
425 bval1 = bval2;
426 rval3 = rval4;
427 gval3 = gval4;
428 bval3 = bval4;
429 /* get new src values */
430 pixels2 = *(lines + j + 1);
431 pixels4 = *(linesp + j + 1);
432 rval2 = pixels2 >> 24;
433 gval2 = (pixels2 >> 16) & 0xff;
434 bval2 = (pixels2 >> 8) & 0xff;
435 rval4 = pixels4 >> 24;
436 gval4 = (pixels4 >> 16) & 0xff;
437 bval4 = (pixels4 >> 8) & 0xff;
438 /* save dest values */
439 pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
440 *(lined + jd) = pixel; /* pix 1 */
441 pixel = ((((rval1 + rval2) << 23) & 0xff000000) |
442 (((gval1 + gval2) << 15) & 0x00ff0000) |
443 (((bval1 + bval2) << 7) & 0x0000ff00));
444 *(lined + jd + 1) = pixel; /* pix 2 */
445 pixel = ((((rval1 + rval3) << 23) & 0xff000000) |
446 (((gval1 + gval3) << 15) & 0x00ff0000) |
447 (((bval1 + bval3) << 7) & 0x0000ff00));
448 *(linedp + jd) = pixel; /* pix 3 */
449 pixel = ((((rval1 + rval2 + rval3 + rval4) << 22) & 0xff000000) |
450 (((gval1 + gval2 + gval3 + gval4) << 14) & 0x00ff0000) |
451 (((bval1 + bval2 + bval3 + bval4) << 6) & 0x0000ff00));
452 *(linedp + jd + 1) = pixel; /* pix 4 */
453 }
454 /* last src pixel on line */
455 rval1 = rval2;
456 gval1 = gval2;
457 bval1 = bval2;
458 rval3 = rval4;
459 gval3 = gval4;
460 bval3 = bval4;
461 pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
462 *(lined + 2 * wsm) = pixel; /* pix 1 */
463 *(lined + 2 * wsm + 1) = pixel; /* pix 2 */
464 pixel = ((((rval1 + rval3) << 23) & 0xff000000) |
465 (((gval1 + gval3) << 15) & 0x00ff0000) |
466 (((bval1 + bval3) << 7) & 0x0000ff00));
467 *(linedp + 2 * wsm) = pixel; /* pix 3 */
468 *(linedp + 2 * wsm + 1) = pixel; /* pix 4 */
469 }
470 else { /* last row of src pixels: lastlineflag == 1 */
471 linedp = lined + wpld;
472 pixels2 = *lines;
473 rval2 = pixels2 >> 24;
474 gval2 = (pixels2 >> 16) & 0xff;
475 bval2 = (pixels2 >> 8) & 0xff;
476 for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
477 rval1 = rval2;
478 gval1 = gval2;
479 bval1 = bval2;
480 pixels2 = *(lines + j + 1);
481 rval2 = pixels2 >> 24;
482 gval2 = (pixels2 >> 16) & 0xff;
483 bval2 = (pixels2 >> 8) & 0xff;
484 pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
485 *(lined + jd) = pixel; /* pix 1 */
486 *(linedp + jd) = pixel; /* pix 2 */
487 pixel = ((((rval1 + rval2) << 23) & 0xff000000) |
488 (((gval1 + gval2) << 15) & 0x00ff0000) |
489 (((bval1 + bval2) << 7) & 0x0000ff00));
490 *(lined + jd + 1) = pixel; /* pix 3 */
491 *(linedp + jd + 1) = pixel; /* pix 4 */
492 }
493 rval1 = rval2;
494 gval1 = gval2;
495 bval1 = bval2;
496 pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
497 *(lined + 2 * wsm) = pixel; /* pix 1 */
498 *(lined + 2 * wsm + 1) = pixel; /* pix 2 */
499 *(linedp + 2 * wsm) = pixel; /* pix 3 */
500 *(linedp + 2 * wsm + 1) = pixel; /* pix 4 */
501 }
502
503 return;
504 }
505
506
507 /*------------------------------------------------------------------*
508 * 2x linear interpolated gray scaling *
509 *------------------------------------------------------------------*/
510 /*!
511 * scaleGray2xLILow()
512 *
513 * This is a special case of 2x expansion by linear
514 * interpolation. Each src pixel contains 4 dest pixels.
515 * The 4 dest pixels in src pixel 1 are numbered at
516 * their UL corners. The 4 dest pixels in src pixel 1
517 * are related to that src pixel and its 3 neighboring
518 * src pixels as follows:
519 *
520 * 1-----2-----|-----|-----|
521 * | | | | |
522 * | | | | |
523 * src 1 --> 3-----4-----| | | <-- src 2
524 * | | | | |
525 * | | | | |
526 * |-----|-----|-----|-----|
527 * | | | | |
528 * | | | | |
529 * src 3 --> | | | | | <-- src 4
530 * | | | | |
531 * | | | | |
532 * |-----|-----|-----|-----|
533 *
534 * dest src
535 * ---- ---
536 * dp1 = sp1
537 * dp2 = (sp1 + sp2) / 2
538 * dp3 = (sp1 + sp3) / 2
539 * dp4 = (sp1 + sp2 + sp3 + sp4) / 4
540 *
541 * We iterate over the src pixels, and unroll the calculation
542 * for each set of 4 dest pixels corresponding to that src
543 * pixel, caching pixels for the next src pixel whenever possible.
544 */
545 void
scaleGray2xLILow(l_uint32 * datad,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)546 scaleGray2xLILow(l_uint32 *datad,
547 l_int32 wpld,
548 l_uint32 *datas,
549 l_int32 ws,
550 l_int32 hs,
551 l_int32 wpls)
552 {
553 l_int32 i, hsm;
554 l_uint32 *lines, *lined;
555
556 hsm = hs - 1;
557
558 /* We're taking 2 src and 2 dest lines at a time,
559 * and for each src line, we're computing 2 dest lines.
560 * Call these 2 dest lines: destline1 and destline2.
561 * The first src line is used for destline 1.
562 * On all but the last src line, both src lines are
563 * used in the linear interpolation for destline2.
564 * On the last src line, both destline1 and destline2
565 * are computed using only that src line (because there
566 * isn't a lower src line). */
567
568 /* iterate over all but the last src line */
569 for (i = 0; i < hsm; i++) {
570 lines = datas + i * wpls;
571 lined = datad + 2 * i * wpld;
572 scaleGray2xLILineLow(lined, wpld, lines, ws, wpls, 0);
573 }
574
575 /* last src line */
576 lines = datas + hsm * wpls;
577 lined = datad + 2 * hsm * wpld;
578 scaleGray2xLILineLow(lined, wpld, lines, ws, wpls, 1);
579
580 return;
581 }
582
583
584 /*!
585 * scaleGray2xLILineLow()
586 *
587 * Input: lined (ptr to top destline, to be made from current src line)
588 * wpld
589 * lines (ptr to current src line)
590 * ws
591 * wpls
592 * lastlineflag (1 if last src line; 0 otherwise)
593 * Return: void
594 */
595 void
scaleGray2xLILineLow(l_uint32 * lined,l_int32 wpld,l_uint32 * lines,l_int32 ws,l_int32 wpls,l_int32 lastlineflag)596 scaleGray2xLILineLow(l_uint32 *lined,
597 l_int32 wpld,
598 l_uint32 *lines,
599 l_int32 ws,
600 l_int32 wpls,
601 l_int32 lastlineflag)
602 {
603 l_int32 j, jd, wsm, w;
604 l_int32 sval1, sval2, sval3, sval4;
605 l_uint32 *linesp, *linedp;
606 l_uint32 words, wordsp, wordd, worddp;
607
608 wsm = ws - 1;
609
610 if (lastlineflag == 0) {
611 linesp = lines + wpls;
612 linedp = lined + wpld;
613
614 /* Unroll the loop 4x and work on full words */
615 words = lines[0];
616 wordsp = linesp[0];
617 sval2 = (words >> 24) & 0xff;
618 sval4 = (wordsp >> 24) & 0xff;
619 for (j = 0, jd = 0, w = 0; j + 3 < wsm; j += 4, jd += 8, w++) {
620 /* At the top of the loop,
621 * words == lines[w], wordsp == linesp[w]
622 * and the top bytes of those have been loaded into
623 * sval2 and sval4. */
624 sval1 = sval2;
625 sval2 = (words >> 16) & 0xff;
626 sval3 = sval4;
627 sval4 = (wordsp >> 16) & 0xff;
628 wordd = (sval1 << 24) | (((sval1 + sval2) >> 1) << 16);
629 worddp = (((sval1 + sval3) >> 1) << 24) |
630 (((sval1 + sval2 + sval3 + sval4) >> 2) << 16);
631
632 sval1 = sval2;
633 sval2 = (words >> 8) & 0xff;
634 sval3 = sval4;
635 sval4 = (wordsp >> 8) & 0xff;
636 wordd |= (sval1 << 8) | ((sval1 + sval2) >> 1);
637 worddp |= (((sval1 + sval3) >> 1) << 8) |
638 ((sval1 + sval2 + sval3 + sval4) >> 2);
639 lined[w * 2] = wordd;
640 linedp[w * 2] = worddp;
641
642 sval1 = sval2;
643 sval2 = words & 0xff;
644 sval3 = sval4;
645 sval4 = wordsp & 0xff;
646 wordd = (sval1 << 24) | /* pix 1 */
647 (((sval1 + sval2) >> 1) << 16); /* pix 2 */
648 worddp = (((sval1 + sval3) >> 1) << 24) | /* pix 3 */
649 (((sval1 + sval2 + sval3 + sval4) >> 2) << 16); /* pix 4 */
650
651 /* Load the next word as we need its first byte */
652 words = lines[w + 1];
653 wordsp = linesp[w + 1];
654 sval1 = sval2;
655 sval2 = (words >> 24) & 0xff;
656 sval3 = sval4;
657 sval4 = (wordsp >> 24) & 0xff;
658 wordd |= (sval1 << 8) | /* pix 1 */
659 ((sval1 + sval2) >> 1); /* pix 2 */
660 worddp |= (((sval1 + sval3) >> 1) << 8) | /* pix 3 */
661 ((sval1 + sval2 + sval3 + sval4) >> 2); /* pix 4 */
662 lined[w * 2 + 1] = wordd;
663 linedp[w * 2 + 1] = worddp;
664 }
665
666 /* Finish up the last word */
667 for (; j < wsm; j++, jd += 2) {
668 sval1 = sval2;
669 sval3 = sval4;
670 sval2 = GET_DATA_BYTE(lines, j + 1);
671 sval4 = GET_DATA_BYTE(linesp, j + 1);
672 SET_DATA_BYTE(lined, jd, sval1); /* pix 1 */
673 SET_DATA_BYTE(lined, jd + 1, (sval1 + sval2) / 2); /* pix 2 */
674 SET_DATA_BYTE(linedp, jd, (sval1 + sval3) / 2); /* pix 3 */
675 SET_DATA_BYTE(linedp, jd + 1,
676 (sval1 + sval2 + sval3 + sval4) / 4); /* pix 4 */
677 }
678 sval1 = sval2;
679 sval3 = sval4;
680 SET_DATA_BYTE(lined, 2 * wsm, sval1); /* pix 1 */
681 SET_DATA_BYTE(lined, 2 * wsm + 1, sval1); /* pix 2 */
682 SET_DATA_BYTE(linedp, 2 * wsm, (sval1 + sval3) / 2); /* pix 3 */
683 SET_DATA_BYTE(linedp, 2 * wsm + 1, (sval1 + sval3) / 2); /* pix 4 */
684
685 #if DEBUG_UNROLLING
686 #define CHECK_BYTE(a, b, c) if (GET_DATA_BYTE(a, b) != c) {\
687 fprintf(stderr, "Error: mismatch at %d, %d vs %d\n", \
688 j, GET_DATA_BYTE(a, b), c); }
689
690 sval2 = GET_DATA_BYTE(lines, 0);
691 sval4 = GET_DATA_BYTE(linesp, 0);
692 for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
693 sval1 = sval2;
694 sval3 = sval4;
695 sval2 = GET_DATA_BYTE(lines, j + 1);
696 sval4 = GET_DATA_BYTE(linesp, j + 1);
697 CHECK_BYTE(lined, jd, sval1); /* pix 1 */
698 CHECK_BYTE(lined, jd + 1, (sval1 + sval2) / 2); /* pix 2 */
699 CHECK_BYTE(linedp, jd, (sval1 + sval3) / 2); /* pix 3 */
700 CHECK_BYTE(linedp, jd + 1,
701 (sval1 + sval2 + sval3 + sval4) / 4); /* pix 4 */
702 }
703 sval1 = sval2;
704 sval3 = sval4;
705 CHECK_BYTE(lined, 2 * wsm, sval1); /* pix 1 */
706 CHECK_BYTE(lined, 2 * wsm + 1, sval1); /* pix 2 */
707 CHECK_BYTE(linedp, 2 * wsm, (sval1 + sval3) / 2); /* pix 3 */
708 CHECK_BYTE(linedp, 2 * wsm + 1, (sval1 + sval3) / 2); /* pix 4 */
709 #undef CHECK_BYTE
710 #endif
711 }
712 else { /* last row of src pixels: lastlineflag == 1 */
713 linedp = lined + wpld;
714 sval2 = GET_DATA_BYTE(lines, 0);
715 for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
716 sval1 = sval2;
717 sval2 = GET_DATA_BYTE(lines, j + 1);
718 SET_DATA_BYTE(lined, jd, sval1); /* pix 1 */
719 SET_DATA_BYTE(linedp, jd, sval1); /* pix 3 */
720 SET_DATA_BYTE(lined, jd + 1, (sval1 + sval2) / 2); /* pix 2 */
721 SET_DATA_BYTE(linedp, jd + 1, (sval1 + sval2) / 2); /* pix 4 */
722 }
723 sval1 = sval2;
724 SET_DATA_BYTE(lined, 2 * wsm, sval1); /* pix 1 */
725 SET_DATA_BYTE(lined, 2 * wsm + 1, sval1); /* pix 2 */
726 SET_DATA_BYTE(linedp, 2 * wsm, sval1); /* pix 3 */
727 SET_DATA_BYTE(linedp, 2 * wsm + 1, sval1); /* pix 4 */
728 }
729
730 return;
731 }
732
733
734 /*------------------------------------------------------------------*
735 * 4x linear interpolated gray scaling *
736 *------------------------------------------------------------------*/
737 /*!
738 * scaleGray4xLILow()
739 *
740 * This is a special case of 4x expansion by linear
741 * interpolation. Each src pixel contains 16 dest pixels.
742 * The 16 dest pixels in src pixel 1 are numbered at
743 * their UL corners. The 16 dest pixels in src pixel 1
744 * are related to that src pixel and its 3 neighboring
745 * src pixels as follows:
746 *
747 * 1---2---3---4---|---|---|---|---|
748 * | | | | | | | | |
749 * 5---6---7---8---|---|---|---|---|
750 * | | | | | | | | |
751 * src 1 --> 9---a---b---c---|---|---|---|---| <-- src 2
752 * | | | | | | | | |
753 * d---e---f---g---|---|---|---|---|
754 * | | | | | | | | |
755 * |===|===|===|===|===|===|===|===|
756 * | | | | | | | | |
757 * |---|---|---|---|---|---|---|---|
758 * | | | | | | | | |
759 * src 3 --> |---|---|---|---|---|---|---|---| <-- src 4
760 * | | | | | | | | |
761 * |---|---|---|---|---|---|---|---|
762 * | | | | | | | | |
763 * |---|---|---|---|---|---|---|---|
764 *
765 * dest src
766 * ---- ---
767 * dp1 = sp1
768 * dp2 = (3 * sp1 + sp2) / 4
769 * dp3 = (sp1 + sp2) / 2
770 * dp4 = (sp1 + 3 * sp2) / 4
771 * dp5 = (3 * sp1 + sp3) / 4
772 * dp6 = (9 * sp1 + 3 * sp2 + 3 * sp3 + sp4) / 16
773 * dp7 = (3 * sp1 + 3 * sp2 + sp3 + sp4) / 8
774 * dp8 = (3 * sp1 + 9 * sp2 + 1 * sp3 + 3 * sp4) / 16
775 * dp9 = (sp1 + sp3) / 2
776 * dp10 = (3 * sp1 + sp2 + 3 * sp3 + sp4) / 8
777 * dp11 = (sp1 + sp2 + sp3 + sp4) / 4
778 * dp12 = (sp1 + 3 * sp2 + sp3 + 3 * sp4) / 8
779 * dp13 = (sp1 + 3 * sp3) / 4
780 * dp14 = (3 * sp1 + sp2 + 9 * sp3 + 3 * sp4) / 16
781 * dp15 = (sp1 + sp2 + 3 * sp3 + 3 * sp4) / 8
782 * dp16 = (sp1 + 3 * sp2 + 3 * sp3 + 9 * sp4) / 16
783 *
784 * We iterate over the src pixels, and unroll the calculation
785 * for each set of 16 dest pixels corresponding to that src
786 * pixel, caching pixels for the next src pixel whenever possible.
787 */
788 void
scaleGray4xLILow(l_uint32 * datad,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)789 scaleGray4xLILow(l_uint32 *datad,
790 l_int32 wpld,
791 l_uint32 *datas,
792 l_int32 ws,
793 l_int32 hs,
794 l_int32 wpls)
795 {
796 l_int32 i, hsm;
797 l_uint32 *lines, *lined;
798
799 hsm = hs - 1;
800
801 /* We're taking 2 src and 4 dest lines at a time,
802 * and for each src line, we're computing 4 dest lines.
803 * Call these 4 dest lines: destline1 - destline4.
804 * The first src line is used for destline 1.
805 * Two src lines are used for all other dest lines,
806 * except for the last 4 dest lines, which are computed
807 * using only the last src line. */
808
809 /* iterate over all but the last src line */
810 for (i = 0; i < hsm; i++) {
811 lines = datas + i * wpls;
812 lined = datad + 4 * i * wpld;
813 scaleGray4xLILineLow(lined, wpld, lines, ws, wpls, 0);
814 }
815
816 /* last src line */
817 lines = datas + hsm * wpls;
818 lined = datad + 4 * hsm * wpld;
819 scaleGray4xLILineLow(lined, wpld, lines, ws, wpls, 1);
820
821 return;
822 }
823
824
825 /*!
826 * scaleGray4xLILineLow()
827 *
828 * Input: lined (ptr to top destline, to be made from current src line)
829 * wpld
830 * lines (ptr to current src line)
831 * ws
832 * wpls
833 * lastlineflag (1 if last src line; 0 otherwise)
834 * Return: void
835 */
836 void
scaleGray4xLILineLow(l_uint32 * lined,l_int32 wpld,l_uint32 * lines,l_int32 ws,l_int32 wpls,l_int32 lastlineflag)837 scaleGray4xLILineLow(l_uint32 *lined,
838 l_int32 wpld,
839 l_uint32 *lines,
840 l_int32 ws,
841 l_int32 wpls,
842 l_int32 lastlineflag)
843 {
844 l_int32 j, jd, wsm, wsm4;
845 l_int32 s1, s2, s3, s4, s1t, s2t, s3t, s4t;
846 l_uint32 *linesp, *linedp1, *linedp2, *linedp3;
847
848 wsm = ws - 1;
849 wsm4 = 4 * wsm;
850
851 if (lastlineflag == 0) {
852 linesp = lines + wpls;
853 linedp1 = lined + wpld;
854 linedp2 = lined + 2 * wpld;
855 linedp3 = lined + 3 * wpld;
856 s2 = GET_DATA_BYTE(lines, 0);
857 s4 = GET_DATA_BYTE(linesp, 0);
858 for (j = 0, jd = 0; j < wsm; j++, jd += 4) {
859 s1 = s2;
860 s3 = s4;
861 s2 = GET_DATA_BYTE(lines, j + 1);
862 s4 = GET_DATA_BYTE(linesp, j + 1);
863 s1t = 3 * s1;
864 s2t = 3 * s2;
865 s3t = 3 * s3;
866 s4t = 3 * s4;
867 SET_DATA_BYTE(lined, jd, s1); /* d1 */
868 SET_DATA_BYTE(lined, jd + 1, (s1t + s2) / 4); /* d2 */
869 SET_DATA_BYTE(lined, jd + 2, (s1 + s2) / 2); /* d3 */
870 SET_DATA_BYTE(lined, jd + 3, (s1 + s2t) / 4); /* d4 */
871 SET_DATA_BYTE(linedp1, jd, (s1t + s3) / 4); /* d5 */
872 SET_DATA_BYTE(linedp1, jd + 1, (9*s1 + s2t + s3t + s4) / 16); /*d6*/
873 SET_DATA_BYTE(linedp1, jd + 2, (s1t + s2t + s3 + s4) / 8); /* d7 */
874 SET_DATA_BYTE(linedp1, jd + 3, (s1t + 9*s2 + s3 + s4t) / 16);/*d8*/
875 SET_DATA_BYTE(linedp2, jd, (s1 + s3) / 2); /* d9 */
876 SET_DATA_BYTE(linedp2, jd + 1, (s1t + s2 + s3t + s4) / 8);/* d10 */
877 SET_DATA_BYTE(linedp2, jd + 2, (s1 + s2 + s3 + s4) / 4); /* d11 */
878 SET_DATA_BYTE(linedp2, jd + 3, (s1 + s2t + s3 + s4t) / 8);/* d12 */
879 SET_DATA_BYTE(linedp3, jd, (s1 + s3t) / 4); /* d13 */
880 SET_DATA_BYTE(linedp3, jd + 1, (s1t + s2 + 9*s3 + s4t) / 16);/*d14*/
881 SET_DATA_BYTE(linedp3, jd + 2, (s1 + s2 + s3t + s4t) / 8); /* d15 */
882 SET_DATA_BYTE(linedp3, jd + 3, (s1 + s2t + s3t + 9*s4) / 16);/*d16*/
883 }
884 s1 = s2;
885 s3 = s4;
886 s1t = 3 * s1;
887 s3t = 3 * s3;
888 SET_DATA_BYTE(lined, wsm4, s1); /* d1 */
889 SET_DATA_BYTE(lined, wsm4 + 1, s1); /* d2 */
890 SET_DATA_BYTE(lined, wsm4 + 2, s1); /* d3 */
891 SET_DATA_BYTE(lined, wsm4 + 3, s1); /* d4 */
892 SET_DATA_BYTE(linedp1, wsm4, (s1t + s3) / 4); /* d5 */
893 SET_DATA_BYTE(linedp1, wsm4 + 1, (s1t + s3) / 4); /* d6 */
894 SET_DATA_BYTE(linedp1, wsm4 + 2, (s1t + s3) / 4); /* d7 */
895 SET_DATA_BYTE(linedp1, wsm4 + 3, (s1t + s3) / 4); /* d8 */
896 SET_DATA_BYTE(linedp2, wsm4, (s1 + s3) / 2); /* d9 */
897 SET_DATA_BYTE(linedp2, wsm4 + 1, (s1 + s3) / 2); /* d10 */
898 SET_DATA_BYTE(linedp2, wsm4 + 2, (s1 + s3) / 2); /* d11 */
899 SET_DATA_BYTE(linedp2, wsm4 + 3, (s1 + s3) / 2); /* d12 */
900 SET_DATA_BYTE(linedp3, wsm4, (s1 + s3t) / 4); /* d13 */
901 SET_DATA_BYTE(linedp3, wsm4 + 1, (s1 + s3t) / 4); /* d14 */
902 SET_DATA_BYTE(linedp3, wsm4 + 2, (s1 + s3t) / 4); /* d15 */
903 SET_DATA_BYTE(linedp3, wsm4 + 3, (s1 + s3t) / 4); /* d16 */
904 }
905 else { /* last row of src pixels: lastlineflag == 1 */
906 linedp1 = lined + wpld;
907 linedp2 = lined + 2 * wpld;
908 linedp3 = lined + 3 * wpld;
909 s2 = GET_DATA_BYTE(lines, 0);
910 for (j = 0, jd = 0; j < wsm; j++, jd += 4) {
911 s1 = s2;
912 s2 = GET_DATA_BYTE(lines, j + 1);
913 s1t = 3 * s1;
914 s2t = 3 * s2;
915 SET_DATA_BYTE(lined, jd, s1); /* d1 */
916 SET_DATA_BYTE(lined, jd + 1, (s1t + s2) / 4 ); /* d2 */
917 SET_DATA_BYTE(lined, jd + 2, (s1 + s2) / 2 ); /* d3 */
918 SET_DATA_BYTE(lined, jd + 3, (s1 + s2t) / 4 ); /* d4 */
919 SET_DATA_BYTE(linedp1, jd, s1); /* d5 */
920 SET_DATA_BYTE(linedp1, jd + 1, (s1t + s2) / 4 ); /* d6 */
921 SET_DATA_BYTE(linedp1, jd + 2, (s1 + s2) / 2 ); /* d7 */
922 SET_DATA_BYTE(linedp1, jd + 3, (s1 + s2t) / 4 ); /* d8 */
923 SET_DATA_BYTE(linedp2, jd, s1); /* d9 */
924 SET_DATA_BYTE(linedp2, jd + 1, (s1t + s2) / 4 ); /* d10 */
925 SET_DATA_BYTE(linedp2, jd + 2, (s1 + s2) / 2 ); /* d11 */
926 SET_DATA_BYTE(linedp2, jd + 3, (s1 + s2t) / 4 ); /* d12 */
927 SET_DATA_BYTE(linedp3, jd, s1); /* d13 */
928 SET_DATA_BYTE(linedp3, jd + 1, (s1t + s2) / 4 ); /* d14 */
929 SET_DATA_BYTE(linedp3, jd + 2, (s1 + s2) / 2 ); /* d15 */
930 SET_DATA_BYTE(linedp3, jd + 3, (s1 + s2t) / 4 ); /* d16 */
931 }
932 s1 = s2;
933 SET_DATA_BYTE(lined, wsm4, s1); /* d1 */
934 SET_DATA_BYTE(lined, wsm4 + 1, s1); /* d2 */
935 SET_DATA_BYTE(lined, wsm4 + 2, s1); /* d3 */
936 SET_DATA_BYTE(lined, wsm4 + 3, s1); /* d4 */
937 SET_DATA_BYTE(linedp1, wsm4, s1); /* d5 */
938 SET_DATA_BYTE(linedp1, wsm4 + 1, s1); /* d6 */
939 SET_DATA_BYTE(linedp1, wsm4 + 2, s1); /* d7 */
940 SET_DATA_BYTE(linedp1, wsm4 + 3, s1); /* d8 */
941 SET_DATA_BYTE(linedp2, wsm4, s1); /* d9 */
942 SET_DATA_BYTE(linedp2, wsm4 + 1, s1); /* d10 */
943 SET_DATA_BYTE(linedp2, wsm4 + 2, s1); /* d11 */
944 SET_DATA_BYTE(linedp2, wsm4 + 3, s1); /* d12 */
945 SET_DATA_BYTE(linedp3, wsm4, s1); /* d13 */
946 SET_DATA_BYTE(linedp3, wsm4 + 1, s1); /* d14 */
947 SET_DATA_BYTE(linedp3, wsm4 + 2, s1); /* d15 */
948 SET_DATA_BYTE(linedp3, wsm4 + 3, s1); /* d16 */
949 }
950
951 return;
952 }
953
954
955 /*------------------------------------------------------------------*
956 * Grayscale and color scaling by closest pixel sampling *
957 *------------------------------------------------------------------*/
958 /*!
959 * scaleBySamplingLow()
960 *
961 * Notes:
962 * (1) The dest must be cleared prior to this operation,
963 * and we clear it here in the low-level code.
964 * (2) We reuse dest pixels and dest pixel rows whenever
965 * possible. This speeds the upscaling; downscaling
966 * is done by strict subsampling and is unaffected.
967 * (3) Because we are sampling and not interpolating, this
968 * routine works directly, without conversion to full
969 * RGB color, for 2, 4 or 8 bpp palette color images.
970 */
971 l_int32
scaleBySamplingLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 d,l_int32 wpls)972 scaleBySamplingLow(l_uint32 *datad,
973 l_int32 wd,
974 l_int32 hd,
975 l_int32 wpld,
976 l_uint32 *datas,
977 l_int32 ws,
978 l_int32 hs,
979 l_int32 d,
980 l_int32 wpls)
981 {
982 l_int32 i, j, bpld;
983 l_int32 xs, prevxs, sval;
984 l_int32 *srow, *scol;
985 l_uint32 csval;
986 l_uint32 *lines, *prevlines, *lined, *prevlined;
987 l_float32 wratio, hratio;
988
989 PROCNAME("scaleBySamplingLow");
990
991 /* clear dest */
992 bpld = 4 * wpld;
993 memset((char *)datad, 0, hd * bpld);
994
995 /* the source row corresponding to dest row i ==> srow[i]
996 * the source col corresponding to dest col j ==> scol[j] */
997 if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
998 return ERROR_INT("srow not made", procName, 1);
999 if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
1000 return ERROR_INT("scol not made", procName, 1);
1001
1002 wratio = (l_float32)ws / (l_float32)wd;
1003 hratio = (l_float32)hs / (l_float32)hd;
1004 for (i = 0; i < hd; i++)
1005 srow[i] = L_MIN((l_int32)(hratio * i + 0.5), hs - 1);
1006 for (j = 0; j < wd; j++)
1007 scol[j] = L_MIN((l_int32)(wratio * j + 0.5), ws - 1);
1008
1009 prevlines = NULL;
1010 for (i = 0; i < hd; i++) {
1011 lines = datas + srow[i] * wpls;
1012 lined = datad + i * wpld;
1013 if (lines != prevlines) { /* make dest from new source row */
1014 prevxs = -1;
1015 sval = 0;
1016 csval = 0;
1017 switch (d)
1018 {
1019 case 2:
1020 for (j = 0; j < wd; j++) {
1021 xs = scol[j];
1022 if (xs != prevxs) { /* get dest pix from source col */
1023 sval = GET_DATA_DIBIT(lines, xs);
1024 SET_DATA_DIBIT(lined, j, sval);
1025 prevxs = xs;
1026 }
1027 else /* copy prev dest pix */
1028 SET_DATA_DIBIT(lined, j, sval);
1029 }
1030 break;
1031 case 4:
1032 for (j = 0; j < wd; j++) {
1033 xs = scol[j];
1034 if (xs != prevxs) { /* get dest pix from source col */
1035 sval = GET_DATA_QBIT(lines, xs);
1036 SET_DATA_QBIT(lined, j, sval);
1037 prevxs = xs;
1038 }
1039 else /* copy prev dest pix */
1040 SET_DATA_QBIT(lined, j, sval);
1041 }
1042 break;
1043 case 8:
1044 for (j = 0; j < wd; j++) {
1045 xs = scol[j];
1046 if (xs != prevxs) { /* get dest pix from source col */
1047 sval = GET_DATA_BYTE(lines, xs);
1048 SET_DATA_BYTE(lined, j, sval);
1049 prevxs = xs;
1050 }
1051 else /* copy prev dest pix */
1052 SET_DATA_BYTE(lined, j, sval);
1053 }
1054 break;
1055 case 16:
1056 for (j = 0; j < wd; j++) {
1057 xs = scol[j];
1058 if (xs != prevxs) { /* get dest pix from source col */
1059 sval = GET_DATA_TWO_BYTES(lines, xs);
1060 SET_DATA_TWO_BYTES(lined, j, sval);
1061 prevxs = xs;
1062 }
1063 else /* copy prev dest pix */
1064 SET_DATA_TWO_BYTES(lined, j, sval);
1065 }
1066 break;
1067 case 32:
1068 for (j = 0; j < wd; j++) {
1069 xs = scol[j];
1070 if (xs != prevxs) { /* get dest pix from source col */
1071 csval = lines[xs];
1072 lined[j] = csval;
1073 prevxs = xs;
1074 }
1075 else /* copy prev dest pix */
1076 lined[j] = csval;
1077 }
1078 break;
1079 default:
1080 return ERROR_INT("pixel depth not supported", procName, 1);
1081 break;
1082 }
1083 }
1084 else { /* lines == prevlines; copy prev dest row */
1085 prevlined = lined - wpld;
1086 memcpy((char *)lined, (char *)prevlined, bpld);
1087 }
1088 prevlines = lines;
1089 }
1090
1091 FREE(srow);
1092 FREE(scol);
1093
1094 return 0;
1095 }
1096
1097
1098 /*------------------------------------------------------------------*
1099 * Color and grayscale downsampling with (antialias) smoothing *
1100 *------------------------------------------------------------------*/
1101 /*!
1102 * scaleSmoothLow()
1103 *
1104 * Notes:
1105 * (1) This function is called on 8 or 32 bpp src and dest images.
1106 * (2) size is the full width of the lowpass smoothing filter.
1107 * It is correlated with the reduction ratio, being the
1108 * nearest integer such that size is approximately equal to hs / hd.
1109 */
1110 l_int32
scaleSmoothLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 d,l_int32 wpls,l_int32 size)1111 scaleSmoothLow(l_uint32 *datad,
1112 l_int32 wd,
1113 l_int32 hd,
1114 l_int32 wpld,
1115 l_uint32 *datas,
1116 l_int32 ws,
1117 l_int32 hs,
1118 l_int32 d,
1119 l_int32 wpls,
1120 l_int32 size)
1121 {
1122 l_int32 i, j, m, n, xstart;
1123 l_int32 val, rval, gval, bval;
1124 l_int32 *srow, *scol;
1125 l_uint32 *lines, *lined, *line, *ppixel;
1126 l_uint32 pixel;
1127 l_float32 wratio, hratio, norm;
1128
1129 PROCNAME("scaleSmoothLow");
1130
1131 /* Clear dest */
1132 memset((char *)datad, 0, 4 * wpld * hd);
1133
1134 /* Each dest pixel at (j,i) is computed as the average
1135 of size^2 corresponding src pixels.
1136 We store the UL corner location of the square of
1137 src pixels that correspond to dest pixel (j,i).
1138 The are labelled by the arrays srow[i] and scol[j]. */
1139 if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
1140 return ERROR_INT("srow not made", procName, 1);
1141 if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
1142 return ERROR_INT("scol not made", procName, 1);
1143
1144 norm = 1. / (l_float32)(size * size);
1145 wratio = (l_float32)ws / (l_float32)wd;
1146 hratio = (l_float32)hs / (l_float32)hd;
1147 for (i = 0; i < hd; i++)
1148 srow[i] = L_MIN((l_int32)(hratio * i), hs - size);
1149 for (j = 0; j < wd; j++)
1150 scol[j] = L_MIN((l_int32)(wratio * j), ws - size);
1151
1152 /* For each dest pixel, compute average */
1153 if (d == 8) {
1154 for (i = 0; i < hd; i++) {
1155 lines = datas + srow[i] * wpls;
1156 lined = datad + i * wpld;
1157 for (j = 0; j < wd; j++) {
1158 xstart = scol[j];
1159 val = 0;
1160 for (m = 0; m < size; m++) {
1161 line = lines + m * wpls;
1162 for (n = 0; n < size; n++) {
1163 val += GET_DATA_BYTE(line, xstart + n);
1164 }
1165 }
1166 val = (l_int32)((l_float32)val * norm);
1167 SET_DATA_BYTE(lined, j, val);
1168 }
1169 }
1170 }
1171 else { /* d == 32 */
1172 for (i = 0; i < hd; i++) {
1173 lines = datas + srow[i] * wpls;
1174 lined = datad + i * wpld;
1175 for (j = 0; j < wd; j++) {
1176 xstart = scol[j];
1177 rval = gval = bval = 0;
1178 for (m = 0; m < size; m++) {
1179 ppixel = lines + m * wpls + xstart;
1180 for (n = 0; n < size; n++) {
1181 pixel = *(ppixel + n);
1182 rval += (pixel >> L_RED_SHIFT) & 0xff;
1183 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1184 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1185 }
1186 }
1187 rval = (l_int32)((l_float32)rval * norm);
1188 gval = (l_int32)((l_float32)gval * norm);
1189 bval = (l_int32)((l_float32)bval * norm);
1190 *(lined + j) = rval << L_RED_SHIFT |
1191 gval << L_GREEN_SHIFT |
1192 bval << L_BLUE_SHIFT;
1193 }
1194 }
1195 }
1196
1197 FREE(srow);
1198 FREE(scol);
1199 return 0;
1200 }
1201
1202
1203 /*!
1204 * scaleRGBToGray2Low()
1205 *
1206 * Note: This function is called with 32 bpp RGB src and 8 bpp,
1207 * half-resolution dest. The weights should add to 1.0.
1208 */
1209 void
scaleRGBToGray2Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_float32 rwt,l_float32 gwt,l_float32 bwt)1210 scaleRGBToGray2Low(l_uint32 *datad,
1211 l_int32 wd,
1212 l_int32 hd,
1213 l_int32 wpld,
1214 l_uint32 *datas,
1215 l_int32 wpls,
1216 l_float32 rwt,
1217 l_float32 gwt,
1218 l_float32 bwt)
1219 {
1220 l_int32 i, j, val, rval, gval, bval;
1221 l_uint32 *lines, *lined;
1222 l_uint32 pixel;
1223
1224 rwt *= 0.25;
1225 gwt *= 0.25;
1226 bwt *= 0.25;
1227 for (i = 0; i < hd; i++) {
1228 lines = datas + 2 * i * wpls;
1229 lined = datad + i * wpld;
1230 for (j = 0; j < wd; j++) {
1231 /* Sum each of the color components from 4 src pixels */
1232 pixel = *(lines + 2 * j);
1233 rval = (pixel >> L_RED_SHIFT) & 0xff;
1234 gval = (pixel >> L_GREEN_SHIFT) & 0xff;
1235 bval = (pixel >> L_BLUE_SHIFT) & 0xff;
1236 pixel = *(lines + 2 * j + 1);
1237 rval += (pixel >> L_RED_SHIFT) & 0xff;
1238 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1239 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1240 pixel = *(lines + wpls + 2 * j);
1241 rval += (pixel >> L_RED_SHIFT) & 0xff;
1242 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1243 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1244 pixel = *(lines + wpls + 2 * j + 1);
1245 rval += (pixel >> L_RED_SHIFT) & 0xff;
1246 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1247 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1248 /* Generate the dest byte as a weighted sum of the averages */
1249 val = (l_int32)(rwt * rval + gwt * gval + bwt * bval);
1250 SET_DATA_BYTE(lined, j, val);
1251 }
1252 }
1253 }
1254
1255
1256 /*------------------------------------------------------------------*
1257 * General area mapped gray scaling *
1258 *------------------------------------------------------------------*/
1259 /*!
1260 * scaleColorAreaMapLow()
1261 *
1262 * This should only be used for downscaling.
1263 * We choose to divide each pixel into 16 x 16 sub-pixels.
1264 * This is much slower than scaleSmoothLow(), but it gives a
1265 * better representation, esp. for downscaling factors between
1266 * 1.5 and 5. All src pixels are subdivided into 256 sub-pixels,
1267 * and are weighted by the number of sub-pixels covered by
1268 * the dest pixel. This is about 2x slower than scaleSmoothLow(),
1269 * but the results are significantly better on small text.
1270 */
1271 void
scaleColorAreaMapLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)1272 scaleColorAreaMapLow(l_uint32 *datad,
1273 l_int32 wd,
1274 l_int32 hd,
1275 l_int32 wpld,
1276 l_uint32 *datas,
1277 l_int32 ws,
1278 l_int32 hs,
1279 l_int32 wpls)
1280 {
1281 l_int32 i, j, k, m, wm2, hm2;
1282 l_int32 area00, area10, area01, area11, areal, arear, areat, areab;
1283 l_int32 xu, yu; /* UL corner in src image, to 1/16 of a pixel */
1284 l_int32 xl, yl; /* LR corner in src image, to 1/16 of a pixel */
1285 l_int32 xup, yup, xuf, yuf; /* UL src pixel: integer and fraction */
1286 l_int32 xlp, ylp, xlf, ylf; /* LR src pixel: integer and fraction */
1287 l_int32 delx, dely, area;
1288 l_int32 v00r, v00g, v00b; /* contrib. from UL src pixel */
1289 l_int32 v01r, v01g, v01b; /* contrib. from LL src pixel */
1290 l_int32 v10r, v10g, v10b; /* contrib from UR src pixel */
1291 l_int32 v11r, v11g, v11b; /* contrib from LR src pixel */
1292 l_int32 vinr, ving, vinb; /* contrib from all full interior src pixels */
1293 l_int32 vmidr, vmidg, vmidb; /* contrib from side parts */
1294 l_int32 rval, gval, bval;
1295 l_uint32 pixel00, pixel10, pixel01, pixel11, pixel;
1296 l_uint32 *lines, *lined;
1297 l_float32 scx, scy;
1298
1299 /* (scx, scy) are scaling factors that are applied to the
1300 * dest coords to get the corresponding src coords.
1301 * We need them because we iterate over dest pixels
1302 * and must find the corresponding set of src pixels. */
1303 scx = 16. * (l_float32)ws / (l_float32)wd;
1304 scy = 16. * (l_float32)hs / (l_float32)hd;
1305
1306 wm2 = ws - 2;
1307 hm2 = hs - 2;
1308
1309 /* Iterate over the destination pixels */
1310 for (i = 0; i < hd; i++) {
1311 yu = (l_int32)(scy * i);
1312 yl = (l_int32)(scy * (i + 1.0));
1313 yup = yu >> 4;
1314 yuf = yu & 0x0f;
1315 ylp = yl >> 4;
1316 ylf = yl & 0x0f;
1317 dely = ylp - yup;
1318 lined = datad + i * wpld;
1319 lines = datas + yup * wpls;
1320 for (j = 0; j < wd; j++) {
1321 xu = (l_int32)(scx * j);
1322 xl = (l_int32)(scx * (j + 1.0));
1323 xup = xu >> 4;
1324 xuf = xu & 0x0f;
1325 xlp = xl >> 4;
1326 xlf = xl & 0x0f;
1327 delx = xlp - xup;
1328
1329 /* If near the edge, just use a src pixel value */
1330 if (xlp > wm2 || ylp > hm2) {
1331 *(lined + j) = *(lines + xup);
1332 continue;
1333 }
1334
1335 /* Area summed over, in subpixels. This varies
1336 * due to the quantization, so we can't simply take
1337 * the area to be a constant: area = scx * scy. */
1338 area = ((16 - xuf) + 16 * (delx - 1) + xlf) *
1339 ((16 - yuf) + 16 * (dely - 1) + ylf);
1340
1341 /* Do area map summation */
1342 pixel00 = *(lines + xup);
1343 pixel10 = *(lines + xlp);
1344 pixel01 = *(lines + dely * wpls + xup);
1345 pixel11 = *(lines + dely * wpls + xlp);
1346 area00 = (16 - xuf) * (16 - yuf);
1347 area10 = xlf * (16 - yuf);
1348 area01 = (16 - xuf) * ylf;
1349 area11 = xlf * ylf;
1350 v00r = area00 * ((pixel00 >> L_RED_SHIFT) & 0xff);
1351 v00g = area00 * ((pixel00 >> L_GREEN_SHIFT) & 0xff);
1352 v00b = area00 * ((pixel00 >> L_BLUE_SHIFT) & 0xff);
1353 v10r = area10 * ((pixel10 >> L_RED_SHIFT) & 0xff);
1354 v10g = area10 * ((pixel10 >> L_GREEN_SHIFT) & 0xff);
1355 v10b = area10 * ((pixel10 >> L_BLUE_SHIFT) & 0xff);
1356 v01r = area01 * ((pixel01 >> L_RED_SHIFT) & 0xff);
1357 v01g = area01 * ((pixel01 >> L_GREEN_SHIFT) & 0xff);
1358 v01b = area01 * ((pixel01 >> L_BLUE_SHIFT) & 0xff);
1359 v11r = area11 * ((pixel11 >> L_RED_SHIFT) & 0xff);
1360 v11g = area11 * ((pixel11 >> L_GREEN_SHIFT) & 0xff);
1361 v11b = area11 * ((pixel11 >> L_BLUE_SHIFT) & 0xff);
1362 vinr = ving = vinb = 0;
1363 for (k = 1; k < dely; k++) { /* for full src pixels */
1364 for (m = 1; m < delx; m++) {
1365 pixel = *(lines + k * wpls + xup + m);
1366 vinr += 256 * ((pixel >> L_RED_SHIFT) & 0xff);
1367 ving += 256 * ((pixel >> L_GREEN_SHIFT) & 0xff);
1368 vinb += 256 * ((pixel >> L_BLUE_SHIFT) & 0xff);
1369 }
1370 }
1371 vmidr = vmidg = vmidb = 0;
1372 areal = (16 - xuf) * 16;
1373 arear = xlf * 16;
1374 areat = 16 * (16 - yuf);
1375 areab = 16 * ylf;
1376 for (k = 1; k < dely; k++) { /* for left side */
1377 pixel = *(lines + k * wpls + xup);
1378 vmidr += areal * ((pixel >> L_RED_SHIFT) & 0xff);
1379 vmidg += areal * ((pixel >> L_GREEN_SHIFT) & 0xff);
1380 vmidb += areal * ((pixel >> L_BLUE_SHIFT) & 0xff);
1381 }
1382 for (k = 1; k < dely; k++) { /* for right side */
1383 pixel = *(lines + k * wpls + xlp);
1384 vmidr += arear * ((pixel >> L_RED_SHIFT) & 0xff);
1385 vmidg += arear * ((pixel >> L_GREEN_SHIFT) & 0xff);
1386 vmidb += arear * ((pixel >> L_BLUE_SHIFT) & 0xff);
1387 }
1388 for (m = 1; m < delx; m++) { /* for top side */
1389 pixel = *(lines + xup + m);
1390 vmidr += areat * ((pixel >> L_RED_SHIFT) & 0xff);
1391 vmidg += areat * ((pixel >> L_GREEN_SHIFT) & 0xff);
1392 vmidb += areat * ((pixel >> L_BLUE_SHIFT) & 0xff);
1393 }
1394 for (m = 1; m < delx; m++) { /* for bottom side */
1395 pixel = *(lines + dely * wpls + xup + m);
1396 vmidr += areab * ((pixel >> L_RED_SHIFT) & 0xff);
1397 vmidg += areab * ((pixel >> L_GREEN_SHIFT) & 0xff);
1398 vmidb += areab * ((pixel >> L_BLUE_SHIFT) & 0xff);
1399 }
1400
1401 /* Sum all the contributions */
1402 rval = (v00r + v01r + v10r + v11r + vinr + vmidr + 128) / area;
1403 gval = (v00g + v01g + v10g + v11g + ving + vmidg + 128) / area;
1404 bval = (v00b + v01b + v10b + v11b + vinb + vmidb + 128) / area;
1405 #if DEBUG_OVERFLOW
1406 if (rval > 255) fprintf(stderr, "rval ovfl: %d\n", rval);
1407 if (rval > 255) fprintf(stderr, "gval ovfl: %d\n", gval);
1408 if (rval > 255) fprintf(stderr, "bval ovfl: %d\n", bval);
1409 #endif /* DEBUG_OVERFLOW */
1410 composeRGBPixel(rval, gval, bval, lined + j);
1411 }
1412 }
1413
1414 return;
1415 }
1416
1417
1418 /*!
1419 * scaleGrayAreaMapLow()
1420 *
1421 * This should only be used for downscaling.
1422 * We choose to divide each pixel into 16 x 16 sub-pixels.
1423 * This is about 2x slower than scaleSmoothLow(), but the results
1424 * are significantly better on small text, esp. for downscaling
1425 * factors between 1.5 and 5. All src pixels are subdivided
1426 * into 256 sub-pixels, and are weighted by the number of
1427 * sub-pixels covered by the dest pixel.
1428 */
1429 void
scaleGrayAreaMapLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)1430 scaleGrayAreaMapLow(l_uint32 *datad,
1431 l_int32 wd,
1432 l_int32 hd,
1433 l_int32 wpld,
1434 l_uint32 *datas,
1435 l_int32 ws,
1436 l_int32 hs,
1437 l_int32 wpls)
1438 {
1439 l_int32 i, j, k, m, wm2, hm2;
1440 l_int32 xu, yu; /* UL corner in src image, to 1/16 of a pixel */
1441 l_int32 xl, yl; /* LR corner in src image, to 1/16 of a pixel */
1442 l_int32 xup, yup, xuf, yuf; /* UL src pixel: integer and fraction */
1443 l_int32 xlp, ylp, xlf, ylf; /* LR src pixel: integer and fraction */
1444 l_int32 delx, dely, area;
1445 l_int32 v00; /* contrib. from UL src pixel */
1446 l_int32 v01; /* contrib. from LL src pixel */
1447 l_int32 v10; /* contrib from UR src pixel */
1448 l_int32 v11; /* contrib from LR src pixel */
1449 l_int32 vin; /* contrib from all full interior src pixels */
1450 l_int32 vmid; /* contrib from side parts that are full in 1 direction */
1451 l_int32 val;
1452 l_uint32 *lines, *lined;
1453 l_float32 scx, scy;
1454
1455 /* (scx, scy) are scaling factors that are applied to the
1456 * dest coords to get the corresponding src coords.
1457 * We need them because we iterate over dest pixels
1458 * and must find the corresponding set of src pixels. */
1459 scx = 16. * (l_float32)ws / (l_float32)wd;
1460 scy = 16. * (l_float32)hs / (l_float32)hd;
1461
1462 wm2 = ws - 2;
1463 hm2 = hs - 2;
1464
1465 /* Iterate over the destination pixels */
1466 for (i = 0; i < hd; i++) {
1467 yu = (l_int32)(scy * i);
1468 yl = (l_int32)(scy * (i + 1.0));
1469 yup = yu >> 4;
1470 yuf = yu & 0x0f;
1471 ylp = yl >> 4;
1472 ylf = yl & 0x0f;
1473 dely = ylp - yup;
1474 lined = datad + i * wpld;
1475 lines = datas + yup * wpls;
1476 for (j = 0; j < wd; j++) {
1477 xu = (l_int32)(scx * j);
1478 xl = (l_int32)(scx * (j + 1.0));
1479 xup = xu >> 4;
1480 xuf = xu & 0x0f;
1481 xlp = xl >> 4;
1482 xlf = xl & 0x0f;
1483 delx = xlp - xup;
1484
1485 /* If near the edge, just use a src pixel value */
1486 if (xlp > wm2 || ylp > hm2) {
1487 SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xup));
1488 continue;
1489 }
1490
1491 /* Area summed over, in subpixels. This varies
1492 * due to the quantization, so we can't simply take
1493 * the area to be a constant: area = scx * scy. */
1494 area = ((16 - xuf) + 16 * (delx - 1) + xlf) *
1495 ((16 - yuf) + 16 * (dely - 1) + ylf);
1496
1497 /* Do area map summation */
1498 v00 = (16 - xuf) * (16 - yuf) * GET_DATA_BYTE(lines, xup);
1499 v10 = xlf * (16 - yuf) * GET_DATA_BYTE(lines, xlp);
1500 v01 = (16 - xuf) * ylf * GET_DATA_BYTE(lines + dely * wpls, xup);
1501 v11 = xlf * ylf * GET_DATA_BYTE(lines + dely * wpls, xlp);
1502 for (vin = 0, k = 1; k < dely; k++) { /* for full src pixels */
1503 for (m = 1; m < delx; m++) {
1504 vin += 256 * GET_DATA_BYTE(lines + k * wpls, xup + m);
1505 }
1506 }
1507 for (vmid = 0, k = 1; k < dely; k++) /* for left side */
1508 vmid += (16 - xuf) * 16 * GET_DATA_BYTE(lines + k * wpls, xup);
1509 for (k = 1; k < dely; k++) /* for right side */
1510 vmid += xlf * 16 * GET_DATA_BYTE(lines + k * wpls, xlp);
1511 for (m = 1; m < delx; m++) /* for top side */
1512 vmid += 16 * (16 - yuf) * GET_DATA_BYTE(lines, xup + m);
1513 for (m = 1; m < delx; m++) /* for bottom side */
1514 vmid += 16 * ylf * GET_DATA_BYTE(lines + dely * wpls, xup + m);
1515 val = (v00 + v01 + v10 + v11 + vin + vmid + 128) / area;
1516 #if DEBUG_OVERFLOW
1517 if (val > 255) fprintf(stderr, "val overflow: %d\n", val);
1518 #endif /* DEBUG_OVERFLOW */
1519 SET_DATA_BYTE(lined, j, val);
1520 }
1521 }
1522
1523 return;
1524 }
1525
1526
1527 /*------------------------------------------------------------------*
1528 * 2x area mapped downscaling *
1529 *------------------------------------------------------------------*/
1530 /*!
1531 * scaleAreaMapLow2()
1532 *
1533 * Note: This function is called with either 8 bpp gray or 32 bpp RGB.
1534 * The result is a 2x reduced dest.
1535 */
1536 void
scaleAreaMapLow2(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 d,l_int32 wpls)1537 scaleAreaMapLow2(l_uint32 *datad,
1538 l_int32 wd,
1539 l_int32 hd,
1540 l_int32 wpld,
1541 l_uint32 *datas,
1542 l_int32 d,
1543 l_int32 wpls)
1544 {
1545 l_int32 i, j, val, rval, gval, bval;
1546 l_uint32 *lines, *lined;
1547 l_uint32 pixel;
1548
1549 if (d == 8) {
1550 for (i = 0; i < hd; i++) {
1551 lines = datas + 2 * i * wpls;
1552 lined = datad + i * wpld;
1553 for (j = 0; j < wd; j++) {
1554 /* Average each dest pixel using 4 src pixels */
1555 val = GET_DATA_BYTE(lines, 2 * j);
1556 val += GET_DATA_BYTE(lines, 2 * j + 1);
1557 val += GET_DATA_BYTE(lines + wpls, 2 * j);
1558 val += GET_DATA_BYTE(lines + wpls, 2 * j + 1);
1559 val >>= 2;
1560 SET_DATA_BYTE(lined, j, val);
1561 }
1562 }
1563 }
1564 else { /* d == 32 */
1565 for (i = 0; i < hd; i++) {
1566 lines = datas + 2 * i * wpls;
1567 lined = datad + i * wpld;
1568 for (j = 0; j < wd; j++) {
1569 /* Average each of the color components from 4 src pixels */
1570 pixel = *(lines + 2 * j);
1571 rval = (pixel >> L_RED_SHIFT) & 0xff;
1572 gval = (pixel >> L_GREEN_SHIFT) & 0xff;
1573 bval = (pixel >> L_BLUE_SHIFT) & 0xff;
1574 pixel = *(lines + 2 * j + 1);
1575 rval += (pixel >> L_RED_SHIFT) & 0xff;
1576 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1577 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1578 pixel = *(lines + wpls + 2 * j);
1579 rval += (pixel >> L_RED_SHIFT) & 0xff;
1580 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1581 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1582 pixel = *(lines + wpls + 2 * j + 1);
1583 rval += (pixel >> L_RED_SHIFT) & 0xff;
1584 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1585 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1586 composeRGBPixel(rval >> 2, gval >> 2, bval >> 2, &pixel);
1587 *(lined + j) = pixel;
1588 }
1589 }
1590 }
1591 return;
1592 }
1593
1594
1595 /*------------------------------------------------------------------*
1596 * Binary scaling by closest pixel sampling *
1597 *------------------------------------------------------------------*/
1598 /*
1599 * scaleBinaryLow()
1600 *
1601 * Notes:
1602 * (1) The dest must be cleared prior to this operation,
1603 * and we clear it here in the low-level code.
1604 * (2) We reuse dest pixels and dest pixel rows whenever
1605 * possible for upscaling; downscaling is done by
1606 * strict subsampling.
1607 */
1608 l_int32
scaleBinaryLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)1609 scaleBinaryLow(l_uint32 *datad,
1610 l_int32 wd,
1611 l_int32 hd,
1612 l_int32 wpld,
1613 l_uint32 *datas,
1614 l_int32 ws,
1615 l_int32 hs,
1616 l_int32 wpls)
1617 {
1618 l_int32 i, j, bpld;
1619 l_int32 xs, prevxs, sval;
1620 l_int32 *srow, *scol;
1621 l_uint32 *lines, *prevlines, *lined, *prevlined;
1622 l_float32 wratio, hratio;
1623
1624 PROCNAME("scaleBinaryLow");
1625
1626 /* clear dest */
1627 bpld = 4 * wpld;
1628 memset((char *)datad, 0, hd * bpld);
1629
1630 /* The source row corresponding to dest row i ==> srow[i]
1631 * The source col corresponding to dest col j ==> scol[j] */
1632 if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
1633 return ERROR_INT("srow not made", procName, 1);
1634 if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
1635 return ERROR_INT("scol not made", procName, 1);
1636
1637 wratio = (l_float32)ws / (l_float32)wd;
1638 hratio = (l_float32)hs / (l_float32)hd;
1639 for (i = 0; i < hd; i++)
1640 srow[i] = L_MIN((l_int32)(hratio * i + 0.5), hs - 1);
1641 for (j = 0; j < wd; j++)
1642 scol[j] = L_MIN((l_int32)(wratio * j + 0.5), ws - 1);
1643
1644 prevlines = NULL;
1645 prevxs = -1;
1646 sval = 0;
1647 for (i = 0; i < hd; i++) {
1648 lines = datas + srow[i] * wpls;
1649 lined = datad + i * wpld;
1650 if (lines != prevlines) { /* make dest from new source row */
1651 for (j = 0; j < wd; j++) {
1652 xs = scol[j];
1653 if (xs != prevxs) { /* get dest pix from source col */
1654 if ((sval = GET_DATA_BIT(lines, xs)))
1655 SET_DATA_BIT(lined, j);
1656 prevxs = xs;
1657 }
1658 else { /* copy prev dest pix, if set */
1659 if (sval)
1660 SET_DATA_BIT(lined, j);
1661 }
1662 }
1663 }
1664 else { /* lines == prevlines; copy prev dest row */
1665 prevlined = lined - wpld;
1666 memcpy((char *)lined, (char *)prevlined, bpld);
1667 }
1668 prevlines = lines;
1669 }
1670
1671 FREE(srow);
1672 FREE(scol);
1673
1674 return 0;
1675 }
1676
1677
1678 /*------------------------------------------------------------------*
1679 * Scale-to-gray 2x *
1680 *------------------------------------------------------------------*/
1681 /*!
1682 * scaleToGray2Low()
1683 *
1684 * Input: usual image variables
1685 * sumtab (made from makeSumTabSG2())
1686 * valtab (made from makeValTabSG2())
1687 * Return: 0 if OK; 1 on error.
1688 *
1689 * The output is processed in sets of 4 output bytes on a row,
1690 * corresponding to 4 2x2 bit-blocks in the input image.
1691 * Two lookup tables are used. The first, sumtab, gets the
1692 * sum of ON pixels in 4 sets of two adjacent bits,
1693 * storing the result in 4 adjacent bytes. After sums from
1694 * two rows have been added, the second table, valtab,
1695 * converts from the sum of ON pixels in the 2x2 block to
1696 * an 8 bpp grayscale value between 0 (for 4 bits ON)
1697 * and 255 (for 0 bits ON).
1698 */
1699 void
scaleToGray2Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_uint32 * sumtab,l_uint8 * valtab)1700 scaleToGray2Low(l_uint32 *datad,
1701 l_int32 wd,
1702 l_int32 hd,
1703 l_int32 wpld,
1704 l_uint32 *datas,
1705 l_int32 wpls,
1706 l_uint32 *sumtab,
1707 l_uint8 *valtab)
1708 {
1709 l_int32 i, j, l, k, m, wd4, extra;
1710 l_uint32 sbyte1, sbyte2, sum;
1711 l_uint32 *lines, *lined;
1712
1713 /* i indexes the dest lines
1714 * l indexes the source lines
1715 * j indexes the dest bytes
1716 * k indexes the source bytes
1717 * We take two bytes from the source (in 2 lines of 8 pixels
1718 * each) and convert them into four 8 bpp bytes of the dest. */
1719 wd4 = wd & 0xfffffffc;
1720 extra = wd - wd4;
1721 for (i = 0, l = 0; i < hd; i++, l += 2) {
1722 lines = datas + l * wpls;
1723 lined = datad + i * wpld;
1724 for (j = 0, k = 0; j < wd4; j += 4, k++) {
1725 sbyte1 = GET_DATA_BYTE(lines, k);
1726 sbyte2 = GET_DATA_BYTE(lines + wpls, k);
1727 sum = sumtab[sbyte1] + sumtab[sbyte2];
1728 SET_DATA_BYTE(lined, j, valtab[sum >> 24]);
1729 SET_DATA_BYTE(lined, j + 1, valtab[(sum >> 16) & 0xff]);
1730 SET_DATA_BYTE(lined, j + 2, valtab[(sum >> 8) & 0xff]);
1731 SET_DATA_BYTE(lined, j + 3, valtab[sum & 0xff]);
1732 }
1733 if (extra > 0) {
1734 sbyte1 = GET_DATA_BYTE(lines, k);
1735 sbyte2 = GET_DATA_BYTE(lines + wpls, k);
1736 sum = sumtab[sbyte1] + sumtab[sbyte2];
1737 for (m = 0; m < extra; m++) {
1738 SET_DATA_BYTE(lined, j + m,
1739 valtab[((sum >> (24 - 8 * m)) & 0xff)]);
1740 }
1741 }
1742
1743 }
1744
1745 return;
1746 }
1747
1748
1749 /*!
1750 * makeSumTabSG2()
1751 *
1752 * Returns a table of 256 l_uint32s, giving the four output
1753 * 8-bit grayscale sums corresponding to 8 input bits of a binary
1754 * image, for a 2x scale-to-gray op. The sums from two
1755 * adjacent scanlines are then added and transformed to
1756 * output four 8 bpp pixel values, using makeValTabSG2().
1757 */
1758 l_uint32 *
makeSumTabSG2(void)1759 makeSumTabSG2(void)
1760 {
1761 l_int32 i;
1762 l_int32 sum[] = {0, 1, 1, 2};
1763 l_uint32 *tab;
1764
1765 PROCNAME("makeSumTabSG2");
1766
1767 if ((tab = (l_uint32 *)CALLOC(256, sizeof(l_uint32))) == NULL)
1768 return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1769
1770 /* Pack the four sums separately in four bytes */
1771 for (i = 0; i < 256; i++) {
1772 tab[i] = (sum[i & 0x3] | sum[(i >> 2) & 0x3] << 8 |
1773 sum[(i >> 4) & 0x3] << 16 | sum[(i >> 6) & 0x3] << 24);
1774 }
1775
1776 return tab;
1777 }
1778
1779
1780 /*!
1781 * makeValTabSG2()
1782 *
1783 * Returns an 8 bit value for the sum of ON pixels
1784 * in a 2x2 square, according to
1785 *
1786 * val = 255 - (255 * sum)/4
1787 *
1788 * where sum is in set {0,1,2,3,4}
1789 */
1790 l_uint8 *
makeValTabSG2(void)1791 makeValTabSG2(void)
1792 {
1793 l_int32 i;
1794 l_uint8 *tab;
1795
1796 PROCNAME("makeValTabSG2");
1797
1798 if ((tab = (l_uint8 *)CALLOC(5, sizeof(l_uint8))) == NULL)
1799 return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1800
1801 for (i = 0; i < 5; i++)
1802 tab[i] = 255 - (i * 255) / 4;
1803
1804 return tab;
1805 }
1806
1807
1808 /*------------------------------------------------------------------*
1809 * Scale-to-gray 3x *
1810 *------------------------------------------------------------------*/
1811 /*!
1812 * scaleToGray3Low()
1813 *
1814 * Input: usual image variables
1815 * sumtab (made from makeSumTabSG3())
1816 * valtab (made from makeValTabSG3())
1817 * Return: 0 if OK; 1 on error
1818 *
1819 * Each set of 8 3x3 bit-blocks in the source image, which
1820 * consist of 72 pixels arranged 24 pixels wide by 3 scanlines,
1821 * is converted to a row of 8 8-bit pixels in the dest image.
1822 * These 72 pixels of the input image are runs of 24 pixels
1823 * in three adjacent scanlines. Each run of 24 pixels is
1824 * stored in the 24 LSbits of a 32-bit word. We use 2 LUTs.
1825 * The first, sumtab, takes 6 of these bits and stores
1826 * sum, taken 3 bits at a time, in two bytes. (See
1827 * makeSumTabSG3). This is done for each of the 3 scanlines,
1828 * and the results are added. We now have the sum of ON pixels
1829 * in the first two 3x3 blocks in two bytes. The valtab LUT
1830 * then converts these values (which go from 0 to 9) to
1831 * grayscale values between between 255 and 0. (See makeValTabSG3).
1832 * This process is repeated for each of the other 3 sets of
1833 * 6x3 input pixels, giving 8 output pixels in total.
1834 *
1835 * Note: because the input image is processed in groups of
1836 * 24 x 3 pixels, the process clips the input height to
1837 * (h - h % 3) and the input width to (w - w % 24).
1838 */
1839 void
scaleToGray3Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_uint32 * sumtab,l_uint8 * valtab)1840 scaleToGray3Low(l_uint32 *datad,
1841 l_int32 wd,
1842 l_int32 hd,
1843 l_int32 wpld,
1844 l_uint32 *datas,
1845 l_int32 wpls,
1846 l_uint32 *sumtab,
1847 l_uint8 *valtab)
1848 {
1849 l_int32 i, j, l, k;
1850 l_uint32 threebytes1, threebytes2, threebytes3, sum;
1851 l_uint32 *lines, *lined;
1852
1853 /* i indexes the dest lines
1854 * l indexes the source lines
1855 * j indexes the dest bytes
1856 * k indexes the source bytes
1857 * We take 9 bytes from the source (72 binary pixels
1858 * in three lines of 24 pixels each) and convert it
1859 * into 8 bytes of the dest (8 8bpp pixels in one line) */
1860 for (i = 0, l = 0; i < hd; i++, l += 3) {
1861 lines = datas + l * wpls;
1862 lined = datad + i * wpld;
1863 for (j = 0, k = 0; j < wd; j += 8, k += 3) {
1864 threebytes1 = (GET_DATA_BYTE(lines, k) << 16) |
1865 (GET_DATA_BYTE(lines, k + 1) << 8) |
1866 GET_DATA_BYTE(lines, k + 2);
1867 threebytes2 = (GET_DATA_BYTE(lines + wpls, k) << 16) |
1868 (GET_DATA_BYTE(lines + wpls, k + 1) << 8) |
1869 GET_DATA_BYTE(lines + wpls, k + 2);
1870 threebytes3 = (GET_DATA_BYTE(lines + 2 * wpls, k) << 16) |
1871 (GET_DATA_BYTE(lines + 2 * wpls, k + 1) << 8) |
1872 GET_DATA_BYTE(lines + 2 * wpls, k + 2);
1873
1874 sum = sumtab[(threebytes1 >> 18)] +
1875 sumtab[(threebytes2 >> 18)] +
1876 sumtab[(threebytes3 >> 18)];
1877 SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 2)]);
1878 SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
1879
1880 sum = sumtab[((threebytes1 >> 12) & 0x3f)] +
1881 sumtab[((threebytes2 >> 12) & 0x3f)] +
1882 sumtab[((threebytes3 >> 12) & 0x3f)];
1883 SET_DATA_BYTE(lined, j + 2, valtab[GET_DATA_BYTE(&sum, 2)]);
1884 SET_DATA_BYTE(lined, j + 3, valtab[GET_DATA_BYTE(&sum, 3)]);
1885
1886 sum = sumtab[((threebytes1 >> 6) & 0x3f)] +
1887 sumtab[((threebytes2 >> 6) & 0x3f)] +
1888 sumtab[((threebytes3 >> 6) & 0x3f)];
1889 SET_DATA_BYTE(lined, j + 4, valtab[GET_DATA_BYTE(&sum, 2)]);
1890 SET_DATA_BYTE(lined, j + 5, valtab[GET_DATA_BYTE(&sum, 3)]);
1891
1892 sum = sumtab[(threebytes1 & 0x3f)] +
1893 sumtab[(threebytes2 & 0x3f)] +
1894 sumtab[(threebytes3 & 0x3f)];
1895 SET_DATA_BYTE(lined, j + 6, valtab[GET_DATA_BYTE(&sum, 2)]);
1896 SET_DATA_BYTE(lined, j + 7, valtab[GET_DATA_BYTE(&sum, 3)]);
1897 }
1898 }
1899
1900 return;
1901 }
1902
1903
1904
1905 /*!
1906 * makeSumTabSG3()
1907 *
1908 * Returns a table of 64 l_uint32s, giving the two output
1909 * 8-bit grayscale sums corresponding to 6 input bits of a binary
1910 * image, for a 3x scale-to-gray op. In practice, this would
1911 * be used three times (on adjacent scanlines), and the sums would
1912 * be added and then transformed to output 8 bpp pixel values,
1913 * using makeValTabSG3().
1914 */
1915 l_uint32 *
makeSumTabSG3(void)1916 makeSumTabSG3(void)
1917 {
1918 l_int32 i;
1919 l_int32 sum[] = {0, 1, 1, 2, 1, 2, 2, 3};
1920 l_uint32 *tab;
1921
1922 PROCNAME("makeSumTabSG3");
1923
1924 if ((tab = (l_uint32 *)CALLOC(64, sizeof(l_uint32))) == NULL)
1925 return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1926
1927 /* Pack the two sums separately in two bytes */
1928 for (i = 0; i < 64; i++) {
1929 tab[i] = (sum[i & 0x07]) | (sum[(i >> 3) & 0x07] << 8);
1930 }
1931
1932 return tab;
1933 }
1934
1935
1936 /*!
1937 * makeValTabSG3()
1938 *
1939 * Returns an 8 bit value for the sum of ON pixels
1940 * in a 3x3 square, according to
1941 * val = 255 - (255 * sum)/9
1942 * where sum is in set {0, ... ,9}
1943 */
1944 l_uint8 *
makeValTabSG3(void)1945 makeValTabSG3(void)
1946 {
1947 l_int32 i;
1948 l_uint8 *tab;
1949
1950 PROCNAME("makeValTabSG3");
1951
1952 if ((tab = (l_uint8 *)CALLOC(10, sizeof(l_uint8))) == NULL)
1953 return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1954
1955 for (i = 0; i < 10; i++)
1956 tab[i] = 0xff - (i * 255) / 9;
1957
1958 return tab;
1959 }
1960
1961
1962 /*------------------------------------------------------------------*
1963 * Scale-to-gray 4x *
1964 *------------------------------------------------------------------*/
1965 /*!
1966 * scaleToGray4Low()
1967 *
1968 * Input: usual image variables
1969 * sumtab (made from makeSumTabSG4())
1970 * valtab (made from makeValTabSG4())
1971 * Return: 0 if OK; 1 on error.
1972 *
1973 * The output is processed in sets of 2 output bytes on a row,
1974 * corresponding to 2 4x4 bit-blocks in the input image.
1975 * Two lookup tables are used. The first, sumtab, gets the
1976 * sum of ON pixels in two sets of four adjacent bits,
1977 * storing the result in 2 adjacent bytes. After sums from
1978 * four rows have been added, the second table, valtab,
1979 * converts from the sum of ON pixels in the 4x4 block to
1980 * an 8 bpp grayscale value between 0 (for 16 bits ON)
1981 * and 255 (for 0 bits ON).
1982 */
1983 void
scaleToGray4Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_uint32 * sumtab,l_uint8 * valtab)1984 scaleToGray4Low(l_uint32 *datad,
1985 l_int32 wd,
1986 l_int32 hd,
1987 l_int32 wpld,
1988 l_uint32 *datas,
1989 l_int32 wpls,
1990 l_uint32 *sumtab,
1991 l_uint8 *valtab)
1992 {
1993 l_int32 i, j, l, k;
1994 l_uint32 sbyte1, sbyte2, sbyte3, sbyte4, sum;
1995 l_uint32 *lines, *lined;
1996
1997 /* i indexes the dest lines
1998 * l indexes the source lines
1999 * j indexes the dest bytes
2000 * k indexes the source bytes
2001 * We take four bytes from the source (in 4 lines of 8 pixels
2002 * each) and convert it into two 8 bpp bytes of the dest. */
2003 for (i = 0, l = 0; i < hd; i++, l += 4) {
2004 lines = datas + l * wpls;
2005 lined = datad + i * wpld;
2006 for (j = 0, k = 0; j < wd; j += 2, k++) {
2007 sbyte1 = GET_DATA_BYTE(lines, k);
2008 sbyte2 = GET_DATA_BYTE(lines + wpls, k);
2009 sbyte3 = GET_DATA_BYTE(lines + 2 * wpls, k);
2010 sbyte4 = GET_DATA_BYTE(lines + 3 * wpls, k);
2011 sum = sumtab[sbyte1] + sumtab[sbyte2] +
2012 sumtab[sbyte3] + sumtab[sbyte4];
2013 SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 2)]);
2014 SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
2015 }
2016 }
2017
2018 return;
2019 }
2020
2021
2022 /*!
2023 * makeSumTabSG4()
2024 *
2025 * Returns a table of 256 l_uint32s, giving the two output
2026 * 8-bit grayscale sums corresponding to 8 input bits of a binary
2027 * image, for a 4x scale-to-gray op. The sums from four
2028 * adjacent scanlines are then added and transformed to
2029 * output 8 bpp pixel values, using makeValTabSG4().
2030 */
2031 l_uint32 *
makeSumTabSG4(void)2032 makeSumTabSG4(void)
2033 {
2034 l_int32 i;
2035 l_int32 sum[] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
2036 l_uint32 *tab;
2037
2038 PROCNAME("makeSumTabSG4");
2039
2040 if ((tab = (l_uint32 *)CALLOC(256, sizeof(l_uint32))) == NULL)
2041 return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2042
2043 /* Pack the two sums separately in two bytes */
2044 for (i = 0; i < 256; i++) {
2045 tab[i] = (sum[i & 0xf]) | (sum[(i >> 4) & 0xf] << 8);
2046 }
2047
2048 return tab;
2049 }
2050
2051
2052 /*!
2053 * makeValTabSG4()
2054 *
2055 * Returns an 8 bit value for the sum of ON pixels
2056 * in a 4x4 square, according to
2057 *
2058 * val = 255 - (255 * sum)/16
2059 *
2060 * where sum is in set {0, ... ,16}
2061 */
2062 l_uint8 *
makeValTabSG4(void)2063 makeValTabSG4(void)
2064 {
2065 l_int32 i;
2066 l_uint8 *tab;
2067
2068 PROCNAME("makeValTabSG4");
2069
2070 if ((tab = (l_uint8 *)CALLOC(17, sizeof(l_uint8))) == NULL)
2071 return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2072
2073 for (i = 0; i < 17; i++)
2074 tab[i] = 0xff - (i * 255) / 16;
2075
2076 return tab;
2077 }
2078
2079
2080 /*------------------------------------------------------------------*
2081 * Scale-to-gray 6x *
2082 *------------------------------------------------------------------*/
2083 /*!
2084 * scaleToGray6Low()
2085 *
2086 * Input: usual image variables
2087 * tab8 (made from makePixelSumTab8())
2088 * valtab (made from makeValTabSG6())
2089 * Return: 0 if OK; 1 on error
2090 *
2091 * Each set of 4 6x6 bit-blocks in the source image, which
2092 * consist of 144 pixels arranged 24 pixels wide by 6 scanlines,
2093 * is converted to a row of 4 8-bit pixels in the dest image.
2094 * These 144 pixels of the input image are runs of 24 pixels
2095 * in six adjacent scanlines. Each run of 24 pixels is
2096 * stored in the 24 LSbits of a 32-bit word. We use 2 LUTs.
2097 * The first, tab8, takes 6 of these bits and stores
2098 * sum in one byte. This is done for each of the 6 scanlines,
2099 * and the results are added.
2100 * We now have the sum of ON pixels in the first 6x6 block. The
2101 * valtab LUT then converts these values (which go from 0 to 36) to
2102 * grayscale values between between 255 and 0. (See makeValTabSG6).
2103 * This process is repeated for each of the other 3 sets of
2104 * 6x6 input pixels, giving 4 output pixels in total.
2105 *
2106 * Note: because the input image is processed in groups of
2107 * 24 x 6 pixels, the process clips the input height to
2108 * (h - h % 6) and the input width to (w - w % 24).
2109 *
2110 */
2111 void
scaleToGray6Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_int32 * tab8,l_uint8 * valtab)2112 scaleToGray6Low(l_uint32 *datad,
2113 l_int32 wd,
2114 l_int32 hd,
2115 l_int32 wpld,
2116 l_uint32 *datas,
2117 l_int32 wpls,
2118 l_int32 *tab8,
2119 l_uint8 *valtab)
2120 {
2121 l_int32 i, j, l, k;
2122 l_uint32 threebytes1, threebytes2, threebytes3;
2123 l_uint32 threebytes4, threebytes5, threebytes6, sum;
2124 l_uint32 *lines, *lined;
2125
2126 /* i indexes the dest lines
2127 * l indexes the source lines
2128 * j indexes the dest bytes
2129 * k indexes the source bytes
2130 * We take 18 bytes from the source (144 binary pixels
2131 * in six lines of 24 pixels each) and convert it
2132 * into 4 bytes of the dest (four 8 bpp pixels in one line) */
2133 for (i = 0, l = 0; i < hd; i++, l += 6) {
2134 lines = datas + l * wpls;
2135 lined = datad + i * wpld;
2136 for (j = 0, k = 0; j < wd; j += 4, k += 3) {
2137 /* First grab the 18 bytes, 3 at a time, and put each set
2138 * of 3 bytes into the LS bytes of a 32-bit word. */
2139 threebytes1 = (GET_DATA_BYTE(lines, k) << 16) |
2140 (GET_DATA_BYTE(lines, k + 1) << 8) |
2141 GET_DATA_BYTE(lines, k + 2);
2142 threebytes2 = (GET_DATA_BYTE(lines + wpls, k) << 16) |
2143 (GET_DATA_BYTE(lines + wpls, k + 1) << 8) |
2144 GET_DATA_BYTE(lines + wpls, k + 2);
2145 threebytes3 = (GET_DATA_BYTE(lines + 2 * wpls, k) << 16) |
2146 (GET_DATA_BYTE(lines + 2 * wpls, k + 1) << 8) |
2147 GET_DATA_BYTE(lines + 2 * wpls, k + 2);
2148 threebytes4 = (GET_DATA_BYTE(lines + 3 * wpls, k) << 16) |
2149 (GET_DATA_BYTE(lines + 3 * wpls, k + 1) << 8) |
2150 GET_DATA_BYTE(lines + 3 * wpls, k + 2);
2151 threebytes5 = (GET_DATA_BYTE(lines + 4 * wpls, k) << 16) |
2152 (GET_DATA_BYTE(lines + 4 * wpls, k + 1) << 8) |
2153 GET_DATA_BYTE(lines + 4 * wpls, k + 2);
2154 threebytes6 = (GET_DATA_BYTE(lines + 5 * wpls, k) << 16) |
2155 (GET_DATA_BYTE(lines + 5 * wpls, k + 1) << 8) |
2156 GET_DATA_BYTE(lines + 5 * wpls, k + 2);
2157
2158 /* Sum first set of 36 bits and convert to 0-255 */
2159 sum = tab8[(threebytes1 >> 18)] +
2160 tab8[(threebytes2 >> 18)] +
2161 tab8[(threebytes3 >> 18)] +
2162 tab8[(threebytes4 >> 18)] +
2163 tab8[(threebytes5 >> 18)] +
2164 tab8[(threebytes6 >> 18)];
2165 SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 3)]);
2166
2167 /* Ditto for second set */
2168 sum = tab8[((threebytes1 >> 12) & 0x3f)] +
2169 tab8[((threebytes2 >> 12) & 0x3f)] +
2170 tab8[((threebytes3 >> 12) & 0x3f)] +
2171 tab8[((threebytes4 >> 12) & 0x3f)] +
2172 tab8[((threebytes5 >> 12) & 0x3f)] +
2173 tab8[((threebytes6 >> 12) & 0x3f)];
2174 SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
2175
2176 sum = tab8[((threebytes1 >> 6) & 0x3f)] +
2177 tab8[((threebytes2 >> 6) & 0x3f)] +
2178 tab8[((threebytes3 >> 6) & 0x3f)] +
2179 tab8[((threebytes4 >> 6) & 0x3f)] +
2180 tab8[((threebytes5 >> 6) & 0x3f)] +
2181 tab8[((threebytes6 >> 6) & 0x3f)];
2182 SET_DATA_BYTE(lined, j + 2, valtab[GET_DATA_BYTE(&sum, 3)]);
2183
2184 sum = tab8[(threebytes1 & 0x3f)] +
2185 tab8[(threebytes2 & 0x3f)] +
2186 tab8[(threebytes3 & 0x3f)] +
2187 tab8[(threebytes4 & 0x3f)] +
2188 tab8[(threebytes5 & 0x3f)] +
2189 tab8[(threebytes6 & 0x3f)];
2190 SET_DATA_BYTE(lined, j + 3, valtab[GET_DATA_BYTE(&sum, 3)]);
2191 }
2192 }
2193
2194 return;
2195 }
2196
2197
2198 /*!
2199 * makeValTabSG6()
2200 *
2201 * Returns an 8 bit value for the sum of ON pixels
2202 * in a 6x6 square, according to
2203 * val = 255 - (255 * sum)/36
2204 * where sum is in set {0, ... ,36}
2205 */
2206 l_uint8 *
makeValTabSG6(void)2207 makeValTabSG6(void)
2208 {
2209 l_int32 i;
2210 l_uint8 *tab;
2211
2212 PROCNAME("makeValTabSG6");
2213
2214 if ((tab = (l_uint8 *)CALLOC(37, sizeof(l_uint8))) == NULL)
2215 return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2216
2217 for (i = 0; i < 37; i++)
2218 tab[i] = 0xff - (i * 255) / 36;
2219
2220 return tab;
2221 }
2222
2223
2224 /*------------------------------------------------------------------*
2225 * Scale-to-gray 8x *
2226 *------------------------------------------------------------------*/
2227 /*!
2228 * scaleToGray8Low()
2229 *
2230 * Input: usual image variables
2231 * tab8 (made from makePixelSumTab8())
2232 * valtab (made from makeValTabSG8())
2233 * Return: 0 if OK; 1 on error.
2234 *
2235 * The output is processed one dest byte at a time,
2236 * corresponding to 8 rows of src bytes in the input image.
2237 * Two lookup tables are used. The first, tab8, gets the
2238 * sum of ON pixels in a byte. After sums from 8 rows have
2239 * been added, the second table, valtab, converts from this
2240 * value (which is between 0 and 64) to an 8 bpp grayscale
2241 * value between 0 (for all 64 bits ON) and 255 (for 0 bits ON).
2242 */
2243 void
scaleToGray8Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_int32 * tab8,l_uint8 * valtab)2244 scaleToGray8Low(l_uint32 *datad,
2245 l_int32 wd,
2246 l_int32 hd,
2247 l_int32 wpld,
2248 l_uint32 *datas,
2249 l_int32 wpls,
2250 l_int32 *tab8,
2251 l_uint8 *valtab)
2252 {
2253 l_int32 i, j, k;
2254 l_int32 sbyte0, sbyte1, sbyte2, sbyte3, sbyte4, sbyte5, sbyte6, sbyte7, sum;
2255 l_uint32 *lines, *lined;
2256
2257 /* i indexes the dest lines
2258 * k indexes the source lines
2259 * j indexes the src and dest bytes
2260 * We take 8 bytes from the source (in 8 lines of 8 pixels
2261 * each) and convert it into one 8 bpp byte of the dest. */
2262 for (i = 0, k = 0; i < hd; i++, k += 8) {
2263 lines = datas + k * wpls;
2264 lined = datad + i * wpld;
2265 for (j = 0; j < wd; j++) {
2266 sbyte0 = GET_DATA_BYTE(lines, j);
2267 sbyte1 = GET_DATA_BYTE(lines + wpls, j);
2268 sbyte2 = GET_DATA_BYTE(lines + 2 * wpls, j);
2269 sbyte3 = GET_DATA_BYTE(lines + 3 * wpls, j);
2270 sbyte4 = GET_DATA_BYTE(lines + 4 * wpls, j);
2271 sbyte5 = GET_DATA_BYTE(lines + 5 * wpls, j);
2272 sbyte6 = GET_DATA_BYTE(lines + 6 * wpls, j);
2273 sbyte7 = GET_DATA_BYTE(lines + 7 * wpls, j);
2274 sum = tab8[sbyte0] + tab8[sbyte1] +
2275 tab8[sbyte2] + tab8[sbyte3] +
2276 tab8[sbyte4] + tab8[sbyte5] +
2277 tab8[sbyte6] + tab8[sbyte7];
2278 SET_DATA_BYTE(lined, j, valtab[sum]);
2279 }
2280 }
2281
2282 return;
2283 }
2284
2285
2286 /*!
2287 * makeValTabSG8()
2288 *
2289 * Returns an 8 bit value for the sum of ON pixels
2290 * in an 8x8 square, according to
2291 * val = 255 - (255 * sum)/64
2292 * where sum is in set {0, ... ,64}
2293 */
2294 l_uint8 *
makeValTabSG8(void)2295 makeValTabSG8(void)
2296 {
2297 l_int32 i;
2298 l_uint8 *tab;
2299
2300 PROCNAME("makeValTabSG8");
2301
2302 if ((tab = (l_uint8 *)CALLOC(65, sizeof(l_uint8))) == NULL)
2303 return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2304
2305 for (i = 0; i < 65; i++)
2306 tab[i] = 0xff - (i * 255) / 64;
2307
2308 return tab;
2309 }
2310
2311
2312 /*------------------------------------------------------------------*
2313 * Scale-to-gray 16x *
2314 *------------------------------------------------------------------*/
2315 /*!
2316 * scaleToGray16Low()
2317 *
2318 * Input: usual image variables
2319 * tab8 (made from makePixelSumTab8())
2320 * Return: 0 if OK; 1 on error.
2321 *
2322 * The output is processed one dest byte at a time, corresponding
2323 * to 16 rows consisting each of 2 src bytes in the input image.
2324 * This uses one lookup table, tab8, which gives the sum of
2325 * ON pixels in a byte. After summing for all ON pixels in the
2326 * 32 src bytes, which is between 0 and 256, this is converted
2327 * to an 8 bpp grayscale value between 0 (for 255 or 256 bits ON)
2328 * and 255 (for 0 bits ON).
2329 */
2330 void
scaleToGray16Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_int32 * tab8)2331 scaleToGray16Low(l_uint32 *datad,
2332 l_int32 wd,
2333 l_int32 hd,
2334 l_int32 wpld,
2335 l_uint32 *datas,
2336 l_int32 wpls,
2337 l_int32 *tab8)
2338 {
2339 l_int32 i, j, k, m;
2340 l_int32 sum;
2341 l_uint32 *lines, *lined;
2342
2343 /* i indexes the dest lines
2344 * k indexes the source lines
2345 * j indexes the dest bytes
2346 * m indexes the src bytes
2347 * We take 32 bytes from the source (in 16 lines of 16 pixels
2348 * each) and convert it into one 8 bpp byte of the dest. */
2349 for (i = 0, k = 0; i < hd; i++, k += 16) {
2350 lines = datas + k * wpls;
2351 lined = datad + i * wpld;
2352 for (j = 0; j < wd; j++) {
2353 m = 2 * j;
2354 sum = tab8[GET_DATA_BYTE(lines, m)];
2355 sum += tab8[GET_DATA_BYTE(lines, m + 1)];
2356 sum += tab8[GET_DATA_BYTE(lines + wpls, m)];
2357 sum += tab8[GET_DATA_BYTE(lines + wpls, m + 1)];
2358 sum += tab8[GET_DATA_BYTE(lines + 2 * wpls, m)];
2359 sum += tab8[GET_DATA_BYTE(lines + 2 * wpls, m + 1)];
2360 sum += tab8[GET_DATA_BYTE(lines + 3 * wpls, m)];
2361 sum += tab8[GET_DATA_BYTE(lines + 3 * wpls, m + 1)];
2362 sum += tab8[GET_DATA_BYTE(lines + 4 * wpls, m)];
2363 sum += tab8[GET_DATA_BYTE(lines + 4 * wpls, m + 1)];
2364 sum += tab8[GET_DATA_BYTE(lines + 5 * wpls, m)];
2365 sum += tab8[GET_DATA_BYTE(lines + 5 * wpls, m + 1)];
2366 sum += tab8[GET_DATA_BYTE(lines + 6 * wpls, m)];
2367 sum += tab8[GET_DATA_BYTE(lines + 6 * wpls, m + 1)];
2368 sum += tab8[GET_DATA_BYTE(lines + 7 * wpls, m)];
2369 sum += tab8[GET_DATA_BYTE(lines + 7 * wpls, m + 1)];
2370 sum += tab8[GET_DATA_BYTE(lines + 8 * wpls, m)];
2371 sum += tab8[GET_DATA_BYTE(lines + 8 * wpls, m + 1)];
2372 sum += tab8[GET_DATA_BYTE(lines + 9 * wpls, m)];
2373 sum += tab8[GET_DATA_BYTE(lines + 9 * wpls, m + 1)];
2374 sum += tab8[GET_DATA_BYTE(lines + 10 * wpls, m)];
2375 sum += tab8[GET_DATA_BYTE(lines + 10 * wpls, m + 1)];
2376 sum += tab8[GET_DATA_BYTE(lines + 11 * wpls, m)];
2377 sum += tab8[GET_DATA_BYTE(lines + 11 * wpls, m + 1)];
2378 sum += tab8[GET_DATA_BYTE(lines + 12 * wpls, m)];
2379 sum += tab8[GET_DATA_BYTE(lines + 12 * wpls, m + 1)];
2380 sum += tab8[GET_DATA_BYTE(lines + 13 * wpls, m)];
2381 sum += tab8[GET_DATA_BYTE(lines + 13 * wpls, m + 1)];
2382 sum += tab8[GET_DATA_BYTE(lines + 14 * wpls, m)];
2383 sum += tab8[GET_DATA_BYTE(lines + 14 * wpls, m + 1)];
2384 sum += tab8[GET_DATA_BYTE(lines + 15 * wpls, m)];
2385 sum += tab8[GET_DATA_BYTE(lines + 15 * wpls, m + 1)];
2386 sum = L_MIN(sum, 255);
2387 SET_DATA_BYTE(lined, j, 255 - sum);
2388 }
2389 }
2390
2391 return;
2392 }
2393
2394
2395
2396 /*------------------------------------------------------------------*
2397 * Grayscale mipmap *
2398 *------------------------------------------------------------------*/
2399 /*!
2400 * scaleMipmapLow()
2401 *
2402 * See notes in scale.c for pixScaleToGrayMipmap(). This function
2403 * is here for pedagogical reasons. It gives poor results on document
2404 * images because of aliasing.
2405 */
2406 l_int32
scaleMipmapLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas1,l_int32 wpls1,l_uint32 * datas2,l_int32 wpls2,l_float32 red)2407 scaleMipmapLow(l_uint32 *datad,
2408 l_int32 wd,
2409 l_int32 hd,
2410 l_int32 wpld,
2411 l_uint32 *datas1,
2412 l_int32 wpls1,
2413 l_uint32 *datas2,
2414 l_int32 wpls2,
2415 l_float32 red)
2416 {
2417 l_int32 i, j, val1, val2, val, row2, col2;
2418 l_int32 *srow, *scol;
2419 l_uint32 *lines1, *lines2, *lined;
2420 l_float32 ratio, w1, w2;
2421
2422 PROCNAME("scaleMipmapLow");
2423
2424 /* Clear dest */
2425 memset((char *)datad, 0, 4 * wpld * hd);
2426
2427 /* Each dest pixel at (j,i) is computed by interpolating
2428 between the two src images at the corresponding location.
2429 We store the UL corner locations of the square of
2430 src pixels in thelower-resolution image that correspond
2431 to dest pixel (j,i). The are labelled by the arrays
2432 srow[i], scol[j]. The UL corner locations of the higher
2433 resolution src pixels are obtained from these arrays
2434 by multiplying by 2. */
2435 if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
2436 return ERROR_INT("srow not made", procName, 1);
2437 if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
2438 return ERROR_INT("scol not made", procName, 1);
2439 ratio = 1. / (2. * red); /* 0.5 for red = 1, 1 for red = 0.5 */
2440 for (i = 0; i < hd; i++)
2441 srow[i] = (l_int32)(ratio * i);
2442 for (j = 0; j < wd; j++)
2443 scol[j] = (l_int32)(ratio * j);
2444
2445 /* Get weights for linear interpolation: these are the
2446 * 'distances' of the dest image plane from the two
2447 * src image planes. */
2448 w1 = 2. * red - 1.; /* w1 --> 1 as red --> 1 */
2449 w2 = 1. - w1;
2450
2451 /* For each dest pixel, compute linear interpolation */
2452 for (i = 0; i < hd; i++) {
2453 row2 = srow[i];
2454 lines1 = datas1 + 2 * row2 * wpls1;
2455 lines2 = datas2 + row2 * wpls2;
2456 lined = datad + i * wpld;
2457 for (j = 0; j < wd; j++) {
2458 col2 = scol[j];
2459 val1 = GET_DATA_BYTE(lines1, 2 * col2);
2460 val2 = GET_DATA_BYTE(lines2, col2);
2461 val = (l_int32)(w1 * val1 + w2 * val2);
2462 SET_DATA_BYTE(lined, j, val);
2463 }
2464 }
2465
2466 FREE(srow);
2467 FREE(scol);
2468 return 0;
2469 }
2470