1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 - This software is distributed in the hope that it will be
4 - useful, but with NO WARRANTY OF ANY KIND.
5 - No author or distributor accepts responsibility to anyone for the
6 - consequences of using this software, or for whether it serves any
7 - particular purpose or works at all, unless he or she says so in
8 - writing. Everyone is granted permission to copy, modify and
9 - redistribute this source code, for commercial or non-commercial
10 - purposes, with the following restrictions: (1) the origin of this
11 - source code must not be misrepresented; (2) modified versions must
12 - be plainly marked as such; and (3) this notice may not be removed
13 - or altered from any source or modified source distribution.
14 *====================================================================*/
15
16 /*
17 * baseline.c
18 *
19 * Locate text baselines in an image
20 * NUMA *pixFindBaselines()
21 *
22 * Projective transform to remove local skew
23 * PIX *pixDeskewLocal()
24 *
25 * Determine local skew
26 * l_int32 pixGetLocalSkewTransform()
27 * NUMA *pixGetLocalSkewAngles()
28 *
29 * We have two apparently different functions here:
30 * - finding baselines
31 * - finding a projective transform to remove keystone warping
32 * The function pixGetLocalSkewAngles() returns an array of angles,
33 * one for each raster line, and the baselines of the text lines
34 * should intersect the left edge of the image with that angle.
35 */
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <math.h>
40 #include "allheaders.h"
41
42 #ifndef NO_CONSOLE_IO
43 #define DEBUG_PLOT 0
44 #endif /* NO_CONSOLE_IO */
45
46 /* Min to travel after finding max before abandoning peak */
47 static const l_int32 MIN_DIST_IN_PEAK = 35;
48
49 /* Thresholds for peaks and zeros, relative to the max peak */
50 static const l_int32 PEAK_THRESHOLD_RATIO = 20;
51 static const l_int32 ZERO_THRESHOLD_RATIO = 100;
52
53 /* Default values for determining local skew */
54 static const l_int32 DEFAULT_SLICES = 10;
55 static const l_int32 DEFAULT_SWEEP_REDUCTION = 2;
56 static const l_int32 DEFAULT_BS_REDUCTION = 1;
57 static const l_float32 DEFAULT_SWEEP_RANGE = 5.; /* degrees */
58 static const l_float32 DEFAULT_SWEEP_DELTA = 1.; /* degrees */
59 static const l_float32 DEFAULT_MINBS_DELTA = 0.01; /* degrees */
60
61 /* Overlap slice fraction added to top and bottom of each slice */
62 static const l_float32 OVERLAP_FRACTION = 0.5;
63
64 /* Minimum allowed confidence (ratio) for accepting a value */
65 static const l_float32 MIN_ALLOWED_CONFIDENCE = 3.0;
66
67
68 /*---------------------------------------------------------------------*
69 * Locate text baselines in an image *
70 *---------------------------------------------------------------------*/
71 /*!
72 * pixFindBaselines()
73 *
74 * Input: pixs (1 bpp)
75 * &pta (<optional return> pairs of pts corresponding to
76 * approx. ends of each text line)
77 * debug (usually 0; set to 1 for debugging output)
78 * Return: na (of baseline y values), or null on error
79 *
80 * Notes:
81 * (1) Input binary image must have text lines already aligned
82 * horizontally. This can be done by either rotating the
83 * image with pixDeskew(), or, if a projective transform
84 * is required, by doing pixDeskewLocal() first.
85 * (2) Input null for &pta if you don't want this returned.
86 * The pta will come in pairs of points (left and right end
87 * of each baseline).
88 * (3) Caution: this will not work properly on text with multiple
89 * columns, where the lines are not aligned between columns.
90 * If there are multiple columns, they should be extracted
91 * separately before finding the baselines.
92 * (4) This function constructs different types of output
93 * for baselines; namely, a set of raster line values and
94 * a set of end points of each baseline.
95 * (5) This function was designed to handle short and long text lines
96 * without using dangerous thresholds on the peak heights. It does
97 * this by combining the differential signal with a morphological
98 * analysis of the locations of the text lines. One can also
99 * combine this data to normalize the peak heights, by weighting
100 * the differential signal in the region of each baseline
101 * by the inverse of the width of the text line found there.
102 * (6) There are various debug sections that can be turned on
103 * with the debug flag.
104 */
105 NUMA *
pixFindBaselines(PIX * pixs,PTA ** ppta,l_int32 debug)106 pixFindBaselines(PIX *pixs,
107 PTA **ppta,
108 l_int32 debug)
109 {
110 l_int32 w, h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh;
111 l_int32 imaxloc, peakthresh, zerothresh, inpeak;
112 l_int32 mintosearch, max, maxloc, nloc, locval;
113 l_int32 *array;
114 l_float32 maxval;
115 BOXA *boxa1, *boxa2, *boxa3;
116 GPLOT *gplot;
117 NUMA *nasum, *nadiff, *naloc, *naval;
118 PIX *pixt1, *pixt2;
119 PTA *pta;
120
121 PROCNAME("pixFindBaselines");
122
123 if (!pixs)
124 return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL);
125 pta = NULL;
126 if (ppta) {
127 pta = ptaCreate(0);
128 *ppta = pta;
129 }
130
131 /* Close up the text characters, removing noise */
132 pixt1 = pixMorphSequence(pixs, "c25.1 + e3.1", 0);
133
134 /* Save the difference of adjacent row sums.
135 * The high positive-going peaks are the baselines */
136 if ((nasum = pixCountPixelsByRow(pixt1, NULL)) == NULL)
137 return (NUMA *)ERROR_PTR("nasum not made", procName, NULL);
138 w = pixGetWidth(pixs);
139 h = pixGetHeight(pixs);
140 nadiff = numaCreate(h);
141 numaGetIValue(nasum, 0, &val2);
142 for (i = 0; i < h - 1; i++) {
143 val1 = val2;
144 numaGetIValue(nasum, i + 1, &val2);
145 numaAddNumber(nadiff, val1 - val2);
146 }
147
148 if (debug) /* show the difference signal */
149 gplotSimple1(nadiff, GPLOT_X11, "junkdiff", "difference");
150
151 /* Use the zeroes of the profile to locate each baseline. */
152 array = numaGetIArray(nadiff);
153 ndiff = numaGetCount(nadiff);
154 numaGetMax(nadiff, &maxval, &imaxloc);
155 /* Use this to begin locating a new peak: */
156 peakthresh = (l_int32)maxval / PEAK_THRESHOLD_RATIO;
157 /* Use this to begin a region between peaks: */
158 zerothresh = (l_int32)maxval / ZERO_THRESHOLD_RATIO;
159 naloc = numaCreate(0);
160 naval = numaCreate(0);
161 inpeak = FALSE;
162 for (i = 0; i < ndiff; i++) {
163 if (inpeak == FALSE) {
164 if (array[i] > peakthresh) { /* transition to in-peak */
165 inpeak = TRUE;
166 mintosearch = i + MIN_DIST_IN_PEAK; /* accept no zeros
167 * between i and mintosearch */
168 max = array[i];
169 maxloc = i;
170 }
171 }
172 else { /* inpeak == TRUE; look for max */
173 if (array[i] > max) {
174 max = array[i];
175 maxloc = i;
176 mintosearch = i + MIN_DIST_IN_PEAK;
177 }
178 else if (i > mintosearch && array[i] <= zerothresh) { /* leave */
179 inpeak = FALSE;
180 numaAddNumber(naval, max);
181 numaAddNumber(naloc, maxloc);
182 }
183 }
184 }
185
186 /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */
187 if (inpeak) {
188 numaAddNumber(naval, max);
189 numaAddNumber(naloc, maxloc);
190 }
191 FREE(array);
192
193 if (debug) { /* show the raster locations for the peaks */
194 gplot = gplotCreate("junkloc", GPLOT_X11, "Peak locations",
195 "rasterline", "height");
196 gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs");
197 gplotMakeOutput(gplot);
198 gplotDestroy(&gplot);
199 }
200
201 /* Generate an approximate profile of text line width.
202 * First, filter the boxes of text, where there may be
203 * more than one box for a given textline. */
204 pixt2 = pixMorphSequence(pixt1, "r11 + c25.1 + o7.1 +c1.3", 0);
205 boxa1 = pixConnComp(pixt2, NULL, 4);
206 boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.);
207 boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL);
208
209 /* Then find the baseline segments */
210 if (pta) {
211 nloc = numaGetCount(naloc);
212 nbox = boxaGetCount(boxa3);
213 for (i = 0; i < nbox; i++) {
214 boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh);
215 for (j = 0; j < nloc; j++) {
216 numaGetIValue(naloc, j, &locval);
217 if (L_ABS(locval - (by + bh)) > 25)
218 continue;
219 ptaAddPt(pta, bx, locval);
220 ptaAddPt(pta, bx + bw, locval);
221 break;
222 }
223 }
224 }
225
226 if (debug) { /* display baselines */
227 PIX *pixd;
228 l_int32 npts, x1, y1, x2, y2;
229 if (pta) {
230 pixd = pixConvertTo32(pixs);
231 npts = ptaGetCount(pta);
232 for (i = 0; i < npts; i += 2) {
233 ptaGetIPt(pta, i, &x1, &y1);
234 ptaGetIPt(pta, i + 1, &x2, &y2);
235 pixRenderLineArb(pixd, x1, y1, x2, y2, 1, 255, 0, 0);
236 }
237 pixDisplay(pixd, 200, 200);
238 pixWrite("junkbaselines", pixd, IFF_PNG);
239 pixDestroy(&pixd);
240 }
241 }
242
243 boxaDestroy(&boxa1);
244 boxaDestroy(&boxa2);
245 boxaDestroy(&boxa3);
246 pixDestroy(&pixt1);
247 pixDestroy(&pixt2);
248 numaDestroy(&nasum);
249 numaDestroy(&nadiff);
250 numaDestroy(&naval);
251 return naloc;
252 }
253
254
255 /*---------------------------------------------------------------------*
256 * Projective transform to remove local skew *
257 *---------------------------------------------------------------------*/
258 /*!
259 * pixDeskewLocal()
260 *
261 * Input: pixs
262 * nslices (the number of horizontal overlapping slices; must
263 * be larger than 1 and not exceed 20; use 0 for default)
264 * redsweep (sweep reduction factor: 1, 2, 4 or 8;
265 * use 0 for default value)
266 * redsearch (search reduction factor: 1, 2, 4 or 8, and
267 * not larger than redsweep; use 0 for default value)
268 * sweeprange (half the full range, assumed about 0; in degrees;
269 * use 0.0 for default value)
270 * sweepdelta (angle increment of sweep; in degrees;
271 * use 0.0 for default value)
272 * minbsdelta (min binary search increment angle; in degrees;
273 * use 0.0 for default value)
274 * Return: pixd, or null on error
275 *
276 * Notes:
277 * (1) This function allows deskew of a page whose skew changes
278 * approximately linearly with vertical position. It uses
279 * a projective tranform that in effect does a differential
280 * shear about the LHS of the page, and makes all text lines
281 * horizontal.
282 * (2) The origin of the keystoning can be either a cheap document
283 * feeder that rotates the page as it is passed through, or a
284 * camera image taken from either the left or right side
285 * of the vertical.
286 * (3) The image transformation is a projective warping,
287 * not a rotation. Apart from this function, the text lines
288 * must be properly aligned vertically with respect to each
289 * other. This can be done by pre-processing the page; e.g.,
290 * by rotating or horizontally shearing it.
291 * Typically, this can be achieved by vertically aligning
292 * the page edge.
293 */
294 PIX *
pixDeskewLocal(PIX * pixs,l_int32 nslices,l_int32 redsweep,l_int32 redsearch,l_float32 sweeprange,l_float32 sweepdelta,l_float32 minbsdelta)295 pixDeskewLocal(PIX *pixs,
296 l_int32 nslices,
297 l_int32 redsweep,
298 l_int32 redsearch,
299 l_float32 sweeprange,
300 l_float32 sweepdelta,
301 l_float32 minbsdelta)
302 {
303 l_int32 ret;
304 PIX *pixd;
305 PTA *ptas, *ptad;
306
307 PROCNAME("pixDeskewLocal");
308
309 if (!pixs)
310 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
311
312 /* Skew array gives skew angle (deg) as fctn of raster line
313 * where it intersects the LHS of the image */
314 ret = pixGetLocalSkewTransform(pixs, nslices, redsweep, redsearch,
315 sweeprange, sweepdelta, minbsdelta,
316 &ptas, &ptad);
317 if (ret != 0)
318 return (PIX *)ERROR_PTR("transform pts not found", procName, NULL);
319
320 /* Use a projective transform */
321 pixd = pixProjectiveSampledPta(pixs, ptad, ptas, L_BRING_IN_WHITE);
322
323 ptaDestroy(&ptas);
324 ptaDestroy(&ptad);
325 return pixd;
326 }
327
328
329 /*---------------------------------------------------------------------*
330 * Determine the local skew *
331 *---------------------------------------------------------------------*/
332 /*!
333 * pixGetLocalSkewTransform()
334 *
335 * Input: pixs
336 * nslices (the number of horizontal overlapping slices; must
337 * be larger than 1 and not exceed 20; use 0 for default)
338 * redsweep (sweep reduction factor: 1, 2, 4 or 8;
339 * use 0 for default value)
340 * redsearch (search reduction factor: 1, 2, 4 or 8, and
341 * not larger than redsweep; use 0 for default value)
342 * sweeprange (half the full range, assumed about 0; in degrees;
343 * use 0.0 for default value)
344 * sweepdelta (angle increment of sweep; in degrees;
345 * use 0.0 for default value)
346 * minbsdelta (min binary search increment angle; in degrees;
347 * use 0.0 for default value)
348 * &ptas (<return> 4 points in the source)
349 * &ptad (<return> the corresponding 4 pts in the dest)
350 * Return: 0 if OK, 1 on error
351 *
352 * Notes:
353 * (1) This generates two pairs of points in the src, each pair
354 * corresponding to a pair of points that would lie along
355 * the same raster line in a transformed (dewarped) image.
356 * (2) The sets of 4 src and 4 dest points returned by this function
357 * can then be used, in a projective or bilinear transform,
358 * to remove keystoning in the src.
359 */
360 l_int32
pixGetLocalSkewTransform(PIX * pixs,l_int32 nslices,l_int32 redsweep,l_int32 redsearch,l_float32 sweeprange,l_float32 sweepdelta,l_float32 minbsdelta,PTA ** pptas,PTA ** pptad)361 pixGetLocalSkewTransform(PIX *pixs,
362 l_int32 nslices,
363 l_int32 redsweep,
364 l_int32 redsearch,
365 l_float32 sweeprange,
366 l_float32 sweepdelta,
367 l_float32 minbsdelta,
368 PTA **pptas,
369 PTA **pptad)
370 {
371 l_int32 w, h, i;
372 l_float32 deg2rad, angr, angd, dely;
373 NUMA *naskew;
374 PTA *ptas, *ptad;
375
376 PROCNAME("pixGetLocalSkewTransform");
377
378 if (!pptas || !pptad)
379 return ERROR_INT("&ptas and &ptad not defined", procName, 1);
380 *pptas = *pptad = NULL;
381 if (!pixs)
382 return ERROR_INT("pixs not defined", procName, 1);
383 if (nslices < 2 || nslices > 20)
384 nslices = DEFAULT_SLICES;
385 if (redsweep < 1 || redsweep > 8)
386 redsweep = DEFAULT_SWEEP_REDUCTION;
387 if (redsearch < 1 || redsearch > redsweep)
388 redsearch = DEFAULT_BS_REDUCTION;
389 if (sweeprange == 0.0)
390 sweeprange = DEFAULT_SWEEP_RANGE;
391 if (sweepdelta == 0.0)
392 sweepdelta = DEFAULT_SWEEP_DELTA;
393 if (minbsdelta == 0.0)
394 minbsdelta = DEFAULT_MINBS_DELTA;
395
396 naskew = pixGetLocalSkewAngles(pixs, nslices, redsweep, redsearch,
397 sweeprange, sweepdelta, minbsdelta,
398 NULL, NULL);
399 if (!naskew)
400 return ERROR_INT("naskew not made", procName, 1);
401
402 deg2rad = 3.14159265 / 180.;
403 w = pixGetWidth(pixs);
404 h = pixGetHeight(pixs);
405 ptas = ptaCreate(4);
406 ptad = ptaCreate(4);
407 *pptas = ptas;
408 *pptad = ptad;
409
410 /* Find i for skew line that intersects LHS at i and RHS at h / 20 */
411 for (i = 0; i < h; i++) {
412 numaGetFValue(naskew, i, &angd);
413 angr = angd * deg2rad;
414 dely = w * tan(angr);
415 if (i - dely > 0.05 * h)
416 break;
417 }
418 ptaAddPt(ptas, 0, i);
419 ptaAddPt(ptas, w - 1, i - dely);
420 ptaAddPt(ptad, 0, i);
421 ptaAddPt(ptad, w - 1, i);
422
423 /* Find i for skew line that intersects LHS at i and RHS at 19h / 20 */
424 for (i = h - 1; i > 0; i--) {
425 numaGetFValue(naskew, i, &angd);
426 angr = angd * deg2rad;
427 dely = w * tan(angr);
428 if (i - dely < 0.95 * h)
429 break;
430 }
431 ptaAddPt(ptas, 0, i);
432 ptaAddPt(ptas, w - 1, i - dely);
433 ptaAddPt(ptad, 0, i);
434 ptaAddPt(ptad, w - 1, i);
435
436 numaDestroy(&naskew);
437 return 0;
438 }
439
440
441 /*!
442 * pixGetLocalSkewAngles()
443 *
444 * Input: pixs
445 * nslices (the number of horizontal overlapping slices; must
446 * be larger than 1 and not exceed 20; use 0 for default)
447 * redsweep (sweep reduction factor: 1, 2, 4 or 8;
448 * use 0 for default value)
449 * redsearch (search reduction factor: 1, 2, 4 or 8, and
450 * not larger than redsweep; use 0 for default value)
451 * sweeprange (half the full range, assumed about 0; in degrees;
452 * use 0.0 for default value)
453 * sweepdelta (angle increment of sweep; in degrees;
454 * use 0.0 for default value)
455 * minbsdelta (min binary search increment angle; in degrees;
456 * use 0.0 for default value)
457 * &a (<optional return> slope of skew as fctn of y)
458 * &b (<optional return> intercept at y=0 of skew as fctn of y)
459 * Return: naskew, or null on error
460 *
461 * Notes:
462 * (1) The local skew is measured in a set of overlapping strips.
463 * We then do a least square linear fit parameters to get
464 * the slope and intercept parameters a and b in
465 * skew-angle = a * y + b (degrees)
466 * for the local skew as a function of raster line y.
467 * This is then used to make naskew, which can be interpreted
468 * as the computed skew angle (in degrees) at the left edge
469 * of each raster line.
470 * (2) naskew can then be used to find the baselines of text, because
471 * each text line has a baseline that should intersect
472 * the left edge of the image with the angle given by this
473 * array, evaluated at the raster line of intersection.
474 */
475 NUMA *
pixGetLocalSkewAngles(PIX * pixs,l_int32 nslices,l_int32 redsweep,l_int32 redsearch,l_float32 sweeprange,l_float32 sweepdelta,l_float32 minbsdelta,l_float32 * pa,l_float32 * pb)476 pixGetLocalSkewAngles(PIX *pixs,
477 l_int32 nslices,
478 l_int32 redsweep,
479 l_int32 redsearch,
480 l_float32 sweeprange,
481 l_float32 sweepdelta,
482 l_float32 minbsdelta,
483 l_float32 *pa,
484 l_float32 *pb)
485 {
486 l_int32 w, h, hs, i, ystart, yend, ovlap, npts;
487 l_float32 angle, conf, ycenter, a, b;
488 BOX *box;
489 NUMA *naskew;
490 PIX *pix;
491 PTA *pta;
492
493 PROCNAME("pixGetLocalSkewAngles");
494
495 if (!pixs)
496 return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL);
497 if (nslices < 2 || nslices > 20)
498 nslices = DEFAULT_SLICES;
499 if (redsweep < 1 || redsweep > 8)
500 redsweep = DEFAULT_SWEEP_REDUCTION;
501 if (redsearch < 1 || redsearch > redsweep)
502 redsearch = DEFAULT_BS_REDUCTION;
503 if (sweeprange == 0.0)
504 sweeprange = DEFAULT_SWEEP_RANGE;
505 if (sweepdelta == 0.0)
506 sweepdelta = DEFAULT_SWEEP_DELTA;
507 if (minbsdelta == 0.0)
508 minbsdelta = DEFAULT_MINBS_DELTA;
509
510 h = pixGetHeight(pixs);
511 w = pixGetWidth(pixs);
512 hs = h / nslices;
513 ovlap = (l_int32)(OVERLAP_FRACTION * hs);
514 pta = ptaCreate(nslices);
515 for (i = 0; i < nslices; i++) {
516 ystart = L_MAX(0, hs * i - ovlap);
517 yend = L_MIN(h - 1, hs * (i + 1) + ovlap);
518 ycenter = (ystart + yend) / 2;
519 box = boxCreate(0, ystart, w, yend - ystart + 1);
520 pix = pixClipRectangle(pixs, box, NULL);
521 pixFindSkewSweepAndSearch(pix, &angle, &conf, redsweep, redsearch,
522 sweeprange, sweepdelta, minbsdelta);
523 if (conf > MIN_ALLOWED_CONFIDENCE)
524 ptaAddPt(pta, ycenter, angle);
525 pixDestroy(&pix);
526 boxDestroy(&box);
527 }
528 /* ptaWriteStream(stderr, pta, 0); */
529
530 /* Do linear least squares fit */
531 if ((npts = ptaGetCount(pta)) < 2) {
532 ptaDestroy(&pta);
533 return (NUMA *)ERROR_PTR("can't fit skew", procName, NULL);
534 }
535 ptaGetLinearLSF(pta, &a, &b, NULL);
536 if (pa) *pa = a;
537 if (pb) *pb = b;
538
539 /* Make skew angle array as function of raster line */
540 naskew = numaCreate(h);
541 for (i = 0; i < h; i++) {
542 angle = a * i + b;
543 numaAddNumber(naskew, angle);
544 }
545
546 #if DEBUG_PLOT
547 { NUMA *nax, *nay;
548 GPLOT *gplot;
549 ptaGetArrays(pta, &nax, &nay);
550 gplot = gplotCreate("junkskew", GPLOT_X11, "skew as fctn of y",
551 "y (in raster lines from top)", "angle (in degrees)");
552 gplotAddPlot(gplot, NULL, naskew, GPLOT_POINTS, "linear lsf");
553 gplotAddPlot(gplot, nax, nay, GPLOT_POINTS, "actual data pts");
554 gplotMakeOutput(gplot);
555 gplotDestroy(&gplot);
556 numaDestroy(&nax);
557 numaDestroy(&nay);
558 }
559 #endif /* DEBUG_PLOT */
560
561 ptaDestroy(&pta);
562 return naskew;
563 }
564
565
566