1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -  This software is distributed in the hope that it will be
4  -  useful, but with NO WARRANTY OF ANY KIND.
5  -  No author or distributor accepts responsibility to anyone for the
6  -  consequences of using this software, or for whether it serves any
7  -  particular purpose or works at all, unless he or she says so in
8  -  writing.  Everyone is granted permission to copy, modify and
9  -  redistribute this source code, for commercial or non-commercial
10  -  purposes, with the following restrictions: (1) the origin of this
11  -  source code must not be misrepresented; (2) modified versions must
12  -  be plainly marked as such; and (3) this notice may not be removed
13  -  or altered from any source or modified source distribution.
14  *====================================================================*/
15 
16 /*
17  *  baseline.c
18  *
19  *      Locate text baselines in an image
20  *           NUMA     *pixFindBaselines()
21  *
22  *      Projective transform to remove local skew
23  *           PIX      *pixDeskewLocal()
24  *
25  *      Determine local skew
26  *           l_int32   pixGetLocalSkewTransform()
27  *           NUMA     *pixGetLocalSkewAngles()
28  *
29  *  We have two apparently different functions here:
30  *    - finding baselines
31  *    - finding a projective transform to remove keystone warping
32  *  The function pixGetLocalSkewAngles() returns an array of angles,
33  *  one for each raster line, and the baselines of the text lines
34  *  should intersect the left edge of the image with that angle.
35  */
36 
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <math.h>
40 #include "allheaders.h"
41 
42 #ifndef  NO_CONSOLE_IO
43 #define  DEBUG_PLOT          0
44 #endif  /* NO_CONSOLE_IO */
45 
46     /* Min to travel after finding max before abandoning peak */
47 static const l_int32  MIN_DIST_IN_PEAK = 35;
48 
49     /* Thresholds for peaks and zeros, relative to the max peak */
50 static const l_int32  PEAK_THRESHOLD_RATIO = 20;
51 static const l_int32  ZERO_THRESHOLD_RATIO = 100;
52 
53     /* Default values for determining local skew */
54 static const l_int32  DEFAULT_SLICES = 10;
55 static const l_int32  DEFAULT_SWEEP_REDUCTION = 2;
56 static const l_int32  DEFAULT_BS_REDUCTION = 1;
57 static const l_float32  DEFAULT_SWEEP_RANGE = 5.;   /* degrees */
58 static const l_float32  DEFAULT_SWEEP_DELTA = 1.;   /* degrees */
59 static const l_float32  DEFAULT_MINBS_DELTA = 0.01;   /* degrees */
60 
61     /* Overlap slice fraction added to top and bottom of each slice */
62 static const l_float32  OVERLAP_FRACTION = 0.5;
63 
64     /* Minimum allowed confidence (ratio) for accepting a value */
65 static const l_float32  MIN_ALLOWED_CONFIDENCE = 3.0;
66 
67 
68 /*---------------------------------------------------------------------*
69  *                    Locate text baselines in an image                *
70  *---------------------------------------------------------------------*/
71 /*!
72  *  pixFindBaselines()
73  *
74  *      Input:  pixs (1 bpp)
75  *              &pta (<optional return> pairs of pts corresponding to
76  *                    approx. ends of each text line)
77  *              debug (usually 0; set to 1 for debugging output)
78  *      Return: na (of baseline y values), or null on error
79  *
80  *  Notes:
81  *      (1) Input binary image must have text lines already aligned
82  *          horizontally.  This can be done by either rotating the
83  *          image with pixDeskew(), or, if a projective transform
84  *          is required, by doing pixDeskewLocal() first.
85  *      (2) Input null for &pta if you don't want this returned.
86  *          The pta will come in pairs of points (left and right end
87  *          of each baseline).
88  *      (3) Caution: this will not work properly on text with multiple
89  *          columns, where the lines are not aligned between columns.
90  *          If there are multiple columns, they should be extracted
91  *          separately before finding the baselines.
92  *      (4) This function constructs different types of output
93  *          for baselines; namely, a set of raster line values and
94  *          a set of end points of each baseline.
95  *      (5) This function was designed to handle short and long text lines
96  *          without using dangerous thresholds on the peak heights.  It does
97  *          this by combining the differential signal with a morphological
98  *          analysis of the locations of the text lines.  One can also
99  *          combine this data to normalize the peak heights, by weighting
100  *          the differential signal in the region of each baseline
101  *          by the inverse of the width of the text line found there.
102  *      (6) There are various debug sections that can be turned on
103  *          with the debug flag.
104  */
105 NUMA *
pixFindBaselines(PIX * pixs,PTA ** ppta,l_int32 debug)106 pixFindBaselines(PIX     *pixs,
107                  PTA    **ppta,
108                  l_int32  debug)
109 {
110 l_int32    w, h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh;
111 l_int32    imaxloc, peakthresh, zerothresh, inpeak;
112 l_int32    mintosearch, max, maxloc, nloc, locval;
113 l_int32   *array;
114 l_float32  maxval;
115 BOXA      *boxa1, *boxa2, *boxa3;
116 GPLOT     *gplot;
117 NUMA      *nasum, *nadiff, *naloc, *naval;
118 PIX       *pixt1, *pixt2;
119 PTA       *pta;
120 
121     PROCNAME("pixFindBaselines");
122 
123     if (!pixs)
124         return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL);
125     pta = NULL;
126     if (ppta) {
127         pta = ptaCreate(0);
128         *ppta = pta;
129     }
130 
131         /* Close up the text characters, removing noise */
132     pixt1 = pixMorphSequence(pixs, "c25.1 + e3.1", 0);
133 
134         /* Save the difference of adjacent row sums.
135          * The high positive-going peaks are the baselines */
136     if ((nasum = pixCountPixelsByRow(pixt1, NULL)) == NULL)
137         return (NUMA *)ERROR_PTR("nasum not made", procName, NULL);
138     w = pixGetWidth(pixs);
139     h = pixGetHeight(pixs);
140     nadiff = numaCreate(h);
141     numaGetIValue(nasum, 0, &val2);
142     for (i = 0; i < h - 1; i++) {
143         val1 = val2;
144         numaGetIValue(nasum, i + 1, &val2);
145         numaAddNumber(nadiff, val1 - val2);
146     }
147 
148     if (debug)  /* show the difference signal */
149         gplotSimple1(nadiff, GPLOT_X11, "junkdiff", "difference");
150 
151         /* Use the zeroes of the profile to locate each baseline. */
152     array = numaGetIArray(nadiff);
153     ndiff = numaGetCount(nadiff);
154     numaGetMax(nadiff, &maxval, &imaxloc);
155         /* Use this to begin locating a new peak: */
156     peakthresh = (l_int32)maxval / PEAK_THRESHOLD_RATIO;
157         /* Use this to begin a region between peaks: */
158     zerothresh = (l_int32)maxval / ZERO_THRESHOLD_RATIO;
159     naloc = numaCreate(0);
160     naval = numaCreate(0);
161     inpeak = FALSE;
162     for (i = 0; i < ndiff; i++) {
163         if (inpeak == FALSE) {
164             if (array[i] > peakthresh) {  /* transition to in-peak */
165                 inpeak = TRUE;
166                 mintosearch = i + MIN_DIST_IN_PEAK; /* accept no zeros
167                                                * between i and mintosearch */
168                 max = array[i];
169                 maxloc = i;
170             }
171         }
172         else {  /* inpeak == TRUE; look for max */
173             if (array[i] > max) {
174                 max = array[i];
175                 maxloc = i;
176                 mintosearch = i + MIN_DIST_IN_PEAK;
177             }
178             else if (i > mintosearch && array[i] <= zerothresh) {  /* leave */
179                 inpeak = FALSE;
180                 numaAddNumber(naval, max);
181                 numaAddNumber(naloc, maxloc);
182             }
183         }
184     }
185 
186         /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */
187     if (inpeak) {
188         numaAddNumber(naval, max);
189         numaAddNumber(naloc, maxloc);
190     }
191     FREE(array);
192 
193     if (debug) {  /* show the raster locations for the peaks */
194         gplot = gplotCreate("junkloc", GPLOT_X11, "Peak locations",
195                             "rasterline", "height");
196         gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs");
197         gplotMakeOutput(gplot);
198         gplotDestroy(&gplot);
199     }
200 
201         /* Generate an approximate profile of text line width.
202          * First, filter the boxes of text, where there may be
203          * more than one box for a given textline. */
204     pixt2 = pixMorphSequence(pixt1, "r11 + c25.1 + o7.1 +c1.3", 0);
205     boxa1 = pixConnComp(pixt2, NULL, 4);
206     boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.);
207     boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL);
208 
209         /* Then find the baseline segments */
210     if (pta) {
211       nloc = numaGetCount(naloc);
212       nbox = boxaGetCount(boxa3);
213       for (i = 0; i < nbox; i++) {
214           boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh);
215           for (j = 0; j < nloc; j++) {
216               numaGetIValue(naloc, j, &locval);
217               if (L_ABS(locval - (by + bh)) > 25)
218                   continue;
219               ptaAddPt(pta, bx, locval);
220               ptaAddPt(pta, bx + bw, locval);
221               break;
222           }
223       }
224     }
225 
226     if (debug) {  /* display baselines */
227         PIX     *pixd;
228         l_int32  npts, x1, y1, x2, y2;
229         if (pta) {
230             pixd = pixConvertTo32(pixs);
231             npts = ptaGetCount(pta);
232             for (i = 0; i < npts; i += 2) {
233                 ptaGetIPt(pta, i, &x1, &y1);
234                 ptaGetIPt(pta, i + 1, &x2, &y2);
235                 pixRenderLineArb(pixd, x1, y1, x2, y2, 1, 255, 0, 0);
236             }
237             pixDisplay(pixd, 200, 200);
238             pixWrite("junkbaselines", pixd, IFF_PNG);
239             pixDestroy(&pixd);
240         }
241     }
242 
243     boxaDestroy(&boxa1);
244     boxaDestroy(&boxa2);
245     boxaDestroy(&boxa3);
246     pixDestroy(&pixt1);
247     pixDestroy(&pixt2);
248     numaDestroy(&nasum);
249     numaDestroy(&nadiff);
250     numaDestroy(&naval);
251     return naloc;
252 }
253 
254 
255 /*---------------------------------------------------------------------*
256  *               Projective transform to remove local skew             *
257  *---------------------------------------------------------------------*/
258 /*!
259  *  pixDeskewLocal()
260  *
261  *      Input:  pixs
262  *              nslices  (the number of horizontal overlapping slices; must
263  *                  be larger than 1 and not exceed 20; use 0 for default)
264  *              redsweep (sweep reduction factor: 1, 2, 4 or 8;
265  *                        use 0 for default value)
266  *              redsearch (search reduction factor: 1, 2, 4 or 8, and
267  *                         not larger than redsweep; use 0 for default value)
268  *              sweeprange (half the full range, assumed about 0; in degrees;
269  *                          use 0.0 for default value)
270  *              sweepdelta (angle increment of sweep; in degrees;
271  *                          use 0.0 for default value)
272  *              minbsdelta (min binary search increment angle; in degrees;
273  *                          use 0.0 for default value)
274  *      Return: pixd, or null on error
275  *
276  *  Notes:
277  *      (1) This function allows deskew of a page whose skew changes
278  *          approximately linearly with vertical position.  It uses
279  *          a projective tranform that in effect does a differential
280  *          shear about the LHS of the page, and makes all text lines
281  *          horizontal.
282  *      (2) The origin of the keystoning can be either a cheap document
283  *          feeder that rotates the page as it is passed through, or a
284  *          camera image taken from either the left or right side
285  *          of the vertical.
286  *      (3) The image transformation is a projective warping,
287  *          not a rotation.  Apart from this function, the text lines
288  *          must be properly aligned vertically with respect to each
289  *          other.  This can be done by pre-processing the page; e.g.,
290  *          by rotating or horizontally shearing it.
291  *          Typically, this can be achieved by vertically aligning
292  *          the page edge.
293  */
294 PIX *
pixDeskewLocal(PIX * pixs,l_int32 nslices,l_int32 redsweep,l_int32 redsearch,l_float32 sweeprange,l_float32 sweepdelta,l_float32 minbsdelta)295 pixDeskewLocal(PIX       *pixs,
296                l_int32    nslices,
297                l_int32    redsweep,
298                l_int32    redsearch,
299                l_float32  sweeprange,
300                l_float32  sweepdelta,
301                l_float32  minbsdelta)
302 {
303 l_int32    ret;
304 PIX       *pixd;
305 PTA       *ptas, *ptad;
306 
307     PROCNAME("pixDeskewLocal");
308 
309     if (!pixs)
310         return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
311 
312         /* Skew array gives skew angle (deg) as fctn of raster line
313          * where it intersects the LHS of the image */
314     ret = pixGetLocalSkewTransform(pixs, nslices, redsweep, redsearch,
315                                    sweeprange, sweepdelta, minbsdelta,
316                                    &ptas, &ptad);
317     if (ret != 0)
318         return (PIX *)ERROR_PTR("transform pts not found", procName, NULL);
319 
320         /* Use a projective transform */
321     pixd = pixProjectiveSampledPta(pixs, ptad, ptas, L_BRING_IN_WHITE);
322 
323     ptaDestroy(&ptas);
324     ptaDestroy(&ptad);
325     return pixd;
326 }
327 
328 
329 /*---------------------------------------------------------------------*
330  *                       Determine the local skew                      *
331  *---------------------------------------------------------------------*/
332 /*!
333  *  pixGetLocalSkewTransform()
334  *
335  *      Input:  pixs
336  *              nslices  (the number of horizontal overlapping slices; must
337  *                  be larger than 1 and not exceed 20; use 0 for default)
338  *              redsweep (sweep reduction factor: 1, 2, 4 or 8;
339  *                        use 0 for default value)
340  *              redsearch (search reduction factor: 1, 2, 4 or 8, and
341  *                         not larger than redsweep; use 0 for default value)
342  *              sweeprange (half the full range, assumed about 0; in degrees;
343  *                          use 0.0 for default value)
344  *              sweepdelta (angle increment of sweep; in degrees;
345  *                          use 0.0 for default value)
346  *              minbsdelta (min binary search increment angle; in degrees;
347  *                          use 0.0 for default value)
348  *              &ptas  (<return> 4 points in the source)
349  *              &ptad  (<return> the corresponding 4 pts in the dest)
350  *      Return: 0 if OK, 1 on error
351  *
352  *  Notes:
353  *      (1) This generates two pairs of points in the src, each pair
354  *          corresponding to a pair of points that would lie along
355  *          the same raster line in a transformed (dewarped) image.
356  *      (2) The sets of 4 src and 4 dest points returned by this function
357  *          can then be used, in a projective or bilinear transform,
358  *          to remove keystoning in the src.
359  */
360 l_int32
pixGetLocalSkewTransform(PIX * pixs,l_int32 nslices,l_int32 redsweep,l_int32 redsearch,l_float32 sweeprange,l_float32 sweepdelta,l_float32 minbsdelta,PTA ** pptas,PTA ** pptad)361 pixGetLocalSkewTransform(PIX       *pixs,
362                          l_int32    nslices,
363                          l_int32    redsweep,
364                          l_int32    redsearch,
365                          l_float32  sweeprange,
366                          l_float32  sweepdelta,
367                          l_float32  minbsdelta,
368                          PTA      **pptas,
369                          PTA      **pptad)
370 {
371 l_int32    w, h, i;
372 l_float32  deg2rad, angr, angd, dely;
373 NUMA      *naskew;
374 PTA       *ptas, *ptad;
375 
376     PROCNAME("pixGetLocalSkewTransform");
377 
378     if (!pptas || !pptad)
379         return ERROR_INT("&ptas and &ptad not defined", procName, 1);
380     *pptas = *pptad = NULL;
381     if (!pixs)
382         return ERROR_INT("pixs not defined", procName, 1);
383     if (nslices < 2 || nslices > 20)
384         nslices = DEFAULT_SLICES;
385     if (redsweep < 1 || redsweep > 8)
386         redsweep = DEFAULT_SWEEP_REDUCTION;
387     if (redsearch < 1 || redsearch > redsweep)
388         redsearch = DEFAULT_BS_REDUCTION;
389     if (sweeprange == 0.0)
390         sweeprange = DEFAULT_SWEEP_RANGE;
391     if (sweepdelta == 0.0)
392         sweepdelta = DEFAULT_SWEEP_DELTA;
393     if (minbsdelta == 0.0)
394         minbsdelta = DEFAULT_MINBS_DELTA;
395 
396     naskew = pixGetLocalSkewAngles(pixs, nslices, redsweep, redsearch,
397                                    sweeprange, sweepdelta, minbsdelta,
398                                    NULL, NULL);
399     if (!naskew)
400         return ERROR_INT("naskew not made", procName, 1);
401 
402     deg2rad = 3.14159265 / 180.;
403     w = pixGetWidth(pixs);
404     h = pixGetHeight(pixs);
405     ptas = ptaCreate(4);
406     ptad = ptaCreate(4);
407     *pptas = ptas;
408     *pptad = ptad;
409 
410         /* Find i for skew line that intersects LHS at i and RHS at h / 20 */
411     for (i = 0; i < h; i++) {
412         numaGetFValue(naskew, i, &angd);
413         angr = angd * deg2rad;
414         dely = w * tan(angr);
415         if (i - dely > 0.05 * h)
416             break;
417     }
418     ptaAddPt(ptas, 0, i);
419     ptaAddPt(ptas, w - 1, i - dely);
420     ptaAddPt(ptad, 0, i);
421     ptaAddPt(ptad, w - 1, i);
422 
423         /* Find i for skew line that intersects LHS at i and RHS at 19h / 20 */
424     for (i = h - 1; i > 0; i--) {
425         numaGetFValue(naskew, i, &angd);
426         angr = angd * deg2rad;
427         dely = w * tan(angr);
428         if (i - dely < 0.95 * h)
429             break;
430     }
431     ptaAddPt(ptas, 0, i);
432     ptaAddPt(ptas, w - 1, i - dely);
433     ptaAddPt(ptad, 0, i);
434     ptaAddPt(ptad, w - 1, i);
435 
436     numaDestroy(&naskew);
437     return 0;
438 }
439 
440 
441 /*!
442  *  pixGetLocalSkewAngles()
443  *
444  *      Input:  pixs
445  *              nslices  (the number of horizontal overlapping slices; must
446  *                  be larger than 1 and not exceed 20; use 0 for default)
447  *              redsweep (sweep reduction factor: 1, 2, 4 or 8;
448  *                        use 0 for default value)
449  *              redsearch (search reduction factor: 1, 2, 4 or 8, and
450  *                         not larger than redsweep; use 0 for default value)
451  *              sweeprange (half the full range, assumed about 0; in degrees;
452  *                          use 0.0 for default value)
453  *              sweepdelta (angle increment of sweep; in degrees;
454  *                          use 0.0 for default value)
455  *              minbsdelta (min binary search increment angle; in degrees;
456  *                          use 0.0 for default value)
457  *              &a (<optional return> slope of skew as fctn of y)
458  *              &b (<optional return> intercept at y=0 of skew as fctn of y)
459  *      Return: naskew, or null on error
460  *
461  *  Notes:
462  *      (1) The local skew is measured in a set of overlapping strips.
463  *          We then do a least square linear fit parameters to get
464  *          the slope and intercept parameters a and b in
465  *              skew-angle = a * y + b  (degrees)
466  *          for the local skew as a function of raster line y.
467  *          This is then used to make naskew, which can be interpreted
468  *          as the computed skew angle (in degrees) at the left edge
469  *          of each raster line.
470  *      (2) naskew can then be used to find the baselines of text, because
471  *          each text line has a baseline that should intersect
472  *          the left edge of the image with the angle given by this
473  *          array, evaluated at the raster line of intersection.
474  */
475 NUMA *
pixGetLocalSkewAngles(PIX * pixs,l_int32 nslices,l_int32 redsweep,l_int32 redsearch,l_float32 sweeprange,l_float32 sweepdelta,l_float32 minbsdelta,l_float32 * pa,l_float32 * pb)476 pixGetLocalSkewAngles(PIX        *pixs,
477                       l_int32     nslices,
478                       l_int32     redsweep,
479                       l_int32     redsearch,
480                       l_float32   sweeprange,
481                       l_float32   sweepdelta,
482                       l_float32   minbsdelta,
483                       l_float32  *pa,
484                       l_float32  *pb)
485 {
486 l_int32    w, h, hs, i, ystart, yend, ovlap, npts;
487 l_float32  angle, conf, ycenter, a, b;
488 BOX       *box;
489 NUMA      *naskew;
490 PIX       *pix;
491 PTA       *pta;
492 
493     PROCNAME("pixGetLocalSkewAngles");
494 
495     if (!pixs)
496         return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL);
497     if (nslices < 2 || nslices > 20)
498         nslices = DEFAULT_SLICES;
499     if (redsweep < 1 || redsweep > 8)
500         redsweep = DEFAULT_SWEEP_REDUCTION;
501     if (redsearch < 1 || redsearch > redsweep)
502         redsearch = DEFAULT_BS_REDUCTION;
503     if (sweeprange == 0.0)
504         sweeprange = DEFAULT_SWEEP_RANGE;
505     if (sweepdelta == 0.0)
506         sweepdelta = DEFAULT_SWEEP_DELTA;
507     if (minbsdelta == 0.0)
508         minbsdelta = DEFAULT_MINBS_DELTA;
509 
510     h = pixGetHeight(pixs);
511     w = pixGetWidth(pixs);
512     hs = h / nslices;
513     ovlap = (l_int32)(OVERLAP_FRACTION * hs);
514     pta = ptaCreate(nslices);
515     for (i = 0; i < nslices; i++) {
516         ystart = L_MAX(0, hs * i - ovlap);
517         yend = L_MIN(h - 1, hs * (i + 1) + ovlap);
518         ycenter = (ystart + yend) / 2;
519         box = boxCreate(0, ystart, w, yend - ystart + 1);
520         pix = pixClipRectangle(pixs, box, NULL);
521         pixFindSkewSweepAndSearch(pix, &angle, &conf, redsweep, redsearch,
522                                   sweeprange, sweepdelta, minbsdelta);
523         if (conf > MIN_ALLOWED_CONFIDENCE)
524             ptaAddPt(pta, ycenter, angle);
525         pixDestroy(&pix);
526         boxDestroy(&box);
527     }
528 /*    ptaWriteStream(stderr, pta, 0); */
529 
530         /* Do linear least squares fit */
531     if ((npts = ptaGetCount(pta)) < 2) {
532         ptaDestroy(&pta);
533         return (NUMA *)ERROR_PTR("can't fit skew", procName, NULL);
534     }
535     ptaGetLinearLSF(pta, &a, &b, NULL);
536     if (pa) *pa = a;
537     if (pb) *pb = b;
538 
539         /* Make skew angle array as function of raster line */
540     naskew = numaCreate(h);
541     for (i = 0; i < h; i++) {
542         angle = a * i + b;
543         numaAddNumber(naskew, angle);
544     }
545 
546 #if  DEBUG_PLOT
547 { NUMA   *nax, *nay;
548   GPLOT  *gplot;
549     ptaGetArrays(pta, &nax, &nay);
550     gplot = gplotCreate("junkskew", GPLOT_X11, "skew as fctn of y",
551                         "y (in raster lines from top)", "angle (in degrees)");
552     gplotAddPlot(gplot, NULL, naskew, GPLOT_POINTS, "linear lsf");
553     gplotAddPlot(gplot, nax, nay, GPLOT_POINTS, "actual data pts");
554     gplotMakeOutput(gplot);
555     gplotDestroy(&gplot);
556     numaDestroy(&nax);
557     numaDestroy(&nay);
558 }
559 #endif  /* DEBUG_PLOT */
560 
561     ptaDestroy(&pta);
562     return naskew;
563 }
564 
565 
566