1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -
4  -  Redistribution and use in source and binary forms, with or without
5  -  modification, are permitted provided that the following conditions
6  -  are met:
7  -  1. Redistributions of source code must retain the above copyright
8  -     notice, this list of conditions and the following disclaimer.
9  -  2. Redistributions in binary form must reproduce the above
10  -     copyright notice, this list of conditions and the following
11  -     disclaimer in the documentation and/or other materials
12  -     provided with the distribution.
13  -
14  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18  -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 /*
28  * jbclass.c
29  *
30  *     These are functions for unsupervised classification of
31  *     collections of connected components -- either characters or
32  *     words -- in binary images.  They can be used as image
33  *     processing steps in jbig2 compression.
34  *
35  *     Initialization
36  *
37  *         JBCLASSER         *jbRankHausInit()      [rank hausdorff encoder]
38  *         JBCLASSER         *jbCorrelationInit()   [correlation encoder]
39  *         JBCLASSER         *jbCorrelationInitWithoutComponents()  [ditto]
40  *         static JBCLASSER  *jbCorrelationInitInternal()
41  *
42  *     Classify the pages
43  *
44  *         l_int32     jbAddPages()
45  *         l_int32     jbAddPage()
46  *         l_int32     jbAddPageComponents()
47  *
48  *     Rank hausdorff classifier
49  *
50  *         l_int32     jbClassifyRankHaus()
51  *         l_int32     pixHaustest()
52  *         l_int32     pixRankHaustest()
53  *
54  *     Binary correlation classifier
55  *
56  *         l_int32     jbClassifyCorrelation()
57  *
58  *     Determine the image components we start with
59  *
60  *         l_int32     jbGetComponents()
61  *         l_int32     pixWordMaskByDilation()
62  *         l_int32     pixWordBoxesByDilation()
63  *
64  *     Build grayscale composites (templates)
65  *
66  *         PIXA       *jbAccumulateComposites
67  *         PIXA       *jbTemplatesFromComposites
68  *
69  *     Utility functions for Classer
70  *
71  *         JBCLASSER  *jbClasserCreate()
72  *         void        jbClasserDestroy()
73  *
74  *     Utility functions for Data
75  *
76  *         JBDATA     *jbDataSave()
77  *         void        jbDataDestroy()
78  *         l_int32     jbDataWrite()
79  *         JBDATA     *jbDataRead()
80  *         PIXA       *jbDataRender()
81  *         l_int32     jbGetULCorners()
82  *         l_int32     jbGetLLCorners()
83  *
84  *     Static helpers
85  *
86  *         static JBFINDCTX *findSimilarSizedTemplatesInit()
87  *         static l_int32    findSimilarSizedTemplatesNext()
88  *         static void       findSimilarSizedTemplatesDestroy()
89  *         static l_int32    finalPositioningForAlignment()
90  *
91  *     Note: this is NOT an implementation of the JPEG jbig2
92  *     proposed standard encoder, the specifications for which
93  *     can be found at http://www.jpeg.org/jbigpt2.html.
94  *     (See below for a full implementation.)
95  *     It is an implementation of the lower-level part of an encoder that:
96  *
97  *        (1) identifies connected components that are going to be used
98  *        (2) puts them in similarity classes (this is an unsupervised
99  *            classifier), and
100  *        (3) stores the result in a simple file format (2 files,
101  *            one for templates and one for page/coordinate/template-index
102  *            quartets).
103  *
104  *     An actual implementation of the official jbig2 encoder could
105  *     start with parts (1) and (2), and would then compress the quartets
106  *     according to the standards requirements (e.g., Huffman or
107  *     arithmetic coding of coordinate differences and image templates).
108  *
109  *     The low-level part of the encoder provided here has the
110  *     following useful features:
111  *
112  *         ~ It is accurate in the identification of templates
113  *           and classes because it uses a windowed hausdorff
114  *           distance metric.
115  *         ~ It is accurate in the placement of the connected
116  *           components, doing a two step process of first aligning
117  *           the the centroids of the template with those of each instance,
118  *           and then making a further correction of up to +- 1 pixel
119  *           in each direction to best align the templates.
120  *         ~ It is fast because it uses a morphologically based
121  *           matching algorithm to implement the hausdorff criterion,
122  *           and it selects the patterns that are possible matches
123  *           based on their size.
124  *
125  *     We provide two different matching functions, one using Hausdorff
126  *     distance and one using a simple image correlation.
127  *     The Hausdorff method sometimes produces better results for the
128  *     same number of classes, because it gives a relatively small
129  *     effective weight to foreground pixels near the boundary,
130  *     and a relatively  large weight to foreground pixels that are
131  *     not near the boundary.  By effectively ignoring these boundary
132  *     pixels, Hausdorff weighting corresponds better to the expected
133  *     probabilities of the pixel values in a scanned image, where the
134  *     variations in instances of the same printed character are much
135  *     more likely to be in pixels near the boundary.  By contrast,
136  *     the correlation method gives equal weight to all foreground pixels.
137  *
138  *     For best results, use the correlation method.  Correlation takes
139  *     the number of fg pixels in the AND of instance and template,
140  *     divided by the product of the number of fg pixels in instance
141  *     and template.  It compares this with a threshold that, in
142  *     general, depends on the fractional coverage of the template.
143  *     For heavy text, the threshold is raised above that for light
144  *     text,  By using both these parameters (basic threshold and
145  *     adjustment factor for text weight), one has more flexibility
146  *     and can arrive at the fewest substitution errors, although
147  *     this comes at the price of more templates.
148  *
149  *     The strict Hausdorff scoring is not a rank weighting, because a
150  *     single pixel beyond the given distance will cause a match
151  *     failure.  A rank Hausdorff is more robust to non-boundary noise,
152  *     but it is also more susceptible to confusing components that
153  *     should be in different classes.  For implementing a jbig2
154  *     application for visually lossless binary image compression,
155  *     you have two choices:
156  *
157  *        (1) use a 3x3 structuring element (size = 3) and a strict
158  *            Hausdorff comparison (rank = 1.0 in the rank Hausdorff
159  *            function).  This will result in a minimal number of classes,
160  *            but confusion of small characters, such as italic and
161  *            non-italic lower-case 'o', can still occur.
162  *        (2) use the correlation method with a threshold of 0.85
163  *            and a weighting factor of about 0.7.  This will result in
164  *            a larger number of classes, but should not be confused
165  *            either by similar small characters or by extremely
166  *            thick sans serif characters, such as in prog/cootoots.png.
167  *
168  *     As mentioned above, if visual substitution errors must be
169  *     avoided, you should use the correlation method.
170  *
171  *     We provide executables that show how to do the encoding:
172  *         prog/jbrankhaus.c
173  *         prog/jbcorrelation.c
174  *
175  *     The basic flow for correlation classification goes as follows,
176  *     where specific choices have been made for parameters (Hausdorff
177  *     is the same except for initialization):
178  *
179  *             // Initialize and save data in the classer
180  *         JBCLASSER *classer =
181  *             jbCorrelationInit(JB_CONN_COMPS, 0, 0, 0.8, 0.7);
182  *         SARRAY *safiles = getSortedPathnamesInDirectory(directory,
183  *                                                         NULL, 0, 0);
184  *         jbAddPages(classer, safiles);
185  *
186  *             // Save the data in a data structure for serialization,
187  *             // and write it into two files.
188  *         JBDATA *data = jbDataSave(classer);
189  *         jbDataWrite(rootname, data);
190  *
191  *             // Reconstruct (render) the pages from the encoded data.
192  *         PIXA *pixa = jbDataRender(data, FALSE);
193  *
194  *     Adam Langley has built a jbig2 standards-compliant encoder, the
195  *     first one to appear in open source.  You can get this encoder at:
196  *          http://www.imperialviolet.org/jbig2.html
197  *
198  *     It uses arithmetic encoding throughout.  It encodes binary images
199  *     losslessly with a single arithmetic coding over the full image.
200  *     It also does both lossy and lossless encoding from connected
201  *     components, using leptonica to generate the templates representing
202  *     each cluster.
203  */
204 
205 #include <string.h>
206 #include <math.h>
207 #include "allheaders.h"
208 
209 static const l_int32  L_BUF_SIZE = 512;
210 
211     /* For jbClassifyRankHaus(): size of border added around
212      * pix of each c.c., to allow further processing.  This
213      * should be at least the sum of the MAX_DIFF_HEIGHT
214      * (or MAX_DIFF_WIDTH) and one-half the size of the Sel  */
215 static const l_int32  JB_ADDED_PIXELS = 6;
216 
217     /* For pixHaustest(), pixRankHaustest() and pixCorrelationScore():
218      * choose these to be 2 or greater */
219 static const l_int32  MAX_DIFF_WIDTH = 2;  /* use at least 2 */
220 static const l_int32  MAX_DIFF_HEIGHT = 2;  /* use at least 2 */
221 
222     /* In initialization, you have the option to discard components
223      * (cc, characters or words) that have either width or height larger
224      * than a given size.  This is convenient for jbDataSave(), because
225      * the components are placed onto a regular lattice with cell
226      * dimension equal to the maximum component size.  The default
227      * values are given here.  If you want to save all components,
228      * use a sufficiently large set of dimensions. */
229 static const l_int32  MAX_CONN_COMP_WIDTH = 350;  /* default max cc width */
230 static const l_int32  MAX_CHAR_COMP_WIDTH = 350;  /* default max char width */
231 static const l_int32  MAX_WORD_COMP_WIDTH = 1000;  /* default max word width */
232 static const l_int32  MAX_COMP_HEIGHT = 120;  /* default max component height */
233 
234     /* This stores the state of a state machine which fetches
235      * similar sized templates */
236 struct JbFindTemplatesState
237 {
238     JBCLASSER       *classer;    /* classer                               */
239     l_int32          w;          /* desired width                         */
240     l_int32          h;          /* desired height                        */
241     l_int32          i;          /* index into two_by_two step array      */
242     L_DNA           *dna;        /* current number array                  */
243     l_int32          n;          /* current element of dna                */
244 };
245 typedef struct JbFindTemplatesState JBFINDCTX;
246 
247     /* Static initialization function */
248 static JBCLASSER * jbCorrelationInitInternal(l_int32 components,
249                        l_int32 maxwidth, l_int32 maxheight, l_float32 thresh,
250                        l_float32 weightfactor, l_int32 keep_components);
251 
252     /* Static helper functions */
253 static JBFINDCTX * findSimilarSizedTemplatesInit(JBCLASSER *classer, PIX *pixs);
254 static l_int32 findSimilarSizedTemplatesNext(JBFINDCTX *context);
255 static void findSimilarSizedTemplatesDestroy(JBFINDCTX **pcontext);
256 static l_int32 finalPositioningForAlignment(PIX *pixs, l_int32 x, l_int32 y,
257                              l_int32 idelx, l_int32 idely, PIX *pixt,
258                              l_int32 *sumtab, l_int32 *pdx, l_int32 *pdy);
259 
260 #ifndef NO_CONSOLE_IO
261 #define  DEBUG_CORRELATION_SCORE   0
262 #endif  /* ~NO_CONSOLE_IO */
263 
264 
265 /*----------------------------------------------------------------------*
266  *                            Initialization                            *
267  *----------------------------------------------------------------------*/
268 /*!
269  * \brief   jbRankHausInit()
270  *
271  * \param[in]    components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
272  * \param[in]    maxwidth of component; use 0 for default
273  * \param[in]    maxheight of component; use 0 for default
274  * \param[in]    size  of square structuring element; 2, representing
275  *                     2x2 sel, is necessary for reasonable accuracy of
276  *                     small components; combine this with rank ~ 0.97
277  *                     to avoid undue class expansion
278  * \param[in]    rank rank val of match, each way; in [0.5 - 1.0];
279  *                    when using size = 2, 0.97 is a reasonable value
280  * \return  jbclasser if OK; NULL on error
281  */
282 JBCLASSER *
jbRankHausInit(l_int32 components,l_int32 maxwidth,l_int32 maxheight,l_int32 size,l_float32 rank)283 jbRankHausInit(l_int32    components,
284                l_int32    maxwidth,
285                l_int32    maxheight,
286                l_int32    size,
287                l_float32  rank)
288 {
289 JBCLASSER  *classer;
290 
291     PROCNAME("jbRankHausInit");
292 
293     if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
294         components != JB_WORDS)
295         return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL);
296     if (size < 1 || size > 10)
297         return (JBCLASSER *)ERROR_PTR("size not reasonable", procName, NULL);
298     if (rank < 0.5 || rank > 1.0)
299         return (JBCLASSER *)ERROR_PTR("rank not in [0.5-1.0]", procName, NULL);
300     if (maxwidth == 0) {
301         if (components == JB_CONN_COMPS)
302             maxwidth = MAX_CONN_COMP_WIDTH;
303         else if (components == JB_CHARACTERS)
304             maxwidth = MAX_CHAR_COMP_WIDTH;
305         else  /* JB_WORDS */
306             maxwidth = MAX_WORD_COMP_WIDTH;
307     }
308     if (maxheight == 0)
309         maxheight = MAX_COMP_HEIGHT;
310 
311     if ((classer = jbClasserCreate(JB_RANKHAUS, components)) == NULL)
312         return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL);
313     classer->maxwidth = maxwidth;
314     classer->maxheight = maxheight;
315     classer->sizehaus = size;
316     classer->rankhaus = rank;
317     classer->dahash = l_dnaHashCreate(5507, 4);  /* 5507 is prime */
318     classer->keep_pixaa = 1;  /* keep all components in pixaa */
319     return classer;
320 }
321 
322 
323 /*!
324  * \brief   jbCorrelationInit()
325  *
326  * \param[in]    components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
327  * \param[in]    maxwidth of component; use 0 for default
328  * \param[in]    maxheight of component; use 0 for default
329  * \param[in]    thresh value for correlation score: in [0.4 - 0.98]
330  * \param[in]    weightfactor corrects thresh for thick characters [0.0 - 1.0]
331  * \return  jbclasser if OK; NULL on error
332  *
333  * <pre>
334  * Notes:
335  *      (1) For scanned text, suggested input values are:
336  *            thresh ~ [0.8 - 0.85]
337  *            weightfactor ~ [0.5 - 0.6]
338  *      (2) For electronically generated fonts (e.g., rasterized pdf),
339  *          a very high thresh (e.g., 0.95) will not cause a significant
340  *          increase in the number of classes.
341  * </pre>
342  */
343 JBCLASSER *
jbCorrelationInit(l_int32 components,l_int32 maxwidth,l_int32 maxheight,l_float32 thresh,l_float32 weightfactor)344 jbCorrelationInit(l_int32    components,
345                   l_int32    maxwidth,
346                   l_int32    maxheight,
347                   l_float32  thresh,
348                   l_float32  weightfactor)
349 {
350     return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh,
351                                      weightfactor, 1);
352 }
353 
354 /*!
355  * \brief   jbCorrelationInitWithoutComponents()
356  *
357  * \param[in]    components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
358  * \param[in]    maxwidth of component; use 0 for default
359  * \param[in]    maxheight of component; use 0 for default
360  * \param[in]    thresh value for correlation score: in [0.4 - 0.98]
361  * \param[in]    weightfactor corrects thresh for thick characters [0.0 - 1.0]
362  * \return  jbclasser if OK; NULL on error
363  *
364  * <pre>
365  * Notes:
366  *      Acts the same as jbCorrelationInit(), but the resulting
367  *      object doesn't keep a list of all the components.
368  * </pre>
369  */
370 JBCLASSER *
jbCorrelationInitWithoutComponents(l_int32 components,l_int32 maxwidth,l_int32 maxheight,l_float32 thresh,l_float32 weightfactor)371 jbCorrelationInitWithoutComponents(l_int32    components,
372                                    l_int32    maxwidth,
373                                    l_int32    maxheight,
374                                    l_float32  thresh,
375                                    l_float32  weightfactor)
376 {
377     return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh,
378                                      weightfactor, 0);
379 }
380 
381 
382 static JBCLASSER *
jbCorrelationInitInternal(l_int32 components,l_int32 maxwidth,l_int32 maxheight,l_float32 thresh,l_float32 weightfactor,l_int32 keep_components)383 jbCorrelationInitInternal(l_int32    components,
384                           l_int32    maxwidth,
385                           l_int32    maxheight,
386                           l_float32  thresh,
387                           l_float32  weightfactor,
388                           l_int32    keep_components)
389 {
390 JBCLASSER  *classer;
391 
392     PROCNAME("jbCorrelationInitInternal");
393 
394     if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
395         components != JB_WORDS)
396         return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL);
397     if (thresh < 0.4 || thresh > 0.98)
398         return (JBCLASSER *)ERROR_PTR("thresh not in range [0.4 - 0.98]",
399                 procName, NULL);
400     if (weightfactor < 0.0 || weightfactor > 1.0)
401         return (JBCLASSER *)ERROR_PTR("weightfactor not in range [0.0 - 1.0]",
402                 procName, NULL);
403     if (maxwidth == 0) {
404         if (components == JB_CONN_COMPS)
405             maxwidth = MAX_CONN_COMP_WIDTH;
406         else if (components == JB_CHARACTERS)
407             maxwidth = MAX_CHAR_COMP_WIDTH;
408         else  /* JB_WORDS */
409             maxwidth = MAX_WORD_COMP_WIDTH;
410     }
411     if (maxheight == 0)
412         maxheight = MAX_COMP_HEIGHT;
413 
414 
415     if ((classer = jbClasserCreate(JB_CORRELATION, components)) == NULL)
416         return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL);
417     classer->maxwidth = maxwidth;
418     classer->maxheight = maxheight;
419     classer->thresh = thresh;
420     classer->weightfactor = weightfactor;
421     classer->dahash = l_dnaHashCreate(5507, 4);  /* 5507 is prime */
422     classer->keep_pixaa = keep_components;
423     return classer;
424 }
425 
426 
427 /*----------------------------------------------------------------------*
428  *                       Classify the pages                             *
429  *----------------------------------------------------------------------*/
430 /*!
431  * \brief   jbAddPages()
432  *
433  * \param[in]    jbclasser
434  * \param[in]    safiles of page image file names
435  * \return  0 if OK; 1 on error
436  *
437  * <pre>
438  * Notes:
439  *      (1) jbclasser makes a copy of the array of file names.
440  *      (2) The caller is still responsible for destroying the input array.
441  * </pre>
442  */
443 l_int32
jbAddPages(JBCLASSER * classer,SARRAY * safiles)444 jbAddPages(JBCLASSER  *classer,
445            SARRAY     *safiles)
446 {
447 l_int32  i, nfiles;
448 char    *fname;
449 PIX     *pix;
450 
451     PROCNAME("jbAddPages");
452 
453     if (!classer)
454         return ERROR_INT("classer not defined", procName, 1);
455     if (!safiles)
456         return ERROR_INT("safiles not defined", procName, 1);
457 
458     classer->safiles = sarrayCopy(safiles);
459     nfiles = sarrayGetCount(safiles);
460     for (i = 0; i < nfiles; i++) {
461         fname = sarrayGetString(safiles, i, L_NOCOPY);
462         if ((pix = pixRead(fname)) == NULL) {
463             L_WARNING("image file %d not read\n", procName, i);
464             continue;
465         }
466         if (pixGetDepth(pix) != 1) {
467             L_WARNING("image file %d not 1 bpp\n", procName, i);
468             continue;
469         }
470         jbAddPage(classer, pix);
471         pixDestroy(&pix);
472     }
473 
474     return 0;
475 }
476 
477 
478 /*!
479  * \brief   jbAddPage()
480  *
481  * \param[in]    jbclasser
482  * \param[in]    pixs of input page
483  * \return  0 if OK; 1 on error
484  */
485 l_int32
jbAddPage(JBCLASSER * classer,PIX * pixs)486 jbAddPage(JBCLASSER  *classer,
487           PIX        *pixs)
488 {
489 BOXA  *boxas;
490 PIXA  *pixas;
491 
492     PROCNAME("jbAddPage");
493 
494     if (!classer)
495         return ERROR_INT("classer not defined", procName, 1);
496     if (!pixs || pixGetDepth(pixs) != 1)
497         return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
498 
499     classer->w = pixGetWidth(pixs);
500     classer->h = pixGetHeight(pixs);
501 
502         /* Get the appropriate components and their bounding boxes */
503     if (jbGetComponents(pixs, classer->components, classer->maxwidth,
504                         classer->maxheight, &boxas, &pixas)) {
505         return ERROR_INT("components not made", procName, 1);
506     }
507 
508     jbAddPageComponents(classer, pixs, boxas, pixas);
509     boxaDestroy(&boxas);
510     pixaDestroy(&pixas);
511     return 0;
512 }
513 
514 
515 /*!
516  * \brief   jbAddPageComponents()
517  *
518  * \param[in]    jbclasser
519  * \param[in]    pixs of input page
520  * \param[in]    boxas b.b. of components for this page
521  * \param[in]    pixas components for this page
522  * \return  0 if OK; 1 on error
523  *
524  * <pre>
525  * Notes:
526  *      (1) If there are no components on the page, we don't require input
527  *          of empty boxas or pixas, although that's the typical situation.
528  * </pre>
529  */
530 l_int32
jbAddPageComponents(JBCLASSER * classer,PIX * pixs,BOXA * boxas,PIXA * pixas)531 jbAddPageComponents(JBCLASSER  *classer,
532                     PIX        *pixs,
533                     BOXA       *boxas,
534                     PIXA       *pixas)
535 {
536 l_int32  n;
537 
538     PROCNAME("jbAddPageComponents");
539 
540     if (!classer)
541         return ERROR_INT("classer not defined", procName, 1);
542     if (!pixs)
543         return ERROR_INT("pix not defined", procName, 1);
544 
545         /* Test for no components on the current page.  Always update the
546          * number of pages processed, even if nothing is on it. */
547     if (!boxas || !pixas || (boxaGetCount(boxas) == 0)) {
548         classer->npages++;
549         return 0;
550     }
551 
552         /* Get classes.  For hausdorff, it uses a specified size of
553          * structuring element and specified rank.  For correlation,
554          * it uses a specified threshold. */
555     if (classer->method == JB_RANKHAUS) {
556         if (jbClassifyRankHaus(classer, boxas, pixas))
557             return ERROR_INT("rankhaus classification failed", procName, 1);
558     } else {  /* classer->method == JB_CORRELATION */
559         if (jbClassifyCorrelation(classer, boxas, pixas))
560             return ERROR_INT("correlation classification failed", procName, 1);
561     }
562 
563         /* Find the global UL corners, adjusted for each instance so
564          * that the class template and instance will have their
565          * centroids in the same place.  Then the template can be
566          * used to replace the instance. */
567     if (jbGetULCorners(classer, pixs, boxas))
568         return ERROR_INT("UL corners not found", procName, 1);
569 
570         /* Update total component counts and number of pages processed. */
571     n = boxaGetCount(boxas);
572     classer->baseindex += n;
573     numaAddNumber(classer->nacomps, n);
574     classer->npages++;
575     return 0;
576 }
577 
578 
579 /*----------------------------------------------------------------------*
580  *         Classification using windowed rank hausdorff metric          *
581  *----------------------------------------------------------------------*/
582 /*!
583  * \brief   jbClassifyRankHaus()
584  *
585  * \param[in]    jbclasser
586  * \param[in]    boxa of new components for classification
587  * \param[in]    pixas of new components for classification
588  * \return  0 if OK; 1 on error
589  */
590 l_int32
jbClassifyRankHaus(JBCLASSER * classer,BOXA * boxa,PIXA * pixas)591 jbClassifyRankHaus(JBCLASSER  *classer,
592                    BOXA       *boxa,
593                    PIXA       *pixas)
594 {
595 l_int32     n, nt, i, wt, ht, iclass, size, found, testval;
596 l_int32     npages, area1, area3;
597 l_int32    *tab8;
598 l_float32   rank, x1, y1, x2, y2;
599 BOX        *box;
600 NUMA       *naclass, *napage;
601 NUMA       *nafg;   /* fg area of all instances */
602 NUMA       *nafgt;  /* fg area of all templates */
603 JBFINDCTX  *findcontext;
604 L_DNAHASH  *dahash;
605 PIX        *pix, *pix1, *pix2, *pix3, *pix4;
606 PIXA       *pixa, *pixa1, *pixa2, *pixat, *pixatd;
607 PIXAA      *pixaa;
608 PTA        *pta, *ptac, *ptact;
609 SEL        *sel;
610 
611     PROCNAME("jbClassifyRankHaus");
612 
613     if (!classer)
614         return ERROR_INT("classer not found", procName, 1);
615     if (!boxa)
616         return ERROR_INT("boxa not found", procName, 1);
617     if (!pixas)
618         return ERROR_INT("pixas not found", procName, 1);
619 
620     npages = classer->npages;
621     size = classer->sizehaus;
622     sel = selCreateBrick(size, size, size / 2, size / 2, SEL_HIT);
623 
624         /* Generate the bordered pixa, with and without dilation.
625          * pixa1 and pixa2 contain all the input components. */
626     n = pixaGetCount(pixas);
627     pixa1 = pixaCreate(n);
628     pixa2 = pixaCreate(n);
629     for (i = 0; i < n; i++) {
630         pix = pixaGetPix(pixas, i, L_CLONE);
631         pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS,
632                     JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0);
633         pix2 = pixDilate(NULL, pix1, sel);
634         pixaAddPix(pixa1, pix1, L_INSERT);   /* un-dilated */
635         pixaAddPix(pixa2, pix2, L_INSERT);   /* dilated */
636         pixDestroy(&pix);
637     }
638 
639         /* Get the centroids of all the bordered images.
640          * These are relative to the UL corner of each (bordered) pix.  */
641     pta = pixaCentroids(pixa1);  /* centroids for this page; use here */
642     ptac = classer->ptac;  /* holds centroids of components up to this page */
643     ptaJoin(ptac, pta, 0, -1);  /* save centroids of all components */
644     ptact = classer->ptact;  /* holds centroids of templates */
645 
646         /* Use these to save the class and page of each component. */
647     naclass = classer->naclass;
648     napage = classer->napage;
649 
650         /* Store the unbordered pix in a pixaa, in a hierarchical
651          * set of arrays.  There is one pixa for each class,
652          * and the pix in each pixa are all the instances found
653          * of that class.  This is actually more than one would need
654          * for a jbig2 encoder, but there are two reasons to keep
655          * them around: (1) the set of instances for each class
656          * can be used to make an improved binary (or, better,
657          * a grayscale) template, rather than simply using the first
658          * one in the set; (2) we can investigate the failures
659          * of the classifier.  This pixaa grows as we process
660          * successive pages. */
661     pixaa = classer->pixaa;
662 
663         /* arrays to store class exemplars (templates) */
664     pixat = classer->pixat;   /* un-dilated */
665     pixatd = classer->pixatd;   /* dilated */
666 
667         /* Fill up the pixaa tree with the template exemplars as
668          * the first pix in each pixa.  As we add each pix,
669          * we also add the associated box to the pixa.
670          * We also keep track of the centroid of each pix,
671          * and use the difference between centroids (of the
672          * pix with the exemplar we are checking it with)
673          * to align the two when checking that the Hausdorff
674          * distance does not exceed a threshold.
675          * The threshold is set by the Sel used for dilating.
676          * For example, a 3x3 brick, sel_3, corresponds to a
677          * Hausdorff distance of 1.  In general, for an NxN brick,
678          * with N odd, corresponds to a Hausdorff distance of (N - 1)/2.
679          * It turns out that we actually need to use a sel of size 2x2
680          * to avoid small bad components when there is a halftone image
681          * from which components can be chosen.
682          * The larger the Sel you use, the fewer the number of classes,
683          * and the greater the likelihood of putting semantically
684          * different objects in the same class.  For simplicity,
685          * we do this separately for the case of rank == 1.0 (exact
686          * match within the Hausdorff distance) and rank < 1.0.  */
687     rank = classer->rankhaus;
688     dahash = classer->dahash;
689     if (rank == 1.0) {
690         for (i = 0; i < n; i++) {
691             pix1 = pixaGetPix(pixa1, i, L_CLONE);
692             pix2 = pixaGetPix(pixa2, i, L_CLONE);
693             ptaGetPt(pta, i, &x1, &y1);
694             nt = pixaGetCount(pixat);  /* number of templates */
695             found = FALSE;
696             findcontext = findSimilarSizedTemplatesInit(classer, pix1);
697             while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
698                     /* Find score for this template */
699                 pix3 = pixaGetPix(pixat, iclass, L_CLONE);
700                 pix4 = pixaGetPix(pixatd, iclass, L_CLONE);
701                 ptaGetPt(ptact, iclass, &x2, &y2);
702                 testval = pixHaustest(pix1, pix2, pix3, pix4, x1 - x2, y1 - y2,
703                                       MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT);
704                 pixDestroy(&pix3);
705                 pixDestroy(&pix4);
706                 if (testval == 1) {
707                     found = TRUE;
708                     numaAddNumber(naclass, iclass);
709                     numaAddNumber(napage, npages);
710                     if (classer->keep_pixaa) {
711                         pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
712                         pix = pixaGetPix(pixas, i, L_CLONE);
713                         pixaAddPix(pixa, pix, L_INSERT);
714                         box = boxaGetBox(boxa, i, L_CLONE);
715                         pixaAddBox(pixa, box, L_INSERT);
716                         pixaDestroy(&pixa);
717                     }
718                     break;
719                 }
720             }
721             findSimilarSizedTemplatesDestroy(&findcontext);
722             if (found == FALSE) {  /* new class */
723                 numaAddNumber(naclass, nt);
724                 numaAddNumber(napage, npages);
725                 pixa = pixaCreate(0);
726                 pix = pixaGetPix(pixas, i, L_CLONE);  /* unbordered instance */
727                 pixaAddPix(pixa, pix, L_INSERT);
728                 wt = pixGetWidth(pix);
729                 ht = pixGetHeight(pix);
730                 l_dnaHashAdd(dahash, ht * wt, nt);
731                 box = boxaGetBox(boxa, i, L_CLONE);
732                 pixaAddBox(pixa, box, L_INSERT);
733                 pixaaAddPixa(pixaa, pixa, L_INSERT);  /* unbordered instance */
734                 ptaAddPt(ptact, x1, y1);
735                 pixaAddPix(pixat, pix1, L_INSERT);  /* bordered template */
736                 pixaAddPix(pixatd, pix2, L_INSERT);  /* bordered dil template */
737             } else {  /* don't save them */
738                 pixDestroy(&pix1);
739                 pixDestroy(&pix2);
740             }
741         }
742     } else {  /* rank < 1.0 */
743         if ((nafg = pixaCountPixels(pixas)) == NULL)  /* areas for this page */
744             return ERROR_INT("nafg not made", procName, 1);
745         nafgt = classer->nafgt;
746         tab8 = makePixelSumTab8();
747         for (i = 0; i < n; i++) {   /* all instances on this page */
748             pix1 = pixaGetPix(pixa1, i, L_CLONE);
749             numaGetIValue(nafg, i, &area1);
750             pix2 = pixaGetPix(pixa2, i, L_CLONE);
751             ptaGetPt(pta, i, &x1, &y1);   /* use pta for this page */
752             nt = pixaGetCount(pixat);  /* number of templates */
753             found = FALSE;
754             findcontext = findSimilarSizedTemplatesInit(classer, pix1);
755             while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
756                     /* Find score for this template */
757                 pix3 = pixaGetPix(pixat, iclass, L_CLONE);
758                 numaGetIValue(nafgt, iclass, &area3);
759                 pix4 = pixaGetPix(pixatd, iclass, L_CLONE);
760                 ptaGetPt(ptact, iclass, &x2, &y2);
761                 testval = pixRankHaustest(pix1, pix2, pix3, pix4,
762                                           x1 - x2, y1 - y2,
763                                           MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT,
764                                           area1, area3, rank, tab8);
765                 pixDestroy(&pix3);
766                 pixDestroy(&pix4);
767                 if (testval == 1) {  /* greedy match; take the first */
768                     found = TRUE;
769                     numaAddNumber(naclass, iclass);
770                     numaAddNumber(napage, npages);
771                     if (classer->keep_pixaa) {
772                         pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
773                         pix = pixaGetPix(pixas, i, L_CLONE);
774                         pixaAddPix(pixa, pix, L_INSERT);
775                         box = boxaGetBox(boxa, i, L_CLONE);
776                         pixaAddBox(pixa, box, L_INSERT);
777                         pixaDestroy(&pixa);
778                     }
779                     break;
780                 }
781             }
782             findSimilarSizedTemplatesDestroy(&findcontext);
783             if (found == FALSE) {  /* new class */
784                 numaAddNumber(naclass, nt);
785                 numaAddNumber(napage, npages);
786                 pixa = pixaCreate(0);
787                 pix = pixaGetPix(pixas, i, L_CLONE);  /* unbordered instance */
788                 pixaAddPix(pixa, pix, L_INSERT);
789                 wt = pixGetWidth(pix);
790                 ht = pixGetHeight(pix);
791                 l_dnaHashAdd(dahash, ht * wt, nt);
792                 box = boxaGetBox(boxa, i, L_CLONE);
793                 pixaAddBox(pixa, box, L_INSERT);
794                 pixaaAddPixa(pixaa, pixa, L_INSERT);  /* unbordered instance */
795                 ptaAddPt(ptact, x1, y1);
796                 pixaAddPix(pixat, pix1, L_INSERT);  /* bordered template */
797                 pixaAddPix(pixatd, pix2, L_INSERT);  /* ditto */
798                 numaAddNumber(nafgt, area1);
799             } else {  /* don't save them */
800                 pixDestroy(&pix1);
801                 pixDestroy(&pix2);
802             }
803         }
804         LEPT_FREE(tab8);
805         numaDestroy(&nafg);
806     }
807     classer->nclass = pixaGetCount(pixat);
808 
809     ptaDestroy(&pta);
810     pixaDestroy(&pixa1);
811     pixaDestroy(&pixa2);
812     selDestroy(&sel);
813     return 0;
814 }
815 
816 
817 /*!
818  * \brief   pixHaustest()
819  *
820  * \param[in]    pix1   new pix, not dilated
821  * \param[in]    pix2   new pix, dilated
822  * \param[in]    pix3   exemplar pix, not dilated
823  * \param[in]    pix4   exemplar pix, dilated
824  * \param[in]    delx   x comp of centroid difference
825  * \param[in]    dely   y comp of centroid difference
826  * \param[in]    maxdiffw max width difference of pix1 and pix2
827  * \param[in]    maxdiffh max height difference of pix1 and pix2
828  * \return  0 FALSE) if no match, 1 (TRUE if the new
829  *              pix is in the same class as the exemplar.
830  *
831  * <pre>
832  * Notes:
833  *  We check first that the two pix are roughly
834  *  the same size.  Only if they meet that criterion do
835  *  we compare the bitmaps.  The Hausdorff is a 2-way
836  *  check.  The centroid difference is used to align the two
837  *  images to the nearest integer for each of the checks.
838  *  These check that the dilated image of one contains
839  *  ALL the pixels of the undilated image of the other.
840  *  Checks are done in both direction.  A single pixel not
841  *  contained in either direction results in failure of the test.
842  * </pre>
843  */
844 l_int32
pixHaustest(PIX * pix1,PIX * pix2,PIX * pix3,PIX * pix4,l_float32 delx,l_float32 dely,l_int32 maxdiffw,l_int32 maxdiffh)845 pixHaustest(PIX       *pix1,
846             PIX       *pix2,
847             PIX       *pix3,
848             PIX       *pix4,
849             l_float32  delx,   /* x(1) - x(3) */
850             l_float32  dely,   /* y(1) - y(3) */
851             l_int32    maxdiffw,
852             l_int32    maxdiffh)
853 {
854 l_int32  wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch;
855 PIX     *pixt;
856 
857         /* Eliminate possible matches based on size difference */
858     wi = pixGetWidth(pix1);
859     hi = pixGetHeight(pix1);
860     wt = pixGetWidth(pix3);
861     ht = pixGetHeight(pix3);
862     delw = L_ABS(wi - wt);
863     if (delw > maxdiffw)
864         return FALSE;
865     delh = L_ABS(hi - ht);
866     if (delh > maxdiffh)
867         return FALSE;
868 
869         /* Round difference in centroid location to nearest integer;
870          * use this as a shift when doing the matching. */
871     if (delx >= 0)
872         idelx = (l_int32)(delx + 0.5);
873     else
874         idelx = (l_int32)(delx - 0.5);
875     if (dely >= 0)
876         idely = (l_int32)(dely + 0.5);
877     else
878         idely = (l_int32)(dely - 0.5);
879 
880         /*  Do 1-direction hausdorff, checking that every pixel in pix1
881          *  is within a dilation distance of some pixel in pix3.  Namely,
882          *  that pix4 entirely covers pix1:
883          *       pixt = pixSubtract(NULL, pix1, pix4), including shift
884          *  where pixt has no ON pixels.  */
885     pixt = pixCreateTemplate(pix1);
886     pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0);
887     pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC),
888                 pix4, 0, 0);
889     pixZero(pixt, &boolmatch);
890     if (boolmatch == 0) {
891         pixDestroy(&pixt);
892         return FALSE;
893     }
894 
895         /*  Do 1-direction hausdorff, checking that every pixel in pix3
896          *  is within a dilation distance of some pixel in pix1.  Namely,
897          *  that pix2 entirely covers pix3:
898          *      pixSubtract(pixt, pix3, pix2), including shift
899          *  where pixt has no ON pixels. */
900     pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0);
901     pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0);
902     pixZero(pixt, &boolmatch);
903     pixDestroy(&pixt);
904     return boolmatch;
905 }
906 
907 
908 /*!
909  * \brief   pixRankHaustest()
910  *
911  * \param[in]    pix1   new pix, not dilated
912  * \param[in]    pix2   new pix, dilated
913  * \param[in]    pix3   exemplar pix, not dilated
914  * \param[in]    pix4   exemplar pix, dilated
915  * \param[in]    delx   x comp of centroid difference
916  * \param[in]    dely   y comp of centroid difference
917  * \param[in]    maxdiffw max width difference of pix1 and pix2
918  * \param[in]    maxdiffh max height difference of pix1 and pix2
919  * \param[in]    area1  fg pixels in pix1
920  * \param[in]    area3  fg pixels in pix3
921  * \param[in]    rank   rank value of test, each way
922  * \param[in]    tab8   table of pixel sums for byte
923  * \return  0 FALSE) if no match, 1 (TRUE if the new
924  *                 pix is in the same class as the exemplar.
925  *
926  * <pre>
927  * Notes:
928  *  We check first that the two pix are roughly
929  *  the same size.  Only if they meet that criterion do
930  *  we compare the bitmaps.  We convert the rank value to
931  *  a number of pixels by multiplying the rank fraction by the number
932  *  of pixels in the undilated image.  The Hausdorff is a 2-way
933  *  check.  The centroid difference is used to align the two
934  *  images to the nearest integer for each of the checks.
935  *  The rank hausdorff checks that the dilated image of one
936  *  contains the rank fraction of the pixels of the undilated
937  *  image of the other.   Checks are done in both direction.
938  *  Failure of the test in either direction results in failure
939  *  of the test.
940  * </pre>
941  */
942 l_int32
pixRankHaustest(PIX * pix1,PIX * pix2,PIX * pix3,PIX * pix4,l_float32 delx,l_float32 dely,l_int32 maxdiffw,l_int32 maxdiffh,l_int32 area1,l_int32 area3,l_float32 rank,l_int32 * tab8)943 pixRankHaustest(PIX       *pix1,
944                 PIX       *pix2,
945                 PIX       *pix3,
946                 PIX       *pix4,
947                 l_float32  delx,   /* x(1) - x(3) */
948                 l_float32  dely,   /* y(1) - y(3) */
949                 l_int32    maxdiffw,
950                 l_int32    maxdiffh,
951                 l_int32    area1,
952                 l_int32    area3,
953                 l_float32  rank,
954                 l_int32   *tab8)
955 {
956 l_int32  wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch;
957 l_int32  thresh1, thresh3;
958 PIX     *pixt;
959 
960         /* Eliminate possible matches based on size difference */
961     wi = pixGetWidth(pix1);
962     hi = pixGetHeight(pix1);
963     wt = pixGetWidth(pix3);
964     ht = pixGetHeight(pix3);
965     delw = L_ABS(wi - wt);
966     if (delw > maxdiffw)
967         return FALSE;
968     delh = L_ABS(hi - ht);
969     if (delh > maxdiffh)
970         return FALSE;
971 
972         /* Upper bounds in remaining pixels for allowable match */
973     thresh1 = (l_int32)(area1 * (1. - rank) + 0.5);
974     thresh3 = (l_int32)(area3 * (1. - rank) + 0.5);
975 
976         /* Round difference in centroid location to nearest integer;
977          * use this as a shift when doing the matching. */
978     if (delx >= 0)
979         idelx = (l_int32)(delx + 0.5);
980     else
981         idelx = (l_int32)(delx - 0.5);
982     if (dely >= 0)
983         idely = (l_int32)(dely + 0.5);
984     else
985         idely = (l_int32)(dely - 0.5);
986 
987         /*  Do 1-direction hausdorff, checking that every pixel in pix1
988          *  is within a dilation distance of some pixel in pix3.  Namely,
989          *  that pix4 entirely covers pix1:
990          *       pixt = pixSubtract(NULL, pix1, pix4), including shift
991          *  where pixt has no ON pixels.  */
992     pixt = pixCreateTemplate(pix1);
993     pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0);
994     pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC),
995                 pix4, 0, 0);
996     pixThresholdPixelSum(pixt, thresh1, &boolmatch, tab8);
997     if (boolmatch == 1) { /* above thresh1 */
998         pixDestroy(&pixt);
999         return FALSE;
1000     }
1001 
1002         /*  Do 1-direction hausdorff, checking that every pixel in pix3
1003          *  is within a dilation distance of some pixel in pix1.  Namely,
1004          *  that pix2 entirely covers pix3:
1005          *      pixSubtract(pixt, pix3, pix2), including shift
1006          *  where pixt has no ON pixels. */
1007     pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0);
1008     pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0);
1009     pixThresholdPixelSum(pixt, thresh3, &boolmatch, tab8);
1010     pixDestroy(&pixt);
1011     if (boolmatch == 1)  /* above thresh3 */
1012         return FALSE;
1013     else
1014         return TRUE;
1015 }
1016 
1017 
1018 /*----------------------------------------------------------------------*
1019  *            Classification using windowed correlation score           *
1020  *----------------------------------------------------------------------*/
1021 /*!
1022  * \brief   jbClassifyCorrelation()
1023  *
1024  * \param[in]    jbclasser
1025  * \param[in]    boxa of new components for classification
1026  * \param[in]    pixas of new components for classification
1027  * \return  0 if OK; 1 on error
1028  */
1029 l_int32
jbClassifyCorrelation(JBCLASSER * classer,BOXA * boxa,PIXA * pixas)1030 jbClassifyCorrelation(JBCLASSER  *classer,
1031                       BOXA       *boxa,
1032                       PIXA       *pixas)
1033 {
1034 l_int32     n, nt, i, iclass, wt, ht, found, area, area1, area2, npages,
1035             overthreshold;
1036 l_int32    *sumtab, *centtab;
1037 l_uint32   *row, word;
1038 l_float32   x1, y1, x2, y2, xsum, ysum;
1039 l_float32   thresh, weight, threshold;
1040 BOX        *box;
1041 NUMA       *naclass, *napage;
1042 NUMA       *nafgt;   /* fg area of all templates */
1043 NUMA       *naarea;   /* w * h area of all templates */
1044 JBFINDCTX  *findcontext;
1045 L_DNAHASH  *dahash;
1046 PIX        *pix, *pix1, *pix2;
1047 PIXA       *pixa, *pixa1, *pixat;
1048 PIXAA      *pixaa;
1049 PTA        *pta, *ptac, *ptact;
1050 l_int32    *pixcts;  /* pixel counts of each pixa */
1051 l_int32   **pixrowcts;  /* row-by-row pixel counts of each pixa */
1052 l_int32     x, y, rowcount, downcount, wpl;
1053 l_uint8     byte;
1054 
1055     PROCNAME("jbClassifyCorrelation");
1056 
1057     if (!classer)
1058         return ERROR_INT("classer not found", procName, 1);
1059     if (!boxa)
1060         return ERROR_INT("boxa not found", procName, 1);
1061     if (!pixas)
1062         return ERROR_INT("pixas not found", procName, 1);
1063 
1064     npages = classer->npages;
1065 
1066         /* Generate the bordered pixa, which contains all the the
1067          * input components.  This will not be saved.   */
1068     if ((n = pixaGetCount(pixas)) == 0) {
1069         L_WARNING("pixas is empty\n", procName);
1070         return 0;
1071     }
1072     pixa1 = pixaCreate(n);
1073     for (i = 0; i < n; i++) {
1074         pix = pixaGetPix(pixas, i, L_CLONE);
1075         pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS,
1076                     JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0);
1077         pixaAddPix(pixa1, pix1, L_INSERT);
1078         pixDestroy(&pix);
1079     }
1080 
1081         /* Use these to save the class and page of each component. */
1082     naclass = classer->naclass;
1083     napage = classer->napage;
1084 
1085         /* Get the number of fg pixels in each component.  */
1086     nafgt = classer->nafgt;    /* holds fg areas of the templates */
1087     sumtab = makePixelSumTab8();
1088 
1089     pixcts = (l_int32 *)LEPT_CALLOC(n, sizeof(*pixcts));
1090     pixrowcts = (l_int32 **)LEPT_CALLOC(n, sizeof(*pixrowcts));
1091     centtab = makePixelCentroidTab8();
1092 
1093         /* Count the "1" pixels in each row of the pix in pixa1; this
1094          * allows pixCorrelationScoreThresholded to abort early if a match
1095          * is impossible.  This loop merges three calculations: the total
1096          * number of "1" pixels, the number of "1" pixels in each row, and
1097          * the centroid.  The centroids are relative to the UL corner of
1098          * each (bordered) pix.  The pixrowcts[i][y] are the total number
1099          * of fg pixels in pixa[i] below row y. */
1100     pta = ptaCreate(n);
1101     for (i = 0; i < n; i++) {
1102         pix = pixaGetPix(pixa1, i, L_CLONE);
1103         pixrowcts[i] = (l_int32 *)LEPT_CALLOC(pixGetHeight(pix),
1104                                          sizeof(**pixrowcts));
1105         xsum = 0;
1106         ysum = 0;
1107         wpl = pixGetWpl(pix);
1108         row = pixGetData(pix) + (pixGetHeight(pix) - 1) * wpl;
1109         downcount = 0;
1110         for (y = pixGetHeight(pix) - 1; y >= 0; y--, row -= wpl) {
1111             pixrowcts[i][y] = downcount;
1112             rowcount = 0;
1113             for (x = 0; x < wpl; x++) {
1114                 word = row[x];
1115                 byte = word & 0xff;
1116                 rowcount += sumtab[byte];
1117                 xsum += centtab[byte] + (x * 32 + 24) * sumtab[byte];
1118                 byte = (word >> 8) & 0xff;
1119                 rowcount += sumtab[byte];
1120                 xsum += centtab[byte] + (x * 32 + 16) * sumtab[byte];
1121                 byte = (word >> 16) & 0xff;
1122                 rowcount += sumtab[byte];
1123                 xsum += centtab[byte] + (x * 32 + 8) * sumtab[byte];
1124                 byte = (word >> 24) & 0xff;
1125                 rowcount += sumtab[byte];
1126                 xsum += centtab[byte] + x * 32 * sumtab[byte];
1127             }
1128             downcount += rowcount;
1129             ysum += rowcount * y;
1130         }
1131         pixcts[i] = downcount;
1132         if (downcount > 0) {
1133             ptaAddPt(pta,
1134                  xsum / (l_float32)downcount, ysum / (l_float32)downcount);
1135         } else {  /* no pixels; shouldn't happen */
1136             L_ERROR("downcount == 0 !\n", procName);
1137             ptaAddPt(pta, pixGetWidth(pix) / 2, pixGetHeight(pix) / 2);
1138         }
1139         pixDestroy(&pix);
1140     }
1141 
1142     ptac = classer->ptac;  /* holds centroids of components up to this page */
1143     ptaJoin(ptac, pta, 0, -1);  /* save centroids of all components */
1144     ptact = classer->ptact;  /* holds centroids of templates */
1145 
1146     /* Store the unbordered pix in a pixaa, in a hierarchical
1147      * set of arrays.  There is one pixa for each class,
1148      * and the pix in each pixa are all the instances found
1149      * of that class.  This is actually more than one would need
1150      * for a jbig2 encoder, but there are two reasons to keep
1151      * them around: (1) the set of instances for each class
1152      * can be used to make an improved binary (or, better,
1153      * a grayscale) template, rather than simply using the first
1154      * one in the set; (2) we can investigate the failures
1155      * of the classifier.  This pixaa grows as we process
1156      * successive pages. */
1157     pixaa = classer->pixaa;
1158 
1159         /* Array to store class exemplars */
1160     pixat = classer->pixat;
1161 
1162         /* Fill up the pixaa tree with the template exemplars as
1163          * the first pix in each pixa.  As we add each pix,
1164          * we also add the associated box to the pixa.
1165          * We also keep track of the centroid of each pix,
1166          * and use the difference between centroids (of the
1167          * pix with the exemplar we are checking it with)
1168          * to align the two when checking that the correlation
1169          * score exceeds a threshold.  The correlation score
1170          * is given by the square of the area of the AND
1171          * between aligned instance and template, divided by
1172          * the product of areas of each image.  For identical
1173          * template and instance, the score is 1.0.
1174          * If the threshold is too small, non-equivalent instances
1175          * will be placed in the same class; if too large, there will
1176          * be an unnecessary division of classes representing the
1177          * same character.  The weightfactor adds in some of the
1178          * difference (1.0 - thresh), depending on the heaviness
1179          * of the template (measured as the fraction of fg pixels). */
1180     thresh = classer->thresh;
1181     weight = classer->weightfactor;
1182     naarea = classer->naarea;
1183     dahash = classer->dahash;
1184     for (i = 0; i < n; i++) {
1185         pix1 = pixaGetPix(pixa1, i, L_CLONE);
1186         area1 = pixcts[i];
1187         ptaGetPt(pta, i, &x1, &y1);  /* centroid for this instance */
1188         nt = pixaGetCount(pixat);
1189         found = FALSE;
1190         findcontext = findSimilarSizedTemplatesInit(classer, pix1);
1191         while ( (iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
1192                 /* Get the template */
1193             pix2 = pixaGetPix(pixat, iclass, L_CLONE);
1194             numaGetIValue(nafgt, iclass, &area2);
1195             ptaGetPt(ptact, iclass, &x2, &y2);  /* template centroid */
1196 
1197                 /* Find threshold for this template */
1198             if (weight > 0.0) {
1199                 numaGetIValue(naarea, iclass, &area);
1200                 threshold = thresh + (1. - thresh) * weight * area2 / area;
1201             } else {
1202                 threshold = thresh;
1203             }
1204 
1205                 /* Find score for this template */
1206             overthreshold = pixCorrelationScoreThresholded(pix1, pix2,
1207                                          area1, area2, x1 - x2, y1 - y2,
1208                                          MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT,
1209                                          sumtab, pixrowcts[i], threshold);
1210 #if DEBUG_CORRELATION_SCORE
1211             {
1212                 l_float32 score, testscore;
1213                 l_int32 count, testcount;
1214                 pixCorrelationScore(pix1, pix2, area1, area2, x1 - x2, y1 - y2,
1215                                     MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT,
1216                                     sumtab, &score);
1217 
1218                 pixCorrelationScoreSimple(pix1, pix2, area1, area2,
1219                                           x1 - x2, y1 - y2, MAX_DIFF_WIDTH,
1220                                           MAX_DIFF_HEIGHT, sumtab, &testscore);
1221                 count = (l_int32)rint(sqrt(score * area1 * area2));
1222                 testcount = (l_int32)rint(sqrt(testscore * area1 * area2));
1223                 if ((score >= threshold) != (testscore >= threshold)) {
1224                     fprintf(stderr, "Correlation score mismatch: "
1225                             "%d(%g,%d) vs %d(%g,%d) (%g)\n",
1226                             count, score, score >= threshold,
1227                             testcount, testscore, testscore >= threshold,
1228                             score - testscore);
1229                 }
1230 
1231                 if ((score >= threshold) != overthreshold) {
1232                     fprintf(stderr, "Mismatch between correlation/threshold "
1233                             "comparison: %g(%g,%d) >= %g(%g) vs %s\n",
1234                             score, score*area1*area2, count, threshold,
1235                             threshold*area1*area2,
1236                             (overthreshold ? "true" : "false"));
1237                 }
1238             }
1239 #endif  /* DEBUG_CORRELATION_SCORE */
1240             pixDestroy(&pix2);
1241 
1242             if (overthreshold) {  /* greedy match */
1243                 found = TRUE;
1244                 numaAddNumber(naclass, iclass);
1245                 numaAddNumber(napage, npages);
1246                 if (classer->keep_pixaa) {
1247                         /* We are keeping a record of all components */
1248                     pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
1249                     pix = pixaGetPix(pixas, i, L_CLONE);
1250                     pixaAddPix(pixa, pix, L_INSERT);
1251                     box = boxaGetBox(boxa, i, L_CLONE);
1252                     pixaAddBox(pixa, box, L_INSERT);
1253                     pixaDestroy(&pixa);
1254                 }
1255                 break;
1256             }
1257         }
1258         findSimilarSizedTemplatesDestroy(&findcontext);
1259         if (found == FALSE) {  /* new class */
1260             numaAddNumber(naclass, nt);
1261             numaAddNumber(napage, npages);
1262             pixa = pixaCreate(0);
1263             pix = pixaGetPix(pixas, i, L_CLONE);  /* unbordered instance */
1264             pixaAddPix(pixa, pix, L_INSERT);
1265             wt = pixGetWidth(pix);
1266             ht = pixGetHeight(pix);
1267             l_dnaHashAdd(dahash, ht * wt, nt);
1268             box = boxaGetBox(boxa, i, L_CLONE);
1269             pixaAddBox(pixa, box, L_INSERT);
1270             pixaaAddPixa(pixaa, pixa, L_INSERT);  /* unbordered instance */
1271             ptaAddPt(ptact, x1, y1);
1272             numaAddNumber(nafgt, area1);
1273             pixaAddPix(pixat, pix1, L_INSERT);   /* bordered template */
1274             area = (pixGetWidth(pix1) - 2 * JB_ADDED_PIXELS) *
1275                    (pixGetHeight(pix1) - 2 * JB_ADDED_PIXELS);
1276             numaAddNumber(naarea, area);
1277         } else {  /* don't save it */
1278             pixDestroy(&pix1);
1279         }
1280     }
1281     classer->nclass = pixaGetCount(pixat);
1282 
1283     LEPT_FREE(pixcts);
1284     LEPT_FREE(centtab);
1285     for (i = 0; i < n; i++) {
1286         LEPT_FREE(pixrowcts[i]);
1287     }
1288     LEPT_FREE(pixrowcts);
1289 
1290     LEPT_FREE(sumtab);
1291     ptaDestroy(&pta);
1292     pixaDestroy(&pixa1);
1293     return 0;
1294 }
1295 
1296 
1297 /*----------------------------------------------------------------------*
1298  *             Determine the image components we start with             *
1299  *----------------------------------------------------------------------*/
1300 /*!
1301  * \brief   jbGetComponents()
1302  *
1303  * \param[in]    pixs 1 bpp
1304  * \param[in]    components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
1305  * \param[in]    maxwidth, maxheight of saved components; larger are discarded
1306  * \param[out]   ppboxa b.b. of component items
1307  * \param[out]   pppixa component items
1308  * \return  0 if OK, 1 on error
1309  */
1310 l_int32
jbGetComponents(PIX * pixs,l_int32 components,l_int32 maxwidth,l_int32 maxheight,BOXA ** pboxad,PIXA ** ppixad)1311 jbGetComponents(PIX     *pixs,
1312                 l_int32  components,
1313                 l_int32  maxwidth,
1314                 l_int32  maxheight,
1315                 BOXA   **pboxad,
1316                 PIXA   **ppixad)
1317 {
1318 l_int32    empty, res, redfactor;
1319 BOXA      *boxa;
1320 PIX       *pix1, *pix2, *pix3;
1321 PIXA      *pixa, *pixat;
1322 
1323     PROCNAME("jbGetComponents");
1324 
1325     if (!pboxad)
1326         return ERROR_INT("&boxad not defined", procName, 1);
1327     *pboxad = NULL;
1328     if (!ppixad)
1329         return ERROR_INT("&pixad not defined", procName, 1);
1330     *ppixad = NULL;
1331     if (!pixs)
1332         return ERROR_INT("pixs not defined", procName, 1);
1333     if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
1334         components != JB_WORDS)
1335         return ERROR_INT("invalid components", procName, 1);
1336 
1337     pixZero(pixs, &empty);
1338     if (empty) {
1339         *pboxad = boxaCreate(0);
1340         *ppixad = pixaCreate(0);
1341         return 0;
1342     }
1343 
1344         /* If required, preprocess input pixs.  The method for both
1345          * characters and words is to generate a connected component
1346          * mask over the units that we want to aggregrate, which are,
1347          * in general, sets of related connected components in pixs.
1348          * For characters, we want to include the dots with
1349          * 'i', 'j' and '!', so we do a small vertical closing to
1350          * generate the mask.  For words, we make a mask over all
1351          * characters in each word.  This is a bit more tricky, because
1352          * the spacing between words is difficult to predict a priori,
1353          * and words can be typeset with variable spacing that can
1354          * in some cases be barely larger than the space between
1355          * characters.  The first step is to generate the mask and
1356          * identify each of its connected components.  */
1357     if (components == JB_CONN_COMPS) {  /* no preprocessing */
1358         boxa = pixConnComp(pixs, &pixa, 8);
1359     } else if (components == JB_CHARACTERS) {
1360         pix1 = pixMorphSequence(pixs, "c1.6", 0);
1361         boxa = pixConnComp(pix1, &pixat, 8);
1362         pixa = pixaClipToPix(pixat, pixs);
1363         pixDestroy(&pix1);
1364         pixaDestroy(&pixat);
1365     } else {  /* components == JB_WORDS */
1366 
1367             /* Do the operations at about 150 ppi resolution.
1368              * It is much faster at 75 ppi, but the results are
1369              * more accurate at 150 ppi.  This will segment the
1370              * words in body text.  It can be expected that relatively
1371              * infrequent words in a larger font will be split. */
1372         res = pixGetXRes(pixs);
1373         if (res <= 200) {
1374             redfactor = 1;
1375             pix1 = pixClone(pixs);
1376         } else if (res <= 400) {
1377             redfactor = 2;
1378             pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
1379         } else {
1380             redfactor = 4;
1381             pix1 = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0);
1382         }
1383 
1384             /* Estimate the word mask, at approximately 150 ppi.
1385              * This has both very large and very small components left in. */
1386         pixWordMaskByDilation(pix1, &pix2, NULL, NULL);
1387 
1388             /* Expand the optimally dilated word mask to full res. */
1389         pix3 = pixExpandReplicate(pix2, redfactor);
1390 
1391             /* Pull out the pixels in pixs corresponding to the mask
1392              * components in pix3.  Note that above we used threshold
1393              * levels in the reduction of 1 to insure that the resulting
1394              * mask fully covers the input pixs.  The downside of using
1395              * a threshold of 1 is that very close characters from adjacent
1396              * lines can be joined.  But with a level of 2 or greater,
1397              * it is necessary to use a seedfill, followed by a pixOr():
1398              *       pixt4 = pixSeedfillBinary(NULL, pix3, pixs, 8);
1399              *       pixOr(pix3, pix3, pixt4);
1400              * to insure that the mask coverage is complete over pixs.  */
1401         boxa = pixConnComp(pix3, &pixat, 4);
1402         pixa = pixaClipToPix(pixat, pixs);
1403         pixaDestroy(&pixat);
1404         pixDestroy(&pix1);
1405         pixDestroy(&pix2);
1406         pixDestroy(&pix3);
1407     }
1408 
1409         /* Remove large components, and save the results.  */
1410     *ppixad = pixaSelectBySize(pixa, maxwidth, maxheight, L_SELECT_IF_BOTH,
1411                                L_SELECT_IF_LTE, NULL);
1412     *pboxad = boxaSelectBySize(boxa, maxwidth, maxheight, L_SELECT_IF_BOTH,
1413                                L_SELECT_IF_LTE, NULL);
1414     pixaDestroy(&pixa);
1415     boxaDestroy(&boxa);
1416 
1417     return 0;
1418 }
1419 
1420 
1421 /*!
1422  * \brief   pixWordMaskByDilation()
1423  *
1424  * \param[in]    pixs               1 bpp; typ. at 75 to 150 ppi
1425  * \param[out]   pmask [optional]   dilated word mask
1426  * \param[out]   psize [optional]   size of good horizontal dilation
1427  * \param[out]   pixadb [optional]  debug: pixa of intermediate steps
1428  * \return  0 if OK, 1 on error
1429  *
1430  * <pre>
1431  * Notes:
1432  *      (1) This gives an estimate of the word masks.  See
1433  *          pixWordBoxesByDilation() for further filtering of the word boxes.
1434  *      (2) The resolution should be between 75 and 150 ppi, and the optimal
1435  *          dilation will be between 3 and 10.
1436  *      (3) A good size for dilating to get word masks is optionally returned.
1437  *      (4) Typically, the number of c.c. reduced with each successive
1438  *          dilation (stored in nadiff) decreases quickly to a minimum
1439  *          (where the characters in a word are joined), and then
1440  *          increases again as the smaller number of words are joined.
1441  *          For the typical case, you can then look for this minimum
1442  *          and dilate to get the word mask.  However, there are many
1443  *          cases where the function is not so simple. For example, if the
1444  *          pix has been upscaled 2x, the nadiff function oscillates, with
1445  *          every other value being zero!  And for some images it tails
1446  *          off without a clear minimum to indicate where to break.
1447  *          So a more simple and robust method is to find the dilation
1448  *          where the initial number of c.c. has been reduced by some
1449  *          fraction (we use a 70% reduction).
1450  * </pre>
1451  */
1452 l_int32
pixWordMaskByDilation(PIX * pixs,PIX ** ppixm,l_int32 * psize,PIXA * pixadb)1453 pixWordMaskByDilation(PIX      *pixs,
1454                       PIX     **ppixm,
1455                       l_int32  *psize,
1456                       PIXA     *pixadb)
1457 {
1458 l_int32   i, n, ndil, maxdiff, diff, ibest;
1459 l_int32   start, stop, check, count, total, xres;
1460 l_int32   ncc[13];  /* max dilation + 1 */
1461 l_int32  *diffa;
1462 BOXA     *boxa;
1463 NUMA     *nacc, *nadiff;
1464 PIX      *pix1, *pix2;
1465 
1466     PROCNAME("pixWordMaskByDilation");
1467 
1468     if (ppixm) *ppixm = NULL;
1469     if (psize) *psize = 0;
1470     if (!pixs || pixGetDepth(pixs) != 1)
1471         return ERROR_INT("pixs undefined or not 1 bpp", procName, 1);
1472     if (!ppixm && !psize)
1473         return ERROR_INT("no output requested", procName, 1);
1474 
1475         /* Find a good dilation to create the word mask, by successively
1476          * increasing dilation size and counting the connected components. */
1477     pix1 = pixCopy(NULL, pixs);
1478     ndil = 12;  /* appropriate for 75 to 150 ppi */
1479     nacc = numaCreate(ndil + 1);
1480     nadiff = numaCreate(ndil + 1);
1481     stop = FALSE;
1482     for (i = 0; i <= ndil; i++) {
1483         if (i == 0)  /* first one not dilated */
1484             pix2 = pixCopy(NULL, pix1);
1485         else  /* successive dilation by sel_2h */
1486             pix2 = pixMorphSequence(pix1, "d2.1", 0);
1487         boxa = pixConnCompBB(pix2, 4);
1488         ncc[i] = boxaGetCount(boxa);
1489         numaAddNumber(nacc, ncc[i]);
1490         if (i == 0) total = ncc[0];
1491         if (i > 0) {
1492             diff = ncc[i - 1] - ncc[i];
1493             numaAddNumber(nadiff, diff);
1494         }
1495         pixDestroy(&pix1);
1496         pix1 = pix2;
1497         boxaDestroy(&boxa);
1498     }
1499     pixDestroy(&pix1);
1500 
1501         /* Find the dilation at which the c.c. count has reduced
1502          * to 30% of thie initial value.  Although 30% seems high,
1503          * it seems better to use this but add one to ibest.  */
1504     diffa = numaGetIArray(nadiff);
1505     n = numaGetCount(nadiff);
1506     maxdiff = 0;
1507     start = 0;
1508     check = TRUE;
1509     ibest = 2;
1510     for (i = 1; i < n; i++) {
1511         numaGetIValue(nacc, i, &count);
1512         if (check && count < 0.3 * total) {
1513             ibest = i + 1;
1514             check = FALSE;
1515         }
1516         diff = diffa[i];
1517         if (diff > maxdiff) {
1518             maxdiff = diff;
1519             start = i;
1520         }
1521     }
1522     LEPT_FREE(diffa);
1523 
1524         /* Add small compensation for higher resolution */
1525     xres = pixGetXRes(pixs);
1526     if (xres == 0) xres = 150;
1527     if (xres > 110) ibest++;
1528     if (ibest < 2) {
1529         L_INFO("setting ibest to minimum allowed value of 2\n", procName);
1530         ibest = 2;
1531     }
1532 
1533     if (pixadb) {
1534         lept_mkdir("lept/jb");
1535         {GPLOT *gplot;
1536          NUMA  *naseq;
1537          PIX   *pix3, *pix4;
1538             L_INFO("Best dilation: %d\n", procName, L_MAX(3, ibest + 1));
1539             naseq = numaMakeSequence(1, 1, numaGetCount(nacc));
1540             gplot = gplotCreate("/tmp/lept/jb/numcc", GPLOT_PNG,
1541                                 "Number of cc vs. horizontal dilation",
1542                                 "Sel horiz", "Number of cc");
1543             gplotAddPlot(gplot, naseq, nacc, GPLOT_LINES, "");
1544             gplotMakeOutput(gplot);
1545             gplotDestroy(&gplot);
1546             pix3 = pixRead("/tmp/lept/jb/numcc.png");
1547             pixaAddPix(pixadb, pix3, L_INSERT);
1548             numaDestroy(&naseq);
1549             naseq = numaMakeSequence(1, 1, numaGetCount(nadiff));
1550             gplot = gplotCreate("/tmp/lept/jb/diffcc", GPLOT_PNG,
1551                                 "Diff count of cc vs. horizontal dilation",
1552                                 "Sel horiz", "Diff in cc");
1553             gplotAddPlot(gplot, naseq, nadiff, GPLOT_LINES, "");
1554             gplotMakeOutput(gplot);
1555             gplotDestroy(&gplot);
1556             pix3 = pixRead("/tmp/lept/jb/diffcc.png");
1557             pixaAddPix(pixadb, pix3, L_INSERT);
1558             numaDestroy(&naseq);
1559             pix3 = pixCloseBrick(NULL, pixs, ibest + 1, 1);
1560             pix4 = pixScaleToSize(pix3, 600, 0);
1561             pixaAddPix(pixadb, pix4, L_INSERT);
1562             pixDestroy(&pix3);
1563         }
1564     }
1565 
1566     if (psize) *psize = ibest + 1;
1567     if (ppixm)
1568         *ppixm = pixCloseBrick(NULL, pixs, ibest + 1, 1);
1569 
1570     numaDestroy(&nacc);
1571     numaDestroy(&nadiff);
1572     return 0;
1573 }
1574 
1575 
1576 /*!
1577  * \brief   pixWordBoxesByDilation()
1578  *
1579  * \param[in]    pixs                  1 bpp; typ. 75 - 200 ppi
1580  * \param[in]    minwidth, minheight   saved components; smaller are discarded
1581  * \param[in]    maxwidth, maxheight   saved components; larger are discarded
1582  * \param[out]   pboxa                 of dilated word mask
1583  * \param[out]   psize [optional]      size of good horizontal dilation
1584  * \param[out]   pixadb [optional]     debug: pixa of intermediate steps
1585  * \return  0 if OK, 1 on error
1586  *
1587  * <pre>
1588  * Notes:
1589  *      (1) Returns a pruned set of word boxes.
1590  *      (2) See pixWordMaskByDilation().
1591  * </pre>
1592  */
1593 l_int32
pixWordBoxesByDilation(PIX * pixs,l_int32 minwidth,l_int32 minheight,l_int32 maxwidth,l_int32 maxheight,BOXA ** pboxa,l_int32 * psize,PIXA * pixadb)1594 pixWordBoxesByDilation(PIX      *pixs,
1595                        l_int32   minwidth,
1596                        l_int32   minheight,
1597                        l_int32   maxwidth,
1598                        l_int32   maxheight,
1599                        BOXA    **pboxa,
1600                        l_int32  *psize,
1601                        PIXA     *pixadb)
1602 {
1603 BOXA  *boxa1, *boxa2;
1604 PIX   *pix1, *pix2;
1605 
1606     PROCNAME("pixWordBoxesByDilation");
1607 
1608     if (psize) *psize = 0;
1609     if (!pixs || pixGetDepth(pixs) != 1)
1610         return ERROR_INT("pixs undefined or not 1 bpp", procName, 1);
1611     if (!pboxa)
1612         return ERROR_INT("&boxa not defined", procName, 1);
1613     *pboxa = NULL;
1614 
1615         /* Make a first estimate of the word mask */
1616     if (pixWordMaskByDilation(pixs, &pix1, psize, pixadb))
1617         return ERROR_INT("pixWordMaskByDilation() failed", procName, 1);
1618 
1619         /* Prune the word mask.  Get the bounding boxes of the words.
1620          * Remove the small ones, which can be due to punctuation
1621          * that was not joined to a word.  Also remove the large ones,
1622          * which are not likely to be words. */
1623     boxa1 = pixConnComp(pix1, NULL, 8);
1624     boxa2 = boxaSelectBySize(boxa1, minwidth, minheight, L_SELECT_IF_BOTH,
1625                              L_SELECT_IF_GTE, NULL);
1626     *pboxa = boxaSelectBySize(boxa2, maxwidth, maxheight, L_SELECT_IF_BOTH,
1627                              L_SELECT_IF_LTE, NULL);
1628     if (pixadb) {
1629         pix2 = pixCopy(NULL, pixs);
1630         pixRenderBoxaArb(pix2, boxa1, 2, 255, 0, 0);
1631         pixaAddPix(pixadb, pix2, L_INSERT);
1632         pix2 = pixCopy(NULL, pixs);
1633         pixRenderBoxaArb(pix2, boxa2, 2, 0, 255, 0);
1634         pixaAddPix(pixadb, pix2, L_INSERT);
1635     }
1636     boxaDestroy(&boxa1);
1637     boxaDestroy(&boxa2);
1638     pixDestroy(&pix1);
1639     return 0;
1640 }
1641 
1642 
1643 /*----------------------------------------------------------------------*
1644  *                 Build grayscale composites (templates)               *
1645  *----------------------------------------------------------------------*/
1646 /*!
1647  * \brief   jbAccumulateComposites()
1648  *
1649  * \param[in]    pixaa one pixa for each class
1650  * \param[out]   ppna number of samples used to build each composite
1651  * \param[out]   pptat centroids of bordered composites
1652  * \return  pixad accumulated sum of samples in each class,
1653  *                     or NULL on error
1654  *
1655  */
1656 PIXA *
jbAccumulateComposites(PIXAA * pixaa,NUMA ** pna,PTA ** pptat)1657 jbAccumulateComposites(PIXAA  *pixaa,
1658                        NUMA  **pna,
1659                        PTA   **pptat)
1660 {
1661 l_int32    n, nt, i, j, d, minw, maxw, minh, maxh, xdiff, ydiff;
1662 l_float32  x, y, xave, yave;
1663 NUMA      *na;
1664 PIX       *pix, *pixt1, *pixt2, *pixsum;
1665 PIXA      *pixa, *pixad;
1666 PTA       *ptat, *pta;
1667 
1668     PROCNAME("jbAccumulateComposites");
1669 
1670     if (!pptat)
1671         return (PIXA *)ERROR_PTR("&ptat not defined", procName, NULL);
1672     *pptat = NULL;
1673     if (!pna)
1674         return (PIXA *)ERROR_PTR("&na not defined", procName, NULL);
1675     *pna = NULL;
1676     if (!pixaa)
1677         return (PIXA *)ERROR_PTR("pixaa not defined", procName, NULL);
1678 
1679     n = pixaaGetCount(pixaa, NULL);
1680     if ((ptat = ptaCreate(n)) == NULL)
1681         return (PIXA *)ERROR_PTR("ptat not made", procName, NULL);
1682     *pptat = ptat;
1683     pixad = pixaCreate(n);
1684     na = numaCreate(n);
1685     *pna = na;
1686 
1687     for (i = 0; i < n; i++) {
1688         pixa = pixaaGetPixa(pixaa, i, L_CLONE);
1689         nt = pixaGetCount(pixa);
1690         numaAddNumber(na, nt);
1691         if (nt == 0) {
1692             L_WARNING("empty pixa found!\n", procName);
1693             pixaDestroy(&pixa);
1694             continue;
1695         }
1696         pixaSizeRange(pixa, &minw, &minh, &maxw, &maxh);
1697         pix = pixaGetPix(pixa, 0, L_CLONE);
1698         d = pixGetDepth(pix);
1699         pixDestroy(&pix);
1700         pixt1 = pixCreate(maxw, maxh, d);
1701         pixsum = pixInitAccumulate(maxw, maxh, 0);
1702         pta = pixaCentroids(pixa);
1703 
1704             /* Find the average value of the centroids ... */
1705         xave = yave = 0;
1706         for (j = 0; j < nt; j++) {
1707             ptaGetPt(pta, j, &x, &y);
1708             xave += x;
1709             yave += y;
1710         }
1711         xave = xave / (l_float32)nt;
1712         yave = yave / (l_float32)nt;
1713 
1714             /* and place all centroids at their average value */
1715         for (j = 0; j < nt; j++) {
1716             pixt2 = pixaGetPix(pixa, j, L_CLONE);
1717             ptaGetPt(pta, j, &x, &y);
1718             xdiff = (l_int32)(x - xave);
1719             ydiff = (l_int32)(y - yave);
1720             pixClearAll(pixt1);
1721             pixRasterop(pixt1, xdiff, ydiff, maxw, maxh, PIX_SRC,
1722                         pixt2, 0, 0);
1723             pixAccumulate(pixsum, pixt1, L_ARITH_ADD);
1724             pixDestroy(&pixt2);
1725         }
1726         pixaAddPix(pixad, pixsum, L_INSERT);
1727         ptaAddPt(ptat, xave, yave);
1728 
1729         pixaDestroy(&pixa);
1730         pixDestroy(&pixt1);
1731         ptaDestroy(&pta);
1732     }
1733 
1734     return pixad;
1735 }
1736 
1737 
1738 /*!
1739  * \brief   jbTemplatesFromComposites()
1740  *
1741  * \param[in]    pixac one pix of composites for each class
1742  * \param[in]    na number of samples used for each class composite
1743  * \return  pixad 8 bpp templates for each class, or NULL on error
1744  *
1745  */
1746 PIXA *
jbTemplatesFromComposites(PIXA * pixac,NUMA * na)1747 jbTemplatesFromComposites(PIXA  *pixac,
1748                           NUMA  *na)
1749 {
1750 l_int32    n, i;
1751 l_float32  nt;  /* number of samples in the composite; always an integer */
1752 l_float32  factor;
1753 PIX       *pixsum;   /* accumulated composite */
1754 PIX       *pixd;
1755 PIXA      *pixad;
1756 
1757     PROCNAME("jbTemplatesFromComposites");
1758 
1759     if (!pixac)
1760         return (PIXA *)ERROR_PTR("pixac not defined", procName, NULL);
1761     if (!na)
1762         return (PIXA *)ERROR_PTR("na not defined", procName, NULL);
1763 
1764     n = pixaGetCount(pixac);
1765     pixad = pixaCreate(n);
1766     for (i = 0; i < n; i++) {
1767         pixsum = pixaGetPix(pixac, i, L_COPY);  /* changed internally */
1768         numaGetFValue(na, i, &nt);
1769         factor = 255. / nt;
1770         pixMultConstAccumulate(pixsum, factor, 0);  /* changes pixsum */
1771         pixd = pixFinalAccumulate(pixsum, 0, 8);
1772         pixaAddPix(pixad, pixd, L_INSERT);
1773         pixDestroy(&pixsum);
1774     }
1775 
1776     return pixad;
1777 }
1778 
1779 
1780 
1781 /*----------------------------------------------------------------------*
1782  *                       jbig2 utility routines                         *
1783  *----------------------------------------------------------------------*/
1784 /*!
1785  * \brief   jbClasserCreate()
1786  *
1787  * \param[in]    method JB_RANKHAUS, JB_CORRELATION
1788  * \param[in]    components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
1789  * \return  jbclasser, or NULL on error
1790  */
1791 JBCLASSER *
jbClasserCreate(l_int32 method,l_int32 components)1792 jbClasserCreate(l_int32  method,
1793                 l_int32  components)
1794 {
1795 JBCLASSER  *classer;
1796 
1797     PROCNAME("jbClasserCreate");
1798 
1799     if (method != JB_RANKHAUS && method != JB_CORRELATION)
1800         return (JBCLASSER *)ERROR_PTR("invalid method", procName, NULL);
1801     if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
1802         components != JB_WORDS)
1803         return (JBCLASSER *)ERROR_PTR("invalid component", procName, NULL);
1804 
1805     classer = (JBCLASSER *)LEPT_CALLOC(1, sizeof(JBCLASSER));
1806     classer->method = method;
1807     classer->components = components;
1808     classer->nacomps = numaCreate(0);
1809     classer->pixaa = pixaaCreate(0);
1810     classer->pixat = pixaCreate(0);
1811     classer->pixatd = pixaCreate(0);
1812     classer->nafgt = numaCreate(0);
1813     classer->naarea = numaCreate(0);
1814     classer->ptac = ptaCreate(0);
1815     classer->ptact = ptaCreate(0);
1816     classer->naclass = numaCreate(0);
1817     classer->napage = numaCreate(0);
1818     classer->ptaul = ptaCreate(0);
1819     return classer;
1820 }
1821 
1822 
1823 /*
1824  *  jbClasserDestroy()
1825  *
1826  *      Input: &classer (<inout> to be nulled)
1827  *      Return: void
1828  */
1829 void
jbClasserDestroy(JBCLASSER ** pclasser)1830 jbClasserDestroy(JBCLASSER  **pclasser)
1831 {
1832 JBCLASSER  *classer;
1833 
1834     if (!pclasser)
1835         return;
1836     if ((classer = *pclasser) == NULL)
1837         return;
1838 
1839     sarrayDestroy(&classer->safiles);
1840     numaDestroy(&classer->nacomps);
1841     pixaaDestroy(&classer->pixaa);
1842     pixaDestroy(&classer->pixat);
1843     pixaDestroy(&classer->pixatd);
1844     l_dnaHashDestroy(&classer->dahash);
1845     numaDestroy(&classer->nafgt);
1846     numaDestroy(&classer->naarea);
1847     ptaDestroy(&classer->ptac);
1848     ptaDestroy(&classer->ptact);
1849     numaDestroy(&classer->naclass);
1850     numaDestroy(&classer->napage);
1851     ptaDestroy(&classer->ptaul);
1852     ptaDestroy(&classer->ptall);
1853     LEPT_FREE(classer);
1854     *pclasser = NULL;
1855     return;
1856 }
1857 
1858 
1859 /*!
1860  * \brief   jbDataSave()
1861  *
1862  * \param[in]    jbclasser
1863  * \param[in]    latticew, latticeh cell size used to store each
1864  *                  connected component in the composite
1865  * \return  jbdata, or NULL on error
1866  *
1867  * <pre>
1868  * Notes:
1869  *      (1) This routine stores the jbig2-type data required for
1870  *          generating a lossy jbig2 version of the image.
1871  *          It can be losslessly written to (and read from) two files.
1872  *      (2) It generates and stores the mosaic of templates.
1873  *      (3) It clones the Numa and Pta arrays, so these must all
1874  *          be destroyed by the caller.
1875  *      (4) Input 0 to use the default values for latticew and/or latticeh,
1876  * </pre>
1877  */
1878 JBDATA *
jbDataSave(JBCLASSER * classer)1879 jbDataSave(JBCLASSER  *classer)
1880 {
1881 l_int32  maxw, maxh;
1882 JBDATA  *data;
1883 PIX     *pix;
1884 
1885     PROCNAME("jbDataSave");
1886 
1887     if (!classer)
1888         return (JBDATA *)ERROR_PTR("classer not defined", procName, NULL);
1889 
1890         /* Write the templates into an array. */
1891     pixaSizeRange(classer->pixat, NULL, NULL, &maxw, &maxh);
1892     pix = pixaDisplayOnLattice(classer->pixat, maxw + 1, maxh + 1,
1893                                NULL, NULL);
1894     if (!pix)
1895         return (JBDATA *)ERROR_PTR("data not made", procName, NULL);
1896 
1897     data = (JBDATA *)LEPT_CALLOC(1, sizeof(JBDATA));
1898     data->pix = pix;
1899     data->npages = classer->npages;
1900     data->w = classer->w;
1901     data->h = classer->h;
1902     data->nclass = classer->nclass;
1903     data->latticew = maxw + 1;
1904     data->latticeh = maxh + 1;
1905     data->naclass = numaClone(classer->naclass);
1906     data->napage = numaClone(classer->napage);
1907     data->ptaul = ptaClone(classer->ptaul);
1908     return data;
1909 }
1910 
1911 
1912 /*
1913  *  jbDataDestroy()
1914  *
1915  *      Input: &data (<inout> to be nulled)
1916  *      Return: void
1917  */
1918 void
jbDataDestroy(JBDATA ** pdata)1919 jbDataDestroy(JBDATA  **pdata)
1920 {
1921 JBDATA  *data;
1922 
1923     if (!pdata)
1924         return;
1925     if ((data = *pdata) == NULL)
1926         return;
1927 
1928     pixDestroy(&data->pix);
1929     numaDestroy(&data->naclass);
1930     numaDestroy(&data->napage);
1931     ptaDestroy(&data->ptaul);
1932     LEPT_FREE(data);
1933     *pdata = NULL;
1934     return;
1935 }
1936 
1937 
1938 /*!
1939  * \brief   jbDataWrite()
1940  *
1941  * \param[in]    rootname for output files; everything but the extension
1942  * \param[in]    jbdata
1943  * \return  0 if OK, 1 on error
1944  *
1945  * <pre>
1946  * Notes:
1947  *      (1) Serialization function that writes data in jbdata to file.
1948  * </pre>
1949  */
1950 l_int32
jbDataWrite(const char * rootout,JBDATA * jbdata)1951 jbDataWrite(const char  *rootout,
1952             JBDATA      *jbdata)
1953 {
1954 char     buf[L_BUF_SIZE];
1955 l_int32  w, h, nclass, npages, cellw, cellh, ncomp, i, x, y, iclass, ipage;
1956 NUMA    *naclass, *napage;
1957 PTA     *ptaul;
1958 PIX     *pixt;
1959 FILE    *fp;
1960 
1961     PROCNAME("jbDataWrite");
1962 
1963     if (!rootout)
1964         return ERROR_INT("no rootout", procName, 1);
1965     if (!jbdata)
1966         return ERROR_INT("no jbdata", procName, 1);
1967 
1968     npages = jbdata->npages;
1969     w = jbdata->w;
1970     h = jbdata->h;
1971     pixt = jbdata->pix;
1972     nclass = jbdata->nclass;
1973     cellw = jbdata->latticew;
1974     cellh = jbdata->latticeh;
1975     naclass = jbdata->naclass;
1976     napage = jbdata->napage;
1977     ptaul = jbdata->ptaul;
1978 
1979     snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_TEMPLATE_EXT);
1980     pixWrite(buf, pixt, IFF_PNG);
1981 
1982     snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_DATA_EXT);
1983     if ((fp = fopenWriteStream(buf, "wb")) == NULL)
1984         return ERROR_INT("stream not opened", procName, 1);
1985     ncomp = ptaGetCount(ptaul);
1986     fprintf(fp, "jb data file\n");
1987     fprintf(fp, "num pages = %d\n", npages);
1988     fprintf(fp, "page size: w = %d, h = %d\n", w, h);
1989     fprintf(fp, "num components = %d\n", ncomp);
1990     fprintf(fp, "num classes = %d\n", nclass);
1991     fprintf(fp, "template lattice size: w = %d, h = %d\n", cellw, cellh);
1992     for (i = 0; i < ncomp; i++) {
1993         numaGetIValue(napage, i, &ipage);
1994         numaGetIValue(naclass, i, &iclass);
1995         ptaGetIPt(ptaul, i, &x, &y);
1996         fprintf(fp, "%d %d %d %d\n", ipage, iclass, x, y);
1997     }
1998     fclose(fp);
1999 
2000     return 0;
2001 }
2002 
2003 
2004 /*!
2005  * \brief   jbDataRead()
2006  *
2007  * \param[in]    rootname for template and data files
2008  * \return  jbdata, or NULL on error
2009  */
2010 JBDATA *
jbDataRead(const char * rootname)2011 jbDataRead(const char  *rootname)
2012 {
2013 char      fname[L_BUF_SIZE];
2014 char     *linestr;
2015 l_uint8  *data;
2016 l_int32   nsa, i, w, h, cellw, cellh, x, y, iclass, ipage;
2017 l_int32   npages, nclass, ncomp, ninit;
2018 size_t    size;
2019 JBDATA   *jbdata;
2020 NUMA     *naclass, *napage;
2021 PIX      *pixs;
2022 PTA      *ptaul;
2023 SARRAY   *sa;
2024 
2025     PROCNAME("jbDataRead");
2026 
2027     if (!rootname)
2028         return (JBDATA *)ERROR_PTR("rootname not defined", procName, NULL);
2029 
2030     snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_TEMPLATE_EXT);
2031     if ((pixs = pixRead(fname)) == NULL)
2032         return (JBDATA *)ERROR_PTR("pix not read", procName, NULL);
2033 
2034     snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_DATA_EXT);
2035     if ((data = l_binaryRead(fname, &size)) == NULL) {
2036         pixDestroy(&pixs);
2037         return (JBDATA *)ERROR_PTR("data not read", procName, NULL);
2038     }
2039 
2040     if ((sa = sarrayCreateLinesFromString((char *)data, 0)) == NULL) {
2041         pixDestroy(&pixs);
2042         LEPT_FREE(data);
2043         return (JBDATA *)ERROR_PTR("sa not made", procName, NULL);
2044     }
2045     nsa = sarrayGetCount(sa);   /* number of cc + 6 */
2046     linestr = sarrayGetString(sa, 0, L_NOCOPY);
2047     if (strcmp(linestr, "jb data file") != 0) {
2048         pixDestroy(&pixs);
2049         LEPT_FREE(data);
2050         sarrayDestroy(&sa);
2051         return (JBDATA *)ERROR_PTR("invalid jb data file", procName, NULL);
2052     }
2053     linestr = sarrayGetString(sa, 1, L_NOCOPY);
2054     sscanf(linestr, "num pages = %d", &npages);
2055     linestr = sarrayGetString(sa, 2, L_NOCOPY);
2056     sscanf(linestr, "page size: w = %d, h = %d", &w, &h);
2057     linestr = sarrayGetString(sa, 3, L_NOCOPY);
2058     sscanf(linestr, "num components = %d", &ncomp);
2059     linestr = sarrayGetString(sa, 4, L_NOCOPY);
2060     sscanf(linestr, "num classes = %d\n", &nclass);
2061     linestr = sarrayGetString(sa, 5, L_NOCOPY);
2062     sscanf(linestr, "template lattice size: w = %d, h = %d\n", &cellw, &cellh);
2063 
2064 #if 1
2065     fprintf(stderr, "num pages = %d\n", npages);
2066     fprintf(stderr, "page size: w = %d, h = %d\n", w, h);
2067     fprintf(stderr, "num components = %d\n", ncomp);
2068     fprintf(stderr, "num classes = %d\n", nclass);
2069     fprintf(stderr, "template lattice size: w = %d, h = %d\n", cellw, cellh);
2070 #endif
2071 
2072     ninit = ncomp;
2073     if (ncomp > 1000000) {  /* fuzz protection */
2074         L_WARNING("ncomp > 1M\n", procName);
2075         ninit = 1000000;
2076     }
2077     naclass = numaCreate(ninit);
2078     napage = numaCreate(ninit);
2079     ptaul = ptaCreate(ninit);
2080     for (i = 6; i < nsa; i++) {
2081         linestr = sarrayGetString(sa, i, L_NOCOPY);
2082         sscanf(linestr, "%d %d %d %d\n", &ipage, &iclass, &x, &y);
2083         numaAddNumber(napage, ipage);
2084         numaAddNumber(naclass, iclass);
2085         ptaAddPt(ptaul, x, y);
2086     }
2087 
2088     jbdata = (JBDATA *)LEPT_CALLOC(1, sizeof(JBDATA));
2089     jbdata->pix = pixs;
2090     jbdata->npages = npages;
2091     jbdata->w = w;
2092     jbdata->h = h;
2093     jbdata->nclass = nclass;
2094     jbdata->latticew = cellw;
2095     jbdata->latticeh = cellh;
2096     jbdata->naclass = naclass;
2097     jbdata->napage = napage;
2098     jbdata->ptaul = ptaul;
2099 
2100     LEPT_FREE(data);
2101     sarrayDestroy(&sa);
2102     return jbdata;
2103 }
2104 
2105 
2106 /*!
2107  * \brief   jbDataRender()
2108  *
2109  * \param[in]    jbdata
2110  * \param[in]    debugflag if TRUE, writes into 2 bpp pix and adds
2111  *                         component outlines in color
2112  * \return  pixa reconstruction of original images, using templates or
2113  *              NULL on error
2114  */
2115 PIXA *
jbDataRender(JBDATA * data,l_int32 debugflag)2116 jbDataRender(JBDATA  *data,
2117              l_int32  debugflag)
2118 {
2119 l_int32   i, w, h, cellw, cellh, x, y, iclass, ipage;
2120 l_int32   npages, nclass, ncomp, wp, hp;
2121 BOX      *box;
2122 NUMA     *naclass, *napage;
2123 PIX      *pixt, *pixt2, *pix, *pixd;
2124 PIXA     *pixat;   /* pixa of templates */
2125 PIXA     *pixad;   /* pixa of output images */
2126 PIXCMAP  *cmap;
2127 PTA      *ptaul;
2128 
2129     PROCNAME("jbDataRender");
2130 
2131     if (!data)
2132         return (PIXA *)ERROR_PTR("data not defined", procName, NULL);
2133 
2134     npages = data->npages;
2135     w = data->w;
2136     h = data->h;
2137     pixt = data->pix;
2138     nclass = data->nclass;
2139     cellw = data->latticew;
2140     cellh = data->latticeh;
2141     naclass = data->naclass;
2142     napage = data->napage;
2143     ptaul = data->ptaul;
2144     ncomp = numaGetCount(naclass);
2145 
2146         /* Reconstruct the original set of images from the templates
2147          * and the data associated with each component.  First,
2148          * generate the output pixa as a set of empty pix. */
2149     if ((pixad = pixaCreate(npages)) == NULL)
2150         return (PIXA *)ERROR_PTR("pixad not made", procName, NULL);
2151     for (i = 0; i < npages; i++) {
2152         if (debugflag == FALSE) {
2153             pix = pixCreate(w, h, 1);
2154         } else {
2155             pix = pixCreate(w, h, 2);
2156             cmap = pixcmapCreate(2);
2157             pixcmapAddColor(cmap, 255, 255, 255);
2158             pixcmapAddColor(cmap, 0, 0, 0);
2159             pixcmapAddColor(cmap, 255, 0, 0);  /* for box outlines */
2160             pixSetColormap(pix, cmap);
2161         }
2162         pixaAddPix(pixad, pix, L_INSERT);
2163     }
2164 
2165         /* Put the class templates into a pixa. */
2166     if ((pixat = pixaCreateFromPix(pixt, nclass, cellw, cellh)) == NULL) {
2167         pixaDestroy(&pixad);
2168         return (PIXA *)ERROR_PTR("pixat not made", procName, NULL);
2169     }
2170 
2171         /* Place each component in the right location on its page. */
2172     for (i = 0; i < ncomp; i++) {
2173         numaGetIValue(napage, i, &ipage);
2174         numaGetIValue(naclass, i, &iclass);
2175         pix = pixaGetPix(pixat, iclass, L_CLONE);  /* the template */
2176         wp = pixGetWidth(pix);
2177         hp = pixGetHeight(pix);
2178         ptaGetIPt(ptaul, i, &x, &y);
2179         pixd = pixaGetPix(pixad, ipage, L_CLONE);   /* the output page */
2180         if (debugflag == FALSE) {
2181             pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pix, 0, 0);
2182         } else {
2183             pixt2 = pixConvert1To2Cmap(pix);
2184             pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pixt2, 0, 0);
2185             box = boxCreate(x, y, wp, hp);
2186             pixRenderBoxArb(pixd, box, 1, 255, 0, 0);
2187             pixDestroy(&pixt2);
2188             boxDestroy(&box);
2189         }
2190         pixDestroy(&pix);   /* the clone only */
2191         pixDestroy(&pixd);  /* the clone only */
2192     }
2193 
2194     pixaDestroy(&pixat);
2195     return pixad;
2196 }
2197 
2198 
2199 /*!
2200  * \brief   jbGetULCorners()
2201  *
2202  * \param[in]    jbclasser
2203  * \param[in]    pixs full res image
2204  * \param[in]    boxa of c.c. bounding rectangles for this page
2205  * \return  0 if OK, 1 on error
2206  *
2207  * <pre>
2208  * Notes:
2209  *      (1) This computes the ptaul field, which has the global UL corners,
2210  *          adjusted for each specific component, so that each component
2211  *          can be replaced by the template for its class and have the
2212  *          centroid in the template in the same position as the
2213  *          centroid of the original connected component.  It is important
2214  *          that this be done properly to avoid a wavy baseline in the
2215  *          result.
2216  *      (2) The array fields ptac and ptact give the centroids of
2217  *          those components relative to the UL corner of each component.
2218  *          Here, we compute the difference in each component, round to
2219  *          nearest integer, and correct the box->x and box->y by
2220  *          the appropriate integral difference.
2221  *      (3) The templates and stored instances are all bordered.
2222  * </pre>
2223  */
2224 l_int32
jbGetULCorners(JBCLASSER * classer,PIX * pixs,BOXA * boxa)2225 jbGetULCorners(JBCLASSER  *classer,
2226                PIX        *pixs,
2227                BOXA       *boxa)
2228 {
2229 l_int32    i, baseindex, index, n, iclass, idelx, idely, x, y, dx, dy;
2230 l_int32   *sumtab;
2231 l_float32  x1, x2, y1, y2, delx, dely;
2232 BOX       *box;
2233 NUMA      *naclass;
2234 PIX       *pixt;
2235 PTA       *ptac, *ptact, *ptaul;
2236 
2237     PROCNAME("jbGetULCorners");
2238 
2239     if (!classer)
2240         return ERROR_INT("classer not defined", procName, 1);
2241     if (!pixs)
2242         return ERROR_INT("pixs not defined", procName, 1);
2243     if (!boxa)
2244         return ERROR_INT("boxa not defined", procName, 1);
2245 
2246     n = boxaGetCount(boxa);
2247     ptaul = classer->ptaul;
2248     naclass = classer->naclass;
2249     ptac = classer->ptac;
2250     ptact = classer->ptact;
2251     baseindex = classer->baseindex;  /* num components before this page */
2252     sumtab = makePixelSumTab8();
2253     for (i = 0; i < n; i++) {
2254         index = baseindex + i;
2255         ptaGetPt(ptac, index, &x1, &y1);
2256         numaGetIValue(naclass, index, &iclass);
2257         ptaGetPt(ptact, iclass, &x2, &y2);
2258         delx = x2 - x1;
2259         dely = y2 - y1;
2260         if (delx >= 0)
2261             idelx = (l_int32)(delx + 0.5);
2262         else
2263             idelx = (l_int32)(delx - 0.5);
2264         if (dely >= 0)
2265             idely = (l_int32)(dely + 0.5);
2266         else
2267             idely = (l_int32)(dely - 0.5);
2268         if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL) {
2269             LEPT_FREE(sumtab);
2270             return ERROR_INT("box not found", procName, 1);
2271         }
2272         boxGetGeometry(box, &x, &y, NULL, NULL);
2273 
2274             /* Get final increments dx and dy for best alignment */
2275         pixt = pixaGetPix(classer->pixat, iclass, L_CLONE);
2276         finalPositioningForAlignment(pixs, x, y, idelx, idely,
2277                                      pixt, sumtab, &dx, &dy);
2278 /*        if (i % 20 == 0)
2279             fprintf(stderr, "dx = %d, dy = %d\n", dx, dy); */
2280         ptaAddPt(ptaul, x - idelx + dx, y - idely + dy);
2281         boxDestroy(&box);
2282         pixDestroy(&pixt);
2283     }
2284 
2285     LEPT_FREE(sumtab);
2286     return 0;
2287 }
2288 
2289 
2290 /*!
2291  * \brief   jbGetLLCorners()
2292  *
2293  * \param[in]    jbclasser
2294  * \return  0 if OK, 1 on error
2295  *
2296  * <pre>
2297  * Notes:
2298  *      (1) This computes the ptall field, which has the global LL corners,
2299  *          adjusted for each specific component, so that each component
2300  *          can be replaced by the template for its class and have the
2301  *          centroid in the template in the same position as the
2302  *          centroid of the original connected component. It is important
2303  *          that this be done properly to avoid a wavy baseline in the result.
2304  *      (2) It is computed here from the corresponding UL corners, where
2305  *          the input templates and stored instances are all bordered.
2306  *          This should be done after all pages have been processed.
2307  *      (3) For proper substitution, the templates whose LL corners are
2308  *          placed in these locations must be UN-bordered.
2309  *          This is available for a realistic jbig2 encoder, which would
2310  *          (1) encode each template without a border, and (2) encode
2311  *          the position using the LL corner (rather than the UL
2312  *          corner) because the difference between y-values
2313  *          of successive instances is typically close to zero.
2314  * </pre>
2315  */
2316 l_int32
jbGetLLCorners(JBCLASSER * classer)2317 jbGetLLCorners(JBCLASSER  *classer)
2318 {
2319 l_int32    i, iclass, n, x1, y1, h;
2320 NUMA      *naclass;
2321 PIX       *pix;
2322 PIXA      *pixat;
2323 PTA       *ptaul, *ptall;
2324 
2325     PROCNAME("jbGetLLCorners");
2326 
2327     if (!classer)
2328         return ERROR_INT("classer not defined", procName, 1);
2329 
2330     ptaul = classer->ptaul;
2331     naclass = classer->naclass;
2332     pixat = classer->pixat;
2333 
2334     ptaDestroy(&classer->ptall);
2335     n = ptaGetCount(ptaul);
2336     ptall = ptaCreate(n);
2337     classer->ptall = ptall;
2338 
2339         /* If the templates were bordered, we would add h - 1 to the UL
2340          * corner y-value.  However, because the templates to be used
2341          * here have their borders removed, and the borders are
2342          * JB_ADDED_PIXELS on each side, we add h - 1 - 2 * JB_ADDED_PIXELS
2343          * to the UL corner y-value.  */
2344     for (i = 0; i < n; i++) {
2345         ptaGetIPt(ptaul, i, &x1, &y1);
2346         numaGetIValue(naclass, i, &iclass);
2347         pix = pixaGetPix(pixat, iclass, L_CLONE);
2348         h = pixGetHeight(pix);
2349         ptaAddPt(ptall, x1, y1 + h - 1 - 2 * JB_ADDED_PIXELS);
2350         pixDestroy(&pix);
2351     }
2352 
2353     return 0;
2354 }
2355 
2356 
2357 /*----------------------------------------------------------------------*
2358  *                              Static helpers                          *
2359  *----------------------------------------------------------------------*/
2360 /* When looking for similar matches we check templates whose size is +/- 2 in
2361  * each direction. This involves 25 possible sizes. This array contains the
2362  * offsets for each of those positions in a spiral pattern. There are 25 pairs
2363  * of numbers in this array: even positions are x values. */
2364 static int two_by_two_walk[50] = {
2365   0, 0,
2366   0, 1,
2367   -1, 0,
2368   0, -1,
2369   1, 0,
2370   -1, 1,
2371   1, 1,
2372   -1, -1,
2373   1, -1,
2374   0, -2,
2375   2, 0,
2376   0, 2,
2377   -2, 0,
2378   -1, -2,
2379   1, -2,
2380   2, -1,
2381   2, 1,
2382   1, 2,
2383   -1, 2,
2384   -2, 1,
2385   -2, -1,
2386   -2, -2,
2387   2, -2,
2388   2, 2,
2389   -2, 2};
2390 
2391 
2392 /*!
2393  * \brief   findSimilarSizedTemplatesInit()
2394  *
2395  * \param[in]    classer
2396  * \param[in]    pixs instance to be matched
2397  * \return  Allocated context to be used with findSimilar*
2398  */
2399 static JBFINDCTX *
findSimilarSizedTemplatesInit(JBCLASSER * classer,PIX * pixs)2400 findSimilarSizedTemplatesInit(JBCLASSER  *classer,
2401                               PIX        *pixs)
2402 {
2403 JBFINDCTX  *state;
2404 
2405     state = (JBFINDCTX *)LEPT_CALLOC(1, sizeof(JBFINDCTX));
2406     state->w = pixGetWidth(pixs) - 2 * JB_ADDED_PIXELS;
2407     state->h = pixGetHeight(pixs) - 2 * JB_ADDED_PIXELS;
2408     state->classer = classer;
2409     return state;
2410 }
2411 
2412 
2413 static void
findSimilarSizedTemplatesDestroy(JBFINDCTX ** pstate)2414 findSimilarSizedTemplatesDestroy(JBFINDCTX  **pstate)
2415 {
2416 JBFINDCTX  *state;
2417 
2418     PROCNAME("findSimilarSizedTemplatesDestroy");
2419 
2420     if (pstate == NULL) {
2421         L_WARNING("ptr address is null\n", procName);
2422         return;
2423     }
2424     if ((state = *pstate) == NULL)
2425         return;
2426 
2427     l_dnaDestroy(&state->dna);
2428     LEPT_FREE(state);
2429     *pstate = NULL;
2430     return;
2431 }
2432 
2433 
2434 /*!
2435  * \brief   findSimilarSizedTemplatesNext()
2436  *
2437  * \param[in]    state from findSimilarSizedTemplatesInit
2438  * \return  next template number, or -1 when finished
2439  *
2440  *  We have a dna hash table that maps template area to a list of template
2441  *  numbers with that area.  We wish to find similar sized templates,
2442  *  so we first look for templates with the same width and height, and
2443  *  then with width + 1, etc.  This walk is guided by the
2444  *  two_by_two_walk array, above.
2445  *
2446  *  We don't want to have to collect the whole list of templates first,
2447  *  because we hope to find a well-matching template quickly.  So we
2448  *  keep the context for this walk in an explictit state structure,
2449  *  and this function acts like a generator.
2450  */
2451 static l_int32
findSimilarSizedTemplatesNext(JBFINDCTX * state)2452 findSimilarSizedTemplatesNext(JBFINDCTX  *state)
2453 {
2454 l_int32  desiredh, desiredw, size, templ;
2455 PIX     *pixt;
2456 
2457     while(1) {  /* Continue the walk over step 'i' */
2458         if (state->i >= 25) {  /* all done; didn't find a good match */
2459             return -1;
2460         }
2461 
2462         desiredw = state->w + two_by_two_walk[2 * state->i];
2463         desiredh = state->h + two_by_two_walk[2 * state->i + 1];
2464         if (desiredh < 1 || desiredw < 1) {  /* invalid size */
2465             state->i++;
2466             continue;
2467         }
2468 
2469         if (!state->dna) {
2470                 /* We have yet to start walking the array for the step 'i' */
2471             state->dna = l_dnaHashGetDna(state->classer->dahash,
2472                                          desiredh * desiredw, L_CLONE);
2473             if (!state->dna) {  /* nothing there */
2474                 state->i++;
2475                 continue;
2476             }
2477 
2478             state->n = 0;  /* OK, we got a dna. */
2479         }
2480 
2481             /* Continue working on this dna */
2482         size = l_dnaGetCount(state->dna);
2483         for ( ; state->n < size; ) {
2484             templ = (l_int32)(state->dna->array[state->n++] + 0.5);
2485             pixt = pixaGetPix(state->classer->pixat, templ, L_CLONE);
2486             if (pixGetWidth(pixt) - 2 * JB_ADDED_PIXELS == desiredw &&
2487                 pixGetHeight(pixt) - 2 * JB_ADDED_PIXELS == desiredh) {
2488                 pixDestroy(&pixt);
2489                 return templ;
2490             }
2491             pixDestroy(&pixt);
2492         }
2493 
2494             /* Exhausted the dna (no match found); take another step and
2495              * try again. */
2496         state->i++;
2497         l_dnaDestroy(&state->dna);
2498         continue;
2499     }
2500 }
2501 
2502 
2503 /*!
2504  * \brief   finalPositioningForAlignment()
2505  *
2506  * \param[in]    pixs input page image
2507  * \param[in]    x, y location of UL corner of bb of component in pixs
2508  * \param[in]    idelx, idely compensation to match centroids of component
2509  *                            and template
2510  * \param[in]    pixt template, with JB_ADDED_PIXELS of padding on all sides
2511  * \param[in]    sumtab for summing fg pixels in an image
2512  * \param[in]    &dx, &dy return delta on position for best match; each
2513  *                        one is in the set {-1, 0, 1}
2514  * \return  0 if OK, 1 on error
2515  *
2516  */
2517 static l_int32
finalPositioningForAlignment(PIX * pixs,l_int32 x,l_int32 y,l_int32 idelx,l_int32 idely,PIX * pixt,l_int32 * sumtab,l_int32 * pdx,l_int32 * pdy)2518 finalPositioningForAlignment(PIX      *pixs,
2519                              l_int32   x,
2520                              l_int32   y,
2521                              l_int32   idelx,
2522                              l_int32   idely,
2523                              PIX      *pixt,
2524                              l_int32  *sumtab,
2525                              l_int32  *pdx,
2526                              l_int32  *pdy)
2527 {
2528 l_int32  w, h, i, j, minx, miny, count, mincount;
2529 PIX     *pixi;  /* clipped from source pixs */
2530 PIX     *pixr;  /* temporary storage */
2531 BOX     *box;
2532 
2533     PROCNAME("finalPositioningForAlignment");
2534 
2535     if (!pixs)
2536         return ERROR_INT("pixs not defined", procName, 1);
2537     if (!pixt)
2538         return ERROR_INT("pixt not defined", procName, 1);
2539     if (!pdx || !pdy)
2540         return ERROR_INT("&dx and &dy not both defined", procName, 1);
2541     if (!sumtab)
2542         return ERROR_INT("sumtab not defined", procName, 1);
2543     *pdx = *pdy = 0;
2544 
2545         /* Use JB_ADDED_PIXELS pixels padding on each side */
2546     pixGetDimensions(pixt, &w, &h, NULL);
2547     box = boxCreate(x - idelx - JB_ADDED_PIXELS,
2548                     y - idely - JB_ADDED_PIXELS, w, h);
2549     pixi = pixClipRectangle(pixs, box, NULL);
2550     boxDestroy(&box);
2551     if (!pixi)
2552         return ERROR_INT("pixi not made", procName, 1);
2553 
2554     pixr = pixCreate(pixGetWidth(pixi), pixGetHeight(pixi), 1);
2555     mincount = 0x7fffffff;
2556     for (i = -1; i <= 1; i++) {
2557         for (j = -1; j <= 1; j++) {
2558             pixCopy(pixr, pixi);
2559             pixRasterop(pixr, j, i, w, h, PIX_SRC ^ PIX_DST, pixt, 0, 0);
2560             pixCountPixels(pixr, &count, sumtab);
2561             if (count < mincount) {
2562                 minx = j;
2563                 miny = i;
2564                 mincount = count;
2565             }
2566         }
2567     }
2568     pixDestroy(&pixi);
2569     pixDestroy(&pixr);
2570 
2571     *pdx = minx;
2572     *pdy = miny;
2573     return 0;
2574 }
2575