1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*
28 * jbclass.c
29 *
30 * These are functions for unsupervised classification of
31 * collections of connected components -- either characters or
32 * words -- in binary images. They can be used as image
33 * processing steps in jbig2 compression.
34 *
35 * Initialization
36 *
37 * JBCLASSER *jbRankHausInit() [rank hausdorff encoder]
38 * JBCLASSER *jbCorrelationInit() [correlation encoder]
39 * JBCLASSER *jbCorrelationInitWithoutComponents() [ditto]
40 * static JBCLASSER *jbCorrelationInitInternal()
41 *
42 * Classify the pages
43 *
44 * l_int32 jbAddPages()
45 * l_int32 jbAddPage()
46 * l_int32 jbAddPageComponents()
47 *
48 * Rank hausdorff classifier
49 *
50 * l_int32 jbClassifyRankHaus()
51 * l_int32 pixHaustest()
52 * l_int32 pixRankHaustest()
53 *
54 * Binary correlation classifier
55 *
56 * l_int32 jbClassifyCorrelation()
57 *
58 * Determine the image components we start with
59 *
60 * l_int32 jbGetComponents()
61 * l_int32 pixWordMaskByDilation()
62 * l_int32 pixWordBoxesByDilation()
63 *
64 * Build grayscale composites (templates)
65 *
66 * PIXA *jbAccumulateComposites
67 * PIXA *jbTemplatesFromComposites
68 *
69 * Utility functions for Classer
70 *
71 * JBCLASSER *jbClasserCreate()
72 * void jbClasserDestroy()
73 *
74 * Utility functions for Data
75 *
76 * JBDATA *jbDataSave()
77 * void jbDataDestroy()
78 * l_int32 jbDataWrite()
79 * JBDATA *jbDataRead()
80 * PIXA *jbDataRender()
81 * l_int32 jbGetULCorners()
82 * l_int32 jbGetLLCorners()
83 *
84 * Static helpers
85 *
86 * static JBFINDCTX *findSimilarSizedTemplatesInit()
87 * static l_int32 findSimilarSizedTemplatesNext()
88 * static void findSimilarSizedTemplatesDestroy()
89 * static l_int32 finalPositioningForAlignment()
90 *
91 * Note: this is NOT an implementation of the JPEG jbig2
92 * proposed standard encoder, the specifications for which
93 * can be found at http://www.jpeg.org/jbigpt2.html.
94 * (See below for a full implementation.)
95 * It is an implementation of the lower-level part of an encoder that:
96 *
97 * (1) identifies connected components that are going to be used
98 * (2) puts them in similarity classes (this is an unsupervised
99 * classifier), and
100 * (3) stores the result in a simple file format (2 files,
101 * one for templates and one for page/coordinate/template-index
102 * quartets).
103 *
104 * An actual implementation of the official jbig2 encoder could
105 * start with parts (1) and (2), and would then compress the quartets
106 * according to the standards requirements (e.g., Huffman or
107 * arithmetic coding of coordinate differences and image templates).
108 *
109 * The low-level part of the encoder provided here has the
110 * following useful features:
111 *
112 * ~ It is accurate in the identification of templates
113 * and classes because it uses a windowed hausdorff
114 * distance metric.
115 * ~ It is accurate in the placement of the connected
116 * components, doing a two step process of first aligning
117 * the the centroids of the template with those of each instance,
118 * and then making a further correction of up to +- 1 pixel
119 * in each direction to best align the templates.
120 * ~ It is fast because it uses a morphologically based
121 * matching algorithm to implement the hausdorff criterion,
122 * and it selects the patterns that are possible matches
123 * based on their size.
124 *
125 * We provide two different matching functions, one using Hausdorff
126 * distance and one using a simple image correlation.
127 * The Hausdorff method sometimes produces better results for the
128 * same number of classes, because it gives a relatively small
129 * effective weight to foreground pixels near the boundary,
130 * and a relatively large weight to foreground pixels that are
131 * not near the boundary. By effectively ignoring these boundary
132 * pixels, Hausdorff weighting corresponds better to the expected
133 * probabilities of the pixel values in a scanned image, where the
134 * variations in instances of the same printed character are much
135 * more likely to be in pixels near the boundary. By contrast,
136 * the correlation method gives equal weight to all foreground pixels.
137 *
138 * For best results, use the correlation method. Correlation takes
139 * the number of fg pixels in the AND of instance and template,
140 * divided by the product of the number of fg pixels in instance
141 * and template. It compares this with a threshold that, in
142 * general, depends on the fractional coverage of the template.
143 * For heavy text, the threshold is raised above that for light
144 * text, By using both these parameters (basic threshold and
145 * adjustment factor for text weight), one has more flexibility
146 * and can arrive at the fewest substitution errors, although
147 * this comes at the price of more templates.
148 *
149 * The strict Hausdorff scoring is not a rank weighting, because a
150 * single pixel beyond the given distance will cause a match
151 * failure. A rank Hausdorff is more robust to non-boundary noise,
152 * but it is also more susceptible to confusing components that
153 * should be in different classes. For implementing a jbig2
154 * application for visually lossless binary image compression,
155 * you have two choices:
156 *
157 * (1) use a 3x3 structuring element (size = 3) and a strict
158 * Hausdorff comparison (rank = 1.0 in the rank Hausdorff
159 * function). This will result in a minimal number of classes,
160 * but confusion of small characters, such as italic and
161 * non-italic lower-case 'o', can still occur.
162 * (2) use the correlation method with a threshold of 0.85
163 * and a weighting factor of about 0.7. This will result in
164 * a larger number of classes, but should not be confused
165 * either by similar small characters or by extremely
166 * thick sans serif characters, such as in prog/cootoots.png.
167 *
168 * As mentioned above, if visual substitution errors must be
169 * avoided, you should use the correlation method.
170 *
171 * We provide executables that show how to do the encoding:
172 * prog/jbrankhaus.c
173 * prog/jbcorrelation.c
174 *
175 * The basic flow for correlation classification goes as follows,
176 * where specific choices have been made for parameters (Hausdorff
177 * is the same except for initialization):
178 *
179 * // Initialize and save data in the classer
180 * JBCLASSER *classer =
181 * jbCorrelationInit(JB_CONN_COMPS, 0, 0, 0.8, 0.7);
182 * SARRAY *safiles = getSortedPathnamesInDirectory(directory,
183 * NULL, 0, 0);
184 * jbAddPages(classer, safiles);
185 *
186 * // Save the data in a data structure for serialization,
187 * // and write it into two files.
188 * JBDATA *data = jbDataSave(classer);
189 * jbDataWrite(rootname, data);
190 *
191 * // Reconstruct (render) the pages from the encoded data.
192 * PIXA *pixa = jbDataRender(data, FALSE);
193 *
194 * Adam Langley has built a jbig2 standards-compliant encoder, the
195 * first one to appear in open source. You can get this encoder at:
196 * http://www.imperialviolet.org/jbig2.html
197 *
198 * It uses arithmetic encoding throughout. It encodes binary images
199 * losslessly with a single arithmetic coding over the full image.
200 * It also does both lossy and lossless encoding from connected
201 * components, using leptonica to generate the templates representing
202 * each cluster.
203 */
204
205 #include <string.h>
206 #include <math.h>
207 #include "allheaders.h"
208
209 static const l_int32 L_BUF_SIZE = 512;
210
211 /* For jbClassifyRankHaus(): size of border added around
212 * pix of each c.c., to allow further processing. This
213 * should be at least the sum of the MAX_DIFF_HEIGHT
214 * (or MAX_DIFF_WIDTH) and one-half the size of the Sel */
215 static const l_int32 JB_ADDED_PIXELS = 6;
216
217 /* For pixHaustest(), pixRankHaustest() and pixCorrelationScore():
218 * choose these to be 2 or greater */
219 static const l_int32 MAX_DIFF_WIDTH = 2; /* use at least 2 */
220 static const l_int32 MAX_DIFF_HEIGHT = 2; /* use at least 2 */
221
222 /* In initialization, you have the option to discard components
223 * (cc, characters or words) that have either width or height larger
224 * than a given size. This is convenient for jbDataSave(), because
225 * the components are placed onto a regular lattice with cell
226 * dimension equal to the maximum component size. The default
227 * values are given here. If you want to save all components,
228 * use a sufficiently large set of dimensions. */
229 static const l_int32 MAX_CONN_COMP_WIDTH = 350; /* default max cc width */
230 static const l_int32 MAX_CHAR_COMP_WIDTH = 350; /* default max char width */
231 static const l_int32 MAX_WORD_COMP_WIDTH = 1000; /* default max word width */
232 static const l_int32 MAX_COMP_HEIGHT = 120; /* default max component height */
233
234 /* This stores the state of a state machine which fetches
235 * similar sized templates */
236 struct JbFindTemplatesState
237 {
238 JBCLASSER *classer; /* classer */
239 l_int32 w; /* desired width */
240 l_int32 h; /* desired height */
241 l_int32 i; /* index into two_by_two step array */
242 L_DNA *dna; /* current number array */
243 l_int32 n; /* current element of dna */
244 };
245 typedef struct JbFindTemplatesState JBFINDCTX;
246
247 /* Static initialization function */
248 static JBCLASSER * jbCorrelationInitInternal(l_int32 components,
249 l_int32 maxwidth, l_int32 maxheight, l_float32 thresh,
250 l_float32 weightfactor, l_int32 keep_components);
251
252 /* Static helper functions */
253 static JBFINDCTX * findSimilarSizedTemplatesInit(JBCLASSER *classer, PIX *pixs);
254 static l_int32 findSimilarSizedTemplatesNext(JBFINDCTX *context);
255 static void findSimilarSizedTemplatesDestroy(JBFINDCTX **pcontext);
256 static l_int32 finalPositioningForAlignment(PIX *pixs, l_int32 x, l_int32 y,
257 l_int32 idelx, l_int32 idely, PIX *pixt,
258 l_int32 *sumtab, l_int32 *pdx, l_int32 *pdy);
259
260 #ifndef NO_CONSOLE_IO
261 #define DEBUG_CORRELATION_SCORE 0
262 #endif /* ~NO_CONSOLE_IO */
263
264
265 /*----------------------------------------------------------------------*
266 * Initialization *
267 *----------------------------------------------------------------------*/
268 /*!
269 * \brief jbRankHausInit()
270 *
271 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
272 * \param[in] maxwidth of component; use 0 for default
273 * \param[in] maxheight of component; use 0 for default
274 * \param[in] size of square structuring element; 2, representing
275 * 2x2 sel, is necessary for reasonable accuracy of
276 * small components; combine this with rank ~ 0.97
277 * to avoid undue class expansion
278 * \param[in] rank rank val of match, each way; in [0.5 - 1.0];
279 * when using size = 2, 0.97 is a reasonable value
280 * \return jbclasser if OK; NULL on error
281 */
282 JBCLASSER *
jbRankHausInit(l_int32 components,l_int32 maxwidth,l_int32 maxheight,l_int32 size,l_float32 rank)283 jbRankHausInit(l_int32 components,
284 l_int32 maxwidth,
285 l_int32 maxheight,
286 l_int32 size,
287 l_float32 rank)
288 {
289 JBCLASSER *classer;
290
291 PROCNAME("jbRankHausInit");
292
293 if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
294 components != JB_WORDS)
295 return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL);
296 if (size < 1 || size > 10)
297 return (JBCLASSER *)ERROR_PTR("size not reasonable", procName, NULL);
298 if (rank < 0.5 || rank > 1.0)
299 return (JBCLASSER *)ERROR_PTR("rank not in [0.5-1.0]", procName, NULL);
300 if (maxwidth == 0) {
301 if (components == JB_CONN_COMPS)
302 maxwidth = MAX_CONN_COMP_WIDTH;
303 else if (components == JB_CHARACTERS)
304 maxwidth = MAX_CHAR_COMP_WIDTH;
305 else /* JB_WORDS */
306 maxwidth = MAX_WORD_COMP_WIDTH;
307 }
308 if (maxheight == 0)
309 maxheight = MAX_COMP_HEIGHT;
310
311 if ((classer = jbClasserCreate(JB_RANKHAUS, components)) == NULL)
312 return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL);
313 classer->maxwidth = maxwidth;
314 classer->maxheight = maxheight;
315 classer->sizehaus = size;
316 classer->rankhaus = rank;
317 classer->dahash = l_dnaHashCreate(5507, 4); /* 5507 is prime */
318 classer->keep_pixaa = 1; /* keep all components in pixaa */
319 return classer;
320 }
321
322
323 /*!
324 * \brief jbCorrelationInit()
325 *
326 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
327 * \param[in] maxwidth of component; use 0 for default
328 * \param[in] maxheight of component; use 0 for default
329 * \param[in] thresh value for correlation score: in [0.4 - 0.98]
330 * \param[in] weightfactor corrects thresh for thick characters [0.0 - 1.0]
331 * \return jbclasser if OK; NULL on error
332 *
333 * <pre>
334 * Notes:
335 * (1) For scanned text, suggested input values are:
336 * thresh ~ [0.8 - 0.85]
337 * weightfactor ~ [0.5 - 0.6]
338 * (2) For electronically generated fonts (e.g., rasterized pdf),
339 * a very high thresh (e.g., 0.95) will not cause a significant
340 * increase in the number of classes.
341 * </pre>
342 */
343 JBCLASSER *
jbCorrelationInit(l_int32 components,l_int32 maxwidth,l_int32 maxheight,l_float32 thresh,l_float32 weightfactor)344 jbCorrelationInit(l_int32 components,
345 l_int32 maxwidth,
346 l_int32 maxheight,
347 l_float32 thresh,
348 l_float32 weightfactor)
349 {
350 return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh,
351 weightfactor, 1);
352 }
353
354 /*!
355 * \brief jbCorrelationInitWithoutComponents()
356 *
357 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
358 * \param[in] maxwidth of component; use 0 for default
359 * \param[in] maxheight of component; use 0 for default
360 * \param[in] thresh value for correlation score: in [0.4 - 0.98]
361 * \param[in] weightfactor corrects thresh for thick characters [0.0 - 1.0]
362 * \return jbclasser if OK; NULL on error
363 *
364 * <pre>
365 * Notes:
366 * Acts the same as jbCorrelationInit(), but the resulting
367 * object doesn't keep a list of all the components.
368 * </pre>
369 */
370 JBCLASSER *
jbCorrelationInitWithoutComponents(l_int32 components,l_int32 maxwidth,l_int32 maxheight,l_float32 thresh,l_float32 weightfactor)371 jbCorrelationInitWithoutComponents(l_int32 components,
372 l_int32 maxwidth,
373 l_int32 maxheight,
374 l_float32 thresh,
375 l_float32 weightfactor)
376 {
377 return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh,
378 weightfactor, 0);
379 }
380
381
382 static JBCLASSER *
jbCorrelationInitInternal(l_int32 components,l_int32 maxwidth,l_int32 maxheight,l_float32 thresh,l_float32 weightfactor,l_int32 keep_components)383 jbCorrelationInitInternal(l_int32 components,
384 l_int32 maxwidth,
385 l_int32 maxheight,
386 l_float32 thresh,
387 l_float32 weightfactor,
388 l_int32 keep_components)
389 {
390 JBCLASSER *classer;
391
392 PROCNAME("jbCorrelationInitInternal");
393
394 if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
395 components != JB_WORDS)
396 return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL);
397 if (thresh < 0.4 || thresh > 0.98)
398 return (JBCLASSER *)ERROR_PTR("thresh not in range [0.4 - 0.98]",
399 procName, NULL);
400 if (weightfactor < 0.0 || weightfactor > 1.0)
401 return (JBCLASSER *)ERROR_PTR("weightfactor not in range [0.0 - 1.0]",
402 procName, NULL);
403 if (maxwidth == 0) {
404 if (components == JB_CONN_COMPS)
405 maxwidth = MAX_CONN_COMP_WIDTH;
406 else if (components == JB_CHARACTERS)
407 maxwidth = MAX_CHAR_COMP_WIDTH;
408 else /* JB_WORDS */
409 maxwidth = MAX_WORD_COMP_WIDTH;
410 }
411 if (maxheight == 0)
412 maxheight = MAX_COMP_HEIGHT;
413
414
415 if ((classer = jbClasserCreate(JB_CORRELATION, components)) == NULL)
416 return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL);
417 classer->maxwidth = maxwidth;
418 classer->maxheight = maxheight;
419 classer->thresh = thresh;
420 classer->weightfactor = weightfactor;
421 classer->dahash = l_dnaHashCreate(5507, 4); /* 5507 is prime */
422 classer->keep_pixaa = keep_components;
423 return classer;
424 }
425
426
427 /*----------------------------------------------------------------------*
428 * Classify the pages *
429 *----------------------------------------------------------------------*/
430 /*!
431 * \brief jbAddPages()
432 *
433 * \param[in] jbclasser
434 * \param[in] safiles of page image file names
435 * \return 0 if OK; 1 on error
436 *
437 * <pre>
438 * Notes:
439 * (1) jbclasser makes a copy of the array of file names.
440 * (2) The caller is still responsible for destroying the input array.
441 * </pre>
442 */
443 l_int32
jbAddPages(JBCLASSER * classer,SARRAY * safiles)444 jbAddPages(JBCLASSER *classer,
445 SARRAY *safiles)
446 {
447 l_int32 i, nfiles;
448 char *fname;
449 PIX *pix;
450
451 PROCNAME("jbAddPages");
452
453 if (!classer)
454 return ERROR_INT("classer not defined", procName, 1);
455 if (!safiles)
456 return ERROR_INT("safiles not defined", procName, 1);
457
458 classer->safiles = sarrayCopy(safiles);
459 nfiles = sarrayGetCount(safiles);
460 for (i = 0; i < nfiles; i++) {
461 fname = sarrayGetString(safiles, i, L_NOCOPY);
462 if ((pix = pixRead(fname)) == NULL) {
463 L_WARNING("image file %d not read\n", procName, i);
464 continue;
465 }
466 if (pixGetDepth(pix) != 1) {
467 L_WARNING("image file %d not 1 bpp\n", procName, i);
468 continue;
469 }
470 jbAddPage(classer, pix);
471 pixDestroy(&pix);
472 }
473
474 return 0;
475 }
476
477
478 /*!
479 * \brief jbAddPage()
480 *
481 * \param[in] jbclasser
482 * \param[in] pixs of input page
483 * \return 0 if OK; 1 on error
484 */
485 l_int32
jbAddPage(JBCLASSER * classer,PIX * pixs)486 jbAddPage(JBCLASSER *classer,
487 PIX *pixs)
488 {
489 BOXA *boxas;
490 PIXA *pixas;
491
492 PROCNAME("jbAddPage");
493
494 if (!classer)
495 return ERROR_INT("classer not defined", procName, 1);
496 if (!pixs || pixGetDepth(pixs) != 1)
497 return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
498
499 classer->w = pixGetWidth(pixs);
500 classer->h = pixGetHeight(pixs);
501
502 /* Get the appropriate components and their bounding boxes */
503 if (jbGetComponents(pixs, classer->components, classer->maxwidth,
504 classer->maxheight, &boxas, &pixas)) {
505 return ERROR_INT("components not made", procName, 1);
506 }
507
508 jbAddPageComponents(classer, pixs, boxas, pixas);
509 boxaDestroy(&boxas);
510 pixaDestroy(&pixas);
511 return 0;
512 }
513
514
515 /*!
516 * \brief jbAddPageComponents()
517 *
518 * \param[in] jbclasser
519 * \param[in] pixs of input page
520 * \param[in] boxas b.b. of components for this page
521 * \param[in] pixas components for this page
522 * \return 0 if OK; 1 on error
523 *
524 * <pre>
525 * Notes:
526 * (1) If there are no components on the page, we don't require input
527 * of empty boxas or pixas, although that's the typical situation.
528 * </pre>
529 */
530 l_int32
jbAddPageComponents(JBCLASSER * classer,PIX * pixs,BOXA * boxas,PIXA * pixas)531 jbAddPageComponents(JBCLASSER *classer,
532 PIX *pixs,
533 BOXA *boxas,
534 PIXA *pixas)
535 {
536 l_int32 n;
537
538 PROCNAME("jbAddPageComponents");
539
540 if (!classer)
541 return ERROR_INT("classer not defined", procName, 1);
542 if (!pixs)
543 return ERROR_INT("pix not defined", procName, 1);
544
545 /* Test for no components on the current page. Always update the
546 * number of pages processed, even if nothing is on it. */
547 if (!boxas || !pixas || (boxaGetCount(boxas) == 0)) {
548 classer->npages++;
549 return 0;
550 }
551
552 /* Get classes. For hausdorff, it uses a specified size of
553 * structuring element and specified rank. For correlation,
554 * it uses a specified threshold. */
555 if (classer->method == JB_RANKHAUS) {
556 if (jbClassifyRankHaus(classer, boxas, pixas))
557 return ERROR_INT("rankhaus classification failed", procName, 1);
558 } else { /* classer->method == JB_CORRELATION */
559 if (jbClassifyCorrelation(classer, boxas, pixas))
560 return ERROR_INT("correlation classification failed", procName, 1);
561 }
562
563 /* Find the global UL corners, adjusted for each instance so
564 * that the class template and instance will have their
565 * centroids in the same place. Then the template can be
566 * used to replace the instance. */
567 if (jbGetULCorners(classer, pixs, boxas))
568 return ERROR_INT("UL corners not found", procName, 1);
569
570 /* Update total component counts and number of pages processed. */
571 n = boxaGetCount(boxas);
572 classer->baseindex += n;
573 numaAddNumber(classer->nacomps, n);
574 classer->npages++;
575 return 0;
576 }
577
578
579 /*----------------------------------------------------------------------*
580 * Classification using windowed rank hausdorff metric *
581 *----------------------------------------------------------------------*/
582 /*!
583 * \brief jbClassifyRankHaus()
584 *
585 * \param[in] jbclasser
586 * \param[in] boxa of new components for classification
587 * \param[in] pixas of new components for classification
588 * \return 0 if OK; 1 on error
589 */
590 l_int32
jbClassifyRankHaus(JBCLASSER * classer,BOXA * boxa,PIXA * pixas)591 jbClassifyRankHaus(JBCLASSER *classer,
592 BOXA *boxa,
593 PIXA *pixas)
594 {
595 l_int32 n, nt, i, wt, ht, iclass, size, found, testval;
596 l_int32 npages, area1, area3;
597 l_int32 *tab8;
598 l_float32 rank, x1, y1, x2, y2;
599 BOX *box;
600 NUMA *naclass, *napage;
601 NUMA *nafg; /* fg area of all instances */
602 NUMA *nafgt; /* fg area of all templates */
603 JBFINDCTX *findcontext;
604 L_DNAHASH *dahash;
605 PIX *pix, *pix1, *pix2, *pix3, *pix4;
606 PIXA *pixa, *pixa1, *pixa2, *pixat, *pixatd;
607 PIXAA *pixaa;
608 PTA *pta, *ptac, *ptact;
609 SEL *sel;
610
611 PROCNAME("jbClassifyRankHaus");
612
613 if (!classer)
614 return ERROR_INT("classer not found", procName, 1);
615 if (!boxa)
616 return ERROR_INT("boxa not found", procName, 1);
617 if (!pixas)
618 return ERROR_INT("pixas not found", procName, 1);
619
620 npages = classer->npages;
621 size = classer->sizehaus;
622 sel = selCreateBrick(size, size, size / 2, size / 2, SEL_HIT);
623
624 /* Generate the bordered pixa, with and without dilation.
625 * pixa1 and pixa2 contain all the input components. */
626 n = pixaGetCount(pixas);
627 pixa1 = pixaCreate(n);
628 pixa2 = pixaCreate(n);
629 for (i = 0; i < n; i++) {
630 pix = pixaGetPix(pixas, i, L_CLONE);
631 pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS,
632 JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0);
633 pix2 = pixDilate(NULL, pix1, sel);
634 pixaAddPix(pixa1, pix1, L_INSERT); /* un-dilated */
635 pixaAddPix(pixa2, pix2, L_INSERT); /* dilated */
636 pixDestroy(&pix);
637 }
638
639 /* Get the centroids of all the bordered images.
640 * These are relative to the UL corner of each (bordered) pix. */
641 pta = pixaCentroids(pixa1); /* centroids for this page; use here */
642 ptac = classer->ptac; /* holds centroids of components up to this page */
643 ptaJoin(ptac, pta, 0, -1); /* save centroids of all components */
644 ptact = classer->ptact; /* holds centroids of templates */
645
646 /* Use these to save the class and page of each component. */
647 naclass = classer->naclass;
648 napage = classer->napage;
649
650 /* Store the unbordered pix in a pixaa, in a hierarchical
651 * set of arrays. There is one pixa for each class,
652 * and the pix in each pixa are all the instances found
653 * of that class. This is actually more than one would need
654 * for a jbig2 encoder, but there are two reasons to keep
655 * them around: (1) the set of instances for each class
656 * can be used to make an improved binary (or, better,
657 * a grayscale) template, rather than simply using the first
658 * one in the set; (2) we can investigate the failures
659 * of the classifier. This pixaa grows as we process
660 * successive pages. */
661 pixaa = classer->pixaa;
662
663 /* arrays to store class exemplars (templates) */
664 pixat = classer->pixat; /* un-dilated */
665 pixatd = classer->pixatd; /* dilated */
666
667 /* Fill up the pixaa tree with the template exemplars as
668 * the first pix in each pixa. As we add each pix,
669 * we also add the associated box to the pixa.
670 * We also keep track of the centroid of each pix,
671 * and use the difference between centroids (of the
672 * pix with the exemplar we are checking it with)
673 * to align the two when checking that the Hausdorff
674 * distance does not exceed a threshold.
675 * The threshold is set by the Sel used for dilating.
676 * For example, a 3x3 brick, sel_3, corresponds to a
677 * Hausdorff distance of 1. In general, for an NxN brick,
678 * with N odd, corresponds to a Hausdorff distance of (N - 1)/2.
679 * It turns out that we actually need to use a sel of size 2x2
680 * to avoid small bad components when there is a halftone image
681 * from which components can be chosen.
682 * The larger the Sel you use, the fewer the number of classes,
683 * and the greater the likelihood of putting semantically
684 * different objects in the same class. For simplicity,
685 * we do this separately for the case of rank == 1.0 (exact
686 * match within the Hausdorff distance) and rank < 1.0. */
687 rank = classer->rankhaus;
688 dahash = classer->dahash;
689 if (rank == 1.0) {
690 for (i = 0; i < n; i++) {
691 pix1 = pixaGetPix(pixa1, i, L_CLONE);
692 pix2 = pixaGetPix(pixa2, i, L_CLONE);
693 ptaGetPt(pta, i, &x1, &y1);
694 nt = pixaGetCount(pixat); /* number of templates */
695 found = FALSE;
696 findcontext = findSimilarSizedTemplatesInit(classer, pix1);
697 while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
698 /* Find score for this template */
699 pix3 = pixaGetPix(pixat, iclass, L_CLONE);
700 pix4 = pixaGetPix(pixatd, iclass, L_CLONE);
701 ptaGetPt(ptact, iclass, &x2, &y2);
702 testval = pixHaustest(pix1, pix2, pix3, pix4, x1 - x2, y1 - y2,
703 MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT);
704 pixDestroy(&pix3);
705 pixDestroy(&pix4);
706 if (testval == 1) {
707 found = TRUE;
708 numaAddNumber(naclass, iclass);
709 numaAddNumber(napage, npages);
710 if (classer->keep_pixaa) {
711 pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
712 pix = pixaGetPix(pixas, i, L_CLONE);
713 pixaAddPix(pixa, pix, L_INSERT);
714 box = boxaGetBox(boxa, i, L_CLONE);
715 pixaAddBox(pixa, box, L_INSERT);
716 pixaDestroy(&pixa);
717 }
718 break;
719 }
720 }
721 findSimilarSizedTemplatesDestroy(&findcontext);
722 if (found == FALSE) { /* new class */
723 numaAddNumber(naclass, nt);
724 numaAddNumber(napage, npages);
725 pixa = pixaCreate(0);
726 pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */
727 pixaAddPix(pixa, pix, L_INSERT);
728 wt = pixGetWidth(pix);
729 ht = pixGetHeight(pix);
730 l_dnaHashAdd(dahash, ht * wt, nt);
731 box = boxaGetBox(boxa, i, L_CLONE);
732 pixaAddBox(pixa, box, L_INSERT);
733 pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */
734 ptaAddPt(ptact, x1, y1);
735 pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */
736 pixaAddPix(pixatd, pix2, L_INSERT); /* bordered dil template */
737 } else { /* don't save them */
738 pixDestroy(&pix1);
739 pixDestroy(&pix2);
740 }
741 }
742 } else { /* rank < 1.0 */
743 if ((nafg = pixaCountPixels(pixas)) == NULL) /* areas for this page */
744 return ERROR_INT("nafg not made", procName, 1);
745 nafgt = classer->nafgt;
746 tab8 = makePixelSumTab8();
747 for (i = 0; i < n; i++) { /* all instances on this page */
748 pix1 = pixaGetPix(pixa1, i, L_CLONE);
749 numaGetIValue(nafg, i, &area1);
750 pix2 = pixaGetPix(pixa2, i, L_CLONE);
751 ptaGetPt(pta, i, &x1, &y1); /* use pta for this page */
752 nt = pixaGetCount(pixat); /* number of templates */
753 found = FALSE;
754 findcontext = findSimilarSizedTemplatesInit(classer, pix1);
755 while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
756 /* Find score for this template */
757 pix3 = pixaGetPix(pixat, iclass, L_CLONE);
758 numaGetIValue(nafgt, iclass, &area3);
759 pix4 = pixaGetPix(pixatd, iclass, L_CLONE);
760 ptaGetPt(ptact, iclass, &x2, &y2);
761 testval = pixRankHaustest(pix1, pix2, pix3, pix4,
762 x1 - x2, y1 - y2,
763 MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT,
764 area1, area3, rank, tab8);
765 pixDestroy(&pix3);
766 pixDestroy(&pix4);
767 if (testval == 1) { /* greedy match; take the first */
768 found = TRUE;
769 numaAddNumber(naclass, iclass);
770 numaAddNumber(napage, npages);
771 if (classer->keep_pixaa) {
772 pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
773 pix = pixaGetPix(pixas, i, L_CLONE);
774 pixaAddPix(pixa, pix, L_INSERT);
775 box = boxaGetBox(boxa, i, L_CLONE);
776 pixaAddBox(pixa, box, L_INSERT);
777 pixaDestroy(&pixa);
778 }
779 break;
780 }
781 }
782 findSimilarSizedTemplatesDestroy(&findcontext);
783 if (found == FALSE) { /* new class */
784 numaAddNumber(naclass, nt);
785 numaAddNumber(napage, npages);
786 pixa = pixaCreate(0);
787 pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */
788 pixaAddPix(pixa, pix, L_INSERT);
789 wt = pixGetWidth(pix);
790 ht = pixGetHeight(pix);
791 l_dnaHashAdd(dahash, ht * wt, nt);
792 box = boxaGetBox(boxa, i, L_CLONE);
793 pixaAddBox(pixa, box, L_INSERT);
794 pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */
795 ptaAddPt(ptact, x1, y1);
796 pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */
797 pixaAddPix(pixatd, pix2, L_INSERT); /* ditto */
798 numaAddNumber(nafgt, area1);
799 } else { /* don't save them */
800 pixDestroy(&pix1);
801 pixDestroy(&pix2);
802 }
803 }
804 LEPT_FREE(tab8);
805 numaDestroy(&nafg);
806 }
807 classer->nclass = pixaGetCount(pixat);
808
809 ptaDestroy(&pta);
810 pixaDestroy(&pixa1);
811 pixaDestroy(&pixa2);
812 selDestroy(&sel);
813 return 0;
814 }
815
816
817 /*!
818 * \brief pixHaustest()
819 *
820 * \param[in] pix1 new pix, not dilated
821 * \param[in] pix2 new pix, dilated
822 * \param[in] pix3 exemplar pix, not dilated
823 * \param[in] pix4 exemplar pix, dilated
824 * \param[in] delx x comp of centroid difference
825 * \param[in] dely y comp of centroid difference
826 * \param[in] maxdiffw max width difference of pix1 and pix2
827 * \param[in] maxdiffh max height difference of pix1 and pix2
828 * \return 0 FALSE) if no match, 1 (TRUE if the new
829 * pix is in the same class as the exemplar.
830 *
831 * <pre>
832 * Notes:
833 * We check first that the two pix are roughly
834 * the same size. Only if they meet that criterion do
835 * we compare the bitmaps. The Hausdorff is a 2-way
836 * check. The centroid difference is used to align the two
837 * images to the nearest integer for each of the checks.
838 * These check that the dilated image of one contains
839 * ALL the pixels of the undilated image of the other.
840 * Checks are done in both direction. A single pixel not
841 * contained in either direction results in failure of the test.
842 * </pre>
843 */
844 l_int32
pixHaustest(PIX * pix1,PIX * pix2,PIX * pix3,PIX * pix4,l_float32 delx,l_float32 dely,l_int32 maxdiffw,l_int32 maxdiffh)845 pixHaustest(PIX *pix1,
846 PIX *pix2,
847 PIX *pix3,
848 PIX *pix4,
849 l_float32 delx, /* x(1) - x(3) */
850 l_float32 dely, /* y(1) - y(3) */
851 l_int32 maxdiffw,
852 l_int32 maxdiffh)
853 {
854 l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch;
855 PIX *pixt;
856
857 /* Eliminate possible matches based on size difference */
858 wi = pixGetWidth(pix1);
859 hi = pixGetHeight(pix1);
860 wt = pixGetWidth(pix3);
861 ht = pixGetHeight(pix3);
862 delw = L_ABS(wi - wt);
863 if (delw > maxdiffw)
864 return FALSE;
865 delh = L_ABS(hi - ht);
866 if (delh > maxdiffh)
867 return FALSE;
868
869 /* Round difference in centroid location to nearest integer;
870 * use this as a shift when doing the matching. */
871 if (delx >= 0)
872 idelx = (l_int32)(delx + 0.5);
873 else
874 idelx = (l_int32)(delx - 0.5);
875 if (dely >= 0)
876 idely = (l_int32)(dely + 0.5);
877 else
878 idely = (l_int32)(dely - 0.5);
879
880 /* Do 1-direction hausdorff, checking that every pixel in pix1
881 * is within a dilation distance of some pixel in pix3. Namely,
882 * that pix4 entirely covers pix1:
883 * pixt = pixSubtract(NULL, pix1, pix4), including shift
884 * where pixt has no ON pixels. */
885 pixt = pixCreateTemplate(pix1);
886 pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0);
887 pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC),
888 pix4, 0, 0);
889 pixZero(pixt, &boolmatch);
890 if (boolmatch == 0) {
891 pixDestroy(&pixt);
892 return FALSE;
893 }
894
895 /* Do 1-direction hausdorff, checking that every pixel in pix3
896 * is within a dilation distance of some pixel in pix1. Namely,
897 * that pix2 entirely covers pix3:
898 * pixSubtract(pixt, pix3, pix2), including shift
899 * where pixt has no ON pixels. */
900 pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0);
901 pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0);
902 pixZero(pixt, &boolmatch);
903 pixDestroy(&pixt);
904 return boolmatch;
905 }
906
907
908 /*!
909 * \brief pixRankHaustest()
910 *
911 * \param[in] pix1 new pix, not dilated
912 * \param[in] pix2 new pix, dilated
913 * \param[in] pix3 exemplar pix, not dilated
914 * \param[in] pix4 exemplar pix, dilated
915 * \param[in] delx x comp of centroid difference
916 * \param[in] dely y comp of centroid difference
917 * \param[in] maxdiffw max width difference of pix1 and pix2
918 * \param[in] maxdiffh max height difference of pix1 and pix2
919 * \param[in] area1 fg pixels in pix1
920 * \param[in] area3 fg pixels in pix3
921 * \param[in] rank rank value of test, each way
922 * \param[in] tab8 table of pixel sums for byte
923 * \return 0 FALSE) if no match, 1 (TRUE if the new
924 * pix is in the same class as the exemplar.
925 *
926 * <pre>
927 * Notes:
928 * We check first that the two pix are roughly
929 * the same size. Only if they meet that criterion do
930 * we compare the bitmaps. We convert the rank value to
931 * a number of pixels by multiplying the rank fraction by the number
932 * of pixels in the undilated image. The Hausdorff is a 2-way
933 * check. The centroid difference is used to align the two
934 * images to the nearest integer for each of the checks.
935 * The rank hausdorff checks that the dilated image of one
936 * contains the rank fraction of the pixels of the undilated
937 * image of the other. Checks are done in both direction.
938 * Failure of the test in either direction results in failure
939 * of the test.
940 * </pre>
941 */
942 l_int32
pixRankHaustest(PIX * pix1,PIX * pix2,PIX * pix3,PIX * pix4,l_float32 delx,l_float32 dely,l_int32 maxdiffw,l_int32 maxdiffh,l_int32 area1,l_int32 area3,l_float32 rank,l_int32 * tab8)943 pixRankHaustest(PIX *pix1,
944 PIX *pix2,
945 PIX *pix3,
946 PIX *pix4,
947 l_float32 delx, /* x(1) - x(3) */
948 l_float32 dely, /* y(1) - y(3) */
949 l_int32 maxdiffw,
950 l_int32 maxdiffh,
951 l_int32 area1,
952 l_int32 area3,
953 l_float32 rank,
954 l_int32 *tab8)
955 {
956 l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch;
957 l_int32 thresh1, thresh3;
958 PIX *pixt;
959
960 /* Eliminate possible matches based on size difference */
961 wi = pixGetWidth(pix1);
962 hi = pixGetHeight(pix1);
963 wt = pixGetWidth(pix3);
964 ht = pixGetHeight(pix3);
965 delw = L_ABS(wi - wt);
966 if (delw > maxdiffw)
967 return FALSE;
968 delh = L_ABS(hi - ht);
969 if (delh > maxdiffh)
970 return FALSE;
971
972 /* Upper bounds in remaining pixels for allowable match */
973 thresh1 = (l_int32)(area1 * (1. - rank) + 0.5);
974 thresh3 = (l_int32)(area3 * (1. - rank) + 0.5);
975
976 /* Round difference in centroid location to nearest integer;
977 * use this as a shift when doing the matching. */
978 if (delx >= 0)
979 idelx = (l_int32)(delx + 0.5);
980 else
981 idelx = (l_int32)(delx - 0.5);
982 if (dely >= 0)
983 idely = (l_int32)(dely + 0.5);
984 else
985 idely = (l_int32)(dely - 0.5);
986
987 /* Do 1-direction hausdorff, checking that every pixel in pix1
988 * is within a dilation distance of some pixel in pix3. Namely,
989 * that pix4 entirely covers pix1:
990 * pixt = pixSubtract(NULL, pix1, pix4), including shift
991 * where pixt has no ON pixels. */
992 pixt = pixCreateTemplate(pix1);
993 pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0);
994 pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC),
995 pix4, 0, 0);
996 pixThresholdPixelSum(pixt, thresh1, &boolmatch, tab8);
997 if (boolmatch == 1) { /* above thresh1 */
998 pixDestroy(&pixt);
999 return FALSE;
1000 }
1001
1002 /* Do 1-direction hausdorff, checking that every pixel in pix3
1003 * is within a dilation distance of some pixel in pix1. Namely,
1004 * that pix2 entirely covers pix3:
1005 * pixSubtract(pixt, pix3, pix2), including shift
1006 * where pixt has no ON pixels. */
1007 pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0);
1008 pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0);
1009 pixThresholdPixelSum(pixt, thresh3, &boolmatch, tab8);
1010 pixDestroy(&pixt);
1011 if (boolmatch == 1) /* above thresh3 */
1012 return FALSE;
1013 else
1014 return TRUE;
1015 }
1016
1017
1018 /*----------------------------------------------------------------------*
1019 * Classification using windowed correlation score *
1020 *----------------------------------------------------------------------*/
1021 /*!
1022 * \brief jbClassifyCorrelation()
1023 *
1024 * \param[in] jbclasser
1025 * \param[in] boxa of new components for classification
1026 * \param[in] pixas of new components for classification
1027 * \return 0 if OK; 1 on error
1028 */
1029 l_int32
jbClassifyCorrelation(JBCLASSER * classer,BOXA * boxa,PIXA * pixas)1030 jbClassifyCorrelation(JBCLASSER *classer,
1031 BOXA *boxa,
1032 PIXA *pixas)
1033 {
1034 l_int32 n, nt, i, iclass, wt, ht, found, area, area1, area2, npages,
1035 overthreshold;
1036 l_int32 *sumtab, *centtab;
1037 l_uint32 *row, word;
1038 l_float32 x1, y1, x2, y2, xsum, ysum;
1039 l_float32 thresh, weight, threshold;
1040 BOX *box;
1041 NUMA *naclass, *napage;
1042 NUMA *nafgt; /* fg area of all templates */
1043 NUMA *naarea; /* w * h area of all templates */
1044 JBFINDCTX *findcontext;
1045 L_DNAHASH *dahash;
1046 PIX *pix, *pix1, *pix2;
1047 PIXA *pixa, *pixa1, *pixat;
1048 PIXAA *pixaa;
1049 PTA *pta, *ptac, *ptact;
1050 l_int32 *pixcts; /* pixel counts of each pixa */
1051 l_int32 **pixrowcts; /* row-by-row pixel counts of each pixa */
1052 l_int32 x, y, rowcount, downcount, wpl;
1053 l_uint8 byte;
1054
1055 PROCNAME("jbClassifyCorrelation");
1056
1057 if (!classer)
1058 return ERROR_INT("classer not found", procName, 1);
1059 if (!boxa)
1060 return ERROR_INT("boxa not found", procName, 1);
1061 if (!pixas)
1062 return ERROR_INT("pixas not found", procName, 1);
1063
1064 npages = classer->npages;
1065
1066 /* Generate the bordered pixa, which contains all the the
1067 * input components. This will not be saved. */
1068 if ((n = pixaGetCount(pixas)) == 0) {
1069 L_WARNING("pixas is empty\n", procName);
1070 return 0;
1071 }
1072 pixa1 = pixaCreate(n);
1073 for (i = 0; i < n; i++) {
1074 pix = pixaGetPix(pixas, i, L_CLONE);
1075 pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS,
1076 JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0);
1077 pixaAddPix(pixa1, pix1, L_INSERT);
1078 pixDestroy(&pix);
1079 }
1080
1081 /* Use these to save the class and page of each component. */
1082 naclass = classer->naclass;
1083 napage = classer->napage;
1084
1085 /* Get the number of fg pixels in each component. */
1086 nafgt = classer->nafgt; /* holds fg areas of the templates */
1087 sumtab = makePixelSumTab8();
1088
1089 pixcts = (l_int32 *)LEPT_CALLOC(n, sizeof(*pixcts));
1090 pixrowcts = (l_int32 **)LEPT_CALLOC(n, sizeof(*pixrowcts));
1091 centtab = makePixelCentroidTab8();
1092
1093 /* Count the "1" pixels in each row of the pix in pixa1; this
1094 * allows pixCorrelationScoreThresholded to abort early if a match
1095 * is impossible. This loop merges three calculations: the total
1096 * number of "1" pixels, the number of "1" pixels in each row, and
1097 * the centroid. The centroids are relative to the UL corner of
1098 * each (bordered) pix. The pixrowcts[i][y] are the total number
1099 * of fg pixels in pixa[i] below row y. */
1100 pta = ptaCreate(n);
1101 for (i = 0; i < n; i++) {
1102 pix = pixaGetPix(pixa1, i, L_CLONE);
1103 pixrowcts[i] = (l_int32 *)LEPT_CALLOC(pixGetHeight(pix),
1104 sizeof(**pixrowcts));
1105 xsum = 0;
1106 ysum = 0;
1107 wpl = pixGetWpl(pix);
1108 row = pixGetData(pix) + (pixGetHeight(pix) - 1) * wpl;
1109 downcount = 0;
1110 for (y = pixGetHeight(pix) - 1; y >= 0; y--, row -= wpl) {
1111 pixrowcts[i][y] = downcount;
1112 rowcount = 0;
1113 for (x = 0; x < wpl; x++) {
1114 word = row[x];
1115 byte = word & 0xff;
1116 rowcount += sumtab[byte];
1117 xsum += centtab[byte] + (x * 32 + 24) * sumtab[byte];
1118 byte = (word >> 8) & 0xff;
1119 rowcount += sumtab[byte];
1120 xsum += centtab[byte] + (x * 32 + 16) * sumtab[byte];
1121 byte = (word >> 16) & 0xff;
1122 rowcount += sumtab[byte];
1123 xsum += centtab[byte] + (x * 32 + 8) * sumtab[byte];
1124 byte = (word >> 24) & 0xff;
1125 rowcount += sumtab[byte];
1126 xsum += centtab[byte] + x * 32 * sumtab[byte];
1127 }
1128 downcount += rowcount;
1129 ysum += rowcount * y;
1130 }
1131 pixcts[i] = downcount;
1132 if (downcount > 0) {
1133 ptaAddPt(pta,
1134 xsum / (l_float32)downcount, ysum / (l_float32)downcount);
1135 } else { /* no pixels; shouldn't happen */
1136 L_ERROR("downcount == 0 !\n", procName);
1137 ptaAddPt(pta, pixGetWidth(pix) / 2, pixGetHeight(pix) / 2);
1138 }
1139 pixDestroy(&pix);
1140 }
1141
1142 ptac = classer->ptac; /* holds centroids of components up to this page */
1143 ptaJoin(ptac, pta, 0, -1); /* save centroids of all components */
1144 ptact = classer->ptact; /* holds centroids of templates */
1145
1146 /* Store the unbordered pix in a pixaa, in a hierarchical
1147 * set of arrays. There is one pixa for each class,
1148 * and the pix in each pixa are all the instances found
1149 * of that class. This is actually more than one would need
1150 * for a jbig2 encoder, but there are two reasons to keep
1151 * them around: (1) the set of instances for each class
1152 * can be used to make an improved binary (or, better,
1153 * a grayscale) template, rather than simply using the first
1154 * one in the set; (2) we can investigate the failures
1155 * of the classifier. This pixaa grows as we process
1156 * successive pages. */
1157 pixaa = classer->pixaa;
1158
1159 /* Array to store class exemplars */
1160 pixat = classer->pixat;
1161
1162 /* Fill up the pixaa tree with the template exemplars as
1163 * the first pix in each pixa. As we add each pix,
1164 * we also add the associated box to the pixa.
1165 * We also keep track of the centroid of each pix,
1166 * and use the difference between centroids (of the
1167 * pix with the exemplar we are checking it with)
1168 * to align the two when checking that the correlation
1169 * score exceeds a threshold. The correlation score
1170 * is given by the square of the area of the AND
1171 * between aligned instance and template, divided by
1172 * the product of areas of each image. For identical
1173 * template and instance, the score is 1.0.
1174 * If the threshold is too small, non-equivalent instances
1175 * will be placed in the same class; if too large, there will
1176 * be an unnecessary division of classes representing the
1177 * same character. The weightfactor adds in some of the
1178 * difference (1.0 - thresh), depending on the heaviness
1179 * of the template (measured as the fraction of fg pixels). */
1180 thresh = classer->thresh;
1181 weight = classer->weightfactor;
1182 naarea = classer->naarea;
1183 dahash = classer->dahash;
1184 for (i = 0; i < n; i++) {
1185 pix1 = pixaGetPix(pixa1, i, L_CLONE);
1186 area1 = pixcts[i];
1187 ptaGetPt(pta, i, &x1, &y1); /* centroid for this instance */
1188 nt = pixaGetCount(pixat);
1189 found = FALSE;
1190 findcontext = findSimilarSizedTemplatesInit(classer, pix1);
1191 while ( (iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
1192 /* Get the template */
1193 pix2 = pixaGetPix(pixat, iclass, L_CLONE);
1194 numaGetIValue(nafgt, iclass, &area2);
1195 ptaGetPt(ptact, iclass, &x2, &y2); /* template centroid */
1196
1197 /* Find threshold for this template */
1198 if (weight > 0.0) {
1199 numaGetIValue(naarea, iclass, &area);
1200 threshold = thresh + (1. - thresh) * weight * area2 / area;
1201 } else {
1202 threshold = thresh;
1203 }
1204
1205 /* Find score for this template */
1206 overthreshold = pixCorrelationScoreThresholded(pix1, pix2,
1207 area1, area2, x1 - x2, y1 - y2,
1208 MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT,
1209 sumtab, pixrowcts[i], threshold);
1210 #if DEBUG_CORRELATION_SCORE
1211 {
1212 l_float32 score, testscore;
1213 l_int32 count, testcount;
1214 pixCorrelationScore(pix1, pix2, area1, area2, x1 - x2, y1 - y2,
1215 MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT,
1216 sumtab, &score);
1217
1218 pixCorrelationScoreSimple(pix1, pix2, area1, area2,
1219 x1 - x2, y1 - y2, MAX_DIFF_WIDTH,
1220 MAX_DIFF_HEIGHT, sumtab, &testscore);
1221 count = (l_int32)rint(sqrt(score * area1 * area2));
1222 testcount = (l_int32)rint(sqrt(testscore * area1 * area2));
1223 if ((score >= threshold) != (testscore >= threshold)) {
1224 fprintf(stderr, "Correlation score mismatch: "
1225 "%d(%g,%d) vs %d(%g,%d) (%g)\n",
1226 count, score, score >= threshold,
1227 testcount, testscore, testscore >= threshold,
1228 score - testscore);
1229 }
1230
1231 if ((score >= threshold) != overthreshold) {
1232 fprintf(stderr, "Mismatch between correlation/threshold "
1233 "comparison: %g(%g,%d) >= %g(%g) vs %s\n",
1234 score, score*area1*area2, count, threshold,
1235 threshold*area1*area2,
1236 (overthreshold ? "true" : "false"));
1237 }
1238 }
1239 #endif /* DEBUG_CORRELATION_SCORE */
1240 pixDestroy(&pix2);
1241
1242 if (overthreshold) { /* greedy match */
1243 found = TRUE;
1244 numaAddNumber(naclass, iclass);
1245 numaAddNumber(napage, npages);
1246 if (classer->keep_pixaa) {
1247 /* We are keeping a record of all components */
1248 pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
1249 pix = pixaGetPix(pixas, i, L_CLONE);
1250 pixaAddPix(pixa, pix, L_INSERT);
1251 box = boxaGetBox(boxa, i, L_CLONE);
1252 pixaAddBox(pixa, box, L_INSERT);
1253 pixaDestroy(&pixa);
1254 }
1255 break;
1256 }
1257 }
1258 findSimilarSizedTemplatesDestroy(&findcontext);
1259 if (found == FALSE) { /* new class */
1260 numaAddNumber(naclass, nt);
1261 numaAddNumber(napage, npages);
1262 pixa = pixaCreate(0);
1263 pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */
1264 pixaAddPix(pixa, pix, L_INSERT);
1265 wt = pixGetWidth(pix);
1266 ht = pixGetHeight(pix);
1267 l_dnaHashAdd(dahash, ht * wt, nt);
1268 box = boxaGetBox(boxa, i, L_CLONE);
1269 pixaAddBox(pixa, box, L_INSERT);
1270 pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */
1271 ptaAddPt(ptact, x1, y1);
1272 numaAddNumber(nafgt, area1);
1273 pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */
1274 area = (pixGetWidth(pix1) - 2 * JB_ADDED_PIXELS) *
1275 (pixGetHeight(pix1) - 2 * JB_ADDED_PIXELS);
1276 numaAddNumber(naarea, area);
1277 } else { /* don't save it */
1278 pixDestroy(&pix1);
1279 }
1280 }
1281 classer->nclass = pixaGetCount(pixat);
1282
1283 LEPT_FREE(pixcts);
1284 LEPT_FREE(centtab);
1285 for (i = 0; i < n; i++) {
1286 LEPT_FREE(pixrowcts[i]);
1287 }
1288 LEPT_FREE(pixrowcts);
1289
1290 LEPT_FREE(sumtab);
1291 ptaDestroy(&pta);
1292 pixaDestroy(&pixa1);
1293 return 0;
1294 }
1295
1296
1297 /*----------------------------------------------------------------------*
1298 * Determine the image components we start with *
1299 *----------------------------------------------------------------------*/
1300 /*!
1301 * \brief jbGetComponents()
1302 *
1303 * \param[in] pixs 1 bpp
1304 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
1305 * \param[in] maxwidth, maxheight of saved components; larger are discarded
1306 * \param[out] ppboxa b.b. of component items
1307 * \param[out] pppixa component items
1308 * \return 0 if OK, 1 on error
1309 */
1310 l_int32
jbGetComponents(PIX * pixs,l_int32 components,l_int32 maxwidth,l_int32 maxheight,BOXA ** pboxad,PIXA ** ppixad)1311 jbGetComponents(PIX *pixs,
1312 l_int32 components,
1313 l_int32 maxwidth,
1314 l_int32 maxheight,
1315 BOXA **pboxad,
1316 PIXA **ppixad)
1317 {
1318 l_int32 empty, res, redfactor;
1319 BOXA *boxa;
1320 PIX *pix1, *pix2, *pix3;
1321 PIXA *pixa, *pixat;
1322
1323 PROCNAME("jbGetComponents");
1324
1325 if (!pboxad)
1326 return ERROR_INT("&boxad not defined", procName, 1);
1327 *pboxad = NULL;
1328 if (!ppixad)
1329 return ERROR_INT("&pixad not defined", procName, 1);
1330 *ppixad = NULL;
1331 if (!pixs)
1332 return ERROR_INT("pixs not defined", procName, 1);
1333 if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
1334 components != JB_WORDS)
1335 return ERROR_INT("invalid components", procName, 1);
1336
1337 pixZero(pixs, &empty);
1338 if (empty) {
1339 *pboxad = boxaCreate(0);
1340 *ppixad = pixaCreate(0);
1341 return 0;
1342 }
1343
1344 /* If required, preprocess input pixs. The method for both
1345 * characters and words is to generate a connected component
1346 * mask over the units that we want to aggregrate, which are,
1347 * in general, sets of related connected components in pixs.
1348 * For characters, we want to include the dots with
1349 * 'i', 'j' and '!', so we do a small vertical closing to
1350 * generate the mask. For words, we make a mask over all
1351 * characters in each word. This is a bit more tricky, because
1352 * the spacing between words is difficult to predict a priori,
1353 * and words can be typeset with variable spacing that can
1354 * in some cases be barely larger than the space between
1355 * characters. The first step is to generate the mask and
1356 * identify each of its connected components. */
1357 if (components == JB_CONN_COMPS) { /* no preprocessing */
1358 boxa = pixConnComp(pixs, &pixa, 8);
1359 } else if (components == JB_CHARACTERS) {
1360 pix1 = pixMorphSequence(pixs, "c1.6", 0);
1361 boxa = pixConnComp(pix1, &pixat, 8);
1362 pixa = pixaClipToPix(pixat, pixs);
1363 pixDestroy(&pix1);
1364 pixaDestroy(&pixat);
1365 } else { /* components == JB_WORDS */
1366
1367 /* Do the operations at about 150 ppi resolution.
1368 * It is much faster at 75 ppi, but the results are
1369 * more accurate at 150 ppi. This will segment the
1370 * words in body text. It can be expected that relatively
1371 * infrequent words in a larger font will be split. */
1372 res = pixGetXRes(pixs);
1373 if (res <= 200) {
1374 redfactor = 1;
1375 pix1 = pixClone(pixs);
1376 } else if (res <= 400) {
1377 redfactor = 2;
1378 pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
1379 } else {
1380 redfactor = 4;
1381 pix1 = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0);
1382 }
1383
1384 /* Estimate the word mask, at approximately 150 ppi.
1385 * This has both very large and very small components left in. */
1386 pixWordMaskByDilation(pix1, &pix2, NULL, NULL);
1387
1388 /* Expand the optimally dilated word mask to full res. */
1389 pix3 = pixExpandReplicate(pix2, redfactor);
1390
1391 /* Pull out the pixels in pixs corresponding to the mask
1392 * components in pix3. Note that above we used threshold
1393 * levels in the reduction of 1 to insure that the resulting
1394 * mask fully covers the input pixs. The downside of using
1395 * a threshold of 1 is that very close characters from adjacent
1396 * lines can be joined. But with a level of 2 or greater,
1397 * it is necessary to use a seedfill, followed by a pixOr():
1398 * pixt4 = pixSeedfillBinary(NULL, pix3, pixs, 8);
1399 * pixOr(pix3, pix3, pixt4);
1400 * to insure that the mask coverage is complete over pixs. */
1401 boxa = pixConnComp(pix3, &pixat, 4);
1402 pixa = pixaClipToPix(pixat, pixs);
1403 pixaDestroy(&pixat);
1404 pixDestroy(&pix1);
1405 pixDestroy(&pix2);
1406 pixDestroy(&pix3);
1407 }
1408
1409 /* Remove large components, and save the results. */
1410 *ppixad = pixaSelectBySize(pixa, maxwidth, maxheight, L_SELECT_IF_BOTH,
1411 L_SELECT_IF_LTE, NULL);
1412 *pboxad = boxaSelectBySize(boxa, maxwidth, maxheight, L_SELECT_IF_BOTH,
1413 L_SELECT_IF_LTE, NULL);
1414 pixaDestroy(&pixa);
1415 boxaDestroy(&boxa);
1416
1417 return 0;
1418 }
1419
1420
1421 /*!
1422 * \brief pixWordMaskByDilation()
1423 *
1424 * \param[in] pixs 1 bpp; typ. at 75 to 150 ppi
1425 * \param[out] pmask [optional] dilated word mask
1426 * \param[out] psize [optional] size of good horizontal dilation
1427 * \param[out] pixadb [optional] debug: pixa of intermediate steps
1428 * \return 0 if OK, 1 on error
1429 *
1430 * <pre>
1431 * Notes:
1432 * (1) This gives an estimate of the word masks. See
1433 * pixWordBoxesByDilation() for further filtering of the word boxes.
1434 * (2) The resolution should be between 75 and 150 ppi, and the optimal
1435 * dilation will be between 3 and 10.
1436 * (3) A good size for dilating to get word masks is optionally returned.
1437 * (4) Typically, the number of c.c. reduced with each successive
1438 * dilation (stored in nadiff) decreases quickly to a minimum
1439 * (where the characters in a word are joined), and then
1440 * increases again as the smaller number of words are joined.
1441 * For the typical case, you can then look for this minimum
1442 * and dilate to get the word mask. However, there are many
1443 * cases where the function is not so simple. For example, if the
1444 * pix has been upscaled 2x, the nadiff function oscillates, with
1445 * every other value being zero! And for some images it tails
1446 * off without a clear minimum to indicate where to break.
1447 * So a more simple and robust method is to find the dilation
1448 * where the initial number of c.c. has been reduced by some
1449 * fraction (we use a 70% reduction).
1450 * </pre>
1451 */
1452 l_int32
pixWordMaskByDilation(PIX * pixs,PIX ** ppixm,l_int32 * psize,PIXA * pixadb)1453 pixWordMaskByDilation(PIX *pixs,
1454 PIX **ppixm,
1455 l_int32 *psize,
1456 PIXA *pixadb)
1457 {
1458 l_int32 i, n, ndil, maxdiff, diff, ibest;
1459 l_int32 start, stop, check, count, total, xres;
1460 l_int32 ncc[13]; /* max dilation + 1 */
1461 l_int32 *diffa;
1462 BOXA *boxa;
1463 NUMA *nacc, *nadiff;
1464 PIX *pix1, *pix2;
1465
1466 PROCNAME("pixWordMaskByDilation");
1467
1468 if (ppixm) *ppixm = NULL;
1469 if (psize) *psize = 0;
1470 if (!pixs || pixGetDepth(pixs) != 1)
1471 return ERROR_INT("pixs undefined or not 1 bpp", procName, 1);
1472 if (!ppixm && !psize)
1473 return ERROR_INT("no output requested", procName, 1);
1474
1475 /* Find a good dilation to create the word mask, by successively
1476 * increasing dilation size and counting the connected components. */
1477 pix1 = pixCopy(NULL, pixs);
1478 ndil = 12; /* appropriate for 75 to 150 ppi */
1479 nacc = numaCreate(ndil + 1);
1480 nadiff = numaCreate(ndil + 1);
1481 stop = FALSE;
1482 for (i = 0; i <= ndil; i++) {
1483 if (i == 0) /* first one not dilated */
1484 pix2 = pixCopy(NULL, pix1);
1485 else /* successive dilation by sel_2h */
1486 pix2 = pixMorphSequence(pix1, "d2.1", 0);
1487 boxa = pixConnCompBB(pix2, 4);
1488 ncc[i] = boxaGetCount(boxa);
1489 numaAddNumber(nacc, ncc[i]);
1490 if (i == 0) total = ncc[0];
1491 if (i > 0) {
1492 diff = ncc[i - 1] - ncc[i];
1493 numaAddNumber(nadiff, diff);
1494 }
1495 pixDestroy(&pix1);
1496 pix1 = pix2;
1497 boxaDestroy(&boxa);
1498 }
1499 pixDestroy(&pix1);
1500
1501 /* Find the dilation at which the c.c. count has reduced
1502 * to 30% of thie initial value. Although 30% seems high,
1503 * it seems better to use this but add one to ibest. */
1504 diffa = numaGetIArray(nadiff);
1505 n = numaGetCount(nadiff);
1506 maxdiff = 0;
1507 start = 0;
1508 check = TRUE;
1509 ibest = 2;
1510 for (i = 1; i < n; i++) {
1511 numaGetIValue(nacc, i, &count);
1512 if (check && count < 0.3 * total) {
1513 ibest = i + 1;
1514 check = FALSE;
1515 }
1516 diff = diffa[i];
1517 if (diff > maxdiff) {
1518 maxdiff = diff;
1519 start = i;
1520 }
1521 }
1522 LEPT_FREE(diffa);
1523
1524 /* Add small compensation for higher resolution */
1525 xres = pixGetXRes(pixs);
1526 if (xres == 0) xres = 150;
1527 if (xres > 110) ibest++;
1528 if (ibest < 2) {
1529 L_INFO("setting ibest to minimum allowed value of 2\n", procName);
1530 ibest = 2;
1531 }
1532
1533 if (pixadb) {
1534 lept_mkdir("lept/jb");
1535 {GPLOT *gplot;
1536 NUMA *naseq;
1537 PIX *pix3, *pix4;
1538 L_INFO("Best dilation: %d\n", procName, L_MAX(3, ibest + 1));
1539 naseq = numaMakeSequence(1, 1, numaGetCount(nacc));
1540 gplot = gplotCreate("/tmp/lept/jb/numcc", GPLOT_PNG,
1541 "Number of cc vs. horizontal dilation",
1542 "Sel horiz", "Number of cc");
1543 gplotAddPlot(gplot, naseq, nacc, GPLOT_LINES, "");
1544 gplotMakeOutput(gplot);
1545 gplotDestroy(&gplot);
1546 pix3 = pixRead("/tmp/lept/jb/numcc.png");
1547 pixaAddPix(pixadb, pix3, L_INSERT);
1548 numaDestroy(&naseq);
1549 naseq = numaMakeSequence(1, 1, numaGetCount(nadiff));
1550 gplot = gplotCreate("/tmp/lept/jb/diffcc", GPLOT_PNG,
1551 "Diff count of cc vs. horizontal dilation",
1552 "Sel horiz", "Diff in cc");
1553 gplotAddPlot(gplot, naseq, nadiff, GPLOT_LINES, "");
1554 gplotMakeOutput(gplot);
1555 gplotDestroy(&gplot);
1556 pix3 = pixRead("/tmp/lept/jb/diffcc.png");
1557 pixaAddPix(pixadb, pix3, L_INSERT);
1558 numaDestroy(&naseq);
1559 pix3 = pixCloseBrick(NULL, pixs, ibest + 1, 1);
1560 pix4 = pixScaleToSize(pix3, 600, 0);
1561 pixaAddPix(pixadb, pix4, L_INSERT);
1562 pixDestroy(&pix3);
1563 }
1564 }
1565
1566 if (psize) *psize = ibest + 1;
1567 if (ppixm)
1568 *ppixm = pixCloseBrick(NULL, pixs, ibest + 1, 1);
1569
1570 numaDestroy(&nacc);
1571 numaDestroy(&nadiff);
1572 return 0;
1573 }
1574
1575
1576 /*!
1577 * \brief pixWordBoxesByDilation()
1578 *
1579 * \param[in] pixs 1 bpp; typ. 75 - 200 ppi
1580 * \param[in] minwidth, minheight saved components; smaller are discarded
1581 * \param[in] maxwidth, maxheight saved components; larger are discarded
1582 * \param[out] pboxa of dilated word mask
1583 * \param[out] psize [optional] size of good horizontal dilation
1584 * \param[out] pixadb [optional] debug: pixa of intermediate steps
1585 * \return 0 if OK, 1 on error
1586 *
1587 * <pre>
1588 * Notes:
1589 * (1) Returns a pruned set of word boxes.
1590 * (2) See pixWordMaskByDilation().
1591 * </pre>
1592 */
1593 l_int32
pixWordBoxesByDilation(PIX * pixs,l_int32 minwidth,l_int32 minheight,l_int32 maxwidth,l_int32 maxheight,BOXA ** pboxa,l_int32 * psize,PIXA * pixadb)1594 pixWordBoxesByDilation(PIX *pixs,
1595 l_int32 minwidth,
1596 l_int32 minheight,
1597 l_int32 maxwidth,
1598 l_int32 maxheight,
1599 BOXA **pboxa,
1600 l_int32 *psize,
1601 PIXA *pixadb)
1602 {
1603 BOXA *boxa1, *boxa2;
1604 PIX *pix1, *pix2;
1605
1606 PROCNAME("pixWordBoxesByDilation");
1607
1608 if (psize) *psize = 0;
1609 if (!pixs || pixGetDepth(pixs) != 1)
1610 return ERROR_INT("pixs undefined or not 1 bpp", procName, 1);
1611 if (!pboxa)
1612 return ERROR_INT("&boxa not defined", procName, 1);
1613 *pboxa = NULL;
1614
1615 /* Make a first estimate of the word mask */
1616 if (pixWordMaskByDilation(pixs, &pix1, psize, pixadb))
1617 return ERROR_INT("pixWordMaskByDilation() failed", procName, 1);
1618
1619 /* Prune the word mask. Get the bounding boxes of the words.
1620 * Remove the small ones, which can be due to punctuation
1621 * that was not joined to a word. Also remove the large ones,
1622 * which are not likely to be words. */
1623 boxa1 = pixConnComp(pix1, NULL, 8);
1624 boxa2 = boxaSelectBySize(boxa1, minwidth, minheight, L_SELECT_IF_BOTH,
1625 L_SELECT_IF_GTE, NULL);
1626 *pboxa = boxaSelectBySize(boxa2, maxwidth, maxheight, L_SELECT_IF_BOTH,
1627 L_SELECT_IF_LTE, NULL);
1628 if (pixadb) {
1629 pix2 = pixCopy(NULL, pixs);
1630 pixRenderBoxaArb(pix2, boxa1, 2, 255, 0, 0);
1631 pixaAddPix(pixadb, pix2, L_INSERT);
1632 pix2 = pixCopy(NULL, pixs);
1633 pixRenderBoxaArb(pix2, boxa2, 2, 0, 255, 0);
1634 pixaAddPix(pixadb, pix2, L_INSERT);
1635 }
1636 boxaDestroy(&boxa1);
1637 boxaDestroy(&boxa2);
1638 pixDestroy(&pix1);
1639 return 0;
1640 }
1641
1642
1643 /*----------------------------------------------------------------------*
1644 * Build grayscale composites (templates) *
1645 *----------------------------------------------------------------------*/
1646 /*!
1647 * \brief jbAccumulateComposites()
1648 *
1649 * \param[in] pixaa one pixa for each class
1650 * \param[out] ppna number of samples used to build each composite
1651 * \param[out] pptat centroids of bordered composites
1652 * \return pixad accumulated sum of samples in each class,
1653 * or NULL on error
1654 *
1655 */
1656 PIXA *
jbAccumulateComposites(PIXAA * pixaa,NUMA ** pna,PTA ** pptat)1657 jbAccumulateComposites(PIXAA *pixaa,
1658 NUMA **pna,
1659 PTA **pptat)
1660 {
1661 l_int32 n, nt, i, j, d, minw, maxw, minh, maxh, xdiff, ydiff;
1662 l_float32 x, y, xave, yave;
1663 NUMA *na;
1664 PIX *pix, *pixt1, *pixt2, *pixsum;
1665 PIXA *pixa, *pixad;
1666 PTA *ptat, *pta;
1667
1668 PROCNAME("jbAccumulateComposites");
1669
1670 if (!pptat)
1671 return (PIXA *)ERROR_PTR("&ptat not defined", procName, NULL);
1672 *pptat = NULL;
1673 if (!pna)
1674 return (PIXA *)ERROR_PTR("&na not defined", procName, NULL);
1675 *pna = NULL;
1676 if (!pixaa)
1677 return (PIXA *)ERROR_PTR("pixaa not defined", procName, NULL);
1678
1679 n = pixaaGetCount(pixaa, NULL);
1680 if ((ptat = ptaCreate(n)) == NULL)
1681 return (PIXA *)ERROR_PTR("ptat not made", procName, NULL);
1682 *pptat = ptat;
1683 pixad = pixaCreate(n);
1684 na = numaCreate(n);
1685 *pna = na;
1686
1687 for (i = 0; i < n; i++) {
1688 pixa = pixaaGetPixa(pixaa, i, L_CLONE);
1689 nt = pixaGetCount(pixa);
1690 numaAddNumber(na, nt);
1691 if (nt == 0) {
1692 L_WARNING("empty pixa found!\n", procName);
1693 pixaDestroy(&pixa);
1694 continue;
1695 }
1696 pixaSizeRange(pixa, &minw, &minh, &maxw, &maxh);
1697 pix = pixaGetPix(pixa, 0, L_CLONE);
1698 d = pixGetDepth(pix);
1699 pixDestroy(&pix);
1700 pixt1 = pixCreate(maxw, maxh, d);
1701 pixsum = pixInitAccumulate(maxw, maxh, 0);
1702 pta = pixaCentroids(pixa);
1703
1704 /* Find the average value of the centroids ... */
1705 xave = yave = 0;
1706 for (j = 0; j < nt; j++) {
1707 ptaGetPt(pta, j, &x, &y);
1708 xave += x;
1709 yave += y;
1710 }
1711 xave = xave / (l_float32)nt;
1712 yave = yave / (l_float32)nt;
1713
1714 /* and place all centroids at their average value */
1715 for (j = 0; j < nt; j++) {
1716 pixt2 = pixaGetPix(pixa, j, L_CLONE);
1717 ptaGetPt(pta, j, &x, &y);
1718 xdiff = (l_int32)(x - xave);
1719 ydiff = (l_int32)(y - yave);
1720 pixClearAll(pixt1);
1721 pixRasterop(pixt1, xdiff, ydiff, maxw, maxh, PIX_SRC,
1722 pixt2, 0, 0);
1723 pixAccumulate(pixsum, pixt1, L_ARITH_ADD);
1724 pixDestroy(&pixt2);
1725 }
1726 pixaAddPix(pixad, pixsum, L_INSERT);
1727 ptaAddPt(ptat, xave, yave);
1728
1729 pixaDestroy(&pixa);
1730 pixDestroy(&pixt1);
1731 ptaDestroy(&pta);
1732 }
1733
1734 return pixad;
1735 }
1736
1737
1738 /*!
1739 * \brief jbTemplatesFromComposites()
1740 *
1741 * \param[in] pixac one pix of composites for each class
1742 * \param[in] na number of samples used for each class composite
1743 * \return pixad 8 bpp templates for each class, or NULL on error
1744 *
1745 */
1746 PIXA *
jbTemplatesFromComposites(PIXA * pixac,NUMA * na)1747 jbTemplatesFromComposites(PIXA *pixac,
1748 NUMA *na)
1749 {
1750 l_int32 n, i;
1751 l_float32 nt; /* number of samples in the composite; always an integer */
1752 l_float32 factor;
1753 PIX *pixsum; /* accumulated composite */
1754 PIX *pixd;
1755 PIXA *pixad;
1756
1757 PROCNAME("jbTemplatesFromComposites");
1758
1759 if (!pixac)
1760 return (PIXA *)ERROR_PTR("pixac not defined", procName, NULL);
1761 if (!na)
1762 return (PIXA *)ERROR_PTR("na not defined", procName, NULL);
1763
1764 n = pixaGetCount(pixac);
1765 pixad = pixaCreate(n);
1766 for (i = 0; i < n; i++) {
1767 pixsum = pixaGetPix(pixac, i, L_COPY); /* changed internally */
1768 numaGetFValue(na, i, &nt);
1769 factor = 255. / nt;
1770 pixMultConstAccumulate(pixsum, factor, 0); /* changes pixsum */
1771 pixd = pixFinalAccumulate(pixsum, 0, 8);
1772 pixaAddPix(pixad, pixd, L_INSERT);
1773 pixDestroy(&pixsum);
1774 }
1775
1776 return pixad;
1777 }
1778
1779
1780
1781 /*----------------------------------------------------------------------*
1782 * jbig2 utility routines *
1783 *----------------------------------------------------------------------*/
1784 /*!
1785 * \brief jbClasserCreate()
1786 *
1787 * \param[in] method JB_RANKHAUS, JB_CORRELATION
1788 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
1789 * \return jbclasser, or NULL on error
1790 */
1791 JBCLASSER *
jbClasserCreate(l_int32 method,l_int32 components)1792 jbClasserCreate(l_int32 method,
1793 l_int32 components)
1794 {
1795 JBCLASSER *classer;
1796
1797 PROCNAME("jbClasserCreate");
1798
1799 if (method != JB_RANKHAUS && method != JB_CORRELATION)
1800 return (JBCLASSER *)ERROR_PTR("invalid method", procName, NULL);
1801 if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
1802 components != JB_WORDS)
1803 return (JBCLASSER *)ERROR_PTR("invalid component", procName, NULL);
1804
1805 classer = (JBCLASSER *)LEPT_CALLOC(1, sizeof(JBCLASSER));
1806 classer->method = method;
1807 classer->components = components;
1808 classer->nacomps = numaCreate(0);
1809 classer->pixaa = pixaaCreate(0);
1810 classer->pixat = pixaCreate(0);
1811 classer->pixatd = pixaCreate(0);
1812 classer->nafgt = numaCreate(0);
1813 classer->naarea = numaCreate(0);
1814 classer->ptac = ptaCreate(0);
1815 classer->ptact = ptaCreate(0);
1816 classer->naclass = numaCreate(0);
1817 classer->napage = numaCreate(0);
1818 classer->ptaul = ptaCreate(0);
1819 return classer;
1820 }
1821
1822
1823 /*
1824 * jbClasserDestroy()
1825 *
1826 * Input: &classer (<inout> to be nulled)
1827 * Return: void
1828 */
1829 void
jbClasserDestroy(JBCLASSER ** pclasser)1830 jbClasserDestroy(JBCLASSER **pclasser)
1831 {
1832 JBCLASSER *classer;
1833
1834 if (!pclasser)
1835 return;
1836 if ((classer = *pclasser) == NULL)
1837 return;
1838
1839 sarrayDestroy(&classer->safiles);
1840 numaDestroy(&classer->nacomps);
1841 pixaaDestroy(&classer->pixaa);
1842 pixaDestroy(&classer->pixat);
1843 pixaDestroy(&classer->pixatd);
1844 l_dnaHashDestroy(&classer->dahash);
1845 numaDestroy(&classer->nafgt);
1846 numaDestroy(&classer->naarea);
1847 ptaDestroy(&classer->ptac);
1848 ptaDestroy(&classer->ptact);
1849 numaDestroy(&classer->naclass);
1850 numaDestroy(&classer->napage);
1851 ptaDestroy(&classer->ptaul);
1852 ptaDestroy(&classer->ptall);
1853 LEPT_FREE(classer);
1854 *pclasser = NULL;
1855 return;
1856 }
1857
1858
1859 /*!
1860 * \brief jbDataSave()
1861 *
1862 * \param[in] jbclasser
1863 * \param[in] latticew, latticeh cell size used to store each
1864 * connected component in the composite
1865 * \return jbdata, or NULL on error
1866 *
1867 * <pre>
1868 * Notes:
1869 * (1) This routine stores the jbig2-type data required for
1870 * generating a lossy jbig2 version of the image.
1871 * It can be losslessly written to (and read from) two files.
1872 * (2) It generates and stores the mosaic of templates.
1873 * (3) It clones the Numa and Pta arrays, so these must all
1874 * be destroyed by the caller.
1875 * (4) Input 0 to use the default values for latticew and/or latticeh,
1876 * </pre>
1877 */
1878 JBDATA *
jbDataSave(JBCLASSER * classer)1879 jbDataSave(JBCLASSER *classer)
1880 {
1881 l_int32 maxw, maxh;
1882 JBDATA *data;
1883 PIX *pix;
1884
1885 PROCNAME("jbDataSave");
1886
1887 if (!classer)
1888 return (JBDATA *)ERROR_PTR("classer not defined", procName, NULL);
1889
1890 /* Write the templates into an array. */
1891 pixaSizeRange(classer->pixat, NULL, NULL, &maxw, &maxh);
1892 pix = pixaDisplayOnLattice(classer->pixat, maxw + 1, maxh + 1,
1893 NULL, NULL);
1894 if (!pix)
1895 return (JBDATA *)ERROR_PTR("data not made", procName, NULL);
1896
1897 data = (JBDATA *)LEPT_CALLOC(1, sizeof(JBDATA));
1898 data->pix = pix;
1899 data->npages = classer->npages;
1900 data->w = classer->w;
1901 data->h = classer->h;
1902 data->nclass = classer->nclass;
1903 data->latticew = maxw + 1;
1904 data->latticeh = maxh + 1;
1905 data->naclass = numaClone(classer->naclass);
1906 data->napage = numaClone(classer->napage);
1907 data->ptaul = ptaClone(classer->ptaul);
1908 return data;
1909 }
1910
1911
1912 /*
1913 * jbDataDestroy()
1914 *
1915 * Input: &data (<inout> to be nulled)
1916 * Return: void
1917 */
1918 void
jbDataDestroy(JBDATA ** pdata)1919 jbDataDestroy(JBDATA **pdata)
1920 {
1921 JBDATA *data;
1922
1923 if (!pdata)
1924 return;
1925 if ((data = *pdata) == NULL)
1926 return;
1927
1928 pixDestroy(&data->pix);
1929 numaDestroy(&data->naclass);
1930 numaDestroy(&data->napage);
1931 ptaDestroy(&data->ptaul);
1932 LEPT_FREE(data);
1933 *pdata = NULL;
1934 return;
1935 }
1936
1937
1938 /*!
1939 * \brief jbDataWrite()
1940 *
1941 * \param[in] rootname for output files; everything but the extension
1942 * \param[in] jbdata
1943 * \return 0 if OK, 1 on error
1944 *
1945 * <pre>
1946 * Notes:
1947 * (1) Serialization function that writes data in jbdata to file.
1948 * </pre>
1949 */
1950 l_int32
jbDataWrite(const char * rootout,JBDATA * jbdata)1951 jbDataWrite(const char *rootout,
1952 JBDATA *jbdata)
1953 {
1954 char buf[L_BUF_SIZE];
1955 l_int32 w, h, nclass, npages, cellw, cellh, ncomp, i, x, y, iclass, ipage;
1956 NUMA *naclass, *napage;
1957 PTA *ptaul;
1958 PIX *pixt;
1959 FILE *fp;
1960
1961 PROCNAME("jbDataWrite");
1962
1963 if (!rootout)
1964 return ERROR_INT("no rootout", procName, 1);
1965 if (!jbdata)
1966 return ERROR_INT("no jbdata", procName, 1);
1967
1968 npages = jbdata->npages;
1969 w = jbdata->w;
1970 h = jbdata->h;
1971 pixt = jbdata->pix;
1972 nclass = jbdata->nclass;
1973 cellw = jbdata->latticew;
1974 cellh = jbdata->latticeh;
1975 naclass = jbdata->naclass;
1976 napage = jbdata->napage;
1977 ptaul = jbdata->ptaul;
1978
1979 snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_TEMPLATE_EXT);
1980 pixWrite(buf, pixt, IFF_PNG);
1981
1982 snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_DATA_EXT);
1983 if ((fp = fopenWriteStream(buf, "wb")) == NULL)
1984 return ERROR_INT("stream not opened", procName, 1);
1985 ncomp = ptaGetCount(ptaul);
1986 fprintf(fp, "jb data file\n");
1987 fprintf(fp, "num pages = %d\n", npages);
1988 fprintf(fp, "page size: w = %d, h = %d\n", w, h);
1989 fprintf(fp, "num components = %d\n", ncomp);
1990 fprintf(fp, "num classes = %d\n", nclass);
1991 fprintf(fp, "template lattice size: w = %d, h = %d\n", cellw, cellh);
1992 for (i = 0; i < ncomp; i++) {
1993 numaGetIValue(napage, i, &ipage);
1994 numaGetIValue(naclass, i, &iclass);
1995 ptaGetIPt(ptaul, i, &x, &y);
1996 fprintf(fp, "%d %d %d %d\n", ipage, iclass, x, y);
1997 }
1998 fclose(fp);
1999
2000 return 0;
2001 }
2002
2003
2004 /*!
2005 * \brief jbDataRead()
2006 *
2007 * \param[in] rootname for template and data files
2008 * \return jbdata, or NULL on error
2009 */
2010 JBDATA *
jbDataRead(const char * rootname)2011 jbDataRead(const char *rootname)
2012 {
2013 char fname[L_BUF_SIZE];
2014 char *linestr;
2015 l_uint8 *data;
2016 l_int32 nsa, i, w, h, cellw, cellh, x, y, iclass, ipage;
2017 l_int32 npages, nclass, ncomp, ninit;
2018 size_t size;
2019 JBDATA *jbdata;
2020 NUMA *naclass, *napage;
2021 PIX *pixs;
2022 PTA *ptaul;
2023 SARRAY *sa;
2024
2025 PROCNAME("jbDataRead");
2026
2027 if (!rootname)
2028 return (JBDATA *)ERROR_PTR("rootname not defined", procName, NULL);
2029
2030 snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_TEMPLATE_EXT);
2031 if ((pixs = pixRead(fname)) == NULL)
2032 return (JBDATA *)ERROR_PTR("pix not read", procName, NULL);
2033
2034 snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_DATA_EXT);
2035 if ((data = l_binaryRead(fname, &size)) == NULL) {
2036 pixDestroy(&pixs);
2037 return (JBDATA *)ERROR_PTR("data not read", procName, NULL);
2038 }
2039
2040 if ((sa = sarrayCreateLinesFromString((char *)data, 0)) == NULL) {
2041 pixDestroy(&pixs);
2042 LEPT_FREE(data);
2043 return (JBDATA *)ERROR_PTR("sa not made", procName, NULL);
2044 }
2045 nsa = sarrayGetCount(sa); /* number of cc + 6 */
2046 linestr = sarrayGetString(sa, 0, L_NOCOPY);
2047 if (strcmp(linestr, "jb data file") != 0) {
2048 pixDestroy(&pixs);
2049 LEPT_FREE(data);
2050 sarrayDestroy(&sa);
2051 return (JBDATA *)ERROR_PTR("invalid jb data file", procName, NULL);
2052 }
2053 linestr = sarrayGetString(sa, 1, L_NOCOPY);
2054 sscanf(linestr, "num pages = %d", &npages);
2055 linestr = sarrayGetString(sa, 2, L_NOCOPY);
2056 sscanf(linestr, "page size: w = %d, h = %d", &w, &h);
2057 linestr = sarrayGetString(sa, 3, L_NOCOPY);
2058 sscanf(linestr, "num components = %d", &ncomp);
2059 linestr = sarrayGetString(sa, 4, L_NOCOPY);
2060 sscanf(linestr, "num classes = %d\n", &nclass);
2061 linestr = sarrayGetString(sa, 5, L_NOCOPY);
2062 sscanf(linestr, "template lattice size: w = %d, h = %d\n", &cellw, &cellh);
2063
2064 #if 1
2065 fprintf(stderr, "num pages = %d\n", npages);
2066 fprintf(stderr, "page size: w = %d, h = %d\n", w, h);
2067 fprintf(stderr, "num components = %d\n", ncomp);
2068 fprintf(stderr, "num classes = %d\n", nclass);
2069 fprintf(stderr, "template lattice size: w = %d, h = %d\n", cellw, cellh);
2070 #endif
2071
2072 ninit = ncomp;
2073 if (ncomp > 1000000) { /* fuzz protection */
2074 L_WARNING("ncomp > 1M\n", procName);
2075 ninit = 1000000;
2076 }
2077 naclass = numaCreate(ninit);
2078 napage = numaCreate(ninit);
2079 ptaul = ptaCreate(ninit);
2080 for (i = 6; i < nsa; i++) {
2081 linestr = sarrayGetString(sa, i, L_NOCOPY);
2082 sscanf(linestr, "%d %d %d %d\n", &ipage, &iclass, &x, &y);
2083 numaAddNumber(napage, ipage);
2084 numaAddNumber(naclass, iclass);
2085 ptaAddPt(ptaul, x, y);
2086 }
2087
2088 jbdata = (JBDATA *)LEPT_CALLOC(1, sizeof(JBDATA));
2089 jbdata->pix = pixs;
2090 jbdata->npages = npages;
2091 jbdata->w = w;
2092 jbdata->h = h;
2093 jbdata->nclass = nclass;
2094 jbdata->latticew = cellw;
2095 jbdata->latticeh = cellh;
2096 jbdata->naclass = naclass;
2097 jbdata->napage = napage;
2098 jbdata->ptaul = ptaul;
2099
2100 LEPT_FREE(data);
2101 sarrayDestroy(&sa);
2102 return jbdata;
2103 }
2104
2105
2106 /*!
2107 * \brief jbDataRender()
2108 *
2109 * \param[in] jbdata
2110 * \param[in] debugflag if TRUE, writes into 2 bpp pix and adds
2111 * component outlines in color
2112 * \return pixa reconstruction of original images, using templates or
2113 * NULL on error
2114 */
2115 PIXA *
jbDataRender(JBDATA * data,l_int32 debugflag)2116 jbDataRender(JBDATA *data,
2117 l_int32 debugflag)
2118 {
2119 l_int32 i, w, h, cellw, cellh, x, y, iclass, ipage;
2120 l_int32 npages, nclass, ncomp, wp, hp;
2121 BOX *box;
2122 NUMA *naclass, *napage;
2123 PIX *pixt, *pixt2, *pix, *pixd;
2124 PIXA *pixat; /* pixa of templates */
2125 PIXA *pixad; /* pixa of output images */
2126 PIXCMAP *cmap;
2127 PTA *ptaul;
2128
2129 PROCNAME("jbDataRender");
2130
2131 if (!data)
2132 return (PIXA *)ERROR_PTR("data not defined", procName, NULL);
2133
2134 npages = data->npages;
2135 w = data->w;
2136 h = data->h;
2137 pixt = data->pix;
2138 nclass = data->nclass;
2139 cellw = data->latticew;
2140 cellh = data->latticeh;
2141 naclass = data->naclass;
2142 napage = data->napage;
2143 ptaul = data->ptaul;
2144 ncomp = numaGetCount(naclass);
2145
2146 /* Reconstruct the original set of images from the templates
2147 * and the data associated with each component. First,
2148 * generate the output pixa as a set of empty pix. */
2149 if ((pixad = pixaCreate(npages)) == NULL)
2150 return (PIXA *)ERROR_PTR("pixad not made", procName, NULL);
2151 for (i = 0; i < npages; i++) {
2152 if (debugflag == FALSE) {
2153 pix = pixCreate(w, h, 1);
2154 } else {
2155 pix = pixCreate(w, h, 2);
2156 cmap = pixcmapCreate(2);
2157 pixcmapAddColor(cmap, 255, 255, 255);
2158 pixcmapAddColor(cmap, 0, 0, 0);
2159 pixcmapAddColor(cmap, 255, 0, 0); /* for box outlines */
2160 pixSetColormap(pix, cmap);
2161 }
2162 pixaAddPix(pixad, pix, L_INSERT);
2163 }
2164
2165 /* Put the class templates into a pixa. */
2166 if ((pixat = pixaCreateFromPix(pixt, nclass, cellw, cellh)) == NULL) {
2167 pixaDestroy(&pixad);
2168 return (PIXA *)ERROR_PTR("pixat not made", procName, NULL);
2169 }
2170
2171 /* Place each component in the right location on its page. */
2172 for (i = 0; i < ncomp; i++) {
2173 numaGetIValue(napage, i, &ipage);
2174 numaGetIValue(naclass, i, &iclass);
2175 pix = pixaGetPix(pixat, iclass, L_CLONE); /* the template */
2176 wp = pixGetWidth(pix);
2177 hp = pixGetHeight(pix);
2178 ptaGetIPt(ptaul, i, &x, &y);
2179 pixd = pixaGetPix(pixad, ipage, L_CLONE); /* the output page */
2180 if (debugflag == FALSE) {
2181 pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pix, 0, 0);
2182 } else {
2183 pixt2 = pixConvert1To2Cmap(pix);
2184 pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pixt2, 0, 0);
2185 box = boxCreate(x, y, wp, hp);
2186 pixRenderBoxArb(pixd, box, 1, 255, 0, 0);
2187 pixDestroy(&pixt2);
2188 boxDestroy(&box);
2189 }
2190 pixDestroy(&pix); /* the clone only */
2191 pixDestroy(&pixd); /* the clone only */
2192 }
2193
2194 pixaDestroy(&pixat);
2195 return pixad;
2196 }
2197
2198
2199 /*!
2200 * \brief jbGetULCorners()
2201 *
2202 * \param[in] jbclasser
2203 * \param[in] pixs full res image
2204 * \param[in] boxa of c.c. bounding rectangles for this page
2205 * \return 0 if OK, 1 on error
2206 *
2207 * <pre>
2208 * Notes:
2209 * (1) This computes the ptaul field, which has the global UL corners,
2210 * adjusted for each specific component, so that each component
2211 * can be replaced by the template for its class and have the
2212 * centroid in the template in the same position as the
2213 * centroid of the original connected component. It is important
2214 * that this be done properly to avoid a wavy baseline in the
2215 * result.
2216 * (2) The array fields ptac and ptact give the centroids of
2217 * those components relative to the UL corner of each component.
2218 * Here, we compute the difference in each component, round to
2219 * nearest integer, and correct the box->x and box->y by
2220 * the appropriate integral difference.
2221 * (3) The templates and stored instances are all bordered.
2222 * </pre>
2223 */
2224 l_int32
jbGetULCorners(JBCLASSER * classer,PIX * pixs,BOXA * boxa)2225 jbGetULCorners(JBCLASSER *classer,
2226 PIX *pixs,
2227 BOXA *boxa)
2228 {
2229 l_int32 i, baseindex, index, n, iclass, idelx, idely, x, y, dx, dy;
2230 l_int32 *sumtab;
2231 l_float32 x1, x2, y1, y2, delx, dely;
2232 BOX *box;
2233 NUMA *naclass;
2234 PIX *pixt;
2235 PTA *ptac, *ptact, *ptaul;
2236
2237 PROCNAME("jbGetULCorners");
2238
2239 if (!classer)
2240 return ERROR_INT("classer not defined", procName, 1);
2241 if (!pixs)
2242 return ERROR_INT("pixs not defined", procName, 1);
2243 if (!boxa)
2244 return ERROR_INT("boxa not defined", procName, 1);
2245
2246 n = boxaGetCount(boxa);
2247 ptaul = classer->ptaul;
2248 naclass = classer->naclass;
2249 ptac = classer->ptac;
2250 ptact = classer->ptact;
2251 baseindex = classer->baseindex; /* num components before this page */
2252 sumtab = makePixelSumTab8();
2253 for (i = 0; i < n; i++) {
2254 index = baseindex + i;
2255 ptaGetPt(ptac, index, &x1, &y1);
2256 numaGetIValue(naclass, index, &iclass);
2257 ptaGetPt(ptact, iclass, &x2, &y2);
2258 delx = x2 - x1;
2259 dely = y2 - y1;
2260 if (delx >= 0)
2261 idelx = (l_int32)(delx + 0.5);
2262 else
2263 idelx = (l_int32)(delx - 0.5);
2264 if (dely >= 0)
2265 idely = (l_int32)(dely + 0.5);
2266 else
2267 idely = (l_int32)(dely - 0.5);
2268 if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL) {
2269 LEPT_FREE(sumtab);
2270 return ERROR_INT("box not found", procName, 1);
2271 }
2272 boxGetGeometry(box, &x, &y, NULL, NULL);
2273
2274 /* Get final increments dx and dy for best alignment */
2275 pixt = pixaGetPix(classer->pixat, iclass, L_CLONE);
2276 finalPositioningForAlignment(pixs, x, y, idelx, idely,
2277 pixt, sumtab, &dx, &dy);
2278 /* if (i % 20 == 0)
2279 fprintf(stderr, "dx = %d, dy = %d\n", dx, dy); */
2280 ptaAddPt(ptaul, x - idelx + dx, y - idely + dy);
2281 boxDestroy(&box);
2282 pixDestroy(&pixt);
2283 }
2284
2285 LEPT_FREE(sumtab);
2286 return 0;
2287 }
2288
2289
2290 /*!
2291 * \brief jbGetLLCorners()
2292 *
2293 * \param[in] jbclasser
2294 * \return 0 if OK, 1 on error
2295 *
2296 * <pre>
2297 * Notes:
2298 * (1) This computes the ptall field, which has the global LL corners,
2299 * adjusted for each specific component, so that each component
2300 * can be replaced by the template for its class and have the
2301 * centroid in the template in the same position as the
2302 * centroid of the original connected component. It is important
2303 * that this be done properly to avoid a wavy baseline in the result.
2304 * (2) It is computed here from the corresponding UL corners, where
2305 * the input templates and stored instances are all bordered.
2306 * This should be done after all pages have been processed.
2307 * (3) For proper substitution, the templates whose LL corners are
2308 * placed in these locations must be UN-bordered.
2309 * This is available for a realistic jbig2 encoder, which would
2310 * (1) encode each template without a border, and (2) encode
2311 * the position using the LL corner (rather than the UL
2312 * corner) because the difference between y-values
2313 * of successive instances is typically close to zero.
2314 * </pre>
2315 */
2316 l_int32
jbGetLLCorners(JBCLASSER * classer)2317 jbGetLLCorners(JBCLASSER *classer)
2318 {
2319 l_int32 i, iclass, n, x1, y1, h;
2320 NUMA *naclass;
2321 PIX *pix;
2322 PIXA *pixat;
2323 PTA *ptaul, *ptall;
2324
2325 PROCNAME("jbGetLLCorners");
2326
2327 if (!classer)
2328 return ERROR_INT("classer not defined", procName, 1);
2329
2330 ptaul = classer->ptaul;
2331 naclass = classer->naclass;
2332 pixat = classer->pixat;
2333
2334 ptaDestroy(&classer->ptall);
2335 n = ptaGetCount(ptaul);
2336 ptall = ptaCreate(n);
2337 classer->ptall = ptall;
2338
2339 /* If the templates were bordered, we would add h - 1 to the UL
2340 * corner y-value. However, because the templates to be used
2341 * here have their borders removed, and the borders are
2342 * JB_ADDED_PIXELS on each side, we add h - 1 - 2 * JB_ADDED_PIXELS
2343 * to the UL corner y-value. */
2344 for (i = 0; i < n; i++) {
2345 ptaGetIPt(ptaul, i, &x1, &y1);
2346 numaGetIValue(naclass, i, &iclass);
2347 pix = pixaGetPix(pixat, iclass, L_CLONE);
2348 h = pixGetHeight(pix);
2349 ptaAddPt(ptall, x1, y1 + h - 1 - 2 * JB_ADDED_PIXELS);
2350 pixDestroy(&pix);
2351 }
2352
2353 return 0;
2354 }
2355
2356
2357 /*----------------------------------------------------------------------*
2358 * Static helpers *
2359 *----------------------------------------------------------------------*/
2360 /* When looking for similar matches we check templates whose size is +/- 2 in
2361 * each direction. This involves 25 possible sizes. This array contains the
2362 * offsets for each of those positions in a spiral pattern. There are 25 pairs
2363 * of numbers in this array: even positions are x values. */
2364 static int two_by_two_walk[50] = {
2365 0, 0,
2366 0, 1,
2367 -1, 0,
2368 0, -1,
2369 1, 0,
2370 -1, 1,
2371 1, 1,
2372 -1, -1,
2373 1, -1,
2374 0, -2,
2375 2, 0,
2376 0, 2,
2377 -2, 0,
2378 -1, -2,
2379 1, -2,
2380 2, -1,
2381 2, 1,
2382 1, 2,
2383 -1, 2,
2384 -2, 1,
2385 -2, -1,
2386 -2, -2,
2387 2, -2,
2388 2, 2,
2389 -2, 2};
2390
2391
2392 /*!
2393 * \brief findSimilarSizedTemplatesInit()
2394 *
2395 * \param[in] classer
2396 * \param[in] pixs instance to be matched
2397 * \return Allocated context to be used with findSimilar*
2398 */
2399 static JBFINDCTX *
findSimilarSizedTemplatesInit(JBCLASSER * classer,PIX * pixs)2400 findSimilarSizedTemplatesInit(JBCLASSER *classer,
2401 PIX *pixs)
2402 {
2403 JBFINDCTX *state;
2404
2405 state = (JBFINDCTX *)LEPT_CALLOC(1, sizeof(JBFINDCTX));
2406 state->w = pixGetWidth(pixs) - 2 * JB_ADDED_PIXELS;
2407 state->h = pixGetHeight(pixs) - 2 * JB_ADDED_PIXELS;
2408 state->classer = classer;
2409 return state;
2410 }
2411
2412
2413 static void
findSimilarSizedTemplatesDestroy(JBFINDCTX ** pstate)2414 findSimilarSizedTemplatesDestroy(JBFINDCTX **pstate)
2415 {
2416 JBFINDCTX *state;
2417
2418 PROCNAME("findSimilarSizedTemplatesDestroy");
2419
2420 if (pstate == NULL) {
2421 L_WARNING("ptr address is null\n", procName);
2422 return;
2423 }
2424 if ((state = *pstate) == NULL)
2425 return;
2426
2427 l_dnaDestroy(&state->dna);
2428 LEPT_FREE(state);
2429 *pstate = NULL;
2430 return;
2431 }
2432
2433
2434 /*!
2435 * \brief findSimilarSizedTemplatesNext()
2436 *
2437 * \param[in] state from findSimilarSizedTemplatesInit
2438 * \return next template number, or -1 when finished
2439 *
2440 * We have a dna hash table that maps template area to a list of template
2441 * numbers with that area. We wish to find similar sized templates,
2442 * so we first look for templates with the same width and height, and
2443 * then with width + 1, etc. This walk is guided by the
2444 * two_by_two_walk array, above.
2445 *
2446 * We don't want to have to collect the whole list of templates first,
2447 * because we hope to find a well-matching template quickly. So we
2448 * keep the context for this walk in an explictit state structure,
2449 * and this function acts like a generator.
2450 */
2451 static l_int32
findSimilarSizedTemplatesNext(JBFINDCTX * state)2452 findSimilarSizedTemplatesNext(JBFINDCTX *state)
2453 {
2454 l_int32 desiredh, desiredw, size, templ;
2455 PIX *pixt;
2456
2457 while(1) { /* Continue the walk over step 'i' */
2458 if (state->i >= 25) { /* all done; didn't find a good match */
2459 return -1;
2460 }
2461
2462 desiredw = state->w + two_by_two_walk[2 * state->i];
2463 desiredh = state->h + two_by_two_walk[2 * state->i + 1];
2464 if (desiredh < 1 || desiredw < 1) { /* invalid size */
2465 state->i++;
2466 continue;
2467 }
2468
2469 if (!state->dna) {
2470 /* We have yet to start walking the array for the step 'i' */
2471 state->dna = l_dnaHashGetDna(state->classer->dahash,
2472 desiredh * desiredw, L_CLONE);
2473 if (!state->dna) { /* nothing there */
2474 state->i++;
2475 continue;
2476 }
2477
2478 state->n = 0; /* OK, we got a dna. */
2479 }
2480
2481 /* Continue working on this dna */
2482 size = l_dnaGetCount(state->dna);
2483 for ( ; state->n < size; ) {
2484 templ = (l_int32)(state->dna->array[state->n++] + 0.5);
2485 pixt = pixaGetPix(state->classer->pixat, templ, L_CLONE);
2486 if (pixGetWidth(pixt) - 2 * JB_ADDED_PIXELS == desiredw &&
2487 pixGetHeight(pixt) - 2 * JB_ADDED_PIXELS == desiredh) {
2488 pixDestroy(&pixt);
2489 return templ;
2490 }
2491 pixDestroy(&pixt);
2492 }
2493
2494 /* Exhausted the dna (no match found); take another step and
2495 * try again. */
2496 state->i++;
2497 l_dnaDestroy(&state->dna);
2498 continue;
2499 }
2500 }
2501
2502
2503 /*!
2504 * \brief finalPositioningForAlignment()
2505 *
2506 * \param[in] pixs input page image
2507 * \param[in] x, y location of UL corner of bb of component in pixs
2508 * \param[in] idelx, idely compensation to match centroids of component
2509 * and template
2510 * \param[in] pixt template, with JB_ADDED_PIXELS of padding on all sides
2511 * \param[in] sumtab for summing fg pixels in an image
2512 * \param[in] &dx, &dy return delta on position for best match; each
2513 * one is in the set {-1, 0, 1}
2514 * \return 0 if OK, 1 on error
2515 *
2516 */
2517 static l_int32
finalPositioningForAlignment(PIX * pixs,l_int32 x,l_int32 y,l_int32 idelx,l_int32 idely,PIX * pixt,l_int32 * sumtab,l_int32 * pdx,l_int32 * pdy)2518 finalPositioningForAlignment(PIX *pixs,
2519 l_int32 x,
2520 l_int32 y,
2521 l_int32 idelx,
2522 l_int32 idely,
2523 PIX *pixt,
2524 l_int32 *sumtab,
2525 l_int32 *pdx,
2526 l_int32 *pdy)
2527 {
2528 l_int32 w, h, i, j, minx, miny, count, mincount;
2529 PIX *pixi; /* clipped from source pixs */
2530 PIX *pixr; /* temporary storage */
2531 BOX *box;
2532
2533 PROCNAME("finalPositioningForAlignment");
2534
2535 if (!pixs)
2536 return ERROR_INT("pixs not defined", procName, 1);
2537 if (!pixt)
2538 return ERROR_INT("pixt not defined", procName, 1);
2539 if (!pdx || !pdy)
2540 return ERROR_INT("&dx and &dy not both defined", procName, 1);
2541 if (!sumtab)
2542 return ERROR_INT("sumtab not defined", procName, 1);
2543 *pdx = *pdy = 0;
2544
2545 /* Use JB_ADDED_PIXELS pixels padding on each side */
2546 pixGetDimensions(pixt, &w, &h, NULL);
2547 box = boxCreate(x - idelx - JB_ADDED_PIXELS,
2548 y - idely - JB_ADDED_PIXELS, w, h);
2549 pixi = pixClipRectangle(pixs, box, NULL);
2550 boxDestroy(&box);
2551 if (!pixi)
2552 return ERROR_INT("pixi not made", procName, 1);
2553
2554 pixr = pixCreate(pixGetWidth(pixi), pixGetHeight(pixi), 1);
2555 mincount = 0x7fffffff;
2556 for (i = -1; i <= 1; i++) {
2557 for (j = -1; j <= 1; j++) {
2558 pixCopy(pixr, pixi);
2559 pixRasterop(pixr, j, i, w, h, PIX_SRC ^ PIX_DST, pixt, 0, 0);
2560 pixCountPixels(pixr, &count, sumtab);
2561 if (count < mincount) {
2562 minx = j;
2563 miny = i;
2564 mincount = count;
2565 }
2566 }
2567 }
2568 pixDestroy(&pixi);
2569 pixDestroy(&pixr);
2570
2571 *pdx = minx;
2572 *pdy = miny;
2573 return 0;
2574 }
2575