1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file recogident.c
29 * <pre>
30 *
31 * Top-level identification
32 * l_int32 recogIdentifyMultiple()
33 *
34 * Segmentation and noise removal
35 * l_int32 recogSplitIntoCharacters()
36 *
37 * Greedy character splitting
38 * l_int32 recogCorrelationBestRow()
39 * l_int32 recogCorrelationBestChar()
40 * static l_int32 pixCorrelationBestShift()
41 *
42 * Low-level identification of single characters
43 * l_int32 recogIdentifyPixa()
44 * l_int32 recogIdentifyPix()
45 * l_int32 recogSkipIdentify()
46 *
47 * Operations for handling identification results
48 * static L_RCHA *rchaCreate()
49 * l_int32 *rchaDestroy()
50 * static L_RCH *rchCreate()
51 * l_int32 *rchDestroy()
52 * l_int32 rchaExtract()
53 * l_int32 rchExtract()
54 * static l_int32 transferRchToRcha()
55 *
56 * Preprocessing and filtering
57 * l_int32 recogProcessToIdentify()
58 * static PIX *recogPreSplittingFilter()
59 * static PIX *recogSplittingFilter()
60 *
61 * Postprocessing
62 * SARRAY *recogExtractNumbers()
63 * PIX *showExtractNumbers()
64 *
65 * Static debug helper
66 * static void l_showIndicatorSplitValues()
67 *
68 * See recogbasic.c for examples of training a recognizer, which is
69 * required before it can be used for identification.
70 *
71 * The character splitter repeatedly does a greedy correlation with each
72 * averaged unscaled template, at all pixel locations along the text to
73 * be identified. The vertical alignment is between the template
74 * centroid and the (moving) windowed centroid, including a delta of
75 * 1 pixel above and below. The best match then removes part of the
76 * input image, leaving 1 or 2 pieces, which, after filtering,
77 * are put in a queue. The process ends when the queue is empty.
78 * The filtering is based on the size and aspect ratio of the
79 * remaining pieces; the intent is to remove anything that is
80 * unlikely to be text, such as small pieces and line graphics.
81 *
82 * After splitting, the selected segments are identified using
83 * the input parameters that were initially specified for the
84 * recognizer. Unlike the splitter, which uses the averaged
85 * templates from the unscaled input, the recognizer can use
86 * either all training examples or averaged templates, and these
87 * can be either scaled or unscaled. These choices are specified
88 * when the recognizer is constructed.
89 * </pre>
90 */
91
92 #include <string.h>
93 #include "allheaders.h"
94
95 /* There are two methods for splitting characters: DID and greedy.
96 * The default method is DID. */
97 #define SPLIT_WITH_DID 1
98
99 /* Padding on pix1: added before correlations and removed from result */
100 static const l_int32 LeftRightPadding = 32;
101
102 /* Parameters for filtering and sorting connected components in splitter */
103 static const l_float32 MinFillFactor = 0.10;
104 static const l_int32 DefaultMinHeight = 15; /* min unscaled height */
105 static const l_int32 MinOverlap1 = 6; /* in pass 1 of boxaSort2d() */
106 static const l_int32 MinOverlap2 = 6; /* in pass 2 of boxaSort2d() */
107 static const l_int32 MinHeightPass1 = 5; /* min height to start pass 1 */
108
109
110 static l_int32 pixCorrelationBestShift(PIX *pix1, PIX *pix2, NUMA *nasum1,
111 NUMA *namoment1, l_int32 area2,
112 l_int32 ycent2, l_int32 maxyshift,
113 l_int32 *tab8, l_int32 *pdelx,
114 l_int32 *pdely, l_float32 *pscore,
115 l_int32 debugflag );
116 static L_RCH *rchCreate(l_int32 index, l_float32 score, char *text,
117 l_int32 sample, l_int32 xloc, l_int32 yloc,
118 l_int32 width);
119 static L_RCHA *rchaCreate();
120 static l_int32 transferRchToRcha(L_RCH *rch, L_RCHA *rcha);
121 static PIX *recogPreSplittingFilter(L_RECOG *recog, PIX *pixs, l_int32 minh,
122 l_float32 minaf, l_int32 debug);
123 static l_int32 recogSplittingFilter(L_RECOG *recog, PIX *pixs, l_int32 min,
124 l_float32 minaf, l_int32 *premove,
125 l_int32 debug);
126 static void l_showIndicatorSplitValues(NUMA *na1, NUMA *na2, NUMA *na3,
127 NUMA *na4, NUMA *na5, NUMA *na6);
128
129 /*------------------------------------------------------------------------*
130 * Identification
131 *------------------------------------------------------------------------*/
132 /*!
133 * \brief recogIdentifyMultiple()
134 *
135 * \param[in] recog with training finished
136 * \param[in] pixs containing typically a small number of characters
137 * \param[in] minh remove shorter components; use 0 for default
138 * \param[in] skipsplit 1 to skip the splitting step
139 * \param[out] pboxa [optional] locations of identified components
140 * \param[out] ppixa [optional] images of identified components
141 * \param[out] ppixdb [optional] debug pix: inputs and best fits
142 * \param[in] debugsplit 1 returns pix split debugging images
143 * \return 0 if OK; 1 if nothing is found; 2 for other errors.
144 *
145 * <pre>
146 * Notes:
147 * (1) This filters the input pixa and calls recogIdentifyPixa()
148 * (2) Splitting is relatively slow, because it tries to match all
149 * character templates to all locations. This step can be skipped.
150 * (3) An attempt is made to order the (optionally) returned images
151 * and boxes in 2-dimensional sorted order. These can then
152 * be used to aggregate identified characters into numbers or words.
153 * One typically wants the pixa, which contains a boxa of the
154 * extracted subimages.
155 * </pre>
156 */
157 l_int32
recogIdentifyMultiple(L_RECOG * recog,PIX * pixs,l_int32 minh,l_int32 skipsplit,BOXA ** pboxa,PIXA ** ppixa,PIX ** ppixdb,l_int32 debugsplit)158 recogIdentifyMultiple(L_RECOG *recog,
159 PIX *pixs,
160 l_int32 minh,
161 l_int32 skipsplit,
162 BOXA **pboxa,
163 PIXA **ppixa,
164 PIX **ppixdb,
165 l_int32 debugsplit)
166 {
167 l_int32 n;
168 BOXA *boxa;
169 PIX *pixb;
170 PIXA *pixa;
171
172 PROCNAME("recogIdentifyMultiple");
173
174 if (pboxa) *pboxa = NULL;
175 if (ppixa) *ppixa = NULL;
176 if (ppixdb) *ppixdb = NULL;
177 if (!recog)
178 return ERROR_INT("recog not defined", procName, 2);
179 if (!recog->train_done)
180 return ERROR_INT("training not finished", procName, 2);
181 if (!pixs)
182 return ERROR_INT("pixs not defined", procName, 2);
183
184 /* Binarize if necessary */
185 if (pixGetDepth(pixs) > 1)
186 pixb = pixConvertTo1(pixs, recog->threshold);
187 else
188 pixb = pixClone(pixs);
189
190 /* Noise removal and splitting of touching characters */
191 recogSplitIntoCharacters(recog, pixb, minh, skipsplit, &boxa, &pixa,
192 debugsplit);
193 pixDestroy(&pixb);
194 if (!pixa || (n = pixaGetCount(pixa)) == 0) {
195 pixaDestroy(&pixa);
196 boxaDestroy(&boxa);
197 L_WARNING("nothing found\n", procName);
198 return 1;
199 }
200
201 recogIdentifyPixa(recog, pixa, ppixdb);
202 if (pboxa)
203 *pboxa = boxa;
204 else
205 boxaDestroy(&boxa);
206 if (ppixa)
207 *ppixa = pixa;
208 else
209 pixaDestroy(&pixa);
210 return 0;
211 }
212
213
214 /*------------------------------------------------------------------------*
215 * Segmentation and noise removal *
216 *------------------------------------------------------------------------*/
217 /*!
218 * \brief recogSplitIntoCharacters()
219 *
220 * \param[in] recog
221 * \param[in] pixs 1 bpp, contains only mostly deskewed text
222 * \param[in] minh remove shorter components; use 0 for default
223 * \param[in] skipsplit 1 to skip the splitting step
224 * \param[out] pboxa character bounding boxes
225 * \param[out] ppixa character images
226 * \param[in] debug 1 for results written to pixadb_split
227 * \return 0 if OK, 1 on error or if no components are returned
228 *
229 * <pre>
230 * Notes:
231 * (1) This can be given an image that has an arbitrary number
232 * of text characters. It optionally splits connected
233 * components based on document image decoding in recogDecode().
234 * The returned pixa includes the boxes from which the
235 * (possibly split) components are extracted.
236 * (2) After noise filtering, the resulting components are put in
237 * row-major (2D) order, and the smaller of overlapping
238 * components are removed if they satisfy conditions of
239 * relative size and fractional overlap.
240 * (3) Note that the spliting function uses unscaled templates
241 * and does not bother returning the class results and scores.
242 * Thes are more accurately found later using the scaled templates.
243 * </pre>
244 */
245 l_int32
recogSplitIntoCharacters(L_RECOG * recog,PIX * pixs,l_int32 minh,l_int32 skipsplit,BOXA ** pboxa,PIXA ** ppixa,l_int32 debug)246 recogSplitIntoCharacters(L_RECOG *recog,
247 PIX *pixs,
248 l_int32 minh,
249 l_int32 skipsplit,
250 BOXA **pboxa,
251 PIXA **ppixa,
252 l_int32 debug)
253 {
254 static l_int32 ind = 0;
255 char buf[32];
256 l_int32 i, xoff, yoff, empty, maxw, bw, ncomp, scaling;
257 BOX *box;
258 BOXA *boxa1, *boxa2, *boxa3, *boxa4, *boxad;
259 BOXAA *baa;
260 PIX *pix, *pix1, *pix2, *pix3;
261 PIXA *pixa;
262
263 PROCNAME("recogSplitIntoCharacters");
264
265 lept_mkdir("lept/recog");
266
267 if (pboxa) *pboxa = NULL;
268 if (ppixa) *ppixa = NULL;
269 if (!pboxa || !ppixa)
270 return ERROR_INT("&boxa and &pixa not defined", procName, 1);
271 if (!recog)
272 return ERROR_INT("recog not defined", procName, 1);
273 if (!recog->train_done)
274 return ERROR_INT("training not finished", procName, 1);
275 if (!pixs || pixGetDepth(pixs) != 1)
276 return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
277 if (minh <= 0) minh = DefaultMinHeight;
278 pixZero(pixs, &empty);
279 if (empty) return 1;
280
281 /* Small vertical close for consolidation. Don't do a horizontal
282 * closing, because it might join separate characters. */
283 pix1 = pixMorphSequence(pixs, "c1.3", 0);
284
285 /* Carefully filter out noise */
286 pix2 = recogPreSplittingFilter(recog, pix1, minh, MinFillFactor, debug);
287 pixDestroy(&pix1);
288
289 /* Get the 8-connected components to be split/identified */
290 boxa1 = pixConnComp(pix2, NULL, 8);
291 pixDestroy(&pix2);
292 ncomp = boxaGetCount(boxa1);
293 if (ncomp == 0) {
294 boxaDestroy(&boxa1);
295 L_WARNING("all components removed\n", procName);
296 return 1;
297 }
298
299 /* Save everything and split the large components */
300 boxa2 = boxaCreate(ncomp);
301 maxw = recog->maxwidth_u + 5;
302 scaling = (recog->scalew > 0 || recog->scaleh > 0) ? TRUE : FALSE;
303 pixa = (debug) ? pixaCreate(ncomp) : NULL;
304 for (i = 0; i < ncomp; i++) {
305 box = boxaGetBox(boxa1, i, L_CLONE);
306 boxGetGeometry(box, &xoff, &yoff, &bw, NULL);
307 /* Treat as one character if it is small, if the images
308 * have been scaled, or if splitting is not to be run. */
309 if (bw <= maxw || scaling || skipsplit) {
310 boxaAddBox(boxa2, box, L_INSERT);
311 } else {
312 pix = pixClipRectangle(pixs, box, NULL);
313 #if SPLIT_WITH_DID
314 if (!debug) {
315 boxa3 = recogDecode(recog, pix, 2, NULL);
316 } else {
317 boxa3 = recogDecode(recog, pix, 2, &pix2);
318 pixaAddPix(pixa, pix2, L_INSERT);
319 }
320 #else /* use greedy splitting */
321 recogCorrelationBestRow(recog, pix, &boxa3, NULL, NULL,
322 NULL, debug);
323 if (debug) {
324 pix2 = pixConvertTo32(pix);
325 pixRenderBoxaArb(pix2, boxa3, 2, 255, 0, 0);
326 pixaAddPix(pixa, pix2, L_INSERT);
327 }
328 #endif /* SPLIT_WITH_DID */
329 pixDestroy(&pix);
330 boxDestroy(&box);
331 if (!boxa3) {
332 L_ERROR("boxa3 not found for component %d\n", procName, i);
333 } else {
334 boxa4 = boxaTransform(boxa3, xoff, yoff, 1.0, 1.0);
335 boxaJoin(boxa2, boxa4, 0, -1);
336 boxaDestroy(&boxa3);
337 boxaDestroy(&boxa4);
338 }
339 }
340 }
341 boxaDestroy(&boxa1);
342 if (pixa) { /* debug */
343 pix3 = pixaDisplayTiledInColumns(pixa, 1, 1.0, 20, 2);
344 snprintf(buf, sizeof(buf), "/tmp/lept/recog/decode-%d.png", ind++);
345 pixWrite(buf, pix3, IFF_PNG);
346 pixaDestroy(&pixa);
347 pixDestroy(&pix3);
348 }
349
350 /* Do a 2D sort on the bounding boxes, and flatten the result to 1D.
351 * For the 2D sort, to add a box to an existing boxa, we require
352 * specified minimum vertical overlaps for the first two passes
353 * of the 2D sort. In pass 1, only components with sufficient
354 * height can start a new boxa. */
355 baa = boxaSort2d(boxa2, NULL, MinOverlap1, MinOverlap2, MinHeightPass1);
356 boxa3 = boxaaFlattenToBoxa(baa, NULL, L_CLONE);
357 boxaaDestroy(&baa);
358 boxaDestroy(&boxa2);
359
360 /* Remove smaller components of overlapping pairs.
361 * We only remove the small component if the overlap is
362 * at least half its area and if its area is no more
363 * than 30% of the area of the large component. Because the
364 * components are in a flattened 2D sort, we don't need to
365 * look far ahead in the array to find all overlapping boxes;
366 * 10 boxes is plenty. */
367 boxad = boxaHandleOverlaps(boxa3, L_COMBINE, 10, 0.5, 0.3, NULL);
368 boxaDestroy(&boxa3);
369
370 /* Extract and save the image pieces from the input image. */
371 *ppixa = pixClipRectangles(pixs, boxad);
372 *pboxa = boxad;
373 return 0;
374 }
375
376
377 /*------------------------------------------------------------------------*
378 * Greedy character splitting *
379 *------------------------------------------------------------------------*/
380 /*!
381 * \brief recogCorrelationBestRow()
382 *
383 * \param[in] recog with LUT's pre-computed
384 * \param[in] pixs typically of multiple touching characters, 1 bpp
385 * \param[out] pboxa bounding boxs of best fit character
386 * \param[out] pnascore [optional] correlation scores
387 * \param[out] pnaindex [optional] indices of classes
388 * \param[out] psachar [optional] array of character strings
389 * \param[in] debug 1 for results written to pixadb_split
390 * \return 0 if OK, 1 on error
391 *
392 * <pre>
393 * Notes:
394 * (1) Supervises character matching for (in general) a c.c with
395 * multiple touching characters. Finds the best match greedily.
396 * Rejects small parts that are left over after splitting.
397 * (2) Matching is to the average, and without character scaling.
398 * </pre>
399 */
400 l_int32
recogCorrelationBestRow(L_RECOG * recog,PIX * pixs,BOXA ** pboxa,NUMA ** pnascore,NUMA ** pnaindex,SARRAY ** psachar,l_int32 debug)401 recogCorrelationBestRow(L_RECOG *recog,
402 PIX *pixs,
403 BOXA **pboxa,
404 NUMA **pnascore,
405 NUMA **pnaindex,
406 SARRAY **psachar,
407 l_int32 debug)
408 {
409 char *charstr;
410 l_int32 index, remove, w, h, bx, bw, bxc, bwc, w1, w2, w3;
411 l_float32 score;
412 BOX *box, *boxc, *boxtrans, *boxl, *boxr, *boxlt, *boxrt;
413 BOXA *boxat;
414 NUMA *nascoret, *naindext, *nasort;
415 PIX *pixb, *pixc, *pixl, *pixr, *pixdb, *pixd;
416 PIXA *pixar, *pixadb;
417 SARRAY *sachart;
418
419 l_int32 iter;
420
421 PROCNAME("recogCorrelationBestRow");
422
423 if (pnascore) *pnascore = NULL;
424 if (pnaindex) *pnaindex = NULL;
425 if (psachar) *psachar = NULL;
426 if (!pboxa)
427 return ERROR_INT("&boxa not defined", procName, 1);
428 *pboxa = NULL;
429 if (!recog)
430 return ERROR_INT("recog not defined", procName, 1);
431 if (!pixs || pixGetDepth(pixs) != 1)
432 return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
433 if (pixGetWidth(pixs) < recog->minwidth_u - 4)
434 return ERROR_INT("pixs too narrow", procName, 1);
435 if (!recog->train_done)
436 return ERROR_INT("training not finished", procName, 1);
437
438 /* Binarize and crop to foreground if necessary */
439 pixb = recogProcessToIdentify(recog, pixs, 0);
440
441 /* Initialize the arrays */
442 boxat = boxaCreate(4);
443 nascoret = numaCreate(4);
444 naindext = numaCreate(4);
445 sachart = sarrayCreate(4);
446 pixadb = (debug) ? pixaCreate(4) : NULL;
447
448 /* Initialize the images remaining to be processed with the input.
449 * These are stored in pixar, which is used here as a queue,
450 * on which we only put image fragments that are large enough to
451 * contain at least one character. */
452 pixar = pixaCreate(1);
453 pixGetDimensions(pixb, &w, &h, NULL);
454 box = boxCreate(0, 0, w, h);
455 pixaAddPix(pixar, pixb, L_INSERT);
456 pixaAddBox(pixar, box, L_INSERT);
457
458 /* Successively split on the best match until nothing is left.
459 * To be safe, we limit the search to 10 characters. */
460 for (iter = 0; iter < 11; iter++) {
461 if (pixaGetCount(pixar) == 0)
462 break;
463 if (iter == 10) {
464 L_WARNING("more than 10 chars; ending search\n", procName);
465 break;
466 }
467
468 /* Pop one from the queue */
469 pixaRemovePixAndSave(pixar, 0, &pixc, &boxc);
470 boxGetGeometry(boxc, &bxc, NULL, &bwc, NULL);
471
472 /* This is a single component; if noise, remove it */
473 recogSplittingFilter(recog, pixc, 0, MinFillFactor, &remove, debug);
474 if (debug)
475 fprintf(stderr, "iter = %d, removed = %d\n", iter, remove);
476 if (remove) {
477 pixDestroy(&pixc);
478 boxDestroy(&boxc);
479 continue;
480 }
481
482 /* Find the best character match */
483 if (debug) {
484 recogCorrelationBestChar(recog, pixc, &box, &score,
485 &index, &charstr, &pixdb);
486 pixaAddPix(pixadb, pixdb, L_INSERT);
487 } else {
488 recogCorrelationBestChar(recog, pixc, &box, &score,
489 &index, &charstr, NULL);
490 }
491
492 /* Find the box in original coordinates, and append
493 * the results to the arrays. */
494 boxtrans = boxTransform(box, bxc, 0, 1.0, 1.0);
495 boxaAddBox(boxat, boxtrans, L_INSERT);
496 numaAddNumber(nascoret, score);
497 numaAddNumber(naindext, index);
498 sarrayAddString(sachart, charstr, L_INSERT);
499
500 /* Split the current pixc into three regions and save
501 * each region if it is large enough. */
502 boxGetGeometry(box, &bx, NULL, &bw, NULL);
503 w1 = bx;
504 w2 = bw;
505 w3 = bwc - bx - bw;
506 if (debug)
507 fprintf(stderr, " w1 = %d, w2 = %d, w3 = %d\n", w1, w2, w3);
508 if (w1 < recog->minwidth_u - 4) {
509 if (debug) L_INFO("discarding width %d on left\n", procName, w1);
510 } else { /* extract and save left region */
511 boxl = boxCreate(0, 0, bx + 1, h);
512 pixl = pixClipRectangle(pixc, boxl, NULL);
513 boxlt = boxTransform(boxl, bxc, 0, 1.0, 1.0);
514 pixaAddPix(pixar, pixl, L_INSERT);
515 pixaAddBox(pixar, boxlt, L_INSERT);
516 boxDestroy(&boxl);
517 }
518 if (w3 < recog->minwidth_u - 4) {
519 if (debug) L_INFO("discarding width %d on right\n", procName, w3);
520 } else { /* extract and save left region */
521 boxr = boxCreate(bx + bw - 1, 0, w3 + 1, h);
522 pixr = pixClipRectangle(pixc, boxr, NULL);
523 boxrt = boxTransform(boxr, bxc, 0, 1.0, 1.0);
524 pixaAddPix(pixar, pixr, L_INSERT);
525 pixaAddBox(pixar, boxrt, L_INSERT);
526 boxDestroy(&boxr);
527 }
528 pixDestroy(&pixc);
529 boxDestroy(&box);
530 boxDestroy(&boxc);
531 }
532 pixaDestroy(&pixar);
533
534
535 /* Sort the output results by left-to-right in the boxa */
536 *pboxa = boxaSort(boxat, L_SORT_BY_X, L_SORT_INCREASING, &nasort);
537 if (pnascore)
538 *pnascore = numaSortByIndex(nascoret, nasort);
539 if (pnaindex)
540 *pnaindex = numaSortByIndex(naindext, nasort);
541 if (psachar)
542 *psachar = sarraySortByIndex(sachart, nasort);
543 numaDestroy(&nasort);
544 boxaDestroy(&boxat);
545 numaDestroy(&nascoret);
546 numaDestroy(&naindext);
547 sarrayDestroy(&sachart);
548
549 /* Final debug output */
550 if (debug) {
551 pixd = pixaDisplayTiledInRows(pixadb, 32, 2000, 1.0, 0, 15, 2);
552 pixDisplay(pixd, 400, 400);
553 pixaAddPix(recog->pixadb_split, pixd, L_INSERT);
554 pixaDestroy(&pixadb);
555 }
556 return 0;
557 }
558
559
560 /*!
561 * \brief recogCorrelationBestChar()
562 *
563 * \param[in] recog with LUT's pre-computed
564 * \param[in] pixs can be of multiple touching characters, 1 bpp
565 * \param[out] pbox bounding box of best fit character
566 * \param[out] pscore correlation score
567 * \param[out] pindex [optional] index of class
568 * \param[out] pcharstr [optional] character string of class
569 * \param[out] ppixdb [optional] debug pix showing input and best fit
570 * \return 0 if OK, 1 on error
571 *
572 * <pre>
573 * Notes:
574 * (1) Basic matching character splitter. Finds the best match among
575 * all templates to some region of the image. This can result
576 * in splitting the image into two parts. This is "image decoding"
577 * without dynamic programming, because we don't use a setwidth
578 * and compute the best matching score for the entire image.
579 * (2) Matching is to the average templates, without character scaling.
580 * </pre>
581 */
582 l_int32
recogCorrelationBestChar(L_RECOG * recog,PIX * pixs,BOX ** pbox,l_float32 * pscore,l_int32 * pindex,char ** pcharstr,PIX ** ppixdb)583 recogCorrelationBestChar(L_RECOG *recog,
584 PIX *pixs,
585 BOX **pbox,
586 l_float32 *pscore,
587 l_int32 *pindex,
588 char **pcharstr,
589 PIX **ppixdb)
590 {
591 l_int32 i, n, w1, h1, w2, area2, ycent2, delx, dely;
592 l_int32 bestdelx, bestdely, bestindex;
593 l_float32 score, bestscore;
594 BOX *box;
595 BOXA *boxa;
596 NUMA *nasum, *namoment;
597 PIX *pix1, *pix2;
598
599 PROCNAME("recogCorrelationBestChar");
600
601 if (pindex) *pindex = 0;
602 if (pcharstr) *pcharstr = NULL;
603 if (ppixdb) *ppixdb = NULL;
604 if (pbox) *pbox = NULL;
605 if (pscore) *pscore = 0.0;
606 if (!pbox || !pscore)
607 return ERROR_INT("&box and &score not both defined", procName, 1);
608 if (!recog)
609 return ERROR_INT("recog not defined", procName, 1);
610 if (!pixs || pixGetDepth(pixs) != 1)
611 return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
612 if (!recog->train_done)
613 return ERROR_INT("training not finished", procName, 1);
614
615 /* Binarize and crop to foreground if necessary. Add padding
616 * to both the left and right side; this is compensated for
617 * when reporting the bounding box of the best matched character. */
618 pix1 = recogProcessToIdentify(recog, pixs, LeftRightPadding);
619 pixGetDimensions(pix1, &w1, &h1, NULL);
620
621 /* Compute vertical sum and moment arrays */
622 nasum = pixCountPixelsByColumn(pix1);
623 namoment = pixGetMomentByColumn(pix1, 1);
624
625 /* Do shifted correlation against all averaged templates. */
626 n = recog->setsize;
627 boxa = boxaCreate(n); /* location of best fits for each character */
628 bestscore = 0.0;
629 bestindex = bestdelx = bestdely = 0;
630 for (i = 0; i < n; i++) {
631 pix2 = pixaGetPix(recog->pixa_u, i, L_CLONE);
632 w2 = pixGetWidth(pix2);
633 /* Note that the slightly expended w1 is typically larger
634 * than w2 (the template). */
635 if (w1 >= w2) {
636 numaGetIValue(recog->nasum_u, i, &area2);
637 ptaGetIPt(recog->pta_u, i, NULL, &ycent2);
638 pixCorrelationBestShift(pix1, pix2, nasum, namoment, area2, ycent2,
639 recog->maxyshift, recog->sumtab, &delx,
640 &dely, &score, 1);
641 if (ppixdb) {
642 fprintf(stderr,
643 "Best match template %d: (x,y) = (%d,%d), score = %5.3f\n",
644 i, delx, dely, score);
645 }
646 /* Compensate for padding */
647 box = boxCreate(delx - LeftRightPadding, 0, w2, h1);
648 if (score > bestscore) {
649 bestscore = score;
650 bestdelx = delx - LeftRightPadding;
651 bestdely = dely;
652 bestindex = i;
653 }
654 } else {
655 box = boxCreate(0, 0, 1, 1); /* placeholder */
656 if (ppixdb)
657 fprintf(stderr, "Component too thin: w1 = %d, w2 = %d\n",
658 w1, w2);
659 }
660 boxaAddBox(boxa, box, L_INSERT);
661 pixDestroy(&pix2);
662 }
663
664 *pscore = bestscore;
665 *pbox = boxaGetBox(boxa, bestindex, L_COPY);
666 if (pindex) *pindex = bestindex;
667 if (pcharstr)
668 recogGetClassString(recog, bestindex, pcharstr);
669
670 if (ppixdb) {
671 L_INFO("Best match: class %d; shifts (%d, %d)\n",
672 procName, bestindex, bestdelx, bestdely);
673 pix2 = pixaGetPix(recog->pixa_u, bestindex, L_CLONE);
674 *ppixdb = recogShowMatch(recog, pix1, pix2, NULL, -1, 0.0);
675 pixDestroy(&pix2);
676 }
677
678 pixDestroy(&pix1);
679 boxaDestroy(&boxa);
680 numaDestroy(&nasum);
681 numaDestroy(&namoment);
682 return 0;
683 }
684
685
686 /*!
687 * \brief pixCorrelationBestShift()
688 *
689 * \param[in] pix1 1 bpp, the unknown image; typically larger
690 * \param[in] pix2 1 bpp, the matching template image)
691 * \param[in] nasum1 vertical column pixel sums for pix1
692 * \param[in] namoment1 vertical column first moment of pixels for pix1
693 * \param[in] area2 number of on pixels in pix2
694 * \param[in] ycent2 y component of centroid of pix2
695 * \param[in] maxyshift max y shift of pix2 around the location where
696 * the centroids of pix2 and a windowed part of pix1
697 * are vertically aligned
698 * \param[in] tab8 [optional] sum tab for ON pixels in byte; can be NULL
699 * \param[out] pdelx [optional] best x shift of pix2 relative to pix1
700 * [out] pdely ([optional] best y shift of pix2 relative to pix1
701 * [out] pscore ([optional] maximum score found; can be NULL
702 * \param[in] debugflag <= 0 to skip; positive to generate output.
703 * The integer is used to label the debug image.
704 * \return 0 if OK, 1 on error
705 *
706 * <pre>
707 * Notes:
708 * (1) This maximizes the correlation score between two 1 bpp images,
709 * one of which is typically wider. In a typical example,
710 * pix1 is a bitmap of 2 or more touching characters and pix2 is
711 * a single character template. This finds the location of pix2
712 * that gives the largest correlation.
713 * (2) The windowed area of fg pixels and windowed first moment
714 * in the y direction are computed from the input sum and moment
715 * column arrays, %nasum1 and %namoment1
716 * (3) This is a brute force operation. We compute the correlation
717 * at every x shift for which pix2 fits entirely within pix1,
718 * and where the centroid of pix2 is aligned, within +-maxyshift,
719 * with the centroid of a window of pix1 of the same width.
720 * The correlation is taken over the full height of pix1.
721 * This can be made more efficient.
722 * </pre>
723 */
724 static l_int32
pixCorrelationBestShift(PIX * pix1,PIX * pix2,NUMA * nasum1,NUMA * namoment1,l_int32 area2,l_int32 ycent2,l_int32 maxyshift,l_int32 * tab8,l_int32 * pdelx,l_int32 * pdely,l_float32 * pscore,l_int32 debugflag)725 pixCorrelationBestShift(PIX *pix1,
726 PIX *pix2,
727 NUMA *nasum1,
728 NUMA *namoment1,
729 l_int32 area2,
730 l_int32 ycent2,
731 l_int32 maxyshift,
732 l_int32 *tab8,
733 l_int32 *pdelx,
734 l_int32 *pdely,
735 l_float32 *pscore,
736 l_int32 debugflag)
737 {
738 l_int32 w1, w2, h1, h2, i, j, nx, shifty, delx, dely;
739 l_int32 sum, moment, count;
740 l_int32 *tab, *area1, *arraysum, *arraymoment;
741 l_float32 maxscore, score;
742 l_float32 *ycent1;
743 FPIX *fpix;
744 PIX *pixt, *pixt1, *pixt2;
745
746 PROCNAME("pixCorrelationBestShift");
747
748 if (pdelx) *pdelx = 0;
749 if (pdely) *pdely = 0;
750 if (pscore) *pscore = 0.0;
751 if (!pix1 || pixGetDepth(pix1) != 1)
752 return ERROR_INT("pix1 not defined or not 1 bpp", procName, 1);
753 if (!pix2 || pixGetDepth(pix2) != 1)
754 return ERROR_INT("pix2 not defined or not 1 bpp", procName, 1);
755 if (!nasum1 || !namoment1)
756 return ERROR_INT("nasum1 and namoment1 not both defined", procName, 1);
757 if (area2 <= 0 || ycent2 <= 0)
758 return ERROR_INT("area2 and ycent2 must be > 0", procName, 1);
759
760 /* If pix1 (the unknown image) is narrower than pix2,
761 * don't bother to try the match. pix1 is already padded with
762 * 2 pixels on each side. */
763 pixGetDimensions(pix1, &w1, &h1, NULL);
764 pixGetDimensions(pix2, &w2, &h2, NULL);
765 if (w1 < w2) {
766 if (debugflag > 0) {
767 L_INFO("skipping match with w1 = %d and w2 = %d\n",
768 procName, w1, w2);
769 }
770 return 0;
771 }
772 nx = w1 - w2 + 1;
773
774 if (debugflag > 0)
775 fpix = fpixCreate(nx, 2 * maxyshift + 1);
776 if (!tab8)
777 tab = makePixelSumTab8();
778 else
779 tab = tab8;
780
781 /* Set up the arrays for area1 and ycent1. We have to do this
782 * for each template (pix2) because the window width is w2. */
783 area1 = (l_int32 *)LEPT_CALLOC(nx, sizeof(l_int32));
784 ycent1 = (l_float32 *)LEPT_CALLOC(nx, sizeof(l_int32));
785 arraysum = numaGetIArray(nasum1);
786 arraymoment = numaGetIArray(namoment1);
787 for (i = 0, sum = 0, moment = 0; i < w2; i++) {
788 sum += arraysum[i];
789 moment += arraymoment[i];
790 }
791 for (i = 0; i < nx - 1; i++) {
792 area1[i] = sum;
793 ycent1[i] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum;
794 sum += arraysum[w2 + i] - arraysum[i];
795 moment += arraymoment[w2 + i] - arraymoment[i];
796 }
797 area1[nx - 1] = sum;
798 ycent1[nx - 1] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum;
799
800 /* Find the best match location for pix2. At each location,
801 * to insure that pixels are ON only within the intersection of
802 * pix and the shifted pix2:
803 * (1) Start with pixt cleared and equal in size to pix1.
804 * (2) Blit the shifted pix2 onto pixt. Then all ON pixels
805 * are within the intersection of pix1 and the shifted pix2.
806 * (3) AND pix1 with pixt. */
807 pixt = pixCreate(w2, h1, 1);
808 maxscore = 0;
809 delx = 0;
810 dely = 0; /* amount to shift pix2 relative to pix1 to get alignment */
811 for (i = 0; i < nx; i++) {
812 shifty = (l_int32)(ycent1[i] - ycent2 + 0.5);
813 for (j = -maxyshift; j <= maxyshift; j++) {
814 pixClearAll(pixt);
815 pixRasterop(pixt, 0, shifty + j, w2, h2, PIX_SRC, pix2, 0, 0);
816 pixRasterop(pixt, 0, 0, w2, h1, PIX_SRC & PIX_DST, pix1, i, 0);
817 pixCountPixels(pixt, &count, tab);
818 score = (l_float32)count * (l_float32)count /
819 ((l_float32)area1[i] * (l_float32)area2);
820 if (score > maxscore) {
821 maxscore = score;
822 delx = i;
823 dely = shifty + j;
824 }
825
826 if (debugflag > 0)
827 fpixSetPixel(fpix, i, maxyshift + j, 1000.0 * score);
828 }
829 }
830
831 if (debugflag > 0) {
832 lept_mkdir("lept/recog");
833 char buf[128];
834 pixt1 = fpixDisplayMaxDynamicRange(fpix);
835 pixt2 = pixExpandReplicate(pixt1, 5);
836 snprintf(buf, sizeof(buf), "/tmp/lept/recog/junkbs_%d.png", debugflag);
837 pixWrite(buf, pixt2, IFF_PNG);
838 pixDestroy(&pixt1);
839 pixDestroy(&pixt2);
840 fpixDestroy(&fpix);
841 }
842
843 if (pdelx) *pdelx = delx;
844 if (pdely) *pdely = dely;
845 if (pscore) *pscore = maxscore;
846 if (!tab8) LEPT_FREE(tab);
847 LEPT_FREE(area1);
848 LEPT_FREE(ycent1);
849 LEPT_FREE(arraysum);
850 LEPT_FREE(arraymoment);
851 pixDestroy(&pixt);
852 return 0;
853 }
854
855
856 /*------------------------------------------------------------------------*
857 * Low-level identification *
858 *------------------------------------------------------------------------*/
859 /*!
860 * \brief recogIdentifyPixa()
861 *
862 * \param[in] recog
863 * \param[in] pixa of 1 bpp images to match
864 * \param[out] ppixdb [optional] pix showing inputs and best fits
865 * \return 0 if OK, 1 on error
866 *
867 * <pre>
868 * Notes:
869 * (1) This should be called by recogIdentifyMuliple(), which
870 * binarizes and splits characters before sending %pixa here.
871 * (2) This calls recogIdentifyPix(), which does the same operation
872 * on each pix in %pixa, and optionally returns the arrays
873 * of results (scores, class index and character string)
874 * for the best correlation match.
875 * </pre>
876 */
877 l_int32
recogIdentifyPixa(L_RECOG * recog,PIXA * pixa,PIX ** ppixdb)878 recogIdentifyPixa(L_RECOG *recog,
879 PIXA *pixa,
880 PIX **ppixdb)
881 {
882 char *text;
883 l_int32 i, n, fail, index, depth;
884 l_float32 score;
885 PIX *pix1, *pix2, *pix3;
886 PIXA *pixa1;
887 L_RCH *rch;
888
889 PROCNAME("recogIdentifyPixa");
890
891 if (ppixdb) *ppixdb = NULL;
892 if (!recog)
893 return ERROR_INT("recog not defined", procName, 1);
894 if (!pixa)
895 return ERROR_INT("pixa not defined", procName, 1);
896
897 /* Run the recognizer on the set of images. This writes
898 * the text string into each pix in pixa. */
899 n = pixaGetCount(pixa);
900 rchaDestroy(&recog->rcha);
901 recog->rcha = rchaCreate();
902 pixa1 = (ppixdb) ? pixaCreate(n) : NULL;
903 depth = 1;
904 for (i = 0; i < n; i++) {
905 pix1 = pixaGetPix(pixa, i, L_CLONE);
906 pix2 = NULL;
907 fail = FALSE;
908 if (!ppixdb)
909 fail = recogIdentifyPix(recog, pix1, NULL);
910 else
911 fail = recogIdentifyPix(recog, pix1, &pix2);
912 if (fail)
913 recogSkipIdentify(recog);
914 if ((rch = recog->rch) == NULL) {
915 L_ERROR("rch not found for char %d\n", procName, i);
916 pixDestroy(&pix1);
917 pixDestroy(&pix2);
918 continue;
919 }
920 rchExtract(rch, NULL, NULL, &text, NULL, NULL, NULL, NULL);
921 pixSetText(pix1, text);
922 LEPT_FREE(text);
923 if (ppixdb) {
924 rchExtract(rch, &index, &score, NULL, NULL, NULL, NULL, NULL);
925 pix3 = recogShowMatch(recog, pix2, NULL, NULL, index, score);
926 if (i == 0) depth = pixGetDepth(pix3);
927 pixaAddPix(pixa1, pix3, L_INSERT);
928 pixDestroy(&pix2);
929 }
930 transferRchToRcha(rch, recog->rcha);
931 pixDestroy(&pix1);
932 }
933
934 /* Package the images for debug */
935 if (ppixdb) {
936 *ppixdb = pixaDisplayTiledInRows(pixa1, depth, 2500, 1.0, 0, 20, 1);
937 pixaDestroy(&pixa1);
938 }
939
940 return 0;
941 }
942
943
944 /*!
945 * \brief recogIdentifyPix()
946 *
947 * \param[in] recog with LUT's pre-computed
948 * \param[in] pixs of a single character, 1 bpp
949 * \param[out] ppixdb [optional] debug pix showing input and best fit
950 * \return 0 if OK, 1 on error
951 *
952 * <pre>
953 * Notes:
954 * (1) Basic recognition function for a single character.
955 * (2) If templ_use == L_USE_ALL_TEMPLATES, which is the default
956 * situation, matching is attempted to every bitmap in the recog,
957 * and the identify of the best match is returned.
958 * (3) For finding outliers, templ_use == L_USE_AVERAGE_TEMPLATES, and
959 * matching is only attemplted to the averaged bitmaps. For this
960 * case, the index of the bestsample is meaningless (0 is returned
961 * if requested).
962 * (4) The score is related to the confidence (probability of correct
963 * identification), in that a higher score is correlated with
964 * a higher probability. However, the actual relation between
965 * the correlation (score) and the probability is not known;
966 * we call this a "score" because "confidence" can be misinterpreted
967 * as an actual probability.
968 * </pre>
969 */
970 l_int32
recogIdentifyPix(L_RECOG * recog,PIX * pixs,PIX ** ppixdb)971 recogIdentifyPix(L_RECOG *recog,
972 PIX *pixs,
973 PIX **ppixdb)
974 {
975 char *text;
976 l_int32 i, j, n, bestindex, bestsample, area1, area2;
977 l_int32 shiftx, shifty, bestdelx, bestdely, bestwidth, maxyshift;
978 l_float32 x1, y1, x2, y2, delx, dely, score, maxscore;
979 NUMA *numa;
980 PIX *pix0, *pix1, *pix2;
981 PIXA *pixa;
982 PTA *pta;
983
984 PROCNAME("recogIdentifyPix");
985
986 if (ppixdb) *ppixdb = NULL;
987 if (!recog)
988 return ERROR_INT("recog not defined", procName, 1);
989 if (!pixs || pixGetDepth(pixs) != 1)
990 return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
991
992 /* Do the averaging if required and not yet done. */
993 if (recog->templ_use == L_USE_AVERAGE_TEMPLATES && !recog->ave_done) {
994 recogAverageSamples(&recog, 0);
995 if (!recog)
996 return ERROR_INT("averaging failed", procName, 1);
997 }
998
999 /* Binarize and crop to foreground if necessary */
1000 if ((pix0 = recogProcessToIdentify(recog, pixs, 0)) == NULL)
1001 return ERROR_INT("no fg pixels in pix0", procName, 1);
1002
1003 /* Optionally scale and/or convert to fixed stroke width */
1004 pix1 = recogModifyTemplate(recog, pix0);
1005 pixDestroy(&pix0);
1006 if (!pix1)
1007 return ERROR_INT("no fg pixels in pix1", procName, 1);
1008
1009 /* Do correlation at all positions within +-maxyshift of
1010 * the nominal centroid alignment. */
1011 pixCountPixels(pix1, &area1, recog->sumtab);
1012 pixCentroid(pix1, recog->centtab, recog->sumtab, &x1, &y1);
1013 bestindex = bestsample = bestdelx = bestdely = bestwidth = 0;
1014 maxscore = 0.0;
1015 maxyshift = recog->maxyshift;
1016 if (recog->templ_use == L_USE_AVERAGE_TEMPLATES) {
1017 for (i = 0; i < recog->setsize; i++) {
1018 numaGetIValue(recog->nasum, i, &area2);
1019 if (area2 == 0) continue; /* no template available */
1020 pix2 = pixaGetPix(recog->pixa, i, L_CLONE);
1021 ptaGetPt(recog->pta, i, &x2, &y2);
1022 delx = x1 - x2;
1023 dely = y1 - y2;
1024 for (shifty = -maxyshift; shifty <= maxyshift; shifty++) {
1025 for (shiftx = -maxyshift; shiftx <= maxyshift; shiftx++) {
1026 pixCorrelationScoreSimple(pix1, pix2, area1, area2,
1027 delx + shiftx, dely + shifty,
1028 5, 5, recog->sumtab, &score);
1029 if (score > maxscore) {
1030 bestindex = i;
1031 bestdelx = delx + shiftx;
1032 bestdely = dely + shifty;
1033 maxscore = score;
1034 }
1035 }
1036 }
1037 pixDestroy(&pix2);
1038 }
1039 } else { /* use all the samples */
1040 for (i = 0; i < recog->setsize; i++) {
1041 pixa = pixaaGetPixa(recog->pixaa, i, L_CLONE);
1042 n = pixaGetCount(pixa);
1043 if (n == 0) {
1044 pixaDestroy(&pixa);
1045 continue;
1046 }
1047 numa = numaaGetNuma(recog->naasum, i, L_CLONE);
1048 pta = ptaaGetPta(recog->ptaa, i, L_CLONE);
1049 for (j = 0; j < n; j++) {
1050 pix2 = pixaGetPix(pixa, j, L_CLONE);
1051 numaGetIValue(numa, j, &area2);
1052 ptaGetPt(pta, j, &x2, &y2);
1053 delx = x1 - x2;
1054 dely = y1 - y2;
1055 for (shifty = -maxyshift; shifty <= maxyshift; shifty++) {
1056 for (shiftx = -maxyshift; shiftx <= maxyshift; shiftx++) {
1057 pixCorrelationScoreSimple(pix1, pix2, area1, area2,
1058 delx + shiftx, dely + shifty,
1059 5, 5, recog->sumtab, &score);
1060 if (score > maxscore) {
1061 bestindex = i;
1062 bestsample = j;
1063 bestdelx = delx + shiftx;
1064 bestdely = dely + shifty;
1065 maxscore = score;
1066 bestwidth = pixGetWidth(pix2);
1067 }
1068 }
1069 }
1070 pixDestroy(&pix2);
1071 }
1072 pixaDestroy(&pixa);
1073 numaDestroy(&numa);
1074 ptaDestroy(&pta);
1075 }
1076 }
1077
1078 /* Package up the results */
1079 recogGetClassString(recog, bestindex, &text);
1080 rchDestroy(&recog->rch);
1081 recog->rch = rchCreate(bestindex, maxscore, text, bestsample,
1082 bestdelx, bestdely, bestwidth);
1083
1084 if (ppixdb) {
1085 if (recog->templ_use == L_USE_AVERAGE_TEMPLATES) {
1086 L_INFO("Best match: str %s; class %d; sh (%d, %d); score %5.3f\n",
1087 procName, text, bestindex, bestdelx, bestdely, maxscore);
1088 pix2 = pixaGetPix(recog->pixa, bestindex, L_CLONE);
1089 } else { /* L_USE_ALL_TEMPLATES */
1090 L_INFO("Best match: str %s; sample %d in class %d; score %5.3f\n",
1091 procName, text, bestsample, bestindex, maxscore);
1092 if (maxyshift > 0 && (L_ABS(bestdelx) > 0 || L_ABS(bestdely) > 0)) {
1093 L_INFO(" Best shift: (%d, %d)\n",
1094 procName, bestdelx, bestdely);
1095 }
1096 pix2 = pixaaGetPix(recog->pixaa, bestindex, bestsample, L_CLONE);
1097 }
1098 *ppixdb = recogShowMatch(recog, pix1, pix2, NULL, -1, 0.0);
1099 pixDestroy(&pix2);
1100 }
1101
1102 pixDestroy(&pix1);
1103 return 0;
1104 }
1105
1106
1107 /*!
1108 * \brief recogSkipIdentify()
1109 *
1110 * \param[in] recog
1111 * \return 0 if OK, 1 on error
1112 *
1113 * <pre>
1114 * Notes:
1115 * (1) This just writes a "dummy" result with 0 score and empty
1116 * string id into the rch.
1117 * </pre>
1118 */
1119 l_int32
recogSkipIdentify(L_RECOG * recog)1120 recogSkipIdentify(L_RECOG *recog)
1121 {
1122 PROCNAME("recogSkipIdentify");
1123
1124 if (!recog)
1125 return ERROR_INT("recog not defined", procName, 1);
1126
1127 /* Package up placeholder results */
1128 rchDestroy(&recog->rch);
1129 recog->rch = rchCreate(0, 0.0, stringNew(""), 0, 0, 0, 0);
1130 return 0;
1131 }
1132
1133
1134 /*------------------------------------------------------------------------*
1135 * Operations for handling identification results *
1136 *------------------------------------------------------------------------*/
1137 /*!
1138 * \brief rchaCreate()
1139 *
1140 * Return: 0 if OK, 1 on error
1141 *
1142 * Notes:
1143 * (1) Be sure to destroy any existing rcha before assigning this.
1144 */
1145 static L_RCHA *
rchaCreate()1146 rchaCreate()
1147 {
1148 L_RCHA *rcha;
1149
1150 rcha = (L_RCHA *)LEPT_CALLOC(1, sizeof(L_RCHA));
1151 rcha->naindex = numaCreate(0);
1152 rcha->nascore = numaCreate(0);
1153 rcha->satext = sarrayCreate(0);
1154 rcha->nasample = numaCreate(0);
1155 rcha->naxloc = numaCreate(0);
1156 rcha->nayloc = numaCreate(0);
1157 rcha->nawidth = numaCreate(0);
1158 return rcha;
1159 }
1160
1161
1162 /*!
1163 * \brief rchaDestroy()
1164 *
1165 * \param[in,out] prcha to be nulled
1166 */
1167 void
rchaDestroy(L_RCHA ** prcha)1168 rchaDestroy(L_RCHA **prcha)
1169 {
1170 L_RCHA *rcha;
1171
1172 PROCNAME("rchaDestroy");
1173
1174 if (prcha == NULL) {
1175 L_WARNING("&rcha is null!\n", procName);
1176 return;
1177 }
1178 if ((rcha = *prcha) == NULL)
1179 return;
1180
1181 numaDestroy(&rcha->naindex);
1182 numaDestroy(&rcha->nascore);
1183 sarrayDestroy(&rcha->satext);
1184 numaDestroy(&rcha->nasample);
1185 numaDestroy(&rcha->naxloc);
1186 numaDestroy(&rcha->nayloc);
1187 numaDestroy(&rcha->nawidth);
1188 LEPT_FREE(rcha);
1189 *prcha = NULL;
1190 return;
1191 }
1192
1193
1194 /*!
1195 * \brief rchCreate()
1196 *
1197 * \param[in] index index of best template
1198 * \param[in] score correlation score of best template
1199 * \param[in] text character string of best template
1200 * \param[in] sample index of best sample; -1 if averages are used
1201 * \param[in] xloc x-location of template: delx + shiftx
1202 * \param[in] yloc y-location of template: dely + shifty
1203 * \param[in] width width of best template
1204 * \return 0 if OK, 1 on error
1205 *
1206 * <pre>
1207 * Notes:
1208 * (1) Be sure to destroy any existing rch before assigning this.
1209 * (2) This stores the text string, not a copy of it, so the
1210 * caller must not destroy the string.
1211 * </pre>
1212 */
1213 static L_RCH *
rchCreate(l_int32 index,l_float32 score,char * text,l_int32 sample,l_int32 xloc,l_int32 yloc,l_int32 width)1214 rchCreate(l_int32 index,
1215 l_float32 score,
1216 char *text,
1217 l_int32 sample,
1218 l_int32 xloc,
1219 l_int32 yloc,
1220 l_int32 width)
1221 {
1222 L_RCH *rch;
1223
1224 rch = (L_RCH *)LEPT_CALLOC(1, sizeof(L_RCH));
1225 rch->index = index;
1226 rch->score = score;
1227 rch->text = text;
1228 rch->sample = sample;
1229 rch->xloc = xloc;
1230 rch->yloc = yloc;
1231 rch->width = width;
1232 return rch;
1233 }
1234
1235
1236 /*!
1237 * \brief rchDestroy()
1238 *
1239 * \param[in,out] prch to be nulled
1240 */
1241 void
rchDestroy(L_RCH ** prch)1242 rchDestroy(L_RCH **prch)
1243 {
1244 L_RCH *rch;
1245
1246 PROCNAME("rchDestroy");
1247
1248 if (prch == NULL) {
1249 L_WARNING("&rch is null!\n", procName);
1250 return;
1251 }
1252 if ((rch = *prch) == NULL)
1253 return;
1254 LEPT_FREE(rch->text);
1255 LEPT_FREE(rch);
1256 *prch = NULL;
1257 return;
1258 }
1259
1260
1261 /*!
1262 * \brief rchaExtract()
1263 *
1264 * \param[in] rcha
1265 * \param[out] pnaindex [optional] indices of best templates
1266 * \param[out] pnascore [optional] correl scores of best templates
1267 * \param[out] psatext [optional] character strings of best templates
1268 * \param[out] pnasample [optional] indices of best samples
1269 * \param[out] pnaxloc [optional] x-locations of templates
1270 * \param[out] pnayloc [optional] y-locations of templates
1271 * \param[out] pnawidth [optional] widths of best templates
1272 * \return 0 if OK, 1 on error
1273 *
1274 * <pre>
1275 * Notes:
1276 * (1) This returns clones of the number and string arrays. They must
1277 * be destroyed by the caller.
1278 * </pre>
1279 */
1280 l_int32
rchaExtract(L_RCHA * rcha,NUMA ** pnaindex,NUMA ** pnascore,SARRAY ** psatext,NUMA ** pnasample,NUMA ** pnaxloc,NUMA ** pnayloc,NUMA ** pnawidth)1281 rchaExtract(L_RCHA *rcha,
1282 NUMA **pnaindex,
1283 NUMA **pnascore,
1284 SARRAY **psatext,
1285 NUMA **pnasample,
1286 NUMA **pnaxloc,
1287 NUMA **pnayloc,
1288 NUMA **pnawidth)
1289 {
1290 PROCNAME("rchaExtract");
1291
1292 if (pnaindex) *pnaindex = NULL;
1293 if (pnascore) *pnascore = NULL;
1294 if (psatext) *psatext = NULL;
1295 if (pnasample) *pnasample = NULL;
1296 if (pnaxloc) *pnaxloc = NULL;
1297 if (pnayloc) *pnayloc = NULL;
1298 if (pnawidth) *pnawidth = NULL;
1299 if (!rcha)
1300 return ERROR_INT("rcha not defined", procName, 1);
1301
1302 if (pnaindex) *pnaindex = numaClone(rcha->naindex);
1303 if (pnascore) *pnascore = numaClone(rcha->nascore);
1304 if (psatext) *psatext = sarrayClone(rcha->satext);
1305 if (pnasample) *pnasample = numaClone(rcha->nasample);
1306 if (pnaxloc) *pnaxloc = numaClone(rcha->naxloc);
1307 if (pnayloc) *pnayloc = numaClone(rcha->nayloc);
1308 if (pnawidth) *pnawidth = numaClone(rcha->nawidth);
1309 return 0;
1310 }
1311
1312
1313 /*!
1314 * \brief rchExtract()
1315 *
1316 * \param[in] rch
1317 * \param[out] pindex [optional] index of best template
1318 * \param[out] pscore [optional] correlation score of best template
1319 * \param[out] ptext [optional] character string of best template
1320 * \param[out] psample [optional] index of best sample
1321 * \param[out] pxloc [optional] x-location of template
1322 * \param[out] pyloc [optional] y-location of template
1323 * \param[out] pwidth [optional] width of best template
1324 * \return 0 if OK, 1 on error
1325 */
1326 l_int32
rchExtract(L_RCH * rch,l_int32 * pindex,l_float32 * pscore,char ** ptext,l_int32 * psample,l_int32 * pxloc,l_int32 * pyloc,l_int32 * pwidth)1327 rchExtract(L_RCH *rch,
1328 l_int32 *pindex,
1329 l_float32 *pscore,
1330 char **ptext,
1331 l_int32 *psample,
1332 l_int32 *pxloc,
1333 l_int32 *pyloc,
1334 l_int32 *pwidth)
1335 {
1336 PROCNAME("rchExtract");
1337
1338 if (pindex) *pindex = 0;
1339 if (pscore) *pscore = 0.0;
1340 if (ptext) *ptext = NULL;
1341 if (psample) *psample = 0;
1342 if (pxloc) *pxloc = 0;
1343 if (pyloc) *pyloc = 0;
1344 if (pwidth) *pwidth = 0;
1345 if (!rch)
1346 return ERROR_INT("rch not defined", procName, 1);
1347
1348 if (pindex) *pindex = rch->index;
1349 if (pscore) *pscore = rch->score;
1350 if (ptext) *ptext = stringNew(rch->text); /* new string: owned by caller */
1351 if (psample) *psample = rch->sample;
1352 if (pxloc) *pxloc = rch->xloc;
1353 if (pyloc) *pyloc = rch->yloc;
1354 if (pwidth) *pwidth = rch->width;
1355 return 0;
1356 }
1357
1358
1359 /*!
1360 * \brief transferRchToRcha()
1361 *
1362 * \param[in] rch source of data
1363 * \param[in] rcha append to arrays in this destination
1364 * \return 0 if OK, 1 on error
1365 *
1366 * <pre>
1367 * Notes:
1368 * (1) This is used to transfer the results of a single character
1369 * identification to an rcha array for the array of characters.
1370 * </pre>
1371 */
1372 static l_int32
transferRchToRcha(L_RCH * rch,L_RCHA * rcha)1373 transferRchToRcha(L_RCH *rch,
1374 L_RCHA *rcha)
1375 {
1376
1377 PROCNAME("transferRchToRcha");
1378
1379 if (!rch)
1380 return ERROR_INT("rch not defined", procName, 1);
1381 if (!rcha)
1382 return ERROR_INT("rcha not defined", procName, 1);
1383
1384 numaAddNumber(rcha->naindex, rch->index);
1385 numaAddNumber(rcha->nascore, rch->score);
1386 sarrayAddString(rcha->satext, rch->text, L_COPY);
1387 numaAddNumber(rcha->nasample, rch->sample);
1388 numaAddNumber(rcha->naxloc, rch->xloc);
1389 numaAddNumber(rcha->nayloc, rch->yloc);
1390 numaAddNumber(rcha->nawidth, rch->width);
1391 return 0;
1392 }
1393
1394
1395 /*------------------------------------------------------------------------*
1396 * Preprocessing and filtering *
1397 *------------------------------------------------------------------------*/
1398 /*!
1399 * \brief recogProcessToIdentify()
1400 *
1401 * \param[in] recog with LUT's pre-computed
1402 * \param[in] pixs typ. single character, possibly d > 1 and uncropped
1403 * \param[in] pad extra pixels added to left and right sides
1404 * \return pixd 1 bpp, clipped to foreground, or NULL if there
1405 * are no fg pixels or on error.
1406 *
1407 * <pre>
1408 * Notes:
1409 * (1) This is a lightweight operation to insure that the input
1410 * image is 1 bpp, properly cropped, and padded on each side.
1411 * If bpp > 1, the image is thresholded.
1412 * </pre>
1413 */
1414 PIX *
recogProcessToIdentify(L_RECOG * recog,PIX * pixs,l_int32 pad)1415 recogProcessToIdentify(L_RECOG *recog,
1416 PIX *pixs,
1417 l_int32 pad)
1418 {
1419 l_int32 canclip;
1420 PIX *pix1, *pix2, *pixd;
1421
1422 PROCNAME("recogProcessToIdentify");
1423
1424 if (!recog)
1425 return (PIX *)ERROR_PTR("recog not defined", procName, NULL);
1426 if (!pixs)
1427 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
1428
1429 if (pixGetDepth(pixs) != 1)
1430 pix1 = pixThresholdToBinary(pixs, recog->threshold);
1431 else
1432 pix1 = pixClone(pixs);
1433 pixTestClipToForeground(pix1, &canclip);
1434 if (canclip)
1435 pixClipToForeground(pix1, &pix2, NULL);
1436 else
1437 pix2 = pixClone(pix1);
1438 pixDestroy(&pix1);
1439 if (!pix2)
1440 return (PIX *)ERROR_PTR("no foreground pixels", procName, NULL);
1441
1442 pixd = pixAddBorderGeneral(pix2, pad, pad, 0, 0, 0);
1443 pixDestroy(&pix2);
1444 return pixd;
1445 }
1446
1447
1448 /*!
1449 * \brief recogPreSplittingFilter()
1450 *
1451 * \param[in] recog
1452 * \param[in] pixs 1 bpp, many connected components
1453 * \param[in] minh minimum height of components to be retained
1454 * \param[in] minaf minimum area fraction (|fg|/(w*h)) to be retained
1455 * \param[in] debug 1 to output indicator arrays
1456 * \return pixd with filtered components removed or NULL on error
1457 */
1458 static PIX *
recogPreSplittingFilter(L_RECOG * recog,PIX * pixs,l_int32 minh,l_float32 minaf,l_int32 debug)1459 recogPreSplittingFilter(L_RECOG *recog,
1460 PIX *pixs,
1461 l_int32 minh,
1462 l_float32 minaf,
1463 l_int32 debug)
1464 {
1465 l_int32 scaling, minsplitw, maxsplith, maxasp;
1466 BOXA *boxas;
1467 NUMA *naw, *nah, *na1, *na1c, *na2, *na3, *na4, *na5, *na6, *na7;
1468 PIX *pixd;
1469 PIXA *pixas;
1470
1471 PROCNAME("recogPreSplittingFilter");
1472
1473 if (!recog)
1474 return (PIX *)ERROR_PTR("recog not defined", procName, NULL);
1475 if (!pixs)
1476 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
1477
1478 /* If there is scaling, do not remove components based on the
1479 * values of min_splitw and max_splith. */
1480 scaling = (recog->scalew > 0 || recog->scaleh > 0) ? TRUE : FALSE;
1481 minsplitw = (scaling) ? 1 : recog->min_splitw - 3;
1482 maxsplith = (scaling) ? 150 : recog->max_splith;
1483 maxasp = recog->max_wh_ratio;
1484
1485 /* Generate an indicator array of connected components to remove:
1486 * short stuff
1487 * tall stuff
1488 * components with large width/height ratio
1489 * components with small area fill fraction */
1490 boxas = pixConnComp(pixs, &pixas, 8);
1491 pixaFindDimensions(pixas, &naw, &nah);
1492 na1 = numaMakeThresholdIndicator(naw, minsplitw, L_SELECT_IF_LT);
1493 na1c = numaCopy(na1);
1494 na2 = numaMakeThresholdIndicator(nah, minh, L_SELECT_IF_LT);
1495 na3 = numaMakeThresholdIndicator(nah, maxsplith, L_SELECT_IF_GT);
1496 na4 = pixaFindWidthHeightRatio(pixas);
1497 na5 = numaMakeThresholdIndicator(na4, maxasp, L_SELECT_IF_GT);
1498 na6 = pixaFindAreaFraction(pixas);
1499 na7 = numaMakeThresholdIndicator(na6, minaf, L_SELECT_IF_LT);
1500 numaLogicalOp(na1, na1, na2, L_UNION);
1501 numaLogicalOp(na1, na1, na3, L_UNION);
1502 numaLogicalOp(na1, na1, na5, L_UNION);
1503 numaLogicalOp(na1, na1, na7, L_UNION);
1504 pixd = pixCopy(NULL, pixs);
1505 pixRemoveWithIndicator(pixd, pixas, na1);
1506 if (debug)
1507 l_showIndicatorSplitValues(na1c, na2, na3, na5, na7, na1);
1508 numaDestroy(&naw);
1509 numaDestroy(&nah);
1510 numaDestroy(&na1);
1511 numaDestroy(&na1c);
1512 numaDestroy(&na2);
1513 numaDestroy(&na3);
1514 numaDestroy(&na4);
1515 numaDestroy(&na5);
1516 numaDestroy(&na6);
1517 numaDestroy(&na7);
1518 boxaDestroy(&boxas);
1519 pixaDestroy(&pixas);
1520 return pixd;
1521 }
1522
1523
1524 /*!
1525 * \brief recogSplittingFilter()
1526 *
1527 * \param[in] recog
1528 * \param[in] pixs 1 bpp, single connected component
1529 * \param[in] minh minimum height of component; 0 for default
1530 * \param[in] minaf minimum area fraction (|fg|/(w*h)) to be retained
1531 * \param[out] premove 0 to save, 1 to remove
1532 * \param[in] debug 1 to output indicator arrays
1533 * \return 0 if OK, 1 on error
1534 */
1535 static l_int32
recogSplittingFilter(L_RECOG * recog,PIX * pixs,l_int32 minh,l_float32 minaf,l_int32 * premove,l_int32 debug)1536 recogSplittingFilter(L_RECOG *recog,
1537 PIX *pixs,
1538 l_int32 minh,
1539 l_float32 minaf,
1540 l_int32 *premove,
1541 l_int32 debug)
1542 {
1543 l_int32 w, h;
1544 l_float32 aspratio, fract;
1545
1546 PROCNAME("recogSplittingFilter");
1547
1548 if (!premove)
1549 return ERROR_INT("&remove not defined", procName, 1);
1550 *premove = 0;
1551 if (!recog)
1552 return ERROR_INT("recog not defined", procName, 1);
1553 if (!pixs)
1554 return ERROR_INT("pixs not defined", procName, 1);
1555 if (minh <= 0) minh = DefaultMinHeight;
1556
1557 /* Remove from further consideration:
1558 * small stuff
1559 * components with large width/height ratio
1560 * components with small area fill fraction */
1561 pixGetDimensions(pixs, &w, &h, NULL);
1562 if (w < recog->min_splitw) {
1563 if (debug) L_INFO("w = %d < %d\n", procName, w, recog->min_splitw);
1564 *premove = 1;
1565 return 0;
1566 }
1567 if (h < minh) {
1568 if (debug) L_INFO("h = %d < %d\n", procName, h, minh);
1569 *premove = 1;
1570 return 0;
1571 }
1572 aspratio = (l_float32)w / (l_float32)h;
1573 if (aspratio > recog->max_wh_ratio) {
1574 if (debug) L_INFO("w/h = %5.3f too large\n", procName, aspratio);
1575 *premove = 1;
1576 return 0;
1577 }
1578 pixFindAreaFraction(pixs, recog->sumtab, &fract);
1579 if (fract < minaf) {
1580 if (debug) L_INFO("area fill fract %5.3f < %5.3f\n",
1581 procName, fract, minaf);
1582 *premove = 1;
1583 return 0;
1584 }
1585
1586 return 0;
1587 }
1588
1589
1590 /*------------------------------------------------------------------------*
1591 * Postprocessing *
1592 *------------------------------------------------------------------------*/
1593 /*!
1594 * \brief recogExtractNumbers()
1595 *
1596 * \param[in] recog
1597 * \param[in] boxas location of components
1598 * \param[in] scorethresh min score for which we accept a component
1599 * \param[in] spacethresh max horizontal distance allowed between digits,
1600 * use -1 for default
1601 * \param[out] pbaa [optional] bounding boxes of identified numbers
1602 * \param[out] pnaa [optional] scores of identified digits
1603 * \return sa of identified numbers, or NULL on error
1604 *
1605 * <pre>
1606 * Notes:
1607 * (1) This extracts digit data after recogaIdentifyMultiple() or
1608 * lower-level identification has taken place.
1609 * (2) Each string in the returned sa contains a sequence of ascii
1610 * digits in a number.
1611 * (3) The horizontal distance between boxes (limited by %spacethresh)
1612 * is the negative of the horizontal overlap.
1613 * (4) Components with a score less than %scorethresh, which may
1614 * be hyphens or other small characters, will signal the
1615 * end of the current sequence of digits in the number. A typical
1616 * value for %scorethresh is 0.60.
1617 * (5) We allow two digits to be combined if these conditions apply:
1618 * (a) the first is to the left of the second
1619 * (b) the second has a horizontal separation less than %spacethresh
1620 * (c) the vertical overlap >= 0 (vertical separation < 0)
1621 * (d) both have a score that exceeds %scorethresh
1622 * (6) Each numa in the optionally returned naa contains the digit
1623 * scores of a number. Each boxa in the optionally returned baa
1624 * contains the bounding boxes of the digits in the number.
1625 * </pre>
1626 */
1627 SARRAY *
recogExtractNumbers(L_RECOG * recog,BOXA * boxas,l_float32 scorethresh,l_int32 spacethresh,BOXAA ** pbaa,NUMAA ** pnaa)1628 recogExtractNumbers(L_RECOG *recog,
1629 BOXA *boxas,
1630 l_float32 scorethresh,
1631 l_int32 spacethresh,
1632 BOXAA **pbaa,
1633 NUMAA **pnaa)
1634 {
1635 char *str, *text;
1636 l_int32 i, n, x1, x2, h_sep, v_sep;
1637 l_float32 score;
1638 BOX *box, *prebox;
1639 BOXA *ba;
1640 BOXAA *baa;
1641 NUMA *nascore, *na;
1642 NUMAA *naa;
1643 SARRAY *satext, *sa, *saout;
1644
1645 PROCNAME("recogExtractNumbers");
1646
1647 if (pbaa) *pbaa = NULL;
1648 if (pnaa) *pnaa = NULL;
1649 if (!recog || !recog->rcha)
1650 return (SARRAY *)ERROR_PTR("recog and rcha not both defined",
1651 procName, NULL);
1652 if (!boxas)
1653 return (SARRAY *)ERROR_PTR("boxas not defined", procName, NULL);
1654
1655 if (spacethresh < 0)
1656 spacethresh = L_MAX(recog->maxheight_u, 20);
1657 rchaExtract(recog->rcha, NULL, &nascore, &satext, NULL, NULL, NULL, NULL);
1658 if (!nascore || !satext) {
1659 numaDestroy(&nascore);
1660 sarrayDestroy(&satext);
1661 return (SARRAY *)ERROR_PTR("nascore and satext not both returned",
1662 procName, NULL);
1663 }
1664
1665 saout = sarrayCreate(0);
1666 naa = numaaCreate(0);
1667 baa = boxaaCreate(0);
1668 prebox = NULL;
1669 n = numaGetCount(nascore);
1670 for (i = 0; i < n; i++) {
1671 numaGetFValue(nascore, i, &score);
1672 text = sarrayGetString(satext, i, L_NOCOPY);
1673 if (prebox == NULL) { /* no current run */
1674 if (score < scorethresh) {
1675 continue;
1676 } else { /* start a number run */
1677 sa = sarrayCreate(0);
1678 ba = boxaCreate(0);
1679 na = numaCreate(0);
1680 sarrayAddString(sa, text, L_COPY);
1681 prebox = boxaGetBox(boxas, i, L_CLONE);
1682 boxaAddBox(ba, prebox, L_COPY);
1683 numaAddNumber(na, score);
1684 }
1685 } else { /* in a current number run */
1686 box = boxaGetBox(boxas, i, L_CLONE);
1687 boxGetGeometry(prebox, &x1, NULL, NULL, NULL);
1688 boxGetGeometry(box, &x2, NULL, NULL, NULL);
1689 boxSeparationDistance(box, prebox, &h_sep, &v_sep);
1690 boxDestroy(&prebox);
1691 if (x1 < x2 && h_sep <= spacethresh &&
1692 v_sep < 0 && score >= scorethresh) { /* add to number */
1693 sarrayAddString(sa, text, L_COPY);
1694 boxaAddBox(ba, box, L_COPY);
1695 numaAddNumber(na, score);
1696 prebox = box;
1697 } else { /* save the completed number */
1698 str = sarrayToString(sa, 0);
1699 sarrayAddString(saout, str, L_INSERT);
1700 sarrayDestroy(&sa);
1701 boxaaAddBoxa(baa, ba, L_INSERT);
1702 numaaAddNuma(naa, na, L_INSERT);
1703 boxDestroy(&box);
1704 if (score >= scorethresh) { /* start a new number */
1705 i--;
1706 continue;
1707 }
1708 }
1709 }
1710 }
1711
1712 if (prebox) { /* save the last number */
1713 str = sarrayToString(sa, 0);
1714 sarrayAddString(saout, str, L_INSERT);
1715 boxaaAddBoxa(baa, ba, L_INSERT);
1716 numaaAddNuma(naa, na, L_INSERT);
1717 sarrayDestroy(&sa);
1718 boxDestroy(&prebox);
1719 }
1720
1721 numaDestroy(&nascore);
1722 sarrayDestroy(&satext);
1723 if (sarrayGetCount(saout) == 0) {
1724 sarrayDestroy(&saout);
1725 boxaaDestroy(&baa);
1726 numaaDestroy(&naa);
1727 L_INFO("saout has no identified text\n", procName);
1728 return NULL;
1729 }
1730
1731 if (pbaa)
1732 *pbaa = baa;
1733 else
1734 boxaaDestroy(&baa);
1735 if (pnaa)
1736 *pnaa = naa;
1737 else
1738 numaaDestroy(&naa);
1739 return saout;
1740 }
1741
1742 /*!
1743 * \brief showExtractNumbers()
1744 *
1745 * \param[in] pixs input 1 bpp image
1746 * \param[in] sa recognized text strings
1747 * \param[in] baa boxa array for location of characters in each string
1748 * \param[in] naa numa array for scores of characters in each string
1749 * \param[out] ppixdb [optional] input pixs with identified chars outlined
1750 * \return pixa of identified strings with text and scores, or NULL on error
1751 *
1752 * <pre>
1753 * Notes:
1754 * (1) This is a debugging routine on digit identification; e.g.:
1755 * recogIdentifyMultiple(recog, pixs, 0, 1, &boxa, NULL, NULL, 0);
1756 * sa = recogExtractNumbers(recog, boxa, 0.8, -1, &baa, &naa);
1757 * pixa = showExtractNumbers(pixs, sa, baa, naa, NULL);
1758 * </pre>
1759 */
1760 PIXA *
showExtractNumbers(PIX * pixs,SARRAY * sa,BOXAA * baa,NUMAA * naa,PIX ** ppixdb)1761 showExtractNumbers(PIX *pixs,
1762 SARRAY *sa,
1763 BOXAA *baa,
1764 NUMAA *naa,
1765 PIX **ppixdb)
1766 {
1767 char buf[128];
1768 char *textstr, *scorestr;
1769 l_int32 i, j, n, nchar, len;
1770 l_float32 score;
1771 L_BMF *bmf;
1772 BOX *box1, *box2;
1773 BOXA *ba;
1774 NUMA *na;
1775 PIX *pix1, *pix2, *pix3, *pix4;
1776 PIXA *pixa;
1777
1778 PROCNAME("showExtractNumbers");
1779
1780 if (ppixdb) *ppixdb = NULL;
1781 if (!pixs)
1782 return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL);
1783 if (!sa)
1784 return (PIXA *)ERROR_PTR("sa not defined", procName, NULL);
1785 if (!baa)
1786 return (PIXA *)ERROR_PTR("baa not defined", procName, NULL);
1787 if (!naa)
1788 return (PIXA *)ERROR_PTR("naa not defined", procName, NULL);
1789
1790 n = sarrayGetCount(sa);
1791 pixa = pixaCreate(n);
1792 bmf = bmfCreate(NULL, 6);
1793 if (ppixdb) *ppixdb = pixConvertTo8(pixs, 1);
1794 for (i = 0; i < n; i++) {
1795 textstr = sarrayGetString(sa, i, L_NOCOPY);
1796 ba = boxaaGetBoxa(baa, i, L_CLONE);
1797 na = numaaGetNuma(naa, i, L_CLONE);
1798 boxaGetExtent(ba, NULL, NULL, &box1);
1799 box2 = boxAdjustSides(NULL, box1, -5, 5, -5, 5);
1800 if (ppixdb) pixRenderBoxArb(*ppixdb, box2, 3, 255, 0, 0);
1801 pix1 = pixClipRectangle(pixs, box1, NULL);
1802 len = strlen(textstr) + 1;
1803 pix2 = pixAddBlackOrWhiteBorder(pix1, 14 * len, 14 * len,
1804 5, 3, L_SET_WHITE);
1805 pix3 = pixConvertTo8(pix2, 1);
1806 nchar = numaGetCount(na);
1807 scorestr = NULL;
1808 for (j = 0; j < nchar; j++) {
1809 numaGetFValue(na, j, &score);
1810 snprintf(buf, sizeof(buf), "%d", (l_int32)(100 * score));
1811 stringJoinIP(&scorestr, buf);
1812 if (j < nchar - 1) stringJoinIP(&scorestr, ",");
1813 }
1814 snprintf(buf, sizeof(buf), "%s: %s\n", textstr, scorestr);
1815 pix4 = pixAddTextlines(pix3, bmf, buf, 0xff000000, L_ADD_BELOW);
1816 pixaAddPix(pixa, pix4, L_INSERT);
1817 boxDestroy(&box1);
1818 boxDestroy(&box2);
1819 pixDestroy(&pix1);
1820 pixDestroy(&pix2);
1821 pixDestroy(&pix3);
1822 boxaDestroy(&ba);
1823 numaDestroy(&na);
1824 LEPT_FREE(scorestr);
1825 }
1826
1827 bmfDestroy(&bmf);
1828 return pixa;
1829 }
1830
1831
1832 /*------------------------------------------------------------------------*
1833 * Static debug helper *
1834 *------------------------------------------------------------------------*/
1835 /*!
1836 * \brief l_showIndicatorSplitValues()
1837 *
1838 * \param[in] na1, na2, na3, na4, na5, na6 6 indicator array
1839 *
1840 * <pre>
1841 * Notes:
1842 * (1) The values indicate that specific criteria has been met
1843 * for component removal by pre-splitting filter..
1844 * The 'result' line shows which components have been removed.
1845 * </pre>
1846 */
1847 static void
l_showIndicatorSplitValues(NUMA * na1,NUMA * na2,NUMA * na3,NUMA * na4,NUMA * na5,NUMA * na6)1848 l_showIndicatorSplitValues(NUMA *na1,
1849 NUMA *na2,
1850 NUMA *na3,
1851 NUMA *na4,
1852 NUMA *na5,
1853 NUMA *na6)
1854 {
1855 l_int32 i, n;
1856
1857 n = numaGetCount(na1);
1858 fprintf(stderr, "================================================\n");
1859 fprintf(stderr, "lt minw: ");
1860 for (i = 0; i < n; i++)
1861 fprintf(stderr, "%4d ", (l_int32)na1->array[i]);
1862 fprintf(stderr, "\nlt minh: ");
1863 for (i = 0; i < n; i++)
1864 fprintf(stderr, "%4d ", (l_int32)na2->array[i]);
1865 fprintf(stderr, "\ngt maxh: ");
1866 for (i = 0; i < n; i++)
1867 fprintf(stderr, "%4d ", (l_int32)na3->array[i]);
1868 fprintf(stderr, "\ngt maxasp: ");
1869 for (i = 0; i < n; i++)
1870 fprintf(stderr, "%4d ", (l_int32)na4->array[i]);
1871 fprintf(stderr, "\nlt minaf: ");
1872 for (i = 0; i < n; i++)
1873 fprintf(stderr, "%4d ", (l_int32)na5->array[i]);
1874 fprintf(stderr, "\n------------------------------------------------");
1875 fprintf(stderr, "\nresult: ");
1876 for (i = 0; i < n; i++)
1877 fprintf(stderr, "%4d ", (l_int32)na6->array[i]);
1878 fprintf(stderr, "\n================================================\n");
1879 }
1880