1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -
4  -  Redistribution and use in source and binary forms, with or without
5  -  modification, are permitted provided that the following conditions
6  -  are met:
7  -  1. Redistributions of source code must retain the above copyright
8  -     notice, this list of conditions and the following disclaimer.
9  -  2. Redistributions in binary form must reproduce the above
10  -     copyright notice, this list of conditions and the following
11  -     disclaimer in the documentation and/or other materials
12  -     provided with the distribution.
13  -
14  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18  -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 /*!
28  * \file recogident.c
29  * <pre>
30  *
31  *      Top-level identification
32  *         l_int32             recogIdentifyMultiple()
33  *
34  *      Segmentation and noise removal
35  *         l_int32             recogSplitIntoCharacters()
36  *
37  *      Greedy character splitting
38  *         l_int32             recogCorrelationBestRow()
39  *         l_int32             recogCorrelationBestChar()
40  *         static l_int32      pixCorrelationBestShift()
41  *
42  *      Low-level identification of single characters
43  *         l_int32             recogIdentifyPixa()
44  *         l_int32             recogIdentifyPix()
45  *         l_int32             recogSkipIdentify()
46  *
47  *      Operations for handling identification results
48  *         static L_RCHA      *rchaCreate()
49  *         l_int32            *rchaDestroy()
50  *         static L_RCH       *rchCreate()
51  *         l_int32            *rchDestroy()
52  *         l_int32             rchaExtract()
53  *         l_int32             rchExtract()
54  *         static l_int32      transferRchToRcha()
55  *
56  *      Preprocessing and filtering
57  *         l_int32             recogProcessToIdentify()
58  *         static PIX         *recogPreSplittingFilter()
59  *         static PIX         *recogSplittingFilter()
60  *
61  *      Postprocessing
62  *         SARRAY             *recogExtractNumbers()
63  *         PIX                *showExtractNumbers()
64  *
65  *      Static debug helper
66  *         static void         l_showIndicatorSplitValues()
67  *
68  *  See recogbasic.c for examples of training a recognizer, which is
69  *  required before it can be used for identification.
70  *
71  *  The character splitter repeatedly does a greedy correlation with each
72  *  averaged unscaled template, at all pixel locations along the text to
73  *  be identified.  The vertical alignment is between the template
74  *  centroid and the (moving) windowed centroid, including a delta of
75  *  1 pixel above and below.  The best match then removes part of the
76  *  input image, leaving 1 or 2 pieces, which, after filtering,
77  *  are put in a queue.  The process ends when the queue is empty.
78  *  The filtering is based on the size and aspect ratio of the
79  *  remaining pieces; the intent is to remove anything that is
80  *  unlikely to be text, such as small pieces and line graphics.
81  *
82  *  After splitting, the selected segments are identified using
83  *  the input parameters that were initially specified for the
84  *  recognizer.  Unlike the splitter, which uses the averaged
85  *  templates from the unscaled input, the recognizer can use
86  *  either all training examples or averaged templates, and these
87  *  can be either scaled or unscaled.  These choices are specified
88  *  when the recognizer is constructed.
89  * </pre>
90  */
91 
92 #include <string.h>
93 #include "allheaders.h"
94 
95     /* There are two methods for splitting characters: DID and greedy.
96      * The default method is DID.  */
97 #define  SPLIT_WITH_DID   1
98 
99     /* Padding on pix1: added before correlations and removed from result */
100 static const l_int32    LeftRightPadding = 32;
101 
102     /* Parameters for filtering and sorting connected components in splitter */
103 static const l_float32  MinFillFactor = 0.10;
104 static const l_int32  DefaultMinHeight = 15;  /* min unscaled height */
105 static const l_int32  MinOverlap1 = 6;  /* in pass 1 of boxaSort2d() */
106 static const l_int32  MinOverlap2 = 6;  /* in pass 2 of boxaSort2d() */
107 static const l_int32  MinHeightPass1 = 5;  /* min height to start pass 1 */
108 
109 
110 static l_int32 pixCorrelationBestShift(PIX *pix1, PIX *pix2, NUMA *nasum1,
111                                        NUMA *namoment1, l_int32 area2,
112                                        l_int32 ycent2, l_int32 maxyshift,
113                                        l_int32 *tab8, l_int32 *pdelx,
114                                        l_int32 *pdely, l_float32 *pscore,
115                                        l_int32 debugflag );
116 static L_RCH *rchCreate(l_int32 index, l_float32 score, char *text,
117                         l_int32 sample, l_int32 xloc, l_int32 yloc,
118                         l_int32 width);
119 static L_RCHA *rchaCreate();
120 static l_int32 transferRchToRcha(L_RCH *rch, L_RCHA *rcha);
121 static PIX *recogPreSplittingFilter(L_RECOG *recog, PIX *pixs, l_int32 minh,
122                                     l_float32 minaf, l_int32 debug);
123 static l_int32 recogSplittingFilter(L_RECOG *recog, PIX *pixs, l_int32 min,
124                                     l_float32 minaf, l_int32 *premove,
125                                     l_int32 debug);
126 static void l_showIndicatorSplitValues(NUMA *na1, NUMA *na2, NUMA *na3,
127                                        NUMA *na4, NUMA *na5, NUMA *na6);
128 
129 /*------------------------------------------------------------------------*
130  *                             Identification
131  *------------------------------------------------------------------------*/
132 /*!
133  * \brief   recogIdentifyMultiple()
134  *
135  * \param[in]    recog      with training finished
136  * \param[in]    pixs       containing typically a small number of characters
137  * \param[in]    minh       remove shorter components; use 0 for default
138  * \param[in]    skipsplit  1 to skip the splitting step
139  * \param[out]   pboxa [optional] locations of identified components
140  * \param[out]   ppixa [optional] images of identified components
141  * \param[out]   ppixdb [optional] debug pix: inputs and best fits
142  * \param[in]    debugsplit 1 returns pix split debugging images
143  * \return  0 if OK; 1 if nothing is found; 2 for other errors.
144  *
145  * <pre>
146  * Notes:
147  *      (1) This filters the input pixa and calls recogIdentifyPixa()
148  *      (2) Splitting is relatively slow, because it tries to match all
149  *          character templates to all locations.  This step can be skipped.
150  *      (3) An attempt is made to order the (optionally) returned images
151  *          and boxes in 2-dimensional sorted order.  These can then
152  *          be used to aggregate identified characters into numbers or words.
153  *          One typically wants the pixa, which contains a boxa of the
154  *          extracted subimages.
155  * </pre>
156  */
157 l_int32
recogIdentifyMultiple(L_RECOG * recog,PIX * pixs,l_int32 minh,l_int32 skipsplit,BOXA ** pboxa,PIXA ** ppixa,PIX ** ppixdb,l_int32 debugsplit)158 recogIdentifyMultiple(L_RECOG  *recog,
159                       PIX      *pixs,
160                       l_int32   minh,
161                       l_int32   skipsplit,
162                       BOXA    **pboxa,
163                       PIXA    **ppixa,
164                       PIX     **ppixdb,
165                       l_int32   debugsplit)
166 {
167 l_int32  n;
168 BOXA    *boxa;
169 PIX     *pixb;
170 PIXA    *pixa;
171 
172     PROCNAME("recogIdentifyMultiple");
173 
174     if (pboxa) *pboxa = NULL;
175     if (ppixa) *ppixa = NULL;
176     if (ppixdb) *ppixdb = NULL;
177     if (!recog)
178         return ERROR_INT("recog not defined", procName, 2);
179     if (!recog->train_done)
180         return ERROR_INT("training not finished", procName, 2);
181     if (!pixs)
182         return ERROR_INT("pixs not defined", procName, 2);
183 
184         /* Binarize if necessary */
185     if (pixGetDepth(pixs) > 1)
186         pixb = pixConvertTo1(pixs, recog->threshold);
187     else
188         pixb = pixClone(pixs);
189 
190         /* Noise removal and splitting of touching characters */
191     recogSplitIntoCharacters(recog, pixb, minh, skipsplit, &boxa, &pixa,
192                              debugsplit);
193     pixDestroy(&pixb);
194     if (!pixa || (n = pixaGetCount(pixa)) == 0) {
195         pixaDestroy(&pixa);
196         boxaDestroy(&boxa);
197         L_WARNING("nothing found\n", procName);
198         return 1;
199     }
200 
201     recogIdentifyPixa(recog, pixa, ppixdb);
202     if (pboxa)
203         *pboxa = boxa;
204     else
205         boxaDestroy(&boxa);
206     if (ppixa)
207         *ppixa = pixa;
208     else
209         pixaDestroy(&pixa);
210     return 0;
211 }
212 
213 
214 /*------------------------------------------------------------------------*
215  *                     Segmentation and noise removal                     *
216  *------------------------------------------------------------------------*/
217 /*!
218  * \brief   recogSplitIntoCharacters()
219  *
220  * \param[in]    recog
221  * \param[in]    pixs      1 bpp, contains only mostly deskewed text
222  * \param[in]    minh      remove shorter components; use 0 for default
223  * \param[in]    skipsplit 1 to skip the splitting step
224  * \param[out]   pboxa     character bounding boxes
225  * \param[out]   ppixa     character images
226  * \param[in]    debug     1 for results written to pixadb_split
227  * \return  0 if OK, 1 on error or if no components are returned
228  *
229  * <pre>
230  * Notes:
231  *      (1) This can be given an image that has an arbitrary number
232  *          of text characters.  It optionally splits connected
233  *          components based on document image decoding in recogDecode().
234  *          The returned pixa includes the boxes from which the
235  *          (possibly split) components are extracted.
236  *      (2) After noise filtering, the resulting components are put in
237  *          row-major (2D) order, and the smaller of overlapping
238  *          components are removed if they satisfy conditions of
239  *          relative size and fractional overlap.
240  *      (3) Note that the spliting function uses unscaled templates
241  *          and does not bother returning the class results and scores.
242  *          Thes are more accurately found later using the scaled templates.
243  * </pre>
244  */
245 l_int32
recogSplitIntoCharacters(L_RECOG * recog,PIX * pixs,l_int32 minh,l_int32 skipsplit,BOXA ** pboxa,PIXA ** ppixa,l_int32 debug)246 recogSplitIntoCharacters(L_RECOG  *recog,
247                          PIX      *pixs,
248                          l_int32   minh,
249                          l_int32   skipsplit,
250                          BOXA    **pboxa,
251                          PIXA    **ppixa,
252                          l_int32   debug)
253 {
254 static l_int32  ind = 0;
255 char     buf[32];
256 l_int32  i, xoff, yoff, empty, maxw, bw, ncomp, scaling;
257 BOX     *box;
258 BOXA    *boxa1, *boxa2, *boxa3, *boxa4, *boxad;
259 BOXAA   *baa;
260 PIX     *pix, *pix1, *pix2, *pix3;
261 PIXA    *pixa;
262 
263     PROCNAME("recogSplitIntoCharacters");
264 
265     lept_mkdir("lept/recog");
266 
267     if (pboxa) *pboxa = NULL;
268     if (ppixa) *ppixa = NULL;
269     if (!pboxa || !ppixa)
270         return ERROR_INT("&boxa and &pixa not defined", procName, 1);
271     if (!recog)
272         return ERROR_INT("recog not defined", procName, 1);
273     if (!recog->train_done)
274         return ERROR_INT("training not finished", procName, 1);
275     if (!pixs || pixGetDepth(pixs) != 1)
276         return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
277     if (minh <= 0) minh = DefaultMinHeight;
278     pixZero(pixs, &empty);
279     if (empty) return 1;
280 
281         /* Small vertical close for consolidation.  Don't do a horizontal
282          * closing, because it might join separate characters. */
283     pix1 = pixMorphSequence(pixs, "c1.3", 0);
284 
285         /* Carefully filter out noise */
286     pix2 = recogPreSplittingFilter(recog, pix1, minh, MinFillFactor, debug);
287     pixDestroy(&pix1);
288 
289         /* Get the 8-connected components to be split/identified */
290     boxa1 = pixConnComp(pix2, NULL, 8);
291     pixDestroy(&pix2);
292     ncomp = boxaGetCount(boxa1);
293     if (ncomp == 0) {
294         boxaDestroy(&boxa1);
295         L_WARNING("all components removed\n", procName);
296         return 1;
297     }
298 
299         /* Save everything and split the large components */
300     boxa2 = boxaCreate(ncomp);
301     maxw = recog->maxwidth_u + 5;
302     scaling = (recog->scalew > 0 || recog->scaleh > 0) ? TRUE : FALSE;
303     pixa = (debug) ? pixaCreate(ncomp) : NULL;
304     for (i = 0; i < ncomp; i++) {
305         box = boxaGetBox(boxa1, i, L_CLONE);
306         boxGetGeometry(box, &xoff, &yoff, &bw, NULL);
307             /* Treat as one character if it is small, if the images
308              * have been scaled, or if splitting is not to be run. */
309         if (bw <= maxw || scaling || skipsplit) {
310             boxaAddBox(boxa2, box, L_INSERT);
311         } else {
312             pix = pixClipRectangle(pixs, box, NULL);
313 #if SPLIT_WITH_DID
314             if (!debug) {
315                 boxa3 = recogDecode(recog, pix, 2, NULL);
316             } else {
317                 boxa3 = recogDecode(recog, pix, 2, &pix2);
318                 pixaAddPix(pixa, pix2, L_INSERT);
319             }
320 #else  /* use greedy splitting */
321             recogCorrelationBestRow(recog, pix, &boxa3, NULL, NULL,
322                                     NULL, debug);
323             if (debug) {
324                 pix2 = pixConvertTo32(pix);
325                 pixRenderBoxaArb(pix2, boxa3, 2, 255, 0, 0);
326                 pixaAddPix(pixa, pix2, L_INSERT);
327             }
328 #endif  /* SPLIT_WITH_DID */
329             pixDestroy(&pix);
330             boxDestroy(&box);
331             if (!boxa3) {
332                 L_ERROR("boxa3 not found for component %d\n", procName, i);
333             } else {
334                 boxa4 = boxaTransform(boxa3, xoff, yoff, 1.0, 1.0);
335                 boxaJoin(boxa2, boxa4, 0, -1);
336                 boxaDestroy(&boxa3);
337                 boxaDestroy(&boxa4);
338             }
339         }
340     }
341     boxaDestroy(&boxa1);
342     if (pixa) {  /* debug */
343         pix3 = pixaDisplayTiledInColumns(pixa, 1, 1.0, 20, 2);
344         snprintf(buf, sizeof(buf), "/tmp/lept/recog/decode-%d.png", ind++);
345         pixWrite(buf, pix3, IFF_PNG);
346         pixaDestroy(&pixa);
347         pixDestroy(&pix3);
348     }
349 
350         /* Do a 2D sort on the bounding boxes, and flatten the result to 1D.
351          * For the 2D sort, to add a box to an existing boxa, we require
352          * specified minimum vertical overlaps for the first two passes
353          * of the 2D sort.  In pass 1, only components with sufficient
354          * height can start a new boxa. */
355     baa = boxaSort2d(boxa2, NULL, MinOverlap1, MinOverlap2, MinHeightPass1);
356     boxa3 = boxaaFlattenToBoxa(baa, NULL, L_CLONE);
357     boxaaDestroy(&baa);
358     boxaDestroy(&boxa2);
359 
360         /* Remove smaller components of overlapping pairs.
361          * We only remove the small component if the overlap is
362          * at least half its area and if its area is no more
363          * than 30% of the area of the large component.  Because the
364          * components are in a flattened 2D sort, we don't need to
365          * look far ahead in the array to find all overlapping boxes;
366          * 10 boxes is plenty. */
367     boxad = boxaHandleOverlaps(boxa3, L_COMBINE, 10, 0.5, 0.3, NULL);
368     boxaDestroy(&boxa3);
369 
370         /* Extract and save the image pieces from the input image. */
371     *ppixa = pixClipRectangles(pixs, boxad);
372     *pboxa = boxad;
373     return 0;
374 }
375 
376 
377 /*------------------------------------------------------------------------*
378  *                       Greedy character splitting                       *
379  *------------------------------------------------------------------------*/
380 /*!
381  * \brief   recogCorrelationBestRow()
382  *
383  * \param[in]    recog with LUT's pre-computed
384  * \param[in]    pixs typically of multiple touching characters, 1 bpp
385  * \param[out]   pboxa bounding boxs of best fit character
386  * \param[out]   pnascore [optional] correlation scores
387  * \param[out]   pnaindex [optional] indices of classes
388  * \param[out]   psachar [optional] array of character strings
389  * \param[in]    debug 1 for results written to pixadb_split
390  * \return  0 if OK, 1 on error
391  *
392  * <pre>
393  * Notes:
394  *      (1) Supervises character matching for (in general) a c.c with
395  *          multiple touching characters.  Finds the best match greedily.
396  *          Rejects small parts that are left over after splitting.
397  *      (2) Matching is to the average, and without character scaling.
398  * </pre>
399  */
400 l_int32
recogCorrelationBestRow(L_RECOG * recog,PIX * pixs,BOXA ** pboxa,NUMA ** pnascore,NUMA ** pnaindex,SARRAY ** psachar,l_int32 debug)401 recogCorrelationBestRow(L_RECOG  *recog,
402                         PIX      *pixs,
403                         BOXA    **pboxa,
404                         NUMA    **pnascore,
405                         NUMA    **pnaindex,
406                         SARRAY  **psachar,
407                         l_int32   debug)
408 {
409 char      *charstr;
410 l_int32    index, remove, w, h, bx, bw, bxc, bwc, w1, w2, w3;
411 l_float32  score;
412 BOX       *box, *boxc, *boxtrans, *boxl, *boxr, *boxlt, *boxrt;
413 BOXA      *boxat;
414 NUMA      *nascoret, *naindext, *nasort;
415 PIX       *pixb, *pixc, *pixl, *pixr, *pixdb, *pixd;
416 PIXA      *pixar, *pixadb;
417 SARRAY    *sachart;
418 
419 l_int32    iter;
420 
421     PROCNAME("recogCorrelationBestRow");
422 
423     if (pnascore) *pnascore = NULL;
424     if (pnaindex) *pnaindex = NULL;
425     if (psachar) *psachar = NULL;
426     if (!pboxa)
427         return ERROR_INT("&boxa not defined", procName, 1);
428     *pboxa = NULL;
429     if (!recog)
430         return ERROR_INT("recog not defined", procName, 1);
431     if (!pixs || pixGetDepth(pixs) != 1)
432         return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
433     if (pixGetWidth(pixs) < recog->minwidth_u - 4)
434         return ERROR_INT("pixs too narrow", procName, 1);
435     if (!recog->train_done)
436         return ERROR_INT("training not finished", procName, 1);
437 
438         /* Binarize and crop to foreground if necessary */
439     pixb = recogProcessToIdentify(recog, pixs, 0);
440 
441         /* Initialize the arrays */
442     boxat = boxaCreate(4);
443     nascoret = numaCreate(4);
444     naindext = numaCreate(4);
445     sachart = sarrayCreate(4);
446     pixadb = (debug) ? pixaCreate(4) : NULL;
447 
448         /* Initialize the images remaining to be processed with the input.
449          * These are stored in pixar, which is used here as a queue,
450          * on which we only put image fragments that are large enough to
451          * contain at least one character.  */
452     pixar = pixaCreate(1);
453     pixGetDimensions(pixb, &w, &h, NULL);
454     box = boxCreate(0, 0, w, h);
455     pixaAddPix(pixar, pixb, L_INSERT);
456     pixaAddBox(pixar, box, L_INSERT);
457 
458         /* Successively split on the best match until nothing is left.
459          * To be safe, we limit the search to 10 characters. */
460     for (iter = 0; iter < 11; iter++) {
461         if (pixaGetCount(pixar) == 0)
462             break;
463         if (iter == 10) {
464             L_WARNING("more than 10 chars; ending search\n", procName);
465             break;
466         }
467 
468             /* Pop one from the queue */
469         pixaRemovePixAndSave(pixar, 0, &pixc, &boxc);
470         boxGetGeometry(boxc, &bxc, NULL, &bwc, NULL);
471 
472             /* This is a single component; if noise, remove it */
473         recogSplittingFilter(recog, pixc, 0, MinFillFactor, &remove, debug);
474         if (debug)
475             fprintf(stderr, "iter = %d, removed = %d\n", iter, remove);
476         if (remove) {
477             pixDestroy(&pixc);
478             boxDestroy(&boxc);
479             continue;
480         }
481 
482             /* Find the best character match */
483         if (debug) {
484             recogCorrelationBestChar(recog, pixc, &box, &score,
485                                      &index, &charstr, &pixdb);
486             pixaAddPix(pixadb, pixdb, L_INSERT);
487         } else {
488             recogCorrelationBestChar(recog, pixc, &box, &score,
489                                      &index, &charstr, NULL);
490         }
491 
492             /* Find the box in original coordinates, and append
493              * the results to the arrays. */
494         boxtrans = boxTransform(box, bxc, 0, 1.0, 1.0);
495         boxaAddBox(boxat, boxtrans, L_INSERT);
496         numaAddNumber(nascoret, score);
497         numaAddNumber(naindext, index);
498         sarrayAddString(sachart, charstr, L_INSERT);
499 
500             /* Split the current pixc into three regions and save
501              * each region if it is large enough. */
502         boxGetGeometry(box, &bx, NULL, &bw, NULL);
503         w1 = bx;
504         w2 = bw;
505         w3 = bwc - bx - bw;
506         if (debug)
507             fprintf(stderr, " w1 = %d, w2 = %d, w3 = %d\n", w1, w2, w3);
508         if (w1 < recog->minwidth_u - 4) {
509             if (debug) L_INFO("discarding width %d on left\n", procName, w1);
510         } else {  /* extract and save left region */
511             boxl = boxCreate(0, 0, bx + 1, h);
512             pixl = pixClipRectangle(pixc, boxl, NULL);
513             boxlt = boxTransform(boxl, bxc, 0, 1.0, 1.0);
514             pixaAddPix(pixar, pixl, L_INSERT);
515             pixaAddBox(pixar, boxlt, L_INSERT);
516             boxDestroy(&boxl);
517         }
518         if (w3 < recog->minwidth_u - 4) {
519             if (debug) L_INFO("discarding width %d on right\n", procName, w3);
520         } else {  /* extract and save left region */
521             boxr = boxCreate(bx + bw - 1, 0, w3 + 1, h);
522             pixr = pixClipRectangle(pixc, boxr, NULL);
523             boxrt = boxTransform(boxr, bxc, 0, 1.0, 1.0);
524             pixaAddPix(pixar, pixr, L_INSERT);
525             pixaAddBox(pixar, boxrt, L_INSERT);
526             boxDestroy(&boxr);
527         }
528         pixDestroy(&pixc);
529         boxDestroy(&box);
530         boxDestroy(&boxc);
531     }
532     pixaDestroy(&pixar);
533 
534 
535         /* Sort the output results by left-to-right in the boxa */
536     *pboxa = boxaSort(boxat, L_SORT_BY_X, L_SORT_INCREASING, &nasort);
537     if (pnascore)
538         *pnascore = numaSortByIndex(nascoret, nasort);
539     if (pnaindex)
540         *pnaindex = numaSortByIndex(naindext, nasort);
541     if (psachar)
542         *psachar = sarraySortByIndex(sachart, nasort);
543     numaDestroy(&nasort);
544     boxaDestroy(&boxat);
545     numaDestroy(&nascoret);
546     numaDestroy(&naindext);
547     sarrayDestroy(&sachart);
548 
549         /* Final debug output */
550     if (debug) {
551         pixd = pixaDisplayTiledInRows(pixadb, 32, 2000, 1.0, 0, 15, 2);
552         pixDisplay(pixd, 400, 400);
553         pixaAddPix(recog->pixadb_split, pixd, L_INSERT);
554         pixaDestroy(&pixadb);
555     }
556     return 0;
557 }
558 
559 
560 /*!
561  * \brief   recogCorrelationBestChar()
562  *
563  * \param[in]    recog with LUT's pre-computed
564  * \param[in]    pixs can be of multiple touching characters, 1 bpp
565  * \param[out]   pbox bounding box of best fit character
566  * \param[out]   pscore correlation score
567  * \param[out]   pindex [optional] index of class
568  * \param[out]   pcharstr [optional] character string of class
569  * \param[out]   ppixdb [optional] debug pix showing input and best fit
570  * \return  0 if OK, 1 on error
571  *
572  * <pre>
573  * Notes:
574  *      (1) Basic matching character splitter.  Finds the best match among
575  *          all templates to some region of the image.  This can result
576  *          in splitting the image into two parts.  This is "image decoding"
577  *          without dynamic programming, because we don't use a setwidth
578  *          and compute the best matching score for the entire image.
579  *      (2) Matching is to the average templates, without character scaling.
580  * </pre>
581  */
582 l_int32
recogCorrelationBestChar(L_RECOG * recog,PIX * pixs,BOX ** pbox,l_float32 * pscore,l_int32 * pindex,char ** pcharstr,PIX ** ppixdb)583 recogCorrelationBestChar(L_RECOG    *recog,
584                          PIX        *pixs,
585                          BOX       **pbox,
586                          l_float32  *pscore,
587                          l_int32    *pindex,
588                          char      **pcharstr,
589                          PIX       **ppixdb)
590 {
591 l_int32    i, n, w1, h1, w2, area2, ycent2, delx, dely;
592 l_int32    bestdelx, bestdely, bestindex;
593 l_float32  score, bestscore;
594 BOX       *box;
595 BOXA      *boxa;
596 NUMA      *nasum, *namoment;
597 PIX       *pix1, *pix2;
598 
599     PROCNAME("recogCorrelationBestChar");
600 
601     if (pindex) *pindex = 0;
602     if (pcharstr) *pcharstr = NULL;
603     if (ppixdb) *ppixdb = NULL;
604     if (pbox) *pbox = NULL;
605     if (pscore) *pscore = 0.0;
606     if (!pbox || !pscore)
607         return ERROR_INT("&box and &score not both defined", procName, 1);
608     if (!recog)
609         return ERROR_INT("recog not defined", procName, 1);
610     if (!pixs || pixGetDepth(pixs) != 1)
611         return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
612     if (!recog->train_done)
613         return ERROR_INT("training not finished", procName, 1);
614 
615         /* Binarize and crop to foreground if necessary.  Add padding
616          * to both the left and right side; this is compensated for
617          * when reporting the bounding box of the best matched character. */
618     pix1 = recogProcessToIdentify(recog, pixs, LeftRightPadding);
619     pixGetDimensions(pix1, &w1, &h1, NULL);
620 
621         /* Compute vertical sum and moment arrays */
622     nasum = pixCountPixelsByColumn(pix1);
623     namoment = pixGetMomentByColumn(pix1, 1);
624 
625         /* Do shifted correlation against all averaged templates. */
626     n = recog->setsize;
627     boxa = boxaCreate(n);  /* location of best fits for each character */
628     bestscore = 0.0;
629     bestindex = bestdelx = bestdely = 0;
630     for (i = 0; i < n; i++) {
631         pix2 = pixaGetPix(recog->pixa_u, i, L_CLONE);
632         w2 = pixGetWidth(pix2);
633             /* Note that the slightly expended w1 is typically larger
634              * than w2 (the template). */
635         if (w1 >= w2) {
636             numaGetIValue(recog->nasum_u, i, &area2);
637             ptaGetIPt(recog->pta_u, i, NULL, &ycent2);
638             pixCorrelationBestShift(pix1, pix2, nasum, namoment, area2, ycent2,
639                                     recog->maxyshift, recog->sumtab, &delx,
640                                     &dely, &score, 1);
641             if (ppixdb) {
642                 fprintf(stderr,
643                     "Best match template %d: (x,y) = (%d,%d), score = %5.3f\n",
644                     i, delx, dely, score);
645             }
646                   /* Compensate for padding */
647             box = boxCreate(delx - LeftRightPadding, 0, w2, h1);
648             if (score > bestscore) {
649                 bestscore = score;
650                 bestdelx = delx - LeftRightPadding;
651                 bestdely = dely;
652                 bestindex = i;
653             }
654         } else {
655             box = boxCreate(0, 0, 1, 1);  /* placeholder */
656             if (ppixdb)
657                 fprintf(stderr, "Component too thin: w1 = %d, w2 = %d\n",
658                         w1, w2);
659         }
660         boxaAddBox(boxa, box, L_INSERT);
661         pixDestroy(&pix2);
662     }
663 
664     *pscore = bestscore;
665     *pbox = boxaGetBox(boxa, bestindex, L_COPY);
666     if (pindex) *pindex = bestindex;
667     if (pcharstr)
668         recogGetClassString(recog, bestindex, pcharstr);
669 
670     if (ppixdb) {
671         L_INFO("Best match: class %d; shifts (%d, %d)\n",
672                procName, bestindex, bestdelx, bestdely);
673         pix2 = pixaGetPix(recog->pixa_u, bestindex, L_CLONE);
674         *ppixdb = recogShowMatch(recog, pix1, pix2, NULL, -1, 0.0);
675         pixDestroy(&pix2);
676     }
677 
678     pixDestroy(&pix1);
679     boxaDestroy(&boxa);
680     numaDestroy(&nasum);
681     numaDestroy(&namoment);
682     return 0;
683 }
684 
685 
686 /*!
687  * \brief   pixCorrelationBestShift()
688  *
689  * \param[in]    pix1   1 bpp, the unknown image; typically larger
690  * \param[in]    pix2   1 bpp, the matching template image)
691  * \param[in]    nasum1 vertical column pixel sums for pix1
692  * \param[in]    namoment1  vertical column first moment of pixels for pix1
693  * \param[in]    area2  number of on pixels in pix2
694  * \param[in]    ycent2  y component of centroid of pix2
695  * \param[in]    maxyshift  max y shift of pix2 around the location where
696  *                          the centroids of pix2 and a windowed part of pix1
697  *                          are vertically aligned
698  * \param[in]    tab8 [optional] sum tab for ON pixels in byte; can be NULL
699  * \param[out]   pdelx [optional] best x shift of pix2 relative to pix1
700  *           [out]   pdely ([optional] best y shift of pix2 relative to pix1
701  *           [out]   pscore ([optional] maximum score found; can be NULL
702  * \param[in]    debugflag <= 0 to skip; positive to generate output.
703  *                         The integer is used to label the debug image.
704  * \return  0 if OK, 1 on error
705  *
706  * <pre>
707  * Notes:
708  *      (1) This maximizes the correlation score between two 1 bpp images,
709  *          one of which is typically wider.  In a typical example,
710  *          pix1 is a bitmap of 2 or more touching characters and pix2 is
711  *          a single character template.  This finds the location of pix2
712  *          that gives the largest correlation.
713  *      (2) The windowed area of fg pixels and windowed first moment
714  *          in the y direction are computed from the input sum and moment
715  *          column arrays, %nasum1 and %namoment1
716  *      (3) This is a brute force operation.  We compute the correlation
717  *          at every x shift for which pix2 fits entirely within pix1,
718  *          and where the centroid of pix2 is aligned, within +-maxyshift,
719  *          with the centroid of a window of pix1 of the same width.
720  *          The correlation is taken over the full height of pix1.
721  *          This can be made more efficient.
722  * </pre>
723  */
724 static l_int32
pixCorrelationBestShift(PIX * pix1,PIX * pix2,NUMA * nasum1,NUMA * namoment1,l_int32 area2,l_int32 ycent2,l_int32 maxyshift,l_int32 * tab8,l_int32 * pdelx,l_int32 * pdely,l_float32 * pscore,l_int32 debugflag)725 pixCorrelationBestShift(PIX        *pix1,
726                         PIX        *pix2,
727                         NUMA       *nasum1,
728                         NUMA       *namoment1,
729                         l_int32     area2,
730                         l_int32     ycent2,
731                         l_int32     maxyshift,
732                         l_int32    *tab8,
733                         l_int32    *pdelx,
734                         l_int32    *pdely,
735                         l_float32  *pscore,
736                         l_int32     debugflag)
737 {
738 l_int32     w1, w2, h1, h2, i, j, nx, shifty, delx, dely;
739 l_int32     sum, moment, count;
740 l_int32    *tab, *area1, *arraysum, *arraymoment;
741 l_float32   maxscore, score;
742 l_float32  *ycent1;
743 FPIX       *fpix;
744 PIX        *pixt, *pixt1, *pixt2;
745 
746     PROCNAME("pixCorrelationBestShift");
747 
748     if (pdelx) *pdelx = 0;
749     if (pdely) *pdely = 0;
750     if (pscore) *pscore = 0.0;
751     if (!pix1 || pixGetDepth(pix1) != 1)
752         return ERROR_INT("pix1 not defined or not 1 bpp", procName, 1);
753     if (!pix2 || pixGetDepth(pix2) != 1)
754         return ERROR_INT("pix2 not defined or not 1 bpp", procName, 1);
755     if (!nasum1 || !namoment1)
756         return ERROR_INT("nasum1 and namoment1 not both defined", procName, 1);
757     if (area2 <= 0 || ycent2 <= 0)
758         return ERROR_INT("area2 and ycent2 must be > 0", procName, 1);
759 
760        /* If pix1 (the unknown image) is narrower than pix2,
761         * don't bother to try the match.  pix1 is already padded with
762         * 2 pixels on each side. */
763     pixGetDimensions(pix1, &w1, &h1, NULL);
764     pixGetDimensions(pix2, &w2, &h2, NULL);
765     if (w1 < w2) {
766         if (debugflag > 0) {
767             L_INFO("skipping match with w1 = %d and w2 = %d\n",
768                    procName, w1, w2);
769         }
770         return 0;
771     }
772     nx = w1 - w2 + 1;
773 
774     if (debugflag > 0)
775         fpix = fpixCreate(nx, 2 * maxyshift + 1);
776     if (!tab8)
777         tab = makePixelSumTab8();
778     else
779         tab = tab8;
780 
781         /* Set up the arrays for area1 and ycent1.  We have to do this
782          * for each template (pix2) because the window width is w2. */
783     area1 = (l_int32 *)LEPT_CALLOC(nx, sizeof(l_int32));
784     ycent1 = (l_float32 *)LEPT_CALLOC(nx, sizeof(l_int32));
785     arraysum = numaGetIArray(nasum1);
786     arraymoment = numaGetIArray(namoment1);
787     for (i = 0, sum = 0, moment = 0; i < w2; i++) {
788         sum += arraysum[i];
789         moment += arraymoment[i];
790     }
791     for (i = 0; i < nx - 1; i++) {
792         area1[i] = sum;
793         ycent1[i] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum;
794         sum += arraysum[w2 + i] - arraysum[i];
795         moment += arraymoment[w2 + i] - arraymoment[i];
796     }
797     area1[nx - 1] = sum;
798     ycent1[nx - 1] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum;
799 
800         /* Find the best match location for pix2.  At each location,
801          * to insure that pixels are ON only within the intersection of
802          * pix and the shifted pix2:
803          *  (1) Start with pixt cleared and equal in size to pix1.
804          *  (2) Blit the shifted pix2 onto pixt.  Then all ON pixels
805          *      are within the intersection of pix1 and the shifted pix2.
806          *  (3) AND pix1 with pixt. */
807     pixt = pixCreate(w2, h1, 1);
808     maxscore = 0;
809     delx = 0;
810     dely = 0;  /* amount to shift pix2 relative to pix1 to get alignment */
811     for (i = 0; i < nx; i++) {
812         shifty = (l_int32)(ycent1[i] - ycent2 + 0.5);
813         for (j = -maxyshift; j <= maxyshift; j++) {
814             pixClearAll(pixt);
815             pixRasterop(pixt, 0, shifty + j, w2, h2, PIX_SRC, pix2, 0, 0);
816             pixRasterop(pixt, 0, 0, w2, h1, PIX_SRC & PIX_DST, pix1, i, 0);
817             pixCountPixels(pixt, &count, tab);
818             score = (l_float32)count * (l_float32)count /
819                     ((l_float32)area1[i] * (l_float32)area2);
820             if (score > maxscore) {
821                 maxscore = score;
822                 delx = i;
823                 dely = shifty + j;
824             }
825 
826             if (debugflag > 0)
827                 fpixSetPixel(fpix, i, maxyshift + j, 1000.0 * score);
828         }
829     }
830 
831     if (debugflag > 0) {
832         lept_mkdir("lept/recog");
833         char  buf[128];
834         pixt1 = fpixDisplayMaxDynamicRange(fpix);
835         pixt2 = pixExpandReplicate(pixt1, 5);
836         snprintf(buf, sizeof(buf), "/tmp/lept/recog/junkbs_%d.png", debugflag);
837         pixWrite(buf, pixt2, IFF_PNG);
838         pixDestroy(&pixt1);
839         pixDestroy(&pixt2);
840         fpixDestroy(&fpix);
841     }
842 
843     if (pdelx) *pdelx = delx;
844     if (pdely) *pdely = dely;
845     if (pscore) *pscore = maxscore;
846     if (!tab8) LEPT_FREE(tab);
847     LEPT_FREE(area1);
848     LEPT_FREE(ycent1);
849     LEPT_FREE(arraysum);
850     LEPT_FREE(arraymoment);
851     pixDestroy(&pixt);
852     return 0;
853 }
854 
855 
856 /*------------------------------------------------------------------------*
857  *                          Low-level identification                      *
858  *------------------------------------------------------------------------*/
859 /*!
860  * \brief   recogIdentifyPixa()
861  *
862  * \param[in]    recog
863  * \param[in]    pixa of 1 bpp images to match
864  * \param[out]   ppixdb [optional] pix showing inputs and best fits
865  * \return  0 if OK, 1 on error
866  *
867  * <pre>
868  * Notes:
869  *      (1) This should be called by recogIdentifyMuliple(), which
870  *          binarizes and splits characters before sending %pixa here.
871  *      (2) This calls recogIdentifyPix(), which does the same operation
872  *          on each pix in %pixa, and optionally returns the arrays
873  *          of results (scores, class index and character string)
874  *          for the best correlation match.
875  * </pre>
876  */
877 l_int32
recogIdentifyPixa(L_RECOG * recog,PIXA * pixa,PIX ** ppixdb)878 recogIdentifyPixa(L_RECOG  *recog,
879                   PIXA     *pixa,
880                   PIX     **ppixdb)
881 {
882 char      *text;
883 l_int32    i, n, fail, index, depth;
884 l_float32  score;
885 PIX       *pix1, *pix2, *pix3;
886 PIXA      *pixa1;
887 L_RCH     *rch;
888 
889     PROCNAME("recogIdentifyPixa");
890 
891     if (ppixdb) *ppixdb = NULL;
892     if (!recog)
893         return ERROR_INT("recog not defined", procName, 1);
894     if (!pixa)
895         return ERROR_INT("pixa not defined", procName, 1);
896 
897         /* Run the recognizer on the set of images.  This writes
898          * the text string into each pix in pixa. */
899     n = pixaGetCount(pixa);
900     rchaDestroy(&recog->rcha);
901     recog->rcha = rchaCreate();
902     pixa1 = (ppixdb) ? pixaCreate(n) : NULL;
903     depth = 1;
904     for (i = 0; i < n; i++) {
905         pix1 = pixaGetPix(pixa, i, L_CLONE);
906         pix2 = NULL;
907         fail = FALSE;
908         if (!ppixdb)
909             fail = recogIdentifyPix(recog, pix1, NULL);
910         else
911             fail = recogIdentifyPix(recog, pix1, &pix2);
912         if (fail)
913             recogSkipIdentify(recog);
914         if ((rch = recog->rch) == NULL) {
915             L_ERROR("rch not found for char %d\n", procName, i);
916             pixDestroy(&pix1);
917             pixDestroy(&pix2);
918             continue;
919         }
920         rchExtract(rch, NULL, NULL, &text, NULL, NULL, NULL, NULL);
921         pixSetText(pix1, text);
922         LEPT_FREE(text);
923         if (ppixdb) {
924             rchExtract(rch, &index, &score, NULL, NULL, NULL, NULL, NULL);
925             pix3 = recogShowMatch(recog, pix2, NULL, NULL, index, score);
926             if (i == 0) depth = pixGetDepth(pix3);
927             pixaAddPix(pixa1, pix3, L_INSERT);
928             pixDestroy(&pix2);
929         }
930         transferRchToRcha(rch, recog->rcha);
931         pixDestroy(&pix1);
932     }
933 
934         /* Package the images for debug */
935     if (ppixdb) {
936         *ppixdb = pixaDisplayTiledInRows(pixa1, depth, 2500, 1.0, 0, 20, 1);
937         pixaDestroy(&pixa1);
938     }
939 
940     return 0;
941 }
942 
943 
944 /*!
945  * \brief   recogIdentifyPix()
946  *
947  * \param[in]    recog with LUT's pre-computed
948  * \param[in]    pixs of a single character, 1 bpp
949  * \param[out]   ppixdb [optional] debug pix showing input and best fit
950  * \return  0 if OK, 1 on error
951  *
952  * <pre>
953  * Notes:
954  *      (1) Basic recognition function for a single character.
955  *      (2) If templ_use == L_USE_ALL_TEMPLATES, which is the default
956  *          situation, matching is attempted to every bitmap in the recog,
957  *          and the identify of the best match is returned.
958  *      (3) For finding outliers, templ_use == L_USE_AVERAGE_TEMPLATES, and
959  *          matching is only attemplted to the averaged bitmaps.  For this
960  *          case, the index of the bestsample is meaningless (0 is returned
961  *          if requested).
962  *      (4) The score is related to the confidence (probability of correct
963  *          identification), in that a higher score is correlated with
964  *          a higher probability.  However, the actual relation between
965  *          the correlation (score) and the probability is not known;
966  *          we call this a "score" because "confidence" can be misinterpreted
967  *          as an actual probability.
968  * </pre>
969  */
970 l_int32
recogIdentifyPix(L_RECOG * recog,PIX * pixs,PIX ** ppixdb)971 recogIdentifyPix(L_RECOG  *recog,
972                  PIX      *pixs,
973                  PIX     **ppixdb)
974 {
975 char      *text;
976 l_int32    i, j, n, bestindex, bestsample, area1, area2;
977 l_int32    shiftx, shifty, bestdelx, bestdely, bestwidth, maxyshift;
978 l_float32  x1, y1, x2, y2, delx, dely, score, maxscore;
979 NUMA      *numa;
980 PIX       *pix0, *pix1, *pix2;
981 PIXA      *pixa;
982 PTA       *pta;
983 
984     PROCNAME("recogIdentifyPix");
985 
986     if (ppixdb) *ppixdb = NULL;
987     if (!recog)
988         return ERROR_INT("recog not defined", procName, 1);
989     if (!pixs || pixGetDepth(pixs) != 1)
990         return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
991 
992         /* Do the averaging if required and not yet done. */
993     if (recog->templ_use == L_USE_AVERAGE_TEMPLATES && !recog->ave_done) {
994         recogAverageSamples(&recog, 0);
995         if (!recog)
996             return ERROR_INT("averaging failed", procName, 1);
997     }
998 
999         /* Binarize and crop to foreground if necessary */
1000     if ((pix0 = recogProcessToIdentify(recog, pixs, 0)) == NULL)
1001         return ERROR_INT("no fg pixels in pix0", procName, 1);
1002 
1003         /* Optionally scale and/or convert to fixed stroke width */
1004     pix1 = recogModifyTemplate(recog, pix0);
1005     pixDestroy(&pix0);
1006     if (!pix1)
1007         return ERROR_INT("no fg pixels in pix1", procName, 1);
1008 
1009         /* Do correlation at all positions within +-maxyshift of
1010          * the nominal centroid alignment. */
1011     pixCountPixels(pix1, &area1, recog->sumtab);
1012     pixCentroid(pix1, recog->centtab, recog->sumtab, &x1, &y1);
1013     bestindex = bestsample = bestdelx = bestdely = bestwidth = 0;
1014     maxscore = 0.0;
1015     maxyshift = recog->maxyshift;
1016     if (recog->templ_use == L_USE_AVERAGE_TEMPLATES) {
1017         for (i = 0; i < recog->setsize; i++) {
1018             numaGetIValue(recog->nasum, i, &area2);
1019             if (area2 == 0) continue;  /* no template available */
1020             pix2 = pixaGetPix(recog->pixa, i, L_CLONE);
1021             ptaGetPt(recog->pta, i, &x2, &y2);
1022             delx = x1 - x2;
1023             dely = y1 - y2;
1024             for (shifty = -maxyshift; shifty <= maxyshift; shifty++) {
1025                 for (shiftx = -maxyshift; shiftx <= maxyshift; shiftx++) {
1026                     pixCorrelationScoreSimple(pix1, pix2, area1, area2,
1027                                               delx + shiftx, dely + shifty,
1028                                               5, 5, recog->sumtab, &score);
1029                     if (score > maxscore) {
1030                         bestindex = i;
1031                         bestdelx = delx + shiftx;
1032                         bestdely = dely + shifty;
1033                         maxscore = score;
1034                     }
1035                 }
1036             }
1037             pixDestroy(&pix2);
1038         }
1039     } else {  /* use all the samples */
1040         for (i = 0; i < recog->setsize; i++) {
1041             pixa = pixaaGetPixa(recog->pixaa, i, L_CLONE);
1042             n = pixaGetCount(pixa);
1043             if (n == 0) {
1044                 pixaDestroy(&pixa);
1045                 continue;
1046             }
1047             numa = numaaGetNuma(recog->naasum, i, L_CLONE);
1048             pta = ptaaGetPta(recog->ptaa, i, L_CLONE);
1049             for (j = 0; j < n; j++) {
1050                 pix2 = pixaGetPix(pixa, j, L_CLONE);
1051                 numaGetIValue(numa, j, &area2);
1052                 ptaGetPt(pta, j, &x2, &y2);
1053                 delx = x1 - x2;
1054                 dely = y1 - y2;
1055                 for (shifty = -maxyshift; shifty <= maxyshift; shifty++) {
1056                     for (shiftx = -maxyshift; shiftx <= maxyshift; shiftx++) {
1057                         pixCorrelationScoreSimple(pix1, pix2, area1, area2,
1058                                                   delx + shiftx, dely + shifty,
1059                                                   5, 5, recog->sumtab, &score);
1060                         if (score > maxscore) {
1061                             bestindex = i;
1062                             bestsample = j;
1063                             bestdelx = delx + shiftx;
1064                             bestdely = dely + shifty;
1065                             maxscore = score;
1066                             bestwidth = pixGetWidth(pix2);
1067                         }
1068                     }
1069                 }
1070                 pixDestroy(&pix2);
1071             }
1072             pixaDestroy(&pixa);
1073             numaDestroy(&numa);
1074             ptaDestroy(&pta);
1075         }
1076     }
1077 
1078         /* Package up the results */
1079     recogGetClassString(recog, bestindex, &text);
1080     rchDestroy(&recog->rch);
1081     recog->rch = rchCreate(bestindex, maxscore, text, bestsample,
1082                            bestdelx, bestdely, bestwidth);
1083 
1084     if (ppixdb) {
1085         if (recog->templ_use == L_USE_AVERAGE_TEMPLATES) {
1086             L_INFO("Best match: str %s; class %d; sh (%d, %d); score %5.3f\n",
1087                    procName, text, bestindex, bestdelx, bestdely, maxscore);
1088             pix2 = pixaGetPix(recog->pixa, bestindex, L_CLONE);
1089         } else {  /* L_USE_ALL_TEMPLATES */
1090             L_INFO("Best match: str %s; sample %d in class %d; score %5.3f\n",
1091                    procName, text, bestsample, bestindex, maxscore);
1092             if (maxyshift > 0 && (L_ABS(bestdelx) > 0 || L_ABS(bestdely) > 0)) {
1093                 L_INFO("  Best shift: (%d, %d)\n",
1094                        procName, bestdelx, bestdely);
1095             }
1096             pix2 = pixaaGetPix(recog->pixaa, bestindex, bestsample, L_CLONE);
1097         }
1098         *ppixdb = recogShowMatch(recog, pix1, pix2, NULL, -1, 0.0);
1099         pixDestroy(&pix2);
1100     }
1101 
1102     pixDestroy(&pix1);
1103     return 0;
1104 }
1105 
1106 
1107 /*!
1108  * \brief   recogSkipIdentify()
1109  *
1110  * \param[in]    recog
1111  * \return  0 if OK, 1 on error
1112  *
1113  * <pre>
1114  * Notes:
1115  *      (1) This just writes a "dummy" result with 0 score and empty
1116  *          string id into the rch.
1117  * </pre>
1118  */
1119 l_int32
recogSkipIdentify(L_RECOG * recog)1120 recogSkipIdentify(L_RECOG  *recog)
1121 {
1122     PROCNAME("recogSkipIdentify");
1123 
1124     if (!recog)
1125         return ERROR_INT("recog not defined", procName, 1);
1126 
1127         /* Package up placeholder results */
1128     rchDestroy(&recog->rch);
1129     recog->rch = rchCreate(0, 0.0, stringNew(""), 0, 0, 0, 0);
1130     return 0;
1131 }
1132 
1133 
1134 /*------------------------------------------------------------------------*
1135  *             Operations for handling identification results             *
1136  *------------------------------------------------------------------------*/
1137 /*!
1138  * \brief   rchaCreate()
1139  *
1140  *      Return: 0 if OK, 1 on error
1141  *
1142  *  Notes:
1143  *      (1) Be sure to destroy any existing rcha before assigning this.
1144  */
1145 static L_RCHA *
rchaCreate()1146 rchaCreate()
1147 {
1148 L_RCHA  *rcha;
1149 
1150     rcha = (L_RCHA *)LEPT_CALLOC(1, sizeof(L_RCHA));
1151     rcha->naindex = numaCreate(0);
1152     rcha->nascore = numaCreate(0);
1153     rcha->satext = sarrayCreate(0);
1154     rcha->nasample = numaCreate(0);
1155     rcha->naxloc = numaCreate(0);
1156     rcha->nayloc = numaCreate(0);
1157     rcha->nawidth = numaCreate(0);
1158     return rcha;
1159 }
1160 
1161 
1162 /*!
1163  * \brief   rchaDestroy()
1164  *
1165  * \param[in,out]  prcha to be nulled
1166  */
1167 void
rchaDestroy(L_RCHA ** prcha)1168 rchaDestroy(L_RCHA  **prcha)
1169 {
1170 L_RCHA  *rcha;
1171 
1172     PROCNAME("rchaDestroy");
1173 
1174     if (prcha == NULL) {
1175         L_WARNING("&rcha is null!\n", procName);
1176         return;
1177     }
1178     if ((rcha = *prcha) == NULL)
1179         return;
1180 
1181     numaDestroy(&rcha->naindex);
1182     numaDestroy(&rcha->nascore);
1183     sarrayDestroy(&rcha->satext);
1184     numaDestroy(&rcha->nasample);
1185     numaDestroy(&rcha->naxloc);
1186     numaDestroy(&rcha->nayloc);
1187     numaDestroy(&rcha->nawidth);
1188     LEPT_FREE(rcha);
1189     *prcha = NULL;
1190     return;
1191 }
1192 
1193 
1194 /*!
1195  * \brief   rchCreate()
1196  *
1197  * \param[in]    index index of best template
1198  * \param[in]    score correlation score of best template
1199  * \param[in]    text character string of best template
1200  * \param[in]    sample index of best sample; -1 if averages are used
1201  * \param[in]    xloc x-location of template: delx + shiftx
1202  * \param[in]    yloc y-location of template: dely + shifty
1203  * \param[in]    width width of best template
1204  * \return  0 if OK, 1 on error
1205  *
1206  * <pre>
1207  * Notes:
1208  *      (1) Be sure to destroy any existing rch before assigning this.
1209  *      (2) This stores the text string, not a copy of it, so the
1210  *          caller must not destroy the string.
1211  * </pre>
1212  */
1213 static L_RCH *
rchCreate(l_int32 index,l_float32 score,char * text,l_int32 sample,l_int32 xloc,l_int32 yloc,l_int32 width)1214 rchCreate(l_int32    index,
1215           l_float32  score,
1216           char      *text,
1217           l_int32    sample,
1218           l_int32    xloc,
1219           l_int32    yloc,
1220           l_int32    width)
1221 {
1222 L_RCH  *rch;
1223 
1224     rch = (L_RCH *)LEPT_CALLOC(1, sizeof(L_RCH));
1225     rch->index = index;
1226     rch->score = score;
1227     rch->text = text;
1228     rch->sample = sample;
1229     rch->xloc = xloc;
1230     rch->yloc = yloc;
1231     rch->width = width;
1232     return rch;
1233 }
1234 
1235 
1236 /*!
1237  * \brief   rchDestroy()
1238  *
1239  * \param[in,out] prch to be nulled
1240  */
1241 void
rchDestroy(L_RCH ** prch)1242 rchDestroy(L_RCH  **prch)
1243 {
1244 L_RCH  *rch;
1245 
1246     PROCNAME("rchDestroy");
1247 
1248     if (prch == NULL) {
1249         L_WARNING("&rch is null!\n", procName);
1250         return;
1251     }
1252     if ((rch = *prch) == NULL)
1253         return;
1254     LEPT_FREE(rch->text);
1255     LEPT_FREE(rch);
1256     *prch = NULL;
1257     return;
1258 }
1259 
1260 
1261 /*!
1262  * \brief   rchaExtract()
1263  *
1264  * \param[in]    rcha
1265  * \param[out]   pnaindex [optional] indices of best templates
1266  * \param[out]   pnascore [optional] correl scores of best templates
1267  * \param[out]   psatext [optional] character strings of best templates
1268  * \param[out]   pnasample [optional] indices of best samples
1269  * \param[out]   pnaxloc [optional] x-locations of templates
1270  * \param[out]   pnayloc [optional] y-locations of templates
1271  * \param[out]   pnawidth [optional] widths of best templates
1272  * \return  0 if OK, 1 on error
1273  *
1274  * <pre>
1275  * Notes:
1276  *      (1) This returns clones of the number and string arrays.  They must
1277  *          be destroyed by the caller.
1278  * </pre>
1279  */
1280 l_int32
rchaExtract(L_RCHA * rcha,NUMA ** pnaindex,NUMA ** pnascore,SARRAY ** psatext,NUMA ** pnasample,NUMA ** pnaxloc,NUMA ** pnayloc,NUMA ** pnawidth)1281 rchaExtract(L_RCHA   *rcha,
1282             NUMA    **pnaindex,
1283             NUMA    **pnascore,
1284             SARRAY  **psatext,
1285             NUMA    **pnasample,
1286             NUMA    **pnaxloc,
1287             NUMA    **pnayloc,
1288             NUMA    **pnawidth)
1289 {
1290     PROCNAME("rchaExtract");
1291 
1292     if (pnaindex) *pnaindex = NULL;
1293     if (pnascore) *pnascore = NULL;
1294     if (psatext) *psatext = NULL;
1295     if (pnasample) *pnasample = NULL;
1296     if (pnaxloc) *pnaxloc = NULL;
1297     if (pnayloc) *pnayloc = NULL;
1298     if (pnawidth) *pnawidth = NULL;
1299     if (!rcha)
1300         return ERROR_INT("rcha not defined", procName, 1);
1301 
1302     if (pnaindex) *pnaindex = numaClone(rcha->naindex);
1303     if (pnascore) *pnascore = numaClone(rcha->nascore);
1304     if (psatext) *psatext = sarrayClone(rcha->satext);
1305     if (pnasample) *pnasample = numaClone(rcha->nasample);
1306     if (pnaxloc) *pnaxloc = numaClone(rcha->naxloc);
1307     if (pnayloc) *pnayloc = numaClone(rcha->nayloc);
1308     if (pnawidth) *pnawidth = numaClone(rcha->nawidth);
1309     return 0;
1310 }
1311 
1312 
1313 /*!
1314  * \brief   rchExtract()
1315  *
1316  * \param[in]    rch
1317  * \param[out]   pindex [optional] index of best template
1318  * \param[out]   pscore [optional] correlation score of best template
1319  * \param[out]   ptext [optional] character string of best template
1320  * \param[out]   psample [optional] index of best sample
1321  * \param[out]   pxloc [optional] x-location of template
1322  * \param[out]   pyloc [optional] y-location of template
1323  * \param[out]   pwidth [optional] width of best template
1324  * \return  0 if OK, 1 on error
1325  */
1326 l_int32
rchExtract(L_RCH * rch,l_int32 * pindex,l_float32 * pscore,char ** ptext,l_int32 * psample,l_int32 * pxloc,l_int32 * pyloc,l_int32 * pwidth)1327 rchExtract(L_RCH      *rch,
1328            l_int32    *pindex,
1329            l_float32  *pscore,
1330            char      **ptext,
1331            l_int32    *psample,
1332            l_int32    *pxloc,
1333            l_int32    *pyloc,
1334            l_int32    *pwidth)
1335 {
1336     PROCNAME("rchExtract");
1337 
1338     if (pindex) *pindex = 0;
1339     if (pscore) *pscore = 0.0;
1340     if (ptext) *ptext = NULL;
1341     if (psample) *psample = 0;
1342     if (pxloc) *pxloc = 0;
1343     if (pyloc) *pyloc = 0;
1344     if (pwidth) *pwidth = 0;
1345     if (!rch)
1346         return ERROR_INT("rch not defined", procName, 1);
1347 
1348     if (pindex) *pindex = rch->index;
1349     if (pscore) *pscore = rch->score;
1350     if (ptext) *ptext = stringNew(rch->text);  /* new string: owned by caller */
1351     if (psample) *psample = rch->sample;
1352     if (pxloc) *pxloc = rch->xloc;
1353     if (pyloc) *pyloc = rch->yloc;
1354     if (pwidth) *pwidth = rch->width;
1355     return 0;
1356 }
1357 
1358 
1359 /*!
1360  * \brief   transferRchToRcha()
1361  *
1362  * \param[in]    rch source of data
1363  * \param[in]    rcha append to arrays in this destination
1364  * \return  0 if OK, 1 on error
1365  *
1366  * <pre>
1367  * Notes:
1368  *      (1) This is used to transfer the results of a single character
1369  *          identification to an rcha array for the array of characters.
1370  * </pre>
1371  */
1372 static l_int32
transferRchToRcha(L_RCH * rch,L_RCHA * rcha)1373 transferRchToRcha(L_RCH   *rch,
1374                   L_RCHA  *rcha)
1375 {
1376 
1377     PROCNAME("transferRchToRcha");
1378 
1379     if (!rch)
1380         return ERROR_INT("rch not defined", procName, 1);
1381     if (!rcha)
1382         return ERROR_INT("rcha not defined", procName, 1);
1383 
1384     numaAddNumber(rcha->naindex, rch->index);
1385     numaAddNumber(rcha->nascore, rch->score);
1386     sarrayAddString(rcha->satext, rch->text, L_COPY);
1387     numaAddNumber(rcha->nasample, rch->sample);
1388     numaAddNumber(rcha->naxloc, rch->xloc);
1389     numaAddNumber(rcha->nayloc, rch->yloc);
1390     numaAddNumber(rcha->nawidth, rch->width);
1391     return 0;
1392 }
1393 
1394 
1395 /*------------------------------------------------------------------------*
1396  *                        Preprocessing and filtering                     *
1397  *------------------------------------------------------------------------*/
1398 /*!
1399  * \brief   recogProcessToIdentify()
1400  *
1401  * \param[in]    recog with LUT's pre-computed
1402  * \param[in]    pixs typ. single character, possibly d > 1 and uncropped
1403  * \param[in]    pad extra pixels added to left and right sides
1404  * \return  pixd 1 bpp, clipped to foreground, or NULL if there
1405  *                    are no fg pixels or on error.
1406  *
1407  * <pre>
1408  * Notes:
1409  *      (1) This is a lightweight operation to insure that the input
1410  *          image is 1 bpp, properly cropped, and padded on each side.
1411  *          If bpp > 1, the image is thresholded.
1412  * </pre>
1413  */
1414 PIX *
recogProcessToIdentify(L_RECOG * recog,PIX * pixs,l_int32 pad)1415 recogProcessToIdentify(L_RECOG  *recog,
1416                        PIX      *pixs,
1417                        l_int32   pad)
1418 {
1419 l_int32  canclip;
1420 PIX     *pix1, *pix2, *pixd;
1421 
1422     PROCNAME("recogProcessToIdentify");
1423 
1424     if (!recog)
1425         return (PIX *)ERROR_PTR("recog not defined", procName, NULL);
1426     if (!pixs)
1427         return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
1428 
1429     if (pixGetDepth(pixs) != 1)
1430         pix1 = pixThresholdToBinary(pixs, recog->threshold);
1431     else
1432         pix1 = pixClone(pixs);
1433     pixTestClipToForeground(pix1, &canclip);
1434     if (canclip)
1435         pixClipToForeground(pix1, &pix2, NULL);
1436     else
1437         pix2 = pixClone(pix1);
1438     pixDestroy(&pix1);
1439     if (!pix2)
1440         return (PIX *)ERROR_PTR("no foreground pixels", procName, NULL);
1441 
1442     pixd = pixAddBorderGeneral(pix2, pad, pad, 0, 0, 0);
1443     pixDestroy(&pix2);
1444     return pixd;
1445 }
1446 
1447 
1448 /*!
1449  * \brief   recogPreSplittingFilter()
1450  *
1451  * \param[in]    recog
1452  * \param[in]    pixs     1 bpp, many connected components
1453  * \param[in]    minh     minimum height of components to be retained
1454  * \param[in]    minaf    minimum area fraction (|fg|/(w*h)) to be retained
1455  * \param[in]    debug    1 to output indicator arrays
1456  * \return  pixd with filtered components removed or NULL on error
1457  */
1458 static PIX *
recogPreSplittingFilter(L_RECOG * recog,PIX * pixs,l_int32 minh,l_float32 minaf,l_int32 debug)1459 recogPreSplittingFilter(L_RECOG   *recog,
1460                         PIX       *pixs,
1461                         l_int32    minh,
1462                         l_float32  minaf,
1463                         l_int32    debug)
1464 {
1465 l_int32  scaling, minsplitw, maxsplith, maxasp;
1466 BOXA    *boxas;
1467 NUMA    *naw, *nah, *na1, *na1c, *na2, *na3, *na4, *na5, *na6, *na7;
1468 PIX     *pixd;
1469 PIXA    *pixas;
1470 
1471     PROCNAME("recogPreSplittingFilter");
1472 
1473     if (!recog)
1474         return (PIX *)ERROR_PTR("recog not defined", procName, NULL);
1475     if (!pixs)
1476         return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
1477 
1478         /* If there is scaling, do not remove components based on the
1479          * values of min_splitw and max_splith. */
1480     scaling = (recog->scalew > 0 || recog->scaleh > 0) ? TRUE : FALSE;
1481     minsplitw = (scaling) ? 1 : recog->min_splitw - 3;
1482     maxsplith = (scaling) ? 150 : recog->max_splith;
1483     maxasp = recog->max_wh_ratio;
1484 
1485         /* Generate an indicator array of connected components to remove:
1486          *    short stuff
1487          *    tall stuff
1488          *    components with large width/height ratio
1489          *    components with small area fill fraction  */
1490     boxas = pixConnComp(pixs, &pixas, 8);
1491     pixaFindDimensions(pixas, &naw, &nah);
1492     na1 = numaMakeThresholdIndicator(naw, minsplitw, L_SELECT_IF_LT);
1493     na1c = numaCopy(na1);
1494     na2 = numaMakeThresholdIndicator(nah, minh, L_SELECT_IF_LT);
1495     na3 = numaMakeThresholdIndicator(nah, maxsplith, L_SELECT_IF_GT);
1496     na4 = pixaFindWidthHeightRatio(pixas);
1497     na5 = numaMakeThresholdIndicator(na4, maxasp, L_SELECT_IF_GT);
1498     na6 = pixaFindAreaFraction(pixas);
1499     na7 = numaMakeThresholdIndicator(na6, minaf, L_SELECT_IF_LT);
1500     numaLogicalOp(na1, na1, na2, L_UNION);
1501     numaLogicalOp(na1, na1, na3, L_UNION);
1502     numaLogicalOp(na1, na1, na5, L_UNION);
1503     numaLogicalOp(na1, na1, na7, L_UNION);
1504     pixd = pixCopy(NULL, pixs);
1505     pixRemoveWithIndicator(pixd, pixas, na1);
1506     if (debug)
1507         l_showIndicatorSplitValues(na1c, na2, na3, na5, na7, na1);
1508     numaDestroy(&naw);
1509     numaDestroy(&nah);
1510     numaDestroy(&na1);
1511     numaDestroy(&na1c);
1512     numaDestroy(&na2);
1513     numaDestroy(&na3);
1514     numaDestroy(&na4);
1515     numaDestroy(&na5);
1516     numaDestroy(&na6);
1517     numaDestroy(&na7);
1518     boxaDestroy(&boxas);
1519     pixaDestroy(&pixas);
1520     return pixd;
1521 }
1522 
1523 
1524 /*!
1525  * \brief   recogSplittingFilter()
1526  *
1527  * \param[in]    recog
1528  * \param[in]    pixs     1 bpp, single connected component
1529  * \param[in]    minh     minimum height of component; 0 for default
1530  * \param[in]    minaf    minimum area fraction (|fg|/(w*h)) to be retained
1531  * \param[out]   premove  0 to save, 1 to remove
1532  * \param[in]    debug    1 to output indicator arrays
1533  * \return  0 if OK, 1 on error
1534  */
1535 static l_int32
recogSplittingFilter(L_RECOG * recog,PIX * pixs,l_int32 minh,l_float32 minaf,l_int32 * premove,l_int32 debug)1536 recogSplittingFilter(L_RECOG   *recog,
1537                      PIX       *pixs,
1538                      l_int32    minh,
1539                      l_float32  minaf,
1540                      l_int32   *premove,
1541                      l_int32    debug)
1542 {
1543 l_int32    w, h;
1544 l_float32  aspratio, fract;
1545 
1546     PROCNAME("recogSplittingFilter");
1547 
1548     if (!premove)
1549         return ERROR_INT("&remove not defined", procName, 1);
1550     *premove = 0;
1551     if (!recog)
1552         return ERROR_INT("recog not defined", procName, 1);
1553     if (!pixs)
1554         return ERROR_INT("pixs not defined", procName, 1);
1555     if (minh <= 0) minh = DefaultMinHeight;
1556 
1557         /* Remove from further consideration:
1558          *    small stuff
1559          *    components with large width/height ratio
1560          *    components with small area fill fraction */
1561     pixGetDimensions(pixs, &w, &h, NULL);
1562     if (w < recog->min_splitw) {
1563         if (debug) L_INFO("w = %d < %d\n", procName, w, recog->min_splitw);
1564         *premove = 1;
1565         return 0;
1566     }
1567     if (h < minh) {
1568         if (debug) L_INFO("h = %d < %d\n", procName, h, minh);
1569         *premove = 1;
1570         return 0;
1571     }
1572     aspratio = (l_float32)w / (l_float32)h;
1573     if (aspratio > recog->max_wh_ratio) {
1574         if (debug) L_INFO("w/h = %5.3f too large\n", procName, aspratio);
1575         *premove = 1;
1576         return 0;
1577     }
1578     pixFindAreaFraction(pixs, recog->sumtab, &fract);
1579     if (fract < minaf) {
1580         if (debug) L_INFO("area fill fract %5.3f < %5.3f\n",
1581                           procName, fract, minaf);
1582         *premove = 1;
1583         return 0;
1584     }
1585 
1586     return 0;
1587 }
1588 
1589 
1590 /*------------------------------------------------------------------------*
1591  *                              Postprocessing                            *
1592  *------------------------------------------------------------------------*/
1593 /*!
1594  * \brief   recogExtractNumbers()
1595  *
1596  * \param[in]    recog
1597  * \param[in]    boxas location of components
1598  * \param[in]    scorethresh min score for which we accept a component
1599  * \param[in]    spacethresh max horizontal distance allowed between digits,
1600  *                           use -1 for default
1601  * \param[out]   pbaa [optional] bounding boxes of identified numbers
1602  * \param[out]   pnaa [optional] scores of identified digits
1603  * \return  sa of identified numbers, or NULL on error
1604  *
1605  * <pre>
1606  * Notes:
1607  *      (1) This extracts digit data after recogaIdentifyMultiple() or
1608  *          lower-level identification has taken place.
1609  *      (2) Each string in the returned sa contains a sequence of ascii
1610  *          digits in a number.
1611  *      (3) The horizontal distance between boxes (limited by %spacethresh)
1612  *          is the negative of the horizontal overlap.
1613  *      (4) Components with a score less than %scorethresh, which may
1614  *          be hyphens or other small characters, will signal the
1615  *          end of the current sequence of digits in the number.  A typical
1616  *          value for %scorethresh is 0.60.
1617  *      (5) We allow two digits to be combined if these conditions apply:
1618  *            (a) the first is to the left of the second
1619  *            (b) the second has a horizontal separation less than %spacethresh
1620  *            (c) the vertical overlap >= 0 (vertical separation < 0)
1621  *            (d) both have a score that exceeds %scorethresh
1622  *      (6) Each numa in the optionally returned naa contains the digit
1623  *          scores of a number.  Each boxa in the optionally returned baa
1624  *          contains the bounding boxes of the digits in the number.
1625  * </pre>
1626  */
1627 SARRAY *
recogExtractNumbers(L_RECOG * recog,BOXA * boxas,l_float32 scorethresh,l_int32 spacethresh,BOXAA ** pbaa,NUMAA ** pnaa)1628 recogExtractNumbers(L_RECOG   *recog,
1629                     BOXA      *boxas,
1630                     l_float32  scorethresh,
1631                     l_int32    spacethresh,
1632                     BOXAA    **pbaa,
1633                     NUMAA    **pnaa)
1634 {
1635 char      *str, *text;
1636 l_int32    i, n, x1, x2, h_sep, v_sep;
1637 l_float32  score;
1638 BOX       *box, *prebox;
1639 BOXA      *ba;
1640 BOXAA     *baa;
1641 NUMA      *nascore, *na;
1642 NUMAA     *naa;
1643 SARRAY    *satext, *sa, *saout;
1644 
1645     PROCNAME("recogExtractNumbers");
1646 
1647     if (pbaa) *pbaa = NULL;
1648     if (pnaa) *pnaa = NULL;
1649     if (!recog || !recog->rcha)
1650         return (SARRAY *)ERROR_PTR("recog and rcha not both defined",
1651                                    procName, NULL);
1652     if (!boxas)
1653         return (SARRAY *)ERROR_PTR("boxas not defined", procName, NULL);
1654 
1655     if (spacethresh < 0)
1656         spacethresh = L_MAX(recog->maxheight_u, 20);
1657     rchaExtract(recog->rcha, NULL, &nascore, &satext, NULL, NULL, NULL, NULL);
1658     if (!nascore || !satext) {
1659         numaDestroy(&nascore);
1660         sarrayDestroy(&satext);
1661         return (SARRAY *)ERROR_PTR("nascore and satext not both returned",
1662                                    procName, NULL);
1663     }
1664 
1665     saout = sarrayCreate(0);
1666     naa = numaaCreate(0);
1667     baa = boxaaCreate(0);
1668     prebox = NULL;
1669     n = numaGetCount(nascore);
1670     for (i = 0; i < n; i++) {
1671         numaGetFValue(nascore, i, &score);
1672         text = sarrayGetString(satext, i, L_NOCOPY);
1673         if (prebox == NULL) {  /* no current run */
1674             if (score < scorethresh) {
1675                 continue;
1676             } else {  /* start a number run */
1677                 sa = sarrayCreate(0);
1678                 ba = boxaCreate(0);
1679                 na = numaCreate(0);
1680                 sarrayAddString(sa, text, L_COPY);
1681                 prebox = boxaGetBox(boxas, i, L_CLONE);
1682                 boxaAddBox(ba, prebox, L_COPY);
1683                 numaAddNumber(na, score);
1684             }
1685         } else {  /* in a current number run */
1686             box = boxaGetBox(boxas, i, L_CLONE);
1687             boxGetGeometry(prebox, &x1, NULL, NULL, NULL);
1688             boxGetGeometry(box, &x2, NULL, NULL, NULL);
1689             boxSeparationDistance(box, prebox, &h_sep, &v_sep);
1690             boxDestroy(&prebox);
1691             if (x1 < x2 && h_sep <= spacethresh &&
1692                 v_sep < 0 && score >= scorethresh) {  /* add to number */
1693                 sarrayAddString(sa, text, L_COPY);
1694                 boxaAddBox(ba, box, L_COPY);
1695                 numaAddNumber(na, score);
1696                 prebox = box;
1697             } else {  /* save the completed number */
1698                 str = sarrayToString(sa, 0);
1699                 sarrayAddString(saout, str, L_INSERT);
1700                 sarrayDestroy(&sa);
1701                 boxaaAddBoxa(baa, ba, L_INSERT);
1702                 numaaAddNuma(naa, na, L_INSERT);
1703                 boxDestroy(&box);
1704                 if (score >= scorethresh) {  /* start a new number */
1705                     i--;
1706                     continue;
1707                 }
1708             }
1709         }
1710     }
1711 
1712     if (prebox) {  /* save the last number */
1713         str = sarrayToString(sa, 0);
1714         sarrayAddString(saout, str, L_INSERT);
1715         boxaaAddBoxa(baa, ba, L_INSERT);
1716         numaaAddNuma(naa, na, L_INSERT);
1717         sarrayDestroy(&sa);
1718         boxDestroy(&prebox);
1719     }
1720 
1721     numaDestroy(&nascore);
1722     sarrayDestroy(&satext);
1723     if (sarrayGetCount(saout) == 0) {
1724         sarrayDestroy(&saout);
1725         boxaaDestroy(&baa);
1726         numaaDestroy(&naa);
1727         L_INFO("saout has no identified text\n", procName);
1728         return NULL;
1729     }
1730 
1731     if (pbaa)
1732         *pbaa = baa;
1733     else
1734         boxaaDestroy(&baa);
1735     if (pnaa)
1736         *pnaa = naa;
1737     else
1738         numaaDestroy(&naa);
1739     return saout;
1740 }
1741 
1742 /*!
1743  * \brief   showExtractNumbers()
1744  *
1745  * \param[in]    pixs   input 1 bpp image
1746  * \param[in]    sa     recognized text strings
1747  * \param[in]    baa    boxa array for location of characters in each string
1748  * \param[in]    naa    numa array for scores of characters in each string
1749  * \param[out]   ppixdb  [optional] input pixs with identified chars outlined
1750  * \return  pixa   of identified strings with text and scores, or NULL on error
1751  *
1752  * <pre>
1753  * Notes:
1754  *      (1) This is a debugging routine on digit identification; e.g.:
1755  *            recogIdentifyMultiple(recog, pixs, 0, 1, &boxa, NULL, NULL, 0);
1756  *            sa = recogExtractNumbers(recog, boxa, 0.8, -1, &baa, &naa);
1757  *            pixa = showExtractNumbers(pixs, sa, baa, naa, NULL);
1758  * </pre>
1759  */
1760 PIXA *
showExtractNumbers(PIX * pixs,SARRAY * sa,BOXAA * baa,NUMAA * naa,PIX ** ppixdb)1761 showExtractNumbers(PIX     *pixs,
1762                    SARRAY  *sa,
1763                    BOXAA   *baa,
1764                    NUMAA   *naa,
1765                    PIX    **ppixdb)
1766 {
1767 char       buf[128];
1768 char      *textstr, *scorestr;
1769 l_int32    i, j, n, nchar, len;
1770 l_float32  score;
1771 L_BMF     *bmf;
1772 BOX       *box1, *box2;
1773 BOXA      *ba;
1774 NUMA      *na;
1775 PIX       *pix1, *pix2, *pix3, *pix4;
1776 PIXA      *pixa;
1777 
1778     PROCNAME("showExtractNumbers");
1779 
1780     if (ppixdb) *ppixdb = NULL;
1781     if (!pixs)
1782         return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL);
1783     if (!sa)
1784         return (PIXA *)ERROR_PTR("sa not defined", procName, NULL);
1785     if (!baa)
1786         return (PIXA *)ERROR_PTR("baa not defined", procName, NULL);
1787     if (!naa)
1788         return (PIXA *)ERROR_PTR("naa not defined", procName, NULL);
1789 
1790     n = sarrayGetCount(sa);
1791     pixa = pixaCreate(n);
1792     bmf = bmfCreate(NULL, 6);
1793     if (ppixdb) *ppixdb = pixConvertTo8(pixs, 1);
1794     for (i = 0; i < n; i++) {
1795         textstr = sarrayGetString(sa, i, L_NOCOPY);
1796         ba = boxaaGetBoxa(baa, i, L_CLONE);
1797         na = numaaGetNuma(naa, i, L_CLONE);
1798         boxaGetExtent(ba, NULL, NULL, &box1);
1799         box2 = boxAdjustSides(NULL, box1, -5, 5, -5, 5);
1800         if (ppixdb) pixRenderBoxArb(*ppixdb, box2, 3, 255, 0, 0);
1801         pix1 = pixClipRectangle(pixs, box1, NULL);
1802         len = strlen(textstr) + 1;
1803         pix2 = pixAddBlackOrWhiteBorder(pix1, 14 * len, 14 * len,
1804                                         5, 3, L_SET_WHITE);
1805         pix3 = pixConvertTo8(pix2, 1);
1806         nchar = numaGetCount(na);
1807         scorestr = NULL;
1808         for (j = 0; j < nchar; j++) {
1809              numaGetFValue(na, j, &score);
1810              snprintf(buf, sizeof(buf), "%d", (l_int32)(100 * score));
1811              stringJoinIP(&scorestr, buf);
1812              if (j < nchar - 1) stringJoinIP(&scorestr, ",");
1813         }
1814         snprintf(buf, sizeof(buf), "%s: %s\n", textstr, scorestr);
1815         pix4 = pixAddTextlines(pix3, bmf, buf, 0xff000000, L_ADD_BELOW);
1816         pixaAddPix(pixa, pix4, L_INSERT);
1817         boxDestroy(&box1);
1818         boxDestroy(&box2);
1819         pixDestroy(&pix1);
1820         pixDestroy(&pix2);
1821         pixDestroy(&pix3);
1822         boxaDestroy(&ba);
1823         numaDestroy(&na);
1824         LEPT_FREE(scorestr);
1825     }
1826 
1827     bmfDestroy(&bmf);
1828     return pixa;
1829 }
1830 
1831 
1832 /*------------------------------------------------------------------------*
1833  *                        Static debug helper                             *
1834  *------------------------------------------------------------------------*/
1835 /*!
1836  * \brief   l_showIndicatorSplitValues()
1837  *
1838  * \param[in]  na1, na2, na3, na4, na5, na6  6 indicator array
1839  *
1840  * <pre>
1841  * Notes:
1842  *      (1) The values indicate that specific criteria has been met
1843  *          for component removal by pre-splitting filter..
1844  *          The 'result' line shows which components have been removed.
1845  * </pre>
1846  */
1847 static void
l_showIndicatorSplitValues(NUMA * na1,NUMA * na2,NUMA * na3,NUMA * na4,NUMA * na5,NUMA * na6)1848 l_showIndicatorSplitValues(NUMA  *na1,
1849                            NUMA  *na2,
1850                            NUMA  *na3,
1851                            NUMA  *na4,
1852                            NUMA  *na5,
1853                            NUMA  *na6)
1854 {
1855 l_int32  i, n;
1856 
1857     n = numaGetCount(na1);
1858     fprintf(stderr, "================================================\n");
1859     fprintf(stderr, "lt minw:    ");
1860     for (i = 0; i < n; i++)
1861         fprintf(stderr, "%4d ", (l_int32)na1->array[i]);
1862     fprintf(stderr, "\nlt minh:    ");
1863     for (i = 0; i < n; i++)
1864         fprintf(stderr, "%4d ", (l_int32)na2->array[i]);
1865     fprintf(stderr, "\ngt maxh:    ");
1866     for (i = 0; i < n; i++)
1867         fprintf(stderr, "%4d ", (l_int32)na3->array[i]);
1868     fprintf(stderr, "\ngt maxasp:  ");
1869     for (i = 0; i < n; i++)
1870         fprintf(stderr, "%4d ", (l_int32)na4->array[i]);
1871     fprintf(stderr, "\nlt minaf:   ");
1872     for (i = 0; i < n; i++)
1873         fprintf(stderr, "%4d ", (l_int32)na5->array[i]);
1874     fprintf(stderr, "\n------------------------------------------------");
1875     fprintf(stderr, "\nresult:     ");
1876     for (i = 0; i < n; i++)
1877         fprintf(stderr, "%4d ", (l_int32)na6->array[i]);
1878     fprintf(stderr, "\n================================================\n");
1879 }
1880