1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -
4  -  Redistribution and use in source and binary forms, with or without
5  -  modification, are permitted provided that the following conditions
6  -  are met:
7  -  1. Redistributions of source code must retain the above copyright
8  -     notice, this list of conditions and the following disclaimer.
9  -  2. Redistributions in binary form must reproduce the above
10  -     copyright notice, this list of conditions and the following
11  -     disclaimer in the documentation and/or other materials
12  -     provided with the distribution.
13  -
14  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18  -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 /*!
28  * \file recogtrain.c
29  * <pre>
30  *
31  *      Training on labeled data
32  *         l_int32             recogTrainLabeled()
33  *         PIX                *recogProcessLabeled()
34  *         l_int32             recogAddSample()
35  *         PIX                *recogModifyTemplate()
36  *         l_int32             recogAverageSamples()
37  *         l_int32             pixaAccumulateSamples()
38  *         l_int32             recogTrainingFinished()
39  *         static l_int32      recogTemplatesAreOK()
40  *         PIXA               *recogFilterPixaBySize()
41  *         PIXAA              *recogSortPixaByClass()
42  *         l_int32             recogRemoveOutliers1()
43  *         PIXA               *pixaRemoveOutliers1()
44  *         l_int32             recogRemoveOutliers2()
45  *         PIXA               *pixaRemoveOutliers2()
46  *
47  *      Training on unlabeled data
48  *         L_RECOG             recogTrainFromBoot()
49  *
50  *      Padding the digit training set
51  *         l_int32             recogPadDigitTrainingSet()
52  *         l_int32             recogIsPaddingNeeded()
53  *         static SARRAY      *recogAddMissingClassStrings()
54  *         PIXA               *recogAddDigitPadTemplates()
55  *         static l_int32      recogCharsetAvailable()
56  *
57  *      Making a boot digit recognizer
58  *         L_RECOG            *recogMakeBootDigitRecog()
59  *         PIXA               *recogMakeBootDigitTemplates()
60  *
61  *      Debugging
62  *         l_int32             recogShowContent()
63  *         l_int32             recogDebugAverages()
64  *         l_int32             recogShowAverageTemplates()
65  *         static PIX         *pixDisplayOutliers()
66  *         PIX                *recogDisplayOutlier()
67  *         PIX                *recogShowMatchesInRange()
68  *         PIX                *recogShowMatch()
69  *
70  *  These abbreviations are for the type of template to be used:
71  *    * SI (for the scanned images)
72  *    * WNL (for width-normalized lines, formed by first skeletonizing
73  *           the scanned images, and then dilating to a fixed width)
74  *  These abbreviations are for the type of recognizer:
75  *    * BAR (book-adapted recognizer; the best type; can do identification
76  *           with unscaled images and separation of touching characters.
77  *    * BSR (bootstrap recognizer; used if more labeled templates are
78  *           required for a BAR, either for finding more templates from
79  *           the book, or making a hybrid BAR/BSR.
80  *
81  *  The recog struct typically holds two versions of the input templates
82  *  (e.g. from a pixa) that were used to generate it.  One version is
83  *  the unscaled input templates.  The other version is the one that
84  *  will be used by the recog to identify unlabeled data.  That version
85  *  depends on the input parameters when the recog is created.  The choices
86  *  for the latter version, and their suggested use, are:
87  *  (1) unscaled SI -- typical for BAR, generated from book images
88  *  (2) unscaled WNL -- ditto
89  *  (3) scaled SI -- typical for recognizers containing template
90  *      images from sources other than the book to be recognized
91  *  (4) scaled WNL -- ditto
92  *  For cases (3) and (4), we recommend scaling to fixed height; e.g.,
93  *  scalew = 0, scaleh = 40.
94  *  When using WNL, we recommend using a width of 5 in the template
95  *  and 4 in the unlabeled data.
96  *  It appears that better results for a BAR are usually obtained using
97  *  SI than WNL, but more experimentation is needed.
98  *
99  *  This utility is designed to build recognizers that are specifically
100  *  adapted from a large amount of material, such as a book.  These
101  *  use labeled templates taken from the material, and not scaled.
102  *  In addition, two special recognizers are useful:
103  *  (1) Bootstrap recognizer (BSR).  This uses height-scaled templates,
104  *      that have been extended with several repetitions in one of two ways:
105  *      (a) aniotropic width scaling (for either SI or WNL)
106  *      (b) iterative erosions/dilations (for SI).
107  *  (2) Outlier removal.  This uses height scaled templates.  It can be
108  *      implemented without using templates that are aligned averages of all
109  *      templates in a class.
110  *
111  *  Recognizers are inexpensive to generate, for example, from a pixa
112  *  of labeled templates.  The general process of building a BAR is
113  *  to start with labeled templates, e.g., in a pixa, make a BAR, and
114  *  analyze new samples from the book to augment the BAR until it has
115  *  enough samples for each character class.  Along the way, samples
116  *  from a BSR may be added for help in training.  If not enough samples
117  *  are available for the BAR, it can finally be augmented with BSR
118  *  samples, in which case the resulting hybrid BAR/BSR recognizer
119  *  must work on scaled images.
120  *
121  *  Here are the steps in doing recog training:
122  *  A. Generate a BAR from any exising labeled templates
123  *    (1) Create a recog and add the templates, using recogAddSample().
124  *        This stores the unscaled templates.
125  *        [Note: this can be done in one step if the labeled templates are put
126  *         into a pixa:
127  *           L_Recog *rec = recogCreateFromPixa(pixa, ...);  ]
128  *    (2) Call recogTrainingFinished() to generate the (sometimes modified)
129  *        templates to be used for correlation.
130  *    (3) Optionally, remove outliers.
131  *    If there are sufficient samples in the classes, we're done. Otherwise,
132  *  B. Try to get more samples from the book to pad the BAR.
133  *     (1) Save the unscaled, labeled templates from the BAR.
134  *     (2) Supplement the BAR with bootstrap templates to make a hybrid BAR/BSR.
135  *     (3) Do recognition on more unlabeled images, scaled to a fixed height
136  *     (4) Add the unscaled, labeled images to the saved set.
137  *     (5) Optionally, remove outliers.
138  *     If there are sufficient samples in the classes, we're done. Otherwise,
139  *  C. For classes without a sufficient number of templates, we can
140  *     supplement the BAR with templates from a BSR (a hybrid RAR/BSR),
141  *     and do recognition scaled to a fixed height.
142  *
143  *  Here are several methods that can be used for identifying outliers:
144  *  (1) Compute average templates for each class and remove a candidate
145  *      that is poorly correlated with the average.  This is the most
146  *      simple method.  recogRemoveOutliers1() uses this, supplemented with
147  *      a second threshold and a target number of templates to be saved.
148  *  (2) Compute average templates for each class and remove a candidate
149  *      that is more highly correlated with the average of some other class.
150  *      This does not require setting a threshold for the correlation.
151  *      recogRemoveOutliers2() uses this method, supplemented with a minimum
152  *      correlation score.
153  *  (3) For each candidate, find the average correlation with other
154  *      members of its class, and remove those that have a relatively
155  *      low average correlation.  This is similar to (1), gives comparable
156  *      results and becauses it does not use average templates, it requires
157  *      a bit more computation.
158  * </pre>
159  */
160 
161 #include <string.h>
162 #include "allheaders.h"
163 
164     /* Static functions */
165 static l_int32 recogTemplatesAreOK(L_RECOG *recog, l_int32 minsize,
166                                    l_float32 minfract, l_int32 *pok);
167 static SARRAY *recogAddMissingClassStrings(L_RECOG  *recog);
168 static l_int32 recogCharsetAvailable(l_int32 type);
169 static PIX *pixDisplayOutliers(PIXA *pixas, NUMA *nas);
170 static PIX *recogDisplayOutlier(L_RECOG *recog, l_int32 iclass, l_int32 jsamp,
171                                 l_int32 maxclass, l_float32 maxscore);
172 
173     /* Default parameters that are used in recogTemplatesAreOK() and
174      * in outlier removal functions, and that use template set size
175      * to decide if the set of templates (before outliers are removed)
176      * is valid.  Values are set to accept most sets of sample templates. */
177 static const l_int32    DEFAULT_MIN_SET_SIZE = 1;  /* minimum number of
178                                        samples for a valid class */
179 static const l_float32  DEFAULT_MIN_SET_FRACT = 0.4;  /* minimum fraction
180                                of classes required for a valid recog */
181 
182     /* Defaults in pixaRemoveOutliers1() and pixaRemoveOutliers2() */
183 static const l_float32  DEFAULT_MIN_SCORE = 0.75; /* keep everything above */
184 static const l_int32    DEFAULT_MIN_TARGET = 3;  /* to be kept if possible */
185 static const l_float32  LOWER_SCORE_THRESHOLD = 0.5;  /* templates can be
186                  * kept down to this score to if needed to retain the
187                  * desired minimum number of templates */
188 
189 
190 /*------------------------------------------------------------------------*
191  *                                Training                                *
192  *------------------------------------------------------------------------*/
193 /*!
194  * \brief   recogTrainLabeled()
195  *
196  * \param[in]    recog in training mode
197  * \param[in]    pixs if depth > 1, will be thresholded to 1 bpp
198  * \param[in]    box [optional] cropping box
199  * \param[in]    text [optional] if null, use text field in pix
200  * \param[in]    debug 1 to display images of samples not captured
201  * \return  0 if OK, 1 on error
202  *
203  * <pre>
204  * Notes:
205  *      (1) Training is restricted to the addition of a single
206  *          character in an arbitrary (e.g., UTF8) charset
207  *      (2) If box != null, it should represent the location in %pixs
208  *          of the character image.
209  * </pre>
210  */
211 l_int32
recogTrainLabeled(L_RECOG * recog,PIX * pixs,BOX * box,char * text,l_int32 debug)212 recogTrainLabeled(L_RECOG  *recog,
213                   PIX      *pixs,
214                   BOX      *box,
215                   char     *text,
216                   l_int32   debug)
217 {
218 l_int32  ret;
219 PIX     *pix;
220 
221     PROCNAME("recogTrainLabeled");
222 
223     if (!recog)
224         return ERROR_INT("recog not defined", procName, 1);
225     if (!pixs)
226         return ERROR_INT("pixs not defined", procName, 1);
227 
228         /* Prepare the sample to be added. This step also acts
229          * as a filter, and can invalidate pixs as a template. */
230     ret = recogProcessLabeled(recog, pixs, box, text, &pix);
231     if (ret) {
232         pixDestroy(&pix);
233         L_WARNING("failure to get sample '%s' for training\n", procName,
234                   text);
235         return 1;
236     }
237 
238     recogAddSample(recog, pix, debug);
239     pixDestroy(&pix);
240     return 0;
241 }
242 
243 
244 /*!
245  * \brief   recogProcessLabeled()
246  *
247  * \param[in]    recog   in training mode
248  * \param[in]    pixs    if depth > 1, will be thresholded to 1 bpp
249  * \param[in]    box     [optional] cropping box
250  * \param[in]    text    [optional] if null, use text field in pix
251  * \param[out]   ppix    addr of pix, 1 bpp, labeled
252  * \return  0 if OK, 1 on error
253  *
254  * <pre>
255  * Notes:
256  *      (1) This crops and binarizes the input image, generating a pix
257  *          of one character where the charval is inserted into the pix.
258  * </pre>
259  */
260 l_int32
recogProcessLabeled(L_RECOG * recog,PIX * pixs,BOX * box,char * text,PIX ** ppix)261 recogProcessLabeled(L_RECOG  *recog,
262                     PIX      *pixs,
263                     BOX      *box,
264                     char     *text,
265                     PIX     **ppix)
266 {
267 char    *textdata;
268 l_int32  textinpix, textin, nsets;
269 NUMA    *na;
270 PIX     *pix1, *pix2, *pix3, *pix4;
271 
272     PROCNAME("recogProcessLabeled");
273 
274     if (!ppix)
275         return ERROR_INT("&pix not defined", procName, 1);
276     *ppix = NULL;
277     if (!recog)
278         return ERROR_INT("recog not defined", procName, 1);
279     if (!pixs)
280         return ERROR_INT("pixs not defined", procName, 1);
281 
282         /* Find the text; this will be stored with the output images */
283     textin = text && (text[0] != '\0');
284     textinpix = (pixs->text && (pixs->text[0] != '\0'));
285     if (!textin && !textinpix) {
286         L_ERROR("no text: %d\n", procName, recog->num_samples);
287         return 1;
288     }
289     textdata = (textin) ? text : pixs->text;  /* do not free */
290 
291         /* Crop and binarize if necessary */
292     if (box)
293         pix1 = pixClipRectangle(pixs, box, NULL);
294     else
295         pix1 = pixClone(pixs);
296     if (pixGetDepth(pix1) > 1)
297         pix2 = pixConvertTo1(pix1, recog->threshold);
298     else
299         pix2 = pixClone(pix1);
300     pixDestroy(&pix1);
301 
302         /* Remove isolated noise, using as a criterion all components
303          * that are removed by a vertical opening of size 5. */
304     pix3 = pixMorphSequence(pix2, "o1.5", 0);  /* seed */
305     pixSeedfillBinary(pix3, pix3, pix2, 8);  /* fill from seed; clip to pix2 */
306     pixDestroy(&pix2);
307 
308         /* Clip to foreground */
309     pixClipToForeground(pix3, &pix4, NULL);
310     pixDestroy(&pix3);
311     if (!pix4)
312         return ERROR_INT("pix4 is empty", procName, 1);
313 
314         /* Verify that if there is more than 1 c.c., they all have
315          * horizontal overlap */
316     na = pixCountByColumn(pix4, NULL);
317     numaCountNonzeroRuns(na, &nsets);
318     numaDestroy(&na);
319     if (nsets > 1) {
320         L_WARNING("found %d sets of horiz separated c.c.; skipping\n",
321                   procName, nsets);
322         pixDestroy(&pix4);
323         return 1;
324     }
325 
326     pixSetText(pix4, textdata);
327     *ppix = pix4;
328     return 0;
329 }
330 
331 
332 /*!
333  * \brief   recogAddSample()
334  *
335  * \param[in]    recog
336  * \param[in]    pix         a single character, 1 bpp
337  * \param[in]    debug
338  * \return  0 if OK, 1 on error
339  *
340  * <pre>
341  * Notes:
342  *      (1) The pix is 1 bpp, with the character string label embedded.
343  *      (2) The pixaa_u array of the recog is initialized to accept
344  *          up to 256 different classes.  When training is finished,
345  *          the arrays are truncated to the actual number of classes.
346  *          To pad an existing recog from the boot recognizers, training
347  *          is started again; if samples from a new class are added,
348  *          the pixaa_u array is extended by adding a pixa to hold them.
349  * </pre>
350  */
351 l_int32
recogAddSample(L_RECOG * recog,PIX * pix,l_int32 debug)352 recogAddSample(L_RECOG  *recog,
353                PIX      *pix,
354                l_int32   debug)
355 {
356 char    *text;
357 l_int32  npa, charint, index;
358 PIXA    *pixa1;
359 PIXAA   *paa;
360 
361     PROCNAME("recogAddSample");
362 
363     if (!recog)
364         return ERROR_INT("recog not defined", procName, 1);
365     if (!pix || pixGetDepth(pix) != 1)
366         return ERROR_INT("pix not defined or not 1 bpp\n", procName, 1);
367     if (recog->train_done)
368         return ERROR_INT("not added: training has been completed", procName, 1);
369     paa = recog->pixaa_u;
370 
371         /* Make sure the character is in the set */
372     text = pixGetText(pix);
373     if (l_convertCharstrToInt(text, &charint) == 1) {
374         L_ERROR("invalid text: %s\n", procName, text);
375         return 1;
376     }
377 
378         /* Determine the class array index.  Check if the class
379          * alreadly exists, and if not, add it. */
380     if (recogGetClassIndex(recog, charint, text, &index) == 1) {
381             /* New class must be added */
382         npa = pixaaGetCount(paa, NULL);
383         if (index > npa) {
384             L_ERROR("oops: bad index %d > npa %d!!\n", procName, index, npa);
385             return 1;
386         }
387         if (index == npa) {  /* paa needs to be extended */
388             L_INFO("Adding new class and pixa: index = %d, text = %s\n",
389                    procName, index, text);
390             pixa1 = pixaCreate(10);
391             pixaaAddPixa(paa, pixa1, L_INSERT);
392         }
393     }
394     if (debug) {
395         L_INFO("Identified text label: %s\n", procName, text);
396         L_INFO("Identified: charint = %d, index = %d\n",
397                procName, charint, index);
398     }
399 
400         /* Insert the unscaled character image into the right pixa.
401          * (Unscaled images are required to split touching characters.) */
402     recog->num_samples++;
403     pixaaAddPix(paa, index, pix, NULL, L_COPY);
404     return 0;
405 }
406 
407 
408 /*!
409  * \brief   recogModifyTemplate()
410  *
411  * \param[in]    recog
412  * \param[in]    pixs   1 bpp, to be optionally scaled and turned into
413  *                      strokes of fixed width
414  * \return  pixd   modified pix if OK, NULL on error
415  */
416 PIX *
recogModifyTemplate(L_RECOG * recog,PIX * pixs)417 recogModifyTemplate(L_RECOG  *recog,
418                     PIX      *pixs)
419 {
420 l_int32  w, h, empty;
421 PIX     *pix1, *pix2;
422 
423     PROCNAME("recogModifyTemplate");
424 
425     if (!recog)
426         return (PIX *)ERROR_PTR("recog not defined", procName, NULL);
427     if (!pixs)
428         return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
429 
430         /* Scale first */
431     pixGetDimensions(pixs, &w, &h, NULL);
432     if ((recog->scalew == 0 || recog->scalew == w) &&
433         (recog->scaleh == 0 || recog->scaleh == h)) {  /* no scaling */
434         pix1 = pixCopy(NULL, pixs);
435     } else {
436         pix1 = pixScaleToSize(pixs, recog->scalew, recog->scaleh);
437     }
438     if (!pix1)
439         return (PIX *)ERROR_PTR("pix1 not made", procName, NULL);
440 
441         /* Then optionally convert to lines */
442     if (recog->linew <= 0) {
443         pix2 = pixClone(pix1);
444     } else {
445         pix2 = pixSetStrokeWidth(pix1, recog->linew, 1, 8);
446     }
447     pixDestroy(&pix1);
448     if (!pix2)
449         return (PIX *)ERROR_PTR("pix2 not made", procName, NULL);
450 
451         /* Make sure we still have some pixels */
452     pixZero(pix2, &empty);
453     if (empty) {
454         pixDestroy(&pix2);
455         return (PIX *)ERROR_PTR("modified template has no pixels",
456                                 procName, NULL);
457     }
458     return pix2;
459 }
460 
461 
462 /*!
463  * \brief   recogAverageSamples()
464  *
465  * \param[in]   precog      addr of existing recog; may be destroyed
466  * \param[in]   debug
467  * \return  0 on success, 1 on failure
468  *
469  * <pre>
470  * Notes:
471  *      (1) This is only called in two situations:
472  *          (a) When splitting characters using either the DID method
473  *              recogDecode() or the the greedy splitter
474  *              recogCorrelationBestRow()
475  *          (b) By a special recognizer that is used to remove outliers.
476  *          Both unscaled and scaled inputs are averaged.
477  *      (2) If the data in any class is nonexistent (no samples), or
478  *          very bad (no fg pixels in the average), or if the ratio
479  *          of max/min average unscaled class template heights is
480  *          greater than max_ht_ratio, this destroys the recog.
481  *          The caller must check the return value of the recog.
482  *      (3) Set debug = 1 to view the resulting templates and their centroids.
483  * </pre>
484  */
485 l_int32
recogAverageSamples(L_RECOG ** precog,l_int32 debug)486 recogAverageSamples(L_RECOG  **precog,
487                     l_int32    debug)
488 {
489 l_int32    i, nsamp, size, area, bx, by, badclass;
490 l_float32  x, y, hratio;
491 BOX       *box;
492 PIXA      *pixa1;
493 PIX       *pix1, *pix2, *pix3;
494 PTA       *pta1;
495 L_RECOG   *recog;
496 
497     PROCNAME("recogAverageSamples");
498 
499     if (!precog)
500         return ERROR_INT("&recog not defined", procName, 1);
501     if ((recog = *precog) == NULL)
502         return ERROR_INT("recog not defined", procName, 1);
503 
504     if (recog->ave_done) {
505         if (debug)  /* always do this if requested */
506             recogShowAverageTemplates(recog);
507         return 0;
508     }
509 
510         /* Remove any previous averaging data */
511     size = recog->setsize;
512     pixaDestroy(&recog->pixa_u);
513     ptaDestroy(&recog->pta_u);
514     numaDestroy(&recog->nasum_u);
515     recog->pixa_u = pixaCreate(size);
516     recog->pta_u = ptaCreate(size);
517     recog->nasum_u = numaCreate(size);
518 
519     pixaDestroy(&recog->pixa);
520     ptaDestroy(&recog->pta);
521     numaDestroy(&recog->nasum);
522     recog->pixa = pixaCreate(size);
523     recog->pta = ptaCreate(size);
524     recog->nasum = numaCreate(size);
525 
526         /* Unscaled bitmaps: compute averaged bitmap, centroid, and fg area.
527          * Note that when we threshold to 1 bpp the 8 bpp averaged template
528          * that is returned from the accumulator, it will not be cropped
529          * to the foreground.  We must crop it, because the correlator
530          * makes that assumption and will return a zero value if the
531          * width or height of the two images differs by several pixels.
532          * But cropping to fg can cause the value of the centroid to
533          * change, if bx > 0 or by > 0. */
534     badclass = FALSE;
535     for (i = 0; i < size; i++) {
536         pixa1 = pixaaGetPixa(recog->pixaa_u, i, L_CLONE);
537         pta1 = ptaaGetPta(recog->ptaa_u, i, L_CLONE);
538         nsamp = pixaGetCount(pixa1);
539         nsamp = L_MIN(nsamp, 256);  /* we only use the first 256 */
540         if (nsamp == 0) {  /* no information for this class */
541             L_ERROR("no samples in class %d\n", procName, i);
542             badclass = TRUE;
543             pixaDestroy(&pixa1);
544             ptaDestroy(&pta1);
545             break;
546         } else {
547             pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y);
548             pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2));
549             pixInvert(pix2, pix2);
550             pixClipToForeground(pix2, &pix3, &box);
551             if (!box) {
552                 L_ERROR("no fg pixels in average for uclass %d\n", procName, i);
553                 badclass = TRUE;
554                 pixDestroy(&pix1);
555                 pixDestroy(&pix2);
556                 pixaDestroy(&pixa1);
557                 ptaDestroy(&pta1);
558                 break;
559             } else {
560                 boxGetGeometry(box, &bx, &by, NULL, NULL);
561                 pixaAddPix(recog->pixa_u, pix3, L_INSERT);
562                 ptaAddPt(recog->pta_u, x - bx, y - by);  /* correct centroid */
563                 pixCountPixels(pix3, &area, recog->sumtab);
564                 numaAddNumber(recog->nasum_u, area);  /* foreground */
565                 boxDestroy(&box);
566             }
567             pixDestroy(&pix1);
568             pixDestroy(&pix2);
569         }
570         pixaDestroy(&pixa1);
571         ptaDestroy(&pta1);
572     }
573 
574         /* Are any classes bad?  If so, destroy the recog and return an error */
575     if (badclass) {
576         recogDestroy(precog);
577         return ERROR_INT("at least 1 bad class; destroying recog", procName, 1);
578     }
579 
580         /* Get the range of sizes of the unscaled average templates.
581          * Reject if the height ratio is too large.  */
582     pixaSizeRange(recog->pixa_u, &recog->minwidth_u, &recog->minheight_u,
583                   &recog->maxwidth_u, &recog->maxheight_u);
584     hratio = (l_float32)recog->maxheight_u / (l_float32)recog->minheight_u;
585     if (hratio > recog->max_ht_ratio) {
586         L_ERROR("ratio of max/min height of average templates = %4.1f;"
587                 " destroying recog\n", procName, hratio);
588         recogDestroy(precog);
589         return 1;
590     }
591 
592         /* Scaled bitmaps: compute averaged bitmap, centroid, and fg area */
593     for (i = 0; i < size; i++) {
594         pixa1 = pixaaGetPixa(recog->pixaa, i, L_CLONE);
595         pta1 = ptaaGetPta(recog->ptaa, i, L_CLONE);
596         nsamp = pixaGetCount(pixa1);
597         nsamp = L_MIN(nsamp, 256);  /* we only use the first 256 */
598         pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y);
599         pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2));
600         pixInvert(pix2, pix2);
601         pixClipToForeground(pix2, &pix3, &box);
602         if (!box) {
603             L_ERROR("no fg pixels in average for sclass %d\n", procName, i);
604             badclass = TRUE;
605             pixDestroy(&pix1);
606             pixDestroy(&pix2);
607             pixaDestroy(&pixa1);
608             ptaDestroy(&pta1);
609             break;
610         } else {
611             boxGetGeometry(box, &bx, &by, NULL, NULL);
612             pixaAddPix(recog->pixa, pix3, L_INSERT);
613             ptaAddPt(recog->pta, x - bx, y - by);  /* correct centroid */
614             pixCountPixels(pix3, &area, recog->sumtab);
615             numaAddNumber(recog->nasum, area);  /* foreground */
616             boxDestroy(&box);
617         }
618         pixDestroy(&pix1);
619         pixDestroy(&pix2);
620         pixaDestroy(&pixa1);
621         ptaDestroy(&pta1);
622     }
623 
624     if (badclass) {
625         recogDestroy(precog);
626         return ERROR_INT("at least 1 bad class; destroying recog", procName, 1);
627     }
628 
629         /* Get the range of widths of the scaled average templates */
630     pixaSizeRange(recog->pixa, &recog->minwidth, NULL, &recog->maxwidth, NULL);
631 
632        /* Get dimensions useful for splitting */
633     recog->min_splitw = L_MAX(5, recog->minwidth_u - 5);
634     recog->max_splith = recog->maxheight_u + 12;  /* allow for skew */
635 
636     if (debug)
637         recogShowAverageTemplates(recog);
638 
639     recog->ave_done = TRUE;
640     return 0;
641 }
642 
643 
644 /*!
645  * \brief   pixaAccumulateSamples()
646  *
647  * \param[in]    pixa of samples from the same class, 1 bpp
648  * \param[in]    pta [optional] of centroids of the samples
649  * \param[out]   ppixd accumulated samples, 8 bpp
650  * \param[out]   px [optional] average x coordinate of centroids
651  * \param[out]   py [optional] average y coordinate of centroids
652  * \return  0 on success, 1 on failure
653  *
654  * <pre>
655  * Notes:
656  *      (1) This generates an aligned (by centroid) sum of the input pix.
657  *      (2) We use only the first 256 samples; that's plenty.
658  *      (3) If pta is not input, we generate two tables, and discard
659  *          after use.  If this is called many times, it is better
660  *          to precompute the pta.
661  * </pre>
662  */
663 l_int32
pixaAccumulateSamples(PIXA * pixa,PTA * pta,PIX ** ppixd,l_float32 * px,l_float32 * py)664 pixaAccumulateSamples(PIXA       *pixa,
665                       PTA        *pta,
666                       PIX       **ppixd,
667                       l_float32  *px,
668                       l_float32  *py)
669 {
670 l_int32    i, n, maxw, maxh, xdiff, ydiff;
671 l_int32   *centtab, *sumtab;
672 l_float32  xc, yc, xave, yave;
673 PIX       *pix1, *pix2, *pixsum;
674 PTA       *ptac;
675 
676     PROCNAME("pixaAccumulateSamples");
677 
678     if (px) *px = 0;
679     if (py) *py = 0;
680     if (!ppixd)
681         return ERROR_INT("&pixd not defined", procName, 1);
682     *ppixd = NULL;
683     if (!pixa)
684         return ERROR_INT("pixa not defined", procName, 1);
685 
686     n = pixaGetCount(pixa);
687     if (pta && ptaGetCount(pta) != n)
688         return ERROR_INT("pta count differs from pixa count", procName, 1);
689     n = L_MIN(n, 256);  /* take the first 256 only */
690     if (n == 0)
691         return ERROR_INT("pixa array empty", procName, 1);
692 
693         /* Find the centroids */
694     if (pta) {
695         ptac = ptaClone(pta);
696     } else {  /* generate them here */
697         ptac = ptaCreate(n);
698         centtab = makePixelCentroidTab8();
699         sumtab = makePixelSumTab8();
700         for (i = 0; i < n; i++) {
701             pix1 = pixaGetPix(pixa, i, L_CLONE);
702             pixCentroid(pix1, centtab, sumtab, &xc, &yc);
703             ptaAddPt(ptac, xc, yc);
704         }
705         LEPT_FREE(centtab);
706         LEPT_FREE(sumtab);
707     }
708 
709         /* Find the average value of the centroids */
710     xave = yave = 0;
711     for (i = 0; i < n; i++) {
712         ptaGetPt(pta, i, &xc, &yc);
713         xave += xc;
714         yave += yc;
715     }
716     xave = xave / (l_float32)n;
717     yave = yave / (l_float32)n;
718     if (px) *px = xave;
719     if (py) *py = yave;
720 
721         /* Place all pix with their centroids located at the average
722          * centroid value, and sum the results.  Make the accumulator
723          * image slightly larger than the largest sample to insure
724          * that all pixels are represented in the accumulator.  */
725     pixaSizeRange(pixa, NULL, NULL, &maxw, &maxh);
726     pixsum = pixInitAccumulate(maxw + 5, maxh + 5, 0);
727     pix1 = pixCreate(maxw, maxh, 1);
728     for (i = 0; i < n; i++) {
729         pix2 = pixaGetPix(pixa, i, L_CLONE);
730         ptaGetPt(ptac, i, &xc, &yc);
731         xdiff = (l_int32)(xave - xc);
732         ydiff = (l_int32)(yave - yc);
733         pixClearAll(pix1);
734         pixRasterop(pix1, xdiff, ydiff, maxw, maxh, PIX_SRC,
735                     pix2, 0, 0);
736         pixAccumulate(pixsum, pix1, L_ARITH_ADD);
737         pixDestroy(&pix2);
738     }
739     *ppixd = pixFinalAccumulate(pixsum, 0, 8);
740 
741     pixDestroy(&pix1);
742     pixDestroy(&pixsum);
743     ptaDestroy(&ptac);
744     return 0;
745 }
746 
747 
748 /*!
749  * \brief   recogTrainingFinished()
750  *
751  * \param[in]    precog       addr of recog
752  * \param[in]    modifyflag   1 to use recogModifyTemplate(); 0 otherwise
753  * \param[in]    minsize      set to -1 for default
754  * \param[in]    minfract     set to -1.0 for default
755  * \return  0 if OK, 1 on error (input recog will be destroyed)
756  *
757  * <pre>
758  * Notes:
759  *      (1) This must be called after all training samples have been added.
760  *      (2) If the templates are not good enough, the recog input is destroyed.
761  *      (3) Usually, %modifyflag == 1, because we want to apply
762  *          recogModifyTemplate() to generate the actual templates
763  *          that will be used.  The one exception is when reading a
764  *          serialized recog: there we want to put the same set of
765  *          templates in both the unscaled and modified pixaa.
766  *          See recogReadStream() to see why we do this.
767  *      (4) See recogTemplatesAreOK() for %minsize and %minfract usage.
768  *      (5) The following things are done here:
769  *          (a) Allocate (or reallocate) storage for (possibly) modified
770  *              bitmaps, centroids, and fg areas.
771  *          (b) Generate the (possibly) modified bitmaps.
772  *          (c) Compute centroid and fg area data for both unscaled and
773  *              modified bitmaps.
774  *          (d) Truncate the pixaa, ptaa and numaa arrays down from
775  *              256 to the actual size.
776  *      (6) Putting these operations here makes it simple to recompute
777  *          the recog with different modifications on the bitmaps.
778  *      (7) Call recogShowContent() to display the templates, both
779  *          unscaled and modified.
780  * </pre>
781  */
782 l_int32
recogTrainingFinished(L_RECOG ** precog,l_int32 modifyflag,l_int32 minsize,l_float32 minfract)783 recogTrainingFinished(L_RECOG  **precog,
784                       l_int32    modifyflag,
785                       l_int32    minsize,
786                       l_float32  minfract)
787 {
788 l_int32    ok, i, j, size, nc, ns, area;
789 l_float32  xave, yave;
790 PIX       *pix, *pixd;
791 PIXA      *pixa;
792 PIXAA     *paa;
793 PTA       *pta;
794 PTAA      *ptaa;
795 L_RECOG   *recog;
796 
797     PROCNAME("recogTrainingFinished");
798 
799     if (!precog)
800         return ERROR_INT("&recog not defined", procName, 1);
801     if ((recog = *precog) == NULL)
802         return ERROR_INT("recog not defined", procName, 1);
803     if (recog->train_done) return 0;
804 
805         /* Test the input templates */
806     recogTemplatesAreOK(recog, minsize, minfract, &ok);
807     if (!ok) {
808         recogDestroy(precog);
809         return ERROR_INT("bad templates", procName, 1);
810     }
811 
812         /* Generate the storage for the possibly-scaled training bitmaps */
813     size = recog->maxarraysize;
814     paa = pixaaCreate(size);
815     pixa = pixaCreate(1);
816     pixaaInitFull(paa, pixa);
817     pixaDestroy(&pixa);
818     pixaaDestroy(&recog->pixaa);
819     recog->pixaa = paa;
820 
821         /* Generate the storage for the unscaled centroid training data */
822     ptaa = ptaaCreate(size);
823     pta = ptaCreate(0);
824     ptaaInitFull(ptaa, pta);
825     ptaaDestroy(&recog->ptaa_u);
826     recog->ptaa_u = ptaa;
827 
828         /* Generate the storage for the possibly-scaled centroid data */
829     ptaa = ptaaCreate(size);
830     ptaaInitFull(ptaa, pta);
831     ptaDestroy(&pta);
832     ptaaDestroy(&recog->ptaa);
833     recog->ptaa = ptaa;
834 
835         /* Generate the storage for the fg area data */
836     numaaDestroy(&recog->naasum_u);
837     numaaDestroy(&recog->naasum);
838     recog->naasum_u = numaaCreateFull(size, 0);
839     recog->naasum = numaaCreateFull(size, 0);
840 
841     paa = recog->pixaa_u;
842     nc = recog->setsize;
843     for (i = 0; i < nc; i++) {
844         pixa = pixaaGetPixa(paa, i, L_CLONE);
845         ns = pixaGetCount(pixa);
846         for (j = 0; j < ns; j++) {
847                 /* Save centroid and area data for the unscaled pix */
848             pix = pixaGetPix(pixa, j, L_CLONE);
849             pixCentroid(pix, recog->centtab, recog->sumtab, &xave, &yave);
850             ptaaAddPt(recog->ptaa_u, i, xave, yave);
851             pixCountPixels(pix, &area, recog->sumtab);
852             numaaAddNumber(recog->naasum_u, i, area);  /* foreground */
853 
854                 /* Insert the (optionally) scaled character image, and
855                  * save centroid and area data for it */
856             if (modifyflag == 1)
857                 pixd = recogModifyTemplate(recog, pix);
858             else
859                 pixd = pixClone(pix);
860             if (pixd) {
861                 pixaaAddPix(recog->pixaa, i, pixd, NULL, L_INSERT);
862                 pixCentroid(pixd, recog->centtab, recog->sumtab, &xave, &yave);
863                 ptaaAddPt(recog->ptaa, i, xave, yave);
864                 pixCountPixels(pixd, &area, recog->sumtab);
865                 numaaAddNumber(recog->naasum, i, area);
866             } else {
867                 L_ERROR("failed: modified template for class %d, sample %d\n",
868                         procName, i, j);
869             }
870             pixDestroy(&pix);
871         }
872         pixaDestroy(&pixa);
873     }
874 
875         /* Truncate the arrays to those with non-empty containers */
876     pixaaTruncate(recog->pixaa_u);
877     pixaaTruncate(recog->pixaa);
878     ptaaTruncate(recog->ptaa_u);
879     ptaaTruncate(recog->ptaa);
880     numaaTruncate(recog->naasum_u);
881     numaaTruncate(recog->naasum);
882 
883     recog->train_done = TRUE;
884     return 0;
885 }
886 
887 
888 /*!
889  * \brief   recogTemplatesAreOK()
890  *
891  * \param[in]    recog
892  * \param[in]    minsize     set to -1 for default
893  * \param[in]    minfract    set to -1.0 for default
894  * \param[out]   pok         set to 1 if template set is valid; 0 otherwise
895  * \return  1 on error; 0 otherwise.  An invalid template set is not an error.
896  *
897  * <pre>
898  * Notes:
899  *      (1) This is called by recogTrainingFinished().  A return value of 0
900  *          will cause recogTrainingFinished() to destroy the recog.
901  *      (2) %minsize is the minimum number of samples required for
902  *          the class; -1 uses the default
903  *      (3) %minfract is the minimum fraction of classes required for
904  *          the recog to be usable; -1.0 uses the default
905  * </pre>
906  */
907 static l_int32
recogTemplatesAreOK(L_RECOG * recog,l_int32 minsize,l_float32 minfract,l_int32 * pok)908 recogTemplatesAreOK(L_RECOG   *recog,
909                     l_int32    minsize,
910                     l_float32  minfract,
911                     l_int32   *pok)
912 {
913 l_int32    i, n, validsets, nt;
914 l_float32  ratio;
915 NUMA      *na;
916 
917     PROCNAME("recogTemplatesAreOK");
918 
919     if (!pok)
920         return ERROR_INT("&ok not defined", procName, 1);
921     *pok = 0;
922     if (!recog)
923         return ERROR_INT("recog not defined", procName, 1);
924 
925     minsize = (minsize < 0) ? DEFAULT_MIN_SET_SIZE : minsize;
926     minfract = (minfract < 0) ? DEFAULT_MIN_SET_FRACT : minfract;
927     n = pixaaGetCount(recog->pixaa_u, &na);
928     validsets = 0;
929     for (i = 0, validsets = 0; i < n; i++) {
930         numaGetIValue(na, i, &nt);
931         if (nt >= minsize)
932             validsets++;
933     }
934     numaDestroy(&na);
935     ratio = (l_float32)validsets / (l_float32)recog->charset_size;
936     *pok = (ratio >= minfract) ? 1 : 0;
937     return 0;
938 }
939 
940 
941 /*!
942  * \brief   recogFilterPixaBySize()
943  *
944  * \param[in]   pixas         labeled templates
945  * \param[in]   setsize       size of character set (number of classes)
946  * \param[in]   maxkeep       max number of templates to keep in a class
947  * \param[in]   max_ht_ratio  max allowed height ratio (see below)
948  * \param[out]  pna     [optional] debug output, giving the number in each
949  *                      class after filtering; use NULL to skip
950  * \return  pixa   filtered templates, or NULL on error
951  *
952  * <pre>
953  * Notes:
954  *      (1) The basic assumption is that the most common and larger
955  *          templates in each class are more likely to represent the
956  *          characters we are interested in.  For example, larger digits
957  *          are more likely to represent page numbers, and smaller digits
958  *          could be data in tables.  Therefore, we bias the first
959  *          stage of filtering toward the larger characters by removing
960  *          very small ones, and select based on proximity of the
961  *          remaining characters to median height.
962  *      (2) For each of the %setsize classes, order the templates
963  *          increasingly by height.  Take the rank 0.9 height.  Eliminate
964  *          all templates that are shorter by more than %max_ht_ratio.
965  *          Of the remaining ones, select up to %maxkeep that are closest
966  *          in rank order height to the median template.
967  * </pre>
968  */
969 PIXA *
recogFilterPixaBySize(PIXA * pixas,l_int32 setsize,l_int32 maxkeep,l_float32 max_ht_ratio,NUMA ** pna)970 recogFilterPixaBySize(PIXA      *pixas,
971                       l_int32    setsize,
972                       l_int32    maxkeep,
973                       l_float32  max_ht_ratio,
974                       NUMA     **pna)
975 {
976 l_int32    i, j, h90, hj, j1, j2, j90, n, nc;
977 l_float32  ratio;
978 NUMA      *na;
979 PIXA      *pixa1, *pixa2, *pixa3, *pixa4, *pixa5;
980 PIXAA     *paa;
981 
982     PROCNAME("recogFilterPixaBySize");
983 
984     if (pna) *pna = NULL;
985     if (!pixas)
986         return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL);
987 
988     if ((paa = recogSortPixaByClass(pixas, setsize)) == NULL)
989         return (PIXA *)ERROR_PTR("paa not made", procName, NULL);
990     nc = pixaaGetCount(paa, NULL);
991     na = (pna) ? numaCreate(0) : NULL;
992     if (pna) *pna = na;
993     pixa5 = pixaCreate(0);
994     for (i = 0; i < nc; i++) {
995         pixa1 = pixaaGetPixa(paa, i, L_CLONE);
996         if ((n = pixaGetCount(pixa1)) == 0) {
997             pixaDestroy(&pixa1);
998             continue;
999         }
1000         pixa2 = pixaSort(pixa1, L_SORT_BY_HEIGHT, L_SORT_INCREASING, NULL,
1001                          L_COPY);
1002         j90 = (l_int32)(0.9 * n);
1003         pixaGetPixDimensions(pixa2, j90, NULL, &h90, NULL);
1004         pixa3 = pixaCreate(n);
1005         for (j = 0; j < n; j++) {
1006             pixaGetPixDimensions(pixa2, j, NULL, &hj, NULL);
1007             ratio = (l_float32)h90 / (l_float32)hj;
1008             if (ratio <= max_ht_ratio)
1009                 pixaAddPix(pixa3, pixaGetPix(pixa2, j, L_COPY), L_INSERT);
1010         }
1011         n = pixaGetCount(pixa3);
1012         if (n <= maxkeep) {
1013             pixa4 = pixaCopy(pixa3, L_CLONE);
1014         } else {
1015             j1 = (n - maxkeep) / 2;
1016             j2 = j1 + maxkeep - 1;
1017             pixa4 = pixaSelectRange(pixa3, j1, j2, L_CLONE);
1018         }
1019         if (na) numaAddNumber(na, pixaGetCount(pixa4));
1020         pixaJoin(pixa5, pixa4, 0, -1);
1021         pixaDestroy(&pixa1);
1022         pixaDestroy(&pixa2);
1023         pixaDestroy(&pixa3);
1024         pixaDestroy(&pixa4);
1025     }
1026 
1027     pixaaDestroy(&paa);
1028     return pixa5;
1029 }
1030 
1031 
1032 /*!
1033  * \brief   recogSortPixaByClass()
1034  *
1035  * \param[in]   pixa          labeled templates
1036  * \param[in]   setsize       size of character set (number of classes)
1037  * \return  paa   pixaa where each pixa has templates for one class,
1038  *                or null on error
1039  */
1040 PIXAA *
recogSortPixaByClass(PIXA * pixa,l_int32 setsize)1041 recogSortPixaByClass(PIXA      *pixa,
1042                      l_int32    setsize)
1043 {
1044 PIXAA    *paa;
1045 L_RECOG  *recog;
1046 
1047     PROCNAME("recogSortPixaByClass");
1048 
1049     if (!pixa)
1050         return (PIXAA *)ERROR_PTR("pixa not defined", procName, NULL);
1051 
1052     if ((recog = recogCreateFromPixaNoFinish(pixa, 0, 0, 0, 0, 0)) == NULL)
1053         return (PIXAA *)ERROR_PTR("recog not made", procName, NULL);
1054     paa = recog->pixaa_u;   /* grab the paa of unscaled templates */
1055     recog->pixaa_u = NULL;
1056     recogDestroy(&recog);
1057     return paa;
1058 }
1059 
1060 
1061 /*!
1062  * \brief   recogRemoveOutliers1()
1063  *
1064  * \param[in]   precog       addr of recog with unscaled labeled templates
1065  * \param[in]   minscore     keep everything with at least this score
1066  * \param[in]   mintarget    minimum desired number to retain if possible
1067  * \param[in]   minsize      minimum number of samples required for a class
1068  * \param[out]  ppixsave     [optional debug] saved templates, with scores
1069  * \param[out]  ppixrem      [optional debug] removed templates, with scores
1070  * \return  0 if OK, 1 on error.
1071  *
1072  * <pre>
1073  * Notes:
1074  *      (1) This is a convenience wrapper when using default parameters
1075  *          for the recog.  See pixaRemoveOutliers1() for details.
1076  *      (2) If this succeeds, the new recog replaces the input recog;
1077  *          if it fails, the input recog is destroyed.
1078  * </pre>
1079  */
1080 l_int32
recogRemoveOutliers1(L_RECOG ** precog,l_float32 minscore,l_int32 mintarget,l_int32 minsize,PIX ** ppixsave,PIX ** ppixrem)1081 recogRemoveOutliers1(L_RECOG  **precog,
1082                      l_float32  minscore,
1083                      l_int32    mintarget,
1084                      l_int32    minsize,
1085                      PIX      **ppixsave,
1086                      PIX      **ppixrem)
1087 {
1088 PIXA     *pixa1, *pixa2;
1089 L_RECOG  *recog;
1090 
1091     PROCNAME("recogRemoveOutliers1");
1092 
1093     if (!precog)
1094         return ERROR_INT("&recog not defined", procName, 1);
1095     if (*precog == NULL)
1096         return ERROR_INT("recog not defined", procName, 1);
1097 
1098         /* Extract the unscaled templates */
1099     pixa1 = recogExtractPixa(*precog);
1100     recogDestroy(precog);
1101 
1102     pixa2 = pixaRemoveOutliers1(pixa1, minscore, mintarget, minsize,
1103                                 ppixsave, ppixrem);
1104     pixaDestroy(&pixa1);
1105     if (!pixa2)
1106         return ERROR_INT("failure to remove outliers", procName, 1);
1107 
1108     recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1);
1109     pixaDestroy(&pixa2);
1110     if (!recog)
1111         return ERROR_INT("failure to make recog from pixa sans outliers",
1112                           procName, 1);
1113 
1114     *precog = recog;
1115     return 0;
1116 }
1117 
1118 
1119 /*!
1120  * \brief   pixaRemoveOutliers1()
1121  *
1122  * \param[in]   pixas        unscaled labeled templates
1123  * \param[in]   minscore     keep everything with at least this score;
1124  *                           use -1.0 for default.
1125  * \param[in]   mintarget    minimum desired number to retain if possible;
1126  *                           use -1 for default.
1127  * \param[in]   minsize      minimum number of samples required for a class;
1128  *                           use -1 for default.
1129  * \param[out]  ppixsave     [optional debug] saved templates, with scores
1130  * \param[out]  ppixrem      [optional debug] removed templates, with scores
1131  * \return  pixa   of unscaled templates to be kept, or NULL on error
1132  *
1133  * <pre>
1134  * Notes:
1135  *      (1) Removing outliers is particularly important when recognition
1136  *          goes against all the samples in the training set, as opposed
1137  *          to the averages for each class.  The reason is that we get
1138  *          an identification error if a mislabeled template is a best
1139  *          match for an input sample.
1140  *      (2) Because the score values depend strongly on the quality
1141  *          of the character images, to avoid losing too many samples
1142  *          we supplement a minimum score for retention with a score
1143  *          necessary to acquire the minimum target number of templates.
1144  *          To do this we are willing to use a lower threshold,
1145  *          LOWER_SCORE_THRESHOLD, on the score.  Consequently, with
1146  *          poor quality templates, we may keep samples with a score
1147  *          less than %minscore, but never less than LOWER_SCORE_THRESHOLD.
1148  *          And if the number of samples is less than %minsize, we do
1149  *          not use any.
1150  *      (3) This is meant to be used on a BAR, where the templates all
1151  *          come from the same book; use minscore ~0.75.
1152  *      (4) Method: make a scaled recog from the input %pixas.  Then,
1153  *          for each class: generate the averages, match each
1154  *          scaled template against the average, and save unscaled
1155  *          templates that had a sufficiently good match.
1156  * </pre>
1157  */
1158 PIXA *
pixaRemoveOutliers1(PIXA * pixas,l_float32 minscore,l_int32 mintarget,l_int32 minsize,PIX ** ppixsave,PIX ** ppixrem)1159 pixaRemoveOutliers1(PIXA      *pixas,
1160                     l_float32  minscore,
1161                     l_int32    mintarget,
1162                     l_int32    minsize,
1163                     PIX      **ppixsave,
1164                     PIX      **ppixrem)
1165 {
1166 l_int32    i, j, debug, n, area1, area2;
1167 l_float32  x1, y1, x2, y2, minfract, score, rankscore, threshscore;
1168 NUMA      *nasum, *narem, *nasave, *nascore;
1169 PIX       *pix1, *pix2;
1170 PIXA      *pixa, *pixarem, *pixad;
1171 PTA       *pta;
1172 L_RECOG   *recog;
1173 
1174     PROCNAME("pixaRemoveOutliers1");
1175 
1176     if (ppixsave) *ppixsave = NULL;
1177     if (ppixrem) *ppixrem = NULL;
1178     if (!pixas)
1179         return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL);
1180     minscore = L_MIN(minscore, 1.0);
1181     if (minscore <= 0.0)
1182         minscore = DEFAULT_MIN_SCORE;
1183     mintarget = L_MIN(mintarget, 3);
1184     if (mintarget <= 0)
1185         mintarget = DEFAULT_MIN_TARGET;
1186     if (minsize < 0)
1187         minsize = DEFAULT_MIN_SET_SIZE;
1188 
1189         /* Make a special height-scaled recognizer with average templates */
1190     debug = (ppixsave || ppixrem) ? 1 : 0;
1191     recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1);
1192     if (!recog)
1193         return (PIXA *)ERROR_PTR("bad pixas; recog not made", procName, NULL);
1194     recogAverageSamples(&recog, debug);
1195     if (!recog)
1196         return (PIXA *)ERROR_PTR("bad templates", procName, NULL);
1197 
1198     nasave = (ppixsave) ? numaCreate(0) : NULL;
1199     pixarem = (ppixrem) ? pixaCreate(0) : NULL;
1200     narem = (ppixrem) ? numaCreate(0) : NULL;
1201 
1202     pixad = pixaCreate(0);
1203     for (i = 0; i < recog->setsize; i++) {
1204             /* Access the average template and values for scaled
1205              * images in this class */
1206         pix1 = pixaGetPix(recog->pixa, i, L_CLONE);
1207         ptaGetPt(recog->pta, i, &x1, &y1);
1208         numaGetIValue(recog->nasum, i, &area1);
1209 
1210             /* Get the scores for each sample in the class */
1211         pixa = pixaaGetPixa(recog->pixaa, i, L_CLONE);
1212         pta = ptaaGetPta(recog->ptaa, i, L_CLONE);  /* centroids */
1213         nasum = numaaGetNuma(recog->naasum, i, L_CLONE);  /* fg areas */
1214         n = pixaGetCount(pixa);
1215         nascore = numaCreate(n);
1216         for (j = 0; j < n; j++) {
1217             pix2 = pixaGetPix(pixa, j, L_CLONE);
1218             ptaGetPt(pta, j, &x2, &y2);  /* centroid average */
1219             numaGetIValue(nasum, j, &area2);  /* fg sum average */
1220             pixCorrelationScoreSimple(pix1, pix2, area1, area2,
1221                                       x1 - x2, y1 - y2, 5, 5,
1222                                       recog->sumtab, &score);
1223             numaAddNumber(nascore, score);
1224             if (debug && score == 0.0)  /* typ. large size difference */
1225                 fprintf(stderr, "Got 0 score for i = %d, j = %d\n", i, j);
1226             pixDestroy(&pix2);
1227         }
1228         pixDestroy(&pix1);
1229 
1230             /* Find the rankscore, corresonding to the 1.0 - minfract.
1231              * To attempt to maintain the minfract of templates, use as a
1232              * cutoff the minimum of minscore and the rank score.  However,
1233              * no template is saved with an actual score less than
1234              * that at least one template is kept. */
1235         minfract = (l_float32)mintarget / (l_float32)n;
1236         numaGetRankValue(nascore, 1.0 - minfract, NULL, 0, &rankscore);
1237         threshscore = L_MAX(LOWER_SCORE_THRESHOLD,
1238                             L_MIN(minscore, rankscore));
1239         if (debug) {
1240             L_INFO("minscore = %4.2f, rankscore = %4.2f, threshscore = %4.2f\n",
1241                    procName, minscore, rankscore, threshscore);
1242         }
1243 
1244             /* Save templates that are at or above threshold.
1245              * Toss any classes with less than %minsize templates. */
1246         for (j = 0; j < n; j++) {
1247             numaGetFValue(nascore, j, &score);
1248             pix1 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY);
1249             if (score >= threshscore && n >= minsize) {
1250                 pixaAddPix(pixad, pix1, L_INSERT);
1251                 if (nasave) numaAddNumber(nasave, score);
1252             } else if (debug) {
1253                 pixaAddPix(pixarem, pix1, L_INSERT);
1254                 numaAddNumber(narem, score);
1255             } else {
1256                 pixDestroy(&pix1);
1257             }
1258         }
1259 
1260         pixaDestroy(&pixa);
1261         ptaDestroy(&pta);
1262         numaDestroy(&nasum);
1263         numaDestroy(&nascore);
1264     }
1265 
1266     if (ppixsave) {
1267         *ppixsave = pixDisplayOutliers(pixad, nasave);
1268         numaDestroy(&nasave);
1269     }
1270     if (ppixrem) {
1271         *ppixrem = pixDisplayOutliers(pixarem, narem);
1272         pixaDestroy(&pixarem);
1273         numaDestroy(&narem);
1274     }
1275     recogDestroy(&recog);
1276     return pixad;
1277 }
1278 
1279 
1280 /*!
1281  * \brief   recogRemoveOutliers2()
1282  *
1283  * \param[in]   precog       addr of recog with unscaled labeled templates
1284  * \param[in]   minscore     keep everything with at least this score
1285  * \param[in]   minsize      minimum number of samples required for a class
1286  * \param[out]  ppixsave     [optional debug] saved templates, with scores
1287  * \param[out]  ppixrem      [optional debug] removed templates, with scores
1288  * \return  0 if OK, 1 on error.
1289  *
1290  * <pre>
1291  * Notes:
1292  *      (1) This is a convenience wrapper when using default parameters
1293  *          for the recog.  See pixaRemoveOutliers2() for details.
1294  *      (2) If this succeeds, the new recog replaces the input recog;
1295  *          if it fails, the input recog is destroyed.
1296  * </pre>
1297  */
1298 l_int32
recogRemoveOutliers2(L_RECOG ** precog,l_float32 minscore,l_int32 minsize,PIX ** ppixsave,PIX ** ppixrem)1299 recogRemoveOutliers2(L_RECOG  **precog,
1300                      l_float32  minscore,
1301                      l_int32    minsize,
1302                      PIX      **ppixsave,
1303                      PIX      **ppixrem)
1304 {
1305 PIXA     *pixa1, *pixa2;
1306 L_RECOG  *recog;
1307 
1308     PROCNAME("recogRemoveOutliers2");
1309 
1310     if (!precog)
1311         return ERROR_INT("&recog not defined", procName, 1);
1312     if (*precog == NULL)
1313         return ERROR_INT("recog not defined", procName, 1);
1314 
1315         /* Extract the unscaled templates */
1316     pixa1 = recogExtractPixa(*precog);
1317     recogDestroy(precog);
1318 
1319     pixa2 = pixaRemoveOutliers2(pixa1, minscore, minsize, ppixsave, ppixrem);
1320     pixaDestroy(&pixa1);
1321     if (!pixa2)
1322         return ERROR_INT("failure to remove outliers", procName, 1);
1323 
1324     recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1);
1325     pixaDestroy(&pixa2);
1326     if (!recog)
1327         return ERROR_INT("failure to make recog from pixa sans outliers",
1328                           procName, 1);
1329 
1330     *precog = recog;
1331     return 0;
1332 }
1333 
1334 
1335 /*!
1336  * \brief   pixaRemoveOutliers2()
1337  *
1338  * \param[in]   pixas       unscaled labeled templates
1339  * \param[in]   minscore    keep everything with at least this score;
1340  *                          use -1.0 for default.
1341  * \param[in]   minsize     minimum number of samples required for a class;
1342  *                          use -1 for default.
1343  * \param[out]  ppixsave    [optional debug] saved templates, with scores
1344  * \param[out]  ppixrem     [optional debug] removed templates, with scores
1345  * \return  pixa   of unscaled templates to be kept, or NULL on error
1346  *
1347  * <pre>
1348  * Notes:
1349  *      (1) Removing outliers is particularly important when recognition
1350  *          goes against all the samples in the training set, as opposed
1351  *          to the averages for each class.  The reason is that we get
1352  *          an identification error if a mislabeled template is a best
1353  *          match for an input sample.
1354  *      (2) This method compares each template against the average templates
1355  *          of each class, and discards any template that has a higher
1356  *          correlation to a class different from its own.  It also
1357  *          sets a lower bound on correlation scores with its class average.
1358  *      (3) This is meant to be used on a BAR, where the templates all
1359  *          come from the same book; use minscore ~0.75.
1360  * </pre>
1361  */
1362 PIXA *
pixaRemoveOutliers2(PIXA * pixas,l_float32 minscore,l_int32 minsize,PIX ** ppixsave,PIX ** ppixrem)1363 pixaRemoveOutliers2(PIXA      *pixas,
1364                     l_float32  minscore,
1365                     l_int32    minsize,
1366                     PIX      **ppixsave,
1367                     PIX      **ppixrem)
1368 {
1369 l_int32    i, j, k, n, area1, area2, maxk, debug;
1370 l_float32  x1, y1, x2, y2, score, maxscore;
1371 NUMA      *nan, *nascore, *nasave;
1372 PIX       *pix1, *pix2, *pix3;
1373 PIXA      *pixarem, *pixad;
1374 L_RECOG   *recog;
1375 
1376     PROCNAME("pixaRemoveOutliers2");
1377 
1378     if (ppixsave) *ppixsave = NULL;
1379     if (ppixrem) *ppixrem = NULL;
1380     if (!pixas)
1381         return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL);
1382     minscore = L_MIN(minscore, 1.0);
1383     if (minscore <= 0.0)
1384         minscore = DEFAULT_MIN_SCORE;
1385     if (minsize < 0)
1386         minsize = DEFAULT_MIN_SET_SIZE;
1387 
1388         /* Make a special height-scaled recognizer with average templates */
1389     debug = (ppixsave || ppixrem) ? 1 : 0;
1390     recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1);
1391     if (!recog)
1392         return (PIXA *)ERROR_PTR("bad pixas; recog not made", procName, NULL);
1393     recogAverageSamples(&recog, debug);
1394     if (!recog)
1395         return (PIXA *)ERROR_PTR("bad templates", procName, NULL);
1396 
1397     nasave = (ppixsave) ? numaCreate(0) : NULL;
1398     pixarem = (ppixrem) ? pixaCreate(0) : NULL;
1399 
1400     pixad = pixaCreate(0);
1401     pixaaGetCount(recog->pixaa, &nan);  /* number of templates in each class */
1402     for (i = 0; i < recog->setsize; i++) {
1403             /* Get the scores for each sample in the class, when comparing
1404              * with averages from all the classes. */
1405         numaGetIValue(nan, i, &n);
1406         for (j = 0; j < n; j++) {
1407             pix1 = pixaaGetPix(recog->pixaa, i, j, L_CLONE);
1408             ptaaGetPt(recog->ptaa, i, j, &x1, &y1);  /* centroid */
1409             numaaGetValue(recog->naasum, i, j, NULL, &area1);  /* fg sum */
1410             nascore = numaCreate(n);
1411             for (k = 0; k < recog->setsize; k++) {  /* average templates */
1412                 pix2 = pixaGetPix(recog->pixa, k, L_CLONE);
1413                 ptaGetPt(recog->pta, k, &x2, &y2);  /* average centroid */
1414                 numaGetIValue(recog->nasum, k, &area2);  /* average fg sum */
1415                 pixCorrelationScoreSimple(pix1, pix2, area1, area2,
1416                                           x1 - x2, y1 - y2, 5, 5,
1417                                           recog->sumtab, &score);
1418                 numaAddNumber(nascore, score);
1419                 pixDestroy(&pix2);
1420             }
1421 
1422                 /* Save templates that are in the correct class and
1423                  * at or above threshold.  Toss any classes with less
1424                  * than %minsize templates. */
1425             numaGetMax(nascore, &maxscore, &maxk);
1426             if (maxk == i && maxscore >= minscore && n >= minsize) {
1427                     /* save it */
1428                 pix3 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY);
1429                 pixaAddPix(pixad, pix3, L_INSERT);
1430                 if (nasave) numaAddNumber(nasave, maxscore);
1431             } else if (ppixrem) {  /* outlier */
1432                 pix3 = recogDisplayOutlier(recog, i, j, maxk, maxscore);
1433                 pixaAddPix(pixarem, pix3, L_INSERT);
1434             }
1435             numaDestroy(&nascore);
1436             pixDestroy(&pix1);
1437         }
1438     }
1439 
1440     if (ppixsave) {
1441         *ppixsave = pixDisplayOutliers(pixad, nasave);
1442         numaDestroy(&nasave);
1443     }
1444     if (ppixrem) {
1445         *ppixrem = pixaDisplayTiledInRows(pixarem, 32, 1500, 1.0, 0, 20, 2);
1446         pixaDestroy(&pixarem);
1447     }
1448 
1449     numaDestroy(&nan);
1450     recogDestroy(&recog);
1451     return pixad;
1452 }
1453 
1454 
1455 /*------------------------------------------------------------------------*
1456  *                       Training on unlabeled data                       *
1457  *------------------------------------------------------------------------*/
1458 /*!
1459  * \brief   recogTrainFromBoot()
1460  *
1461  * \param[in]    recogboot  labeled boot recognizer
1462  * \param[in]    pixas      set of unlabeled input characters
1463  * \param[in]    minscore   min score for accepting the example; e.g., 0.75
1464  * \param[in]    threshold  for binarization, if needed
1465  * \param[in]    debug      1 for debug output saved to recogboot; 0 otherwise
1466  * \return  pixad   labeled version of input pixas, trained on a BSR,
1467  *                  or NULL on error
1468  *
1469  * <pre>
1470  * Notes:
1471  *      (1) This takes %pixas of unscaled single characters and %recboot,
1472  *          a bootstrep recognizer (BSR) that has been set up with parameters
1473  *            * scaleh: scale all templates to this height
1474  *            * linew: width of normalized strokes, or 0 if using
1475  *              the input image
1476  *          It modifies the pix in %pixas accordingly and correlates
1477  *          with the templates in the BSR.  It returns those input
1478  *          images in %pixas whose best correlation with the BSR is at
1479  *          or above %minscore.  The returned pix have added text labels
1480  *          for the text string of the class to which the best
1481  *          correlated template belongs.
1482  *      (2) Identification occurs in scaled mode (typically with h = 40),
1483  *          optionally using a width-normalized line images derived
1484  *          from those in %pixas.
1485  * </pre>
1486  */
1487 PIXA  *
recogTrainFromBoot(L_RECOG * recogboot,PIXA * pixas,l_float32 minscore,l_int32 threshold,l_int32 debug)1488 recogTrainFromBoot(L_RECOG   *recogboot,
1489                    PIXA      *pixas,
1490                    l_float32  minscore,
1491                    l_int32    threshold,
1492                    l_int32    debug)
1493 {
1494 char      *text;
1495 l_int32    i, n, same, maxd, scaleh, linew;
1496 l_float32  score;
1497 PIX       *pix1, *pix2, *pixdb;
1498 PIXA      *pixa1, *pixa2, *pixa3, *pixad;
1499 
1500     PROCNAME("recogTrainFromBoot");
1501 
1502     if (!recogboot)
1503         return (PIXA *)ERROR_PTR("recogboot not defined", procName, NULL);
1504     if (!pixas)
1505         return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL);
1506 
1507         /* Make sure all input pix are 1 bpp */
1508     if ((n = pixaGetCount(pixas)) == 0)
1509         return (PIXA *)ERROR_PTR("no pix in pixa", procName, NULL);
1510     pixaVerifyDepth(pixas, &same, &maxd);
1511     if (maxd == 1) {
1512         pixa1 = pixaCopy(pixas, L_COPY);
1513     } else {
1514         pixa1 = pixaCreate(n);
1515         for (i = 0; i < n; i++) {
1516             pix1 = pixaGetPix(pixas, i, L_CLONE);
1517             pix2 = pixConvertTo1(pix1, threshold);
1518             pixaAddPix(pixa1, pix2, L_INSERT);
1519             pixDestroy(&pix1);
1520         }
1521     }
1522 
1523         /* Scale the input images to match the BSR */
1524     scaleh = recogboot->scaleh;
1525     linew = recogboot->linew;
1526     pixa2 = pixaCreate(n);
1527     for (i = 0; i < n; i++) {
1528         pix1 = pixaGetPix(pixa1, i, L_CLONE);
1529         pix2 = pixScaleToSize(pix1, 0, scaleh);
1530         pixaAddPix(pixa2, pix2, L_INSERT);
1531         pixDestroy(&pix1);
1532     }
1533     pixaDestroy(&pixa1);
1534 
1535         /* Optionally convert to width-normalized line */
1536     if (linew > 0)
1537         pixa3 = pixaSetStrokeWidth(pixa2, linew, 4, 8);
1538     else
1539         pixa3 = pixaCopy(pixa2, L_CLONE);
1540     pixaDestroy(&pixa2);
1541 
1542         /* Identify using recogboot */
1543     n = pixaGetCount(pixa3);
1544     pixad = pixaCreate(n);
1545     for (i = 0; i < n; i++) {
1546         pix1 = pixaGetPix(pixa3, i, L_COPY);
1547         pixSetText(pix1, NULL);  /* remove any existing text or labelling */
1548         if (!debug) {
1549             recogIdentifyPix(recogboot, pix1, NULL);
1550         } else {
1551             recogIdentifyPix(recogboot, pix1, &pixdb);
1552             pixaAddPix(recogboot->pixadb_boot, pixdb, L_INSERT);
1553         }
1554         rchExtract(recogboot->rch, NULL, &score, &text, NULL, NULL, NULL, NULL);
1555         if (score >= minscore) {
1556             pix2 = pixaGetPix(pixas, i, L_COPY);
1557             pixSetText(pix2, text);
1558             pixaAddPix(pixad, pix2, L_INSERT);
1559             pixaAddPix(recogboot->pixadb_boot, pixdb, L_COPY);
1560         }
1561         LEPT_FREE(text);
1562         pixDestroy(&pix1);
1563     }
1564     pixaDestroy(&pixa3);
1565 
1566     return pixad;
1567 }
1568 
1569 
1570 /*------------------------------------------------------------------------*
1571  *                     Padding the digit training set                     *
1572  *------------------------------------------------------------------------*/
1573 /*!
1574  * \brief   recogPadDigitTrainingSet()
1575  *
1576  * \param[in/out]   precog   trained; if padding is needed, it is replaced
1577  *                           by a a new padded recog
1578  * \param[in]       scaleh   must be > 0; suggest ~40.
1579  * \param[in]       linew    use 0 for original scanned images
1580  * \return       0 if OK, 1 on error
1581  *
1582  * <pre>
1583  * Notes:
1584  *      (1) This is a no-op if padding is not needed.  However,
1585  *          if it is, this replaces the input recog with a new recog,
1586  *          padded appropriately with templates from a boot recognizer,
1587  *          and set up with correlation templates derived from
1588  *          %scaleh and %linew.
1589  * </pre>
1590  */
1591 l_int32
recogPadDigitTrainingSet(L_RECOG ** precog,l_int32 scaleh,l_int32 linew)1592 recogPadDigitTrainingSet(L_RECOG  **precog,
1593                          l_int32    scaleh,
1594                          l_int32    linew)
1595 {
1596 PIXA     *pixa;
1597 L_RECOG  *recog1, *recog2;
1598 SARRAY   *sa;
1599 
1600     PROCNAME("recogPadDigitTrainingSet");
1601 
1602     if (!precog)
1603         return ERROR_INT("&recog not defined", procName, 1);
1604     recog1 = *precog;
1605 
1606     recogIsPaddingNeeded(recog1, &sa);
1607     if (!sa) return 0;
1608 
1609         /* Get a new pixa with the padding templates added */
1610     pixa = recogAddDigitPadTemplates(recog1, sa);
1611     sarrayDestroy(&sa);
1612     if (!pixa)
1613         return ERROR_INT("pixa not made", procName, 1);
1614 
1615         /* Need to use templates that are scaled to a fixed height. */
1616     if (scaleh <= 0) {
1617         L_WARNING("templates must be scaled to fixed height; using %d\n",
1618                   procName, 40);
1619         scaleh = 40;
1620     }
1621 
1622         /* Create a hybrid recog, composed of templates from both
1623          * the original and bootstrap sources. */
1624     recog2 = recogCreateFromPixa(pixa, 0, scaleh, linew, recog1->threshold,
1625                                  recog1->maxyshift);
1626     pixaDestroy(&pixa);
1627     recogDestroy(precog);
1628     *precog = recog2;
1629     return 0;
1630 }
1631 
1632 
1633 /*!
1634  * \brief   recogIsPaddingNeeded()
1635  *
1636  * \param[in]    recog   trained
1637  * \param[out]   psa     addr of returned string containing text value
1638  * \return       1 on error; 0 if OK, whether or not additional padding
1639  *               templates are required.
1640  *
1641  * <pre>
1642  * Notes:
1643  *      (1) This returns a string array in &sa containing character values
1644  *          for which extra templates are needed; this sarray is
1645  *          used by recogGetPadTemplates().  It returns NULL
1646  *          if no padding templates are needed.
1647  * </pre>
1648  */
1649 l_int32
recogIsPaddingNeeded(L_RECOG * recog,SARRAY ** psa)1650 recogIsPaddingNeeded(L_RECOG  *recog,
1651                      SARRAY  **psa)
1652 {
1653 char      *str;
1654 l_int32    i, nt, min_nopad, nclass, allclasses;
1655 l_float32  minval;
1656 NUMA      *naclass;
1657 SARRAY    *sa;
1658 
1659     PROCNAME("recogIsPaddingNeeded");
1660 
1661     if (!psa)
1662         return ERROR_INT("&sa not defined", procName, 1);
1663     *psa = NULL;
1664     if (!recog)
1665         return ERROR_INT("recog not defined", procName, 1);
1666 
1667         /* Do we have samples from all classes? */
1668     nclass = pixaaGetCount(recog->pixaa_u, &naclass);  /* unscaled bitmaps */
1669     allclasses = (nclass == recog->charset_size) ? 1 : 0;
1670 
1671         /* Are there enough samples in each class already? */
1672     min_nopad = recog->min_nopad;
1673     numaGetMin(naclass, &minval, NULL);
1674     if (allclasses && (minval >= min_nopad)) {
1675         numaDestroy(&naclass);
1676         return 0;
1677     }
1678 
1679         /* Are any classes not represented? */
1680     sa = recogAddMissingClassStrings(recog);
1681     *psa = sa;
1682 
1683         /* Are any other classes under-represented? */
1684     for (i = 0; i < nclass; i++) {
1685         numaGetIValue(naclass, i, &nt);
1686         if (nt < min_nopad) {
1687             str = sarrayGetString(recog->sa_text, i, L_COPY);
1688             sarrayAddString(sa, str, L_INSERT);
1689         }
1690     }
1691     numaDestroy(&naclass);
1692     return 0;
1693 }
1694 
1695 
1696 /*!
1697  * \brief   recogAddMissingClassStrings()
1698  *
1699  * \param[in]    recog   trained
1700  * \return       sa  of class string missing in %recog, or NULL on error
1701  *
1702  * <pre>
1703  * Notes:
1704  *      (1) This returns an empty %sa if there is at least one template
1705  *          in each class in %recog.
1706  * </pre>
1707  */
1708 static SARRAY  *
recogAddMissingClassStrings(L_RECOG * recog)1709 recogAddMissingClassStrings(L_RECOG  *recog)
1710 {
1711 char    *text;
1712 char     str[4];
1713 l_int32  i, nclass, index, ival;
1714 NUMA    *na;
1715 SARRAY  *sa;
1716 
1717     PROCNAME("recogAddMissingClassStrings");
1718 
1719     if (!recog)
1720         return (SARRAY *)ERROR_PTR("recog not defined", procName, NULL);
1721 
1722         /* Only handling digits */
1723     nclass = pixaaGetCount(recog->pixaa_u, NULL);  /* unscaled bitmaps */
1724     if (recog->charset_type != 1 || nclass == 10)
1725         return sarrayCreate(0);  /* empty */
1726 
1727         /* Make an indicator array for missing classes */
1728     na = numaCreate(0);
1729     sa = sarrayCreate(0);
1730     for (i = 0; i < recog->charset_size; i++)
1731          numaAddNumber(na, 1);
1732     for (i = 0; i < nclass; i++) {
1733         text = sarrayGetString(recog->sa_text, i, L_NOCOPY);
1734         index = text[0] - '0';
1735         numaSetValue(na, index, 0);
1736     }
1737 
1738         /* Convert to string and add to output */
1739     for (i = 0; i < nclass; i++) {
1740         numaGetIValue(na, i, &ival);
1741         if (ival == 1) {
1742             str[0] = '0' + i;
1743             str[1] = '\0';
1744             sarrayAddString(sa, str, L_COPY);
1745         }
1746     }
1747     numaDestroy(&na);
1748     return sa;
1749 }
1750 
1751 
1752 /*!
1753  * \brief   recogAddDigitPadTemplates()
1754  *
1755  * \param[in]    recog   trained
1756  * \param[in]    sa      set of text strings that need to be padded
1757  * \return  pixa   of all templates from %recog and the additional pad
1758  *                 templates from a boot recognizer; or NULL on error
1759  *
1760  * <pre>
1761  * Notes:
1762  *      (1) Call recogIsPaddingNeeded() first, which returns %sa of
1763  *          template text strings for classes where more templates
1764  *          are needed.
1765  * </pre>
1766  */
1767 PIXA  *
recogAddDigitPadTemplates(L_RECOG * recog,SARRAY * sa)1768 recogAddDigitPadTemplates(L_RECOG  *recog,
1769                           SARRAY   *sa)
1770 {
1771 char    *str, *text;
1772 l_int32  i, j, n, nt;
1773 PIX     *pix;
1774 PIXA    *pixa1, *pixa2;
1775 
1776     PROCNAME("recogAddDigitPadTemplates");
1777 
1778     if (!recog)
1779         return (PIXA *)ERROR_PTR("recog not defined", procName, NULL);
1780     if (!sa)
1781         return (PIXA *)ERROR_PTR("sa not defined", procName, NULL);
1782     if (recogCharsetAvailable(recog->charset_type) == FALSE)
1783         return (PIXA *)ERROR_PTR("boot charset not available", procName, NULL);
1784 
1785         /* Make boot recog templates */
1786     pixa1 = recogMakeBootDigitTemplates(0);
1787     n = pixaGetCount(pixa1);
1788 
1789         /* Extract the unscaled templates from %recog */
1790     pixa2 = recogExtractPixa(recog);
1791 
1792         /* Add selected boot recog templates based on the text strings in sa */
1793     nt = sarrayGetCount(sa);
1794     for (i = 0; i < n; i++) {
1795         pix = pixaGetPix(pixa1, i, L_CLONE);
1796         text = pixGetText(pix);
1797         for (j = 0; j < nt; j++) {
1798             str = sarrayGetString(sa, j, L_NOCOPY);
1799             if (!strcmp(text, str)) {
1800                 pixaAddPix(pixa2, pix, L_COPY);
1801                 break;
1802             }
1803         }
1804         pixDestroy(&pix);
1805     }
1806 
1807     pixaDestroy(&pixa1);
1808     return pixa2;
1809 }
1810 
1811 
1812 /*!
1813  * \brief   recogCharsetAvailable()
1814  *
1815  * \param[in]    type of charset for padding
1816  * \return  1 if available; 0 if not.
1817  */
1818 static l_int32
recogCharsetAvailable(l_int32 type)1819 recogCharsetAvailable(l_int32  type)
1820 {
1821 l_int32  ret;
1822 
1823     PROCNAME("recogCharsetAvailable");
1824 
1825     switch (type)
1826     {
1827     case L_ARABIC_NUMERALS:
1828         ret = TRUE;
1829         break;
1830     case L_LC_ROMAN_NUMERALS:
1831     case L_UC_ROMAN_NUMERALS:
1832     case L_LC_ALPHA:
1833     case L_UC_ALPHA:
1834         L_INFO("charset type %d not available\n", procName, type);
1835         ret = FALSE;
1836         break;
1837     default:
1838         L_INFO("charset type %d is unknown\n", procName, type);
1839         ret = FALSE;
1840         break;
1841     }
1842 
1843     return ret;
1844 }
1845 
1846 
1847 /*------------------------------------------------------------------------*
1848  *                      Making a boot digit recognizer                    *
1849  *------------------------------------------------------------------------*/
1850 /*!
1851  * \brief   recogMakeBootDigitRecog()
1852  *
1853  * \param[in]    scaleh   scale all heights to this; typ. use 40
1854  * \param[in]    linew    normalized line width; typ. use 5; 0 to skip
1855  * \param[in]    maxyshift from nominal centroid alignment; typically 0 or 1
1856  * \param[in]    debug  1 for showing templates; 0 otherwise
1857  * \return  recog, or NULL on error
1858  *
1859  * <pre>
1860  * Notes:
1861  *     (1) This takes a set of pre-computed, labeled pixa of single
1862  *         digits, and generates a recognizer where the character templates
1863  *         that will be used are derived from the boot-generated pixa:
1864  *         - extending by replicating the set with different widths,
1865  *           keeping the height the same
1866  *         - scaling (isotropically to fixed height)
1867  *         - optionally generating a skeleton and thickening so that
1868  *           all strokes have the same width.
1869  *     (2) The resulting templates are scaled versions of either the
1870  *         input bitmaps or images with fixed line widths.  To use the
1871  *         input bitmaps, set %linew = 0; otherwise, set %linew to the
1872  *         desired line width.
1873  * </pre>
1874  */
1875 L_RECOG  *
recogMakeBootDigitRecog(l_int32 scaleh,l_int32 linew,l_int32 maxyshift,l_int32 debug)1876 recogMakeBootDigitRecog(l_int32  scaleh,
1877                         l_int32  linew,
1878                         l_int32  maxyshift,
1879                         l_int32  debug)
1880 
1881 {
1882 PIXA     *pixa;
1883 L_RECOG  *recog;
1884 
1885         /* Get the templates, extended by horizontal scaling */
1886     pixa = recogMakeBootDigitTemplates(debug);
1887 
1888         /* Make the boot recog; recogModifyTemplate() will scale the
1889          * templates and optionally turn them into strokes of fixed width. */
1890     recog = recogCreateFromPixa(pixa, 0, scaleh, linew, 128, maxyshift);
1891     pixaDestroy(&pixa);
1892     if (debug)
1893         recogShowContent(stderr, recog, 0, 1);
1894 
1895     return recog;
1896 }
1897 
1898 
1899 /*!
1900  * \brief   recogMakeBootDigitTemplates()
1901  *
1902  * \param[in]    debug  1 for display of templates
1903  * \return  pixa   of templates; or NULL on error
1904  *
1905  * <pre>
1906  * Notes:
1907  *     (1) See recogMakeBootDigitRecog().
1908  * </pre>
1909  */
1910 PIXA  *
recogMakeBootDigitTemplates(l_int32 debug)1911 recogMakeBootDigitTemplates(l_int32  debug)
1912 {
1913 NUMA  *na;
1914 PIX   *pix1, *pix2, *pix3;
1915 PIXA  *pixa1, *pixa2, *pixa3;
1916 
1917     pixa1 = l_bootnum_gen1();
1918     pixa2 = l_bootnum_gen2();
1919     pixa3 = l_bootnum_gen3();
1920     if (debug) {
1921         pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 10, 2, 6, 0xff000000);
1922         pix2 = pixaDisplayTiledWithText(pixa2, 1500, 1.0, 10, 2, 6, 0xff000000);
1923         pix3 = pixaDisplayTiledWithText(pixa3, 1500, 1.0, 10, 2, 6, 0xff000000);
1924         pixDisplay(pix1, 0, 0);
1925         pixDisplay(pix2, 600, 0);
1926         pixDisplay(pix3, 1200, 0);
1927         pixDestroy(&pix1);
1928         pixDestroy(&pix2);
1929         pixDestroy(&pix3);
1930     }
1931     pixaJoin(pixa1, pixa2, 0, -1);
1932     pixaJoin(pixa1, pixa3, 0, -1);
1933     pixaDestroy(&pixa2);
1934     pixaDestroy(&pixa3);
1935 
1936         /* Extend by horizontal scaling */
1937     na = numaCreate(4);
1938     numaAddNumber(na, 0.9);
1939     numaAddNumber(na, 1.1);
1940     numaAddNumber(na, 1.2);
1941     pixa2 = pixaExtendByScaling(pixa1, na, L_HORIZ, 1);
1942 
1943     pixaDestroy(&pixa1);
1944     numaDestroy(&na);
1945     return pixa2;
1946 }
1947 
1948 
1949 /*------------------------------------------------------------------------*
1950  *                               Debugging                                *
1951  *------------------------------------------------------------------------*/
1952 /*!
1953  * \brief   recogShowContent()
1954  *
1955  * \param[in]    fp file  stream
1956  * \param[in]    recog
1957  * \param[in]    index    for naming of output files of template images
1958  * \param[in]    display  1 for showing template images, 0 otherwise
1959  * \return  0 if OK, 1 on error
1960  */
1961 l_int32
recogShowContent(FILE * fp,L_RECOG * recog,l_int32 index,l_int32 display)1962 recogShowContent(FILE     *fp,
1963                  L_RECOG  *recog,
1964                  l_int32   index,
1965                  l_int32   display)
1966 {
1967 char     buf[128];
1968 l_int32  i, val, count;
1969 PIX     *pix;
1970 NUMA    *na;
1971 
1972     PROCNAME("recogShowContent");
1973 
1974     if (!fp)
1975         return ERROR_INT("stream not defined", procName, 1);
1976     if (!recog)
1977         return ERROR_INT("recog not defined", procName, 1);
1978 
1979     fprintf(fp, "Debug print of recog contents\n");
1980     fprintf(fp, "  Setsize: %d\n", recog->setsize);
1981     fprintf(fp, "  Binarization threshold: %d\n", recog->threshold);
1982     fprintf(fp, "  Maximum matching y-jiggle: %d\n", recog->maxyshift);
1983     if (recog->linew <= 0)
1984         fprintf(fp, "  Using image templates for matching\n");
1985     else
1986         fprintf(fp, "  Using templates with fixed line width for matching\n");
1987     if (recog->scalew == 0)
1988         fprintf(fp, "  No width scaling of templates\n");
1989     else
1990         fprintf(fp, "  Template width scaled to %d\n", recog->scalew);
1991     if (recog->scaleh == 0)
1992         fprintf(fp, "  No height scaling of templates\n");
1993     else
1994         fprintf(fp, "  Template height scaled to %d\n", recog->scaleh);
1995     fprintf(fp, "  Number of samples in each class:\n");
1996     pixaaGetCount(recog->pixaa_u, &na);
1997     for (i = 0; i < recog->setsize; i++) {
1998         l_dnaGetIValue(recog->dna_tochar, i, &val);
1999         numaGetIValue(na, i, &count);
2000         if (val < 128)
2001             fprintf(fp, "    class %d, char %c:   %d\n", i, val, count);
2002         else
2003             fprintf(fp, "    class %d, val %d:   %d\n", i, val, count);
2004     }
2005     numaDestroy(&na);
2006 
2007     if (display) {
2008         lept_mkdir("lept/recog");
2009         pix = pixaaDisplayByPixa(recog->pixaa_u, 20, 20, 1000);
2010         snprintf(buf, sizeof(buf), "/tmp/lept/recog/templates_u.%d.png", index);
2011         pixWriteDebug(buf, pix, IFF_PNG);
2012         pixDisplay(pix, 0, 200 * index);
2013         pixDestroy(&pix);
2014         if (recog->train_done) {
2015             pix = pixaaDisplayByPixa(recog->pixaa, 20, 20, 1000);
2016             snprintf(buf, sizeof(buf),
2017                      "/tmp/lept/recog/templates.%d.png", index);
2018             pixWriteDebug(buf, pix, IFF_PNG);
2019             pixDisplay(pix, 800, 200 * index);
2020             pixDestroy(&pix);
2021         }
2022     }
2023     return 0;
2024 }
2025 
2026 
2027 /*!
2028  * \brief   recogDebugAverages()
2029  *
2030  * \param[in]    precog    addr of recog
2031  * \param[in]    debug     0 no output; 1 for images; 2 for text; 3 for both
2032  * \return  0 if OK, 1 on error
2033  *
2034  * <pre>
2035  * Notes:
2036  *      (1) Generates an image that pairs each of the input images used
2037  *          in training with the average template that it is best
2038  *          correlated to.  This is written into the recog.
2039  *      (2) It also generates pixa_tr of all the input training images,
2040  *          which can be used, e.g., in recogShowMatchesInRange().
2041  *      (3) Destroys the recog if the averaging function finds any bad classes.
2042  * </pre>
2043  */
2044 l_int32
recogDebugAverages(L_RECOG ** precog,l_int32 debug)2045 recogDebugAverages(L_RECOG  **precog,
2046                    l_int32    debug)
2047 {
2048 l_int32    i, j, n, np, index;
2049 l_float32  score;
2050 PIX       *pix1, *pix2, *pix3;
2051 PIXA      *pixa, *pixat;
2052 PIXAA     *paa1, *paa2;
2053 L_RECOG   *recog;
2054 
2055     PROCNAME("recogDebugAverages");
2056 
2057     if (!precog)
2058         return ERROR_INT("&recog not defined", procName, 1);
2059     if ((recog = *precog) == NULL)
2060         return ERROR_INT("recog not defined", procName, 1);
2061 
2062         /* Mark the training as finished if necessary, and make sure
2063          * that the average templates have been built. */
2064     recogAverageSamples(&recog, 0);
2065     if (!recog)
2066         return ERROR_INT("averaging failed; recog destroyed", procName, 1);
2067 
2068         /* Save a pixa of all the training examples */
2069     paa1 = recog->pixaa;
2070     if (!recog->pixa_tr)
2071         recog->pixa_tr = pixaaFlattenToPixa(paa1, NULL, L_CLONE);
2072 
2073         /* Destroy any existing image and make a new one */
2074     if (recog->pixdb_ave)
2075         pixDestroy(&recog->pixdb_ave);
2076     n = pixaaGetCount(paa1, NULL);
2077     paa2 = pixaaCreate(n);
2078     for (i = 0; i < n; i++) {
2079         pixa = pixaCreate(0);
2080         pixat = pixaaGetPixa(paa1, i, L_CLONE);
2081         np = pixaGetCount(pixat);
2082         for (j = 0; j < np; j++) {
2083             pix1 = pixaaGetPix(paa1, i, j, L_CLONE);
2084             recogIdentifyPix(recog, pix1, &pix2);
2085             rchExtract(recog->rch, &index, &score, NULL, NULL, NULL,
2086                        NULL, NULL);
2087             if (debug >= 2)
2088                 fprintf(stderr, "index = %d, score = %7.3f\n", index, score);
2089             pix3 = pixAddBorder(pix2, 2, 1);
2090             pixaAddPix(pixa, pix3, L_INSERT);
2091             pixDestroy(&pix1);
2092             pixDestroy(&pix2);
2093         }
2094         pixaaAddPixa(paa2, pixa, L_INSERT);
2095         pixaDestroy(&pixat);
2096     }
2097     recog->pixdb_ave = pixaaDisplayByPixa(paa2, 20, 20, 2500);
2098     if (debug % 2) {
2099         lept_mkdir("lept/recog");
2100         pixWriteDebug("/tmp/lept/recog/templ_match.png", recog->pixdb_ave,
2101                       IFF_PNG);
2102         pixDisplay(recog->pixdb_ave, 100, 100);
2103     }
2104 
2105     pixaaDestroy(&paa2);
2106     return 0;
2107 }
2108 
2109 
2110 /*!
2111  * \brief   recogShowAverageTemplates()
2112  *
2113  * \param[in]    recog
2114  * \return  0 on success, 1 on failure
2115  *
2116  * <pre>
2117  * Notes:
2118  *      (1) This debug routine generates a display of the averaged templates,
2119  *          both scaled and unscaled, with the centroid visible in red.
2120  * </pre>
2121  */
2122 l_int32
recogShowAverageTemplates(L_RECOG * recog)2123 recogShowAverageTemplates(L_RECOG  *recog)
2124 {
2125 l_int32    i, size;
2126 l_float32  x, y;
2127 PIX       *pix1, *pix2, *pixr;
2128 PIXA      *pixat, *pixadb;
2129 
2130     PROCNAME("recogShowAverageTemplates");
2131 
2132     if (!recog)
2133         return ERROR_INT("recog not defined", procName, 1);
2134 
2135     fprintf(stderr, "min/max width_u = (%d,%d); min/max height_u = (%d,%d)\n",
2136             recog->minwidth_u, recog->maxwidth_u,
2137             recog->minheight_u, recog->maxheight_u);
2138     fprintf(stderr, "min splitw = %d, max splith = %d\n",
2139             recog->min_splitw, recog->max_splith);
2140 
2141     pixaDestroy(&recog->pixadb_ave);
2142 
2143     pixr = pixCreate(3, 3, 32);  /* 3x3 red square for centroid location */
2144     pixSetAllArbitrary(pixr, 0xff000000);
2145     pixadb = pixaCreate(2);
2146 
2147         /* Unscaled bitmaps */
2148     size = recog->setsize;
2149     pixat = pixaCreate(size);
2150     for (i = 0; i < size; i++) {
2151         if ((pix1 = pixaGetPix(recog->pixa_u, i, L_CLONE)) == NULL)
2152             continue;
2153         pix2 = pixConvertTo32(pix1);
2154         ptaGetPt(recog->pta_u, i, &x, &y);
2155         pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3,
2156                     PIX_SRC, pixr, 0, 0);
2157         pixaAddPix(pixat, pix2, L_INSERT);
2158         pixDestroy(&pix1);
2159     }
2160     pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0);
2161     pixaAddPix(pixadb, pix1, L_INSERT);
2162     pixDisplay(pix1, 100, 100);
2163     pixaDestroy(&pixat);
2164 
2165         /* Scaled bitmaps */
2166     pixat = pixaCreate(size);
2167     for (i = 0; i < size; i++) {
2168         if ((pix1 = pixaGetPix(recog->pixa, i, L_CLONE)) == NULL)
2169             continue;
2170         pix2 = pixConvertTo32(pix1);
2171         ptaGetPt(recog->pta, i, &x, &y);
2172         pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3,
2173                     PIX_SRC, pixr, 0, 0);
2174         pixaAddPix(pixat, pix2, L_INSERT);
2175         pixDestroy(&pix1);
2176     }
2177     pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0);
2178     pixaAddPix(pixadb, pix1, L_INSERT);
2179     pixDisplay(pix1, 100, 100);
2180     pixaDestroy(&pixat);
2181     pixDestroy(&pixr);
2182     recog->pixadb_ave = pixadb;
2183     return 0;
2184 }
2185 
2186 
2187 /*!
2188  * \brief   pixDisplayOutliers()
2189  *
2190  * \param[in]    pixas    unscaled labeled templates
2191  * \param[in]    nas      scores of templates (against class averages)
2192  * \return  pix    tiled pixa with text and scores, or NULL on failure
2193  *
2194  * <pre>
2195  * Notes:
2196  *      (1) This debug routine is called from recogRemoveOutliers2(),
2197  *          and takes the saved templates and their scores as input.
2198  * </pre>
2199  */
2200 static PIX  *
pixDisplayOutliers(PIXA * pixas,NUMA * nas)2201 pixDisplayOutliers(PIXA  *pixas,
2202                    NUMA  *nas)
2203 {
2204 char      *text;
2205 char       buf[16];
2206 l_int32    i, n;
2207 l_float32  fval;
2208 PIX       *pix1, *pix2;
2209 PIXA      *pixa1;
2210 
2211     PROCNAME("pixDisplayOutliers");
2212 
2213     if (!pixas)
2214         return (PIX *)ERROR_PTR("pixas not defined", procName, NULL);
2215     if (!nas)
2216         return (PIX *)ERROR_PTR("nas not defined", procName, NULL);
2217     n = pixaGetCount(pixas);
2218     if (numaGetCount(nas) != n)
2219         return (PIX *)ERROR_PTR("pixas and nas sizes differ", procName, NULL);
2220 
2221     pixa1 = pixaCreate(n);
2222     for (i = 0; i < n; i++) {
2223         pix1 = pixaGetPix(pixas, i, L_CLONE);
2224         pix2 = pixAddBlackOrWhiteBorder(pix1, 25, 25, 0, 0, L_GET_WHITE_VAL);
2225         text = pixGetText(pix1);
2226         numaGetFValue(nas, i, &fval);
2227         snprintf(buf, sizeof(buf), "'%s': %5.2f", text, fval);
2228         pixSetText(pix2, buf);
2229         pixaAddPix(pixa1, pix2, L_INSERT);
2230         pixDestroy(&pix1);
2231     }
2232     pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 20, 2, 6, 0xff000000);
2233     pixaDestroy(&pixa1);
2234     return pix1;
2235 }
2236 
2237 
2238 /*!
2239  * \brief   recogDisplayOutlier()
2240  *
2241  * \param[in]    recog
2242  * \param[in]    iclass     sample is in this class
2243  * \param[in]    jsamp      index of sample is class i
2244  * \param[in]    maxclass   index of class with closest average to sample
2245  * \param[in]    maxscore   score of sample with average of class %maxclass
2246  * \return  pix  sample and template images, with score, or NULL on error
2247  *
2248  * <pre>
2249  * Notes:
2250  *      (1) This shows three templates, side-by-side:
2251  *          - The outlier sample
2252  *          - The average template from the same class
2253  *          - The average class template that best matched the outlier sample
2254  * </pre>
2255  */
2256 static PIX  *
recogDisplayOutlier(L_RECOG * recog,l_int32 iclass,l_int32 jsamp,l_int32 maxclass,l_float32 maxscore)2257 recogDisplayOutlier(L_RECOG   *recog,
2258                     l_int32    iclass,
2259                     l_int32    jsamp,
2260                     l_int32    maxclass,
2261                     l_float32  maxscore)
2262 {
2263 char   buf[64];
2264 PIX   *pix1, *pix2, *pix3, *pix4, *pix5;
2265 PIXA  *pixa;
2266 
2267     PROCNAME("recogDisplayOutlier");
2268 
2269     if (!recog)
2270         return (PIX *)ERROR_PTR("recog not defined", procName, NULL);
2271 
2272     pix1 = pixaaGetPix(recog->pixaa, iclass, jsamp, L_CLONE);
2273     pix2 = pixaGetPix(recog->pixa, iclass, L_CLONE);
2274     pix3 = pixaGetPix(recog->pixa, maxclass, L_CLONE);
2275     pixa = pixaCreate(3);
2276     pixaAddPix(pixa, pix1, L_INSERT);
2277     pixaAddPix(pixa, pix2, L_INSERT);
2278     pixaAddPix(pixa, pix3, L_INSERT);
2279     pix4 = pixaDisplayTiledInRows(pixa, 32, 400, 2.0, 0, 12, 2);
2280     snprintf(buf, sizeof(buf), "C=%d, BAC=%d, S=%4.2f", iclass, maxclass,
2281              maxscore);
2282     pix5 = pixAddSingleTextblock(pix4, recog->bmf, buf, 0xff000000,
2283                                  L_ADD_BELOW, NULL);
2284     pixDestroy(&pix4);
2285     pixaDestroy(&pixa);
2286     return pix5;
2287 }
2288 
2289 
2290 /*!
2291  * \brief   recogShowMatchesInRange()
2292  *
2293  * \param[in]    recog
2294  * \param[in]    pixa of 1 bpp images to match
2295  * \param[in]    minscore, maxscore range to include output
2296  * \param[in]    display to display the result
2297  * \return  0 if OK, 1 on error
2298  *
2299  * <pre>
2300  * Notes:
2301  *      (1) This gives a visual output of the best matches for a given
2302  *          range of scores.  Each pair of images can optionally be
2303  *          labeled with the index of the best match and the correlation.
2304  *      (2) To use this, save a set of 1 bpp images (labeled or
2305  *          unlabeled) that can be given to a recognizer in a pixa.
2306  *          Then call this function with the pixa and parameters
2307  *          to filter a range of scores.
2308  * </pre>
2309  */
2310 l_int32
recogShowMatchesInRange(L_RECOG * recog,PIXA * pixa,l_float32 minscore,l_float32 maxscore,l_int32 display)2311 recogShowMatchesInRange(L_RECOG     *recog,
2312                         PIXA        *pixa,
2313                         l_float32    minscore,
2314                         l_float32    maxscore,
2315                         l_int32      display)
2316 {
2317 l_int32    i, n, index, depth;
2318 l_float32  score;
2319 NUMA      *nascore, *naindex;
2320 PIX       *pix1, *pix2;
2321 PIXA      *pixa1, *pixa2;
2322 
2323     PROCNAME("recogShowMatchesInRange");
2324 
2325     if (!recog)
2326         return ERROR_INT("recog not defined", procName, 1);
2327     if (!pixa)
2328         return ERROR_INT("pixa not defined", procName, 1);
2329 
2330         /* Run the recognizer on the set of images */
2331     n = pixaGetCount(pixa);
2332     nascore = numaCreate(n);
2333     naindex = numaCreate(n);
2334     pixa1 = pixaCreate(n);
2335     for (i = 0; i < n; i++) {
2336         pix1 = pixaGetPix(pixa, i, L_CLONE);
2337         recogIdentifyPix(recog, pix1, &pix2);
2338         rchExtract(recog->rch, &index, &score, NULL, NULL, NULL, NULL, NULL);
2339         numaAddNumber(nascore, score);
2340         numaAddNumber(naindex, index);
2341         pixaAddPix(pixa1, pix2, L_INSERT);
2342         pixDestroy(&pix1);
2343     }
2344 
2345         /* Filter the set and optionally add text to each */
2346     pixa2 = pixaCreate(n);
2347     depth = 1;
2348     for (i = 0; i < n; i++) {
2349         numaGetFValue(nascore, i, &score);
2350         if (score < minscore || score > maxscore) continue;
2351         pix1 = pixaGetPix(pixa1, i, L_CLONE);
2352         numaGetIValue(naindex, i, &index);
2353         pix2 = recogShowMatch(recog, pix1, NULL, NULL, index, score);
2354         if (i == 0) depth = pixGetDepth(pix2);
2355         pixaAddPix(pixa2, pix2, L_INSERT);
2356         pixDestroy(&pix1);
2357     }
2358 
2359         /* Package it up */
2360     pixDestroy(&recog->pixdb_range);
2361     if (pixaGetCount(pixa2) > 0) {
2362         recog->pixdb_range =
2363             pixaDisplayTiledInRows(pixa2, depth, 2500, 1.0, 0, 20, 1);
2364         if (display)
2365             pixDisplay(recog->pixdb_range, 300, 100);
2366     } else {
2367         L_INFO("no character matches in the range of scores\n", procName);
2368     }
2369 
2370     pixaDestroy(&pixa1);
2371     pixaDestroy(&pixa2);
2372     numaDestroy(&nascore);
2373     numaDestroy(&naindex);
2374     return 0;
2375 }
2376 
2377 
2378 /*!
2379  * \brief   recogShowMatch()
2380  *
2381  * \param[in]    recog
2382  * \param[in]    pix1  input pix; several possibilities
2383  * \param[in]    pix2  [optional] matching template
2384  * \param[in]    box  [optional] region in pix1 for which pix2 matches
2385  * \param[in]    index  index of matching template; use -1 to disable printing
2386  * \param[in]    score  score of match
2387  * \return  pixd pair of images, showing input pix and best template,
2388  *                    optionally with matching information, or NULL on error.
2389  *
2390  * <pre>
2391  * Notes:
2392  *      (1) pix1 can be one of these:
2393  *          (a) The input pix alone, which can be either a single character
2394  *              (box == NULL) or several characters that need to be
2395  *              segmented.  If more than character is present, the box
2396  *              region is displayed with an outline.
2397  *          (b) Both the input pix and the matching template.  In this case,
2398  *              pix2 and box will both be null.
2399  *      (2) If the bmf has been made (by a call to recogMakeBmf())
2400  *          and the index >= 0, the text field, match score and index
2401  *          will be rendered; otherwise their values will be ignored.
2402  * </pre>
2403  */
2404 PIX *
recogShowMatch(L_RECOG * recog,PIX * pix1,PIX * pix2,BOX * box,l_int32 index,l_float32 score)2405 recogShowMatch(L_RECOG   *recog,
2406                PIX       *pix1,
2407                PIX       *pix2,
2408                BOX       *box,
2409                l_int32    index,
2410                l_float32  score)
2411 {
2412 char    buf[32];
2413 char   *text;
2414 L_BMF  *bmf;
2415 PIX    *pix3, *pix4, *pix5, *pixd;
2416 PIXA   *pixa;
2417 
2418     PROCNAME("recogShowMatch");
2419 
2420     if (!recog)
2421         return (PIX *)ERROR_PTR("recog not defined", procName, NULL);
2422     if (!pix1)
2423         return (PIX *)ERROR_PTR("pix1 not defined", procName, NULL);
2424 
2425     bmf = (recog->bmf && index >= 0) ? recog->bmf : NULL;
2426     if (!pix2 && !box && !bmf)  /* nothing to do */
2427         return pixCopy(NULL, pix1);
2428 
2429     pix3 = pixConvertTo32(pix1);
2430     if (box)
2431         pixRenderBoxArb(pix3, box, 1, 255, 0, 0);
2432 
2433     if (pix2) {
2434         pixa = pixaCreate(2);
2435         pixaAddPix(pixa, pix3, L_CLONE);
2436         pixaAddPix(pixa, pix2, L_CLONE);
2437         pix4 = pixaDisplayTiledInRows(pixa, 1, 500, 1.0, 0, 15, 0);
2438         pixaDestroy(&pixa);
2439     } else {
2440         pix4 = pixCopy(NULL, pix3);
2441     }
2442     pixDestroy(&pix3);
2443 
2444     if (bmf) {
2445         pix5 = pixAddBorderGeneral(pix4, 55, 55, 0, 0, 0xffffff00);
2446         recogGetClassString(recog, index, &text);
2447         snprintf(buf, sizeof(buf), "C=%s, S=%4.3f, I=%d", text, score, index);
2448         pixd = pixAddSingleTextblock(pix5, bmf, buf, 0xff000000,
2449                                      L_ADD_BELOW, NULL);
2450         pixDestroy(&pix5);
2451         LEPT_FREE(text);
2452     } else {
2453         pixd = pixClone(pix4);
2454     }
2455     pixDestroy(&pix4);
2456 
2457     return pixd;
2458 }
2459