1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file recogtrain.c
29 * <pre>
30 *
31 * Training on labeled data
32 * l_int32 recogTrainLabeled()
33 * PIX *recogProcessLabeled()
34 * l_int32 recogAddSample()
35 * PIX *recogModifyTemplate()
36 * l_int32 recogAverageSamples()
37 * l_int32 pixaAccumulateSamples()
38 * l_int32 recogTrainingFinished()
39 * static l_int32 recogTemplatesAreOK()
40 * PIXA *recogFilterPixaBySize()
41 * PIXAA *recogSortPixaByClass()
42 * l_int32 recogRemoveOutliers1()
43 * PIXA *pixaRemoveOutliers1()
44 * l_int32 recogRemoveOutliers2()
45 * PIXA *pixaRemoveOutliers2()
46 *
47 * Training on unlabeled data
48 * L_RECOG recogTrainFromBoot()
49 *
50 * Padding the digit training set
51 * l_int32 recogPadDigitTrainingSet()
52 * l_int32 recogIsPaddingNeeded()
53 * static SARRAY *recogAddMissingClassStrings()
54 * PIXA *recogAddDigitPadTemplates()
55 * static l_int32 recogCharsetAvailable()
56 *
57 * Making a boot digit recognizer
58 * L_RECOG *recogMakeBootDigitRecog()
59 * PIXA *recogMakeBootDigitTemplates()
60 *
61 * Debugging
62 * l_int32 recogShowContent()
63 * l_int32 recogDebugAverages()
64 * l_int32 recogShowAverageTemplates()
65 * static PIX *pixDisplayOutliers()
66 * PIX *recogDisplayOutlier()
67 * PIX *recogShowMatchesInRange()
68 * PIX *recogShowMatch()
69 *
70 * These abbreviations are for the type of template to be used:
71 * * SI (for the scanned images)
72 * * WNL (for width-normalized lines, formed by first skeletonizing
73 * the scanned images, and then dilating to a fixed width)
74 * These abbreviations are for the type of recognizer:
75 * * BAR (book-adapted recognizer; the best type; can do identification
76 * with unscaled images and separation of touching characters.
77 * * BSR (bootstrap recognizer; used if more labeled templates are
78 * required for a BAR, either for finding more templates from
79 * the book, or making a hybrid BAR/BSR.
80 *
81 * The recog struct typically holds two versions of the input templates
82 * (e.g. from a pixa) that were used to generate it. One version is
83 * the unscaled input templates. The other version is the one that
84 * will be used by the recog to identify unlabeled data. That version
85 * depends on the input parameters when the recog is created. The choices
86 * for the latter version, and their suggested use, are:
87 * (1) unscaled SI -- typical for BAR, generated from book images
88 * (2) unscaled WNL -- ditto
89 * (3) scaled SI -- typical for recognizers containing template
90 * images from sources other than the book to be recognized
91 * (4) scaled WNL -- ditto
92 * For cases (3) and (4), we recommend scaling to fixed height; e.g.,
93 * scalew = 0, scaleh = 40.
94 * When using WNL, we recommend using a width of 5 in the template
95 * and 4 in the unlabeled data.
96 * It appears that better results for a BAR are usually obtained using
97 * SI than WNL, but more experimentation is needed.
98 *
99 * This utility is designed to build recognizers that are specifically
100 * adapted from a large amount of material, such as a book. These
101 * use labeled templates taken from the material, and not scaled.
102 * In addition, two special recognizers are useful:
103 * (1) Bootstrap recognizer (BSR). This uses height-scaled templates,
104 * that have been extended with several repetitions in one of two ways:
105 * (a) aniotropic width scaling (for either SI or WNL)
106 * (b) iterative erosions/dilations (for SI).
107 * (2) Outlier removal. This uses height scaled templates. It can be
108 * implemented without using templates that are aligned averages of all
109 * templates in a class.
110 *
111 * Recognizers are inexpensive to generate, for example, from a pixa
112 * of labeled templates. The general process of building a BAR is
113 * to start with labeled templates, e.g., in a pixa, make a BAR, and
114 * analyze new samples from the book to augment the BAR until it has
115 * enough samples for each character class. Along the way, samples
116 * from a BSR may be added for help in training. If not enough samples
117 * are available for the BAR, it can finally be augmented with BSR
118 * samples, in which case the resulting hybrid BAR/BSR recognizer
119 * must work on scaled images.
120 *
121 * Here are the steps in doing recog training:
122 * A. Generate a BAR from any exising labeled templates
123 * (1) Create a recog and add the templates, using recogAddSample().
124 * This stores the unscaled templates.
125 * [Note: this can be done in one step if the labeled templates are put
126 * into a pixa:
127 * L_Recog *rec = recogCreateFromPixa(pixa, ...); ]
128 * (2) Call recogTrainingFinished() to generate the (sometimes modified)
129 * templates to be used for correlation.
130 * (3) Optionally, remove outliers.
131 * If there are sufficient samples in the classes, we're done. Otherwise,
132 * B. Try to get more samples from the book to pad the BAR.
133 * (1) Save the unscaled, labeled templates from the BAR.
134 * (2) Supplement the BAR with bootstrap templates to make a hybrid BAR/BSR.
135 * (3) Do recognition on more unlabeled images, scaled to a fixed height
136 * (4) Add the unscaled, labeled images to the saved set.
137 * (5) Optionally, remove outliers.
138 * If there are sufficient samples in the classes, we're done. Otherwise,
139 * C. For classes without a sufficient number of templates, we can
140 * supplement the BAR with templates from a BSR (a hybrid RAR/BSR),
141 * and do recognition scaled to a fixed height.
142 *
143 * Here are several methods that can be used for identifying outliers:
144 * (1) Compute average templates for each class and remove a candidate
145 * that is poorly correlated with the average. This is the most
146 * simple method. recogRemoveOutliers1() uses this, supplemented with
147 * a second threshold and a target number of templates to be saved.
148 * (2) Compute average templates for each class and remove a candidate
149 * that is more highly correlated with the average of some other class.
150 * This does not require setting a threshold for the correlation.
151 * recogRemoveOutliers2() uses this method, supplemented with a minimum
152 * correlation score.
153 * (3) For each candidate, find the average correlation with other
154 * members of its class, and remove those that have a relatively
155 * low average correlation. This is similar to (1), gives comparable
156 * results and becauses it does not use average templates, it requires
157 * a bit more computation.
158 * </pre>
159 */
160
161 #include <string.h>
162 #include "allheaders.h"
163
164 /* Static functions */
165 static l_int32 recogTemplatesAreOK(L_RECOG *recog, l_int32 minsize,
166 l_float32 minfract, l_int32 *pok);
167 static SARRAY *recogAddMissingClassStrings(L_RECOG *recog);
168 static l_int32 recogCharsetAvailable(l_int32 type);
169 static PIX *pixDisplayOutliers(PIXA *pixas, NUMA *nas);
170 static PIX *recogDisplayOutlier(L_RECOG *recog, l_int32 iclass, l_int32 jsamp,
171 l_int32 maxclass, l_float32 maxscore);
172
173 /* Default parameters that are used in recogTemplatesAreOK() and
174 * in outlier removal functions, and that use template set size
175 * to decide if the set of templates (before outliers are removed)
176 * is valid. Values are set to accept most sets of sample templates. */
177 static const l_int32 DEFAULT_MIN_SET_SIZE = 1; /* minimum number of
178 samples for a valid class */
179 static const l_float32 DEFAULT_MIN_SET_FRACT = 0.4; /* minimum fraction
180 of classes required for a valid recog */
181
182 /* Defaults in pixaRemoveOutliers1() and pixaRemoveOutliers2() */
183 static const l_float32 DEFAULT_MIN_SCORE = 0.75; /* keep everything above */
184 static const l_int32 DEFAULT_MIN_TARGET = 3; /* to be kept if possible */
185 static const l_float32 LOWER_SCORE_THRESHOLD = 0.5; /* templates can be
186 * kept down to this score to if needed to retain the
187 * desired minimum number of templates */
188
189
190 /*------------------------------------------------------------------------*
191 * Training *
192 *------------------------------------------------------------------------*/
193 /*!
194 * \brief recogTrainLabeled()
195 *
196 * \param[in] recog in training mode
197 * \param[in] pixs if depth > 1, will be thresholded to 1 bpp
198 * \param[in] box [optional] cropping box
199 * \param[in] text [optional] if null, use text field in pix
200 * \param[in] debug 1 to display images of samples not captured
201 * \return 0 if OK, 1 on error
202 *
203 * <pre>
204 * Notes:
205 * (1) Training is restricted to the addition of a single
206 * character in an arbitrary (e.g., UTF8) charset
207 * (2) If box != null, it should represent the location in %pixs
208 * of the character image.
209 * </pre>
210 */
211 l_int32
recogTrainLabeled(L_RECOG * recog,PIX * pixs,BOX * box,char * text,l_int32 debug)212 recogTrainLabeled(L_RECOG *recog,
213 PIX *pixs,
214 BOX *box,
215 char *text,
216 l_int32 debug)
217 {
218 l_int32 ret;
219 PIX *pix;
220
221 PROCNAME("recogTrainLabeled");
222
223 if (!recog)
224 return ERROR_INT("recog not defined", procName, 1);
225 if (!pixs)
226 return ERROR_INT("pixs not defined", procName, 1);
227
228 /* Prepare the sample to be added. This step also acts
229 * as a filter, and can invalidate pixs as a template. */
230 ret = recogProcessLabeled(recog, pixs, box, text, &pix);
231 if (ret) {
232 pixDestroy(&pix);
233 L_WARNING("failure to get sample '%s' for training\n", procName,
234 text);
235 return 1;
236 }
237
238 recogAddSample(recog, pix, debug);
239 pixDestroy(&pix);
240 return 0;
241 }
242
243
244 /*!
245 * \brief recogProcessLabeled()
246 *
247 * \param[in] recog in training mode
248 * \param[in] pixs if depth > 1, will be thresholded to 1 bpp
249 * \param[in] box [optional] cropping box
250 * \param[in] text [optional] if null, use text field in pix
251 * \param[out] ppix addr of pix, 1 bpp, labeled
252 * \return 0 if OK, 1 on error
253 *
254 * <pre>
255 * Notes:
256 * (1) This crops and binarizes the input image, generating a pix
257 * of one character where the charval is inserted into the pix.
258 * </pre>
259 */
260 l_int32
recogProcessLabeled(L_RECOG * recog,PIX * pixs,BOX * box,char * text,PIX ** ppix)261 recogProcessLabeled(L_RECOG *recog,
262 PIX *pixs,
263 BOX *box,
264 char *text,
265 PIX **ppix)
266 {
267 char *textdata;
268 l_int32 textinpix, textin, nsets;
269 NUMA *na;
270 PIX *pix1, *pix2, *pix3, *pix4;
271
272 PROCNAME("recogProcessLabeled");
273
274 if (!ppix)
275 return ERROR_INT("&pix not defined", procName, 1);
276 *ppix = NULL;
277 if (!recog)
278 return ERROR_INT("recog not defined", procName, 1);
279 if (!pixs)
280 return ERROR_INT("pixs not defined", procName, 1);
281
282 /* Find the text; this will be stored with the output images */
283 textin = text && (text[0] != '\0');
284 textinpix = (pixs->text && (pixs->text[0] != '\0'));
285 if (!textin && !textinpix) {
286 L_ERROR("no text: %d\n", procName, recog->num_samples);
287 return 1;
288 }
289 textdata = (textin) ? text : pixs->text; /* do not free */
290
291 /* Crop and binarize if necessary */
292 if (box)
293 pix1 = pixClipRectangle(pixs, box, NULL);
294 else
295 pix1 = pixClone(pixs);
296 if (pixGetDepth(pix1) > 1)
297 pix2 = pixConvertTo1(pix1, recog->threshold);
298 else
299 pix2 = pixClone(pix1);
300 pixDestroy(&pix1);
301
302 /* Remove isolated noise, using as a criterion all components
303 * that are removed by a vertical opening of size 5. */
304 pix3 = pixMorphSequence(pix2, "o1.5", 0); /* seed */
305 pixSeedfillBinary(pix3, pix3, pix2, 8); /* fill from seed; clip to pix2 */
306 pixDestroy(&pix2);
307
308 /* Clip to foreground */
309 pixClipToForeground(pix3, &pix4, NULL);
310 pixDestroy(&pix3);
311 if (!pix4)
312 return ERROR_INT("pix4 is empty", procName, 1);
313
314 /* Verify that if there is more than 1 c.c., they all have
315 * horizontal overlap */
316 na = pixCountByColumn(pix4, NULL);
317 numaCountNonzeroRuns(na, &nsets);
318 numaDestroy(&na);
319 if (nsets > 1) {
320 L_WARNING("found %d sets of horiz separated c.c.; skipping\n",
321 procName, nsets);
322 pixDestroy(&pix4);
323 return 1;
324 }
325
326 pixSetText(pix4, textdata);
327 *ppix = pix4;
328 return 0;
329 }
330
331
332 /*!
333 * \brief recogAddSample()
334 *
335 * \param[in] recog
336 * \param[in] pix a single character, 1 bpp
337 * \param[in] debug
338 * \return 0 if OK, 1 on error
339 *
340 * <pre>
341 * Notes:
342 * (1) The pix is 1 bpp, with the character string label embedded.
343 * (2) The pixaa_u array of the recog is initialized to accept
344 * up to 256 different classes. When training is finished,
345 * the arrays are truncated to the actual number of classes.
346 * To pad an existing recog from the boot recognizers, training
347 * is started again; if samples from a new class are added,
348 * the pixaa_u array is extended by adding a pixa to hold them.
349 * </pre>
350 */
351 l_int32
recogAddSample(L_RECOG * recog,PIX * pix,l_int32 debug)352 recogAddSample(L_RECOG *recog,
353 PIX *pix,
354 l_int32 debug)
355 {
356 char *text;
357 l_int32 npa, charint, index;
358 PIXA *pixa1;
359 PIXAA *paa;
360
361 PROCNAME("recogAddSample");
362
363 if (!recog)
364 return ERROR_INT("recog not defined", procName, 1);
365 if (!pix || pixGetDepth(pix) != 1)
366 return ERROR_INT("pix not defined or not 1 bpp\n", procName, 1);
367 if (recog->train_done)
368 return ERROR_INT("not added: training has been completed", procName, 1);
369 paa = recog->pixaa_u;
370
371 /* Make sure the character is in the set */
372 text = pixGetText(pix);
373 if (l_convertCharstrToInt(text, &charint) == 1) {
374 L_ERROR("invalid text: %s\n", procName, text);
375 return 1;
376 }
377
378 /* Determine the class array index. Check if the class
379 * alreadly exists, and if not, add it. */
380 if (recogGetClassIndex(recog, charint, text, &index) == 1) {
381 /* New class must be added */
382 npa = pixaaGetCount(paa, NULL);
383 if (index > npa) {
384 L_ERROR("oops: bad index %d > npa %d!!\n", procName, index, npa);
385 return 1;
386 }
387 if (index == npa) { /* paa needs to be extended */
388 L_INFO("Adding new class and pixa: index = %d, text = %s\n",
389 procName, index, text);
390 pixa1 = pixaCreate(10);
391 pixaaAddPixa(paa, pixa1, L_INSERT);
392 }
393 }
394 if (debug) {
395 L_INFO("Identified text label: %s\n", procName, text);
396 L_INFO("Identified: charint = %d, index = %d\n",
397 procName, charint, index);
398 }
399
400 /* Insert the unscaled character image into the right pixa.
401 * (Unscaled images are required to split touching characters.) */
402 recog->num_samples++;
403 pixaaAddPix(paa, index, pix, NULL, L_COPY);
404 return 0;
405 }
406
407
408 /*!
409 * \brief recogModifyTemplate()
410 *
411 * \param[in] recog
412 * \param[in] pixs 1 bpp, to be optionally scaled and turned into
413 * strokes of fixed width
414 * \return pixd modified pix if OK, NULL on error
415 */
416 PIX *
recogModifyTemplate(L_RECOG * recog,PIX * pixs)417 recogModifyTemplate(L_RECOG *recog,
418 PIX *pixs)
419 {
420 l_int32 w, h, empty;
421 PIX *pix1, *pix2;
422
423 PROCNAME("recogModifyTemplate");
424
425 if (!recog)
426 return (PIX *)ERROR_PTR("recog not defined", procName, NULL);
427 if (!pixs)
428 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
429
430 /* Scale first */
431 pixGetDimensions(pixs, &w, &h, NULL);
432 if ((recog->scalew == 0 || recog->scalew == w) &&
433 (recog->scaleh == 0 || recog->scaleh == h)) { /* no scaling */
434 pix1 = pixCopy(NULL, pixs);
435 } else {
436 pix1 = pixScaleToSize(pixs, recog->scalew, recog->scaleh);
437 }
438 if (!pix1)
439 return (PIX *)ERROR_PTR("pix1 not made", procName, NULL);
440
441 /* Then optionally convert to lines */
442 if (recog->linew <= 0) {
443 pix2 = pixClone(pix1);
444 } else {
445 pix2 = pixSetStrokeWidth(pix1, recog->linew, 1, 8);
446 }
447 pixDestroy(&pix1);
448 if (!pix2)
449 return (PIX *)ERROR_PTR("pix2 not made", procName, NULL);
450
451 /* Make sure we still have some pixels */
452 pixZero(pix2, &empty);
453 if (empty) {
454 pixDestroy(&pix2);
455 return (PIX *)ERROR_PTR("modified template has no pixels",
456 procName, NULL);
457 }
458 return pix2;
459 }
460
461
462 /*!
463 * \brief recogAverageSamples()
464 *
465 * \param[in] precog addr of existing recog; may be destroyed
466 * \param[in] debug
467 * \return 0 on success, 1 on failure
468 *
469 * <pre>
470 * Notes:
471 * (1) This is only called in two situations:
472 * (a) When splitting characters using either the DID method
473 * recogDecode() or the the greedy splitter
474 * recogCorrelationBestRow()
475 * (b) By a special recognizer that is used to remove outliers.
476 * Both unscaled and scaled inputs are averaged.
477 * (2) If the data in any class is nonexistent (no samples), or
478 * very bad (no fg pixels in the average), or if the ratio
479 * of max/min average unscaled class template heights is
480 * greater than max_ht_ratio, this destroys the recog.
481 * The caller must check the return value of the recog.
482 * (3) Set debug = 1 to view the resulting templates and their centroids.
483 * </pre>
484 */
485 l_int32
recogAverageSamples(L_RECOG ** precog,l_int32 debug)486 recogAverageSamples(L_RECOG **precog,
487 l_int32 debug)
488 {
489 l_int32 i, nsamp, size, area, bx, by, badclass;
490 l_float32 x, y, hratio;
491 BOX *box;
492 PIXA *pixa1;
493 PIX *pix1, *pix2, *pix3;
494 PTA *pta1;
495 L_RECOG *recog;
496
497 PROCNAME("recogAverageSamples");
498
499 if (!precog)
500 return ERROR_INT("&recog not defined", procName, 1);
501 if ((recog = *precog) == NULL)
502 return ERROR_INT("recog not defined", procName, 1);
503
504 if (recog->ave_done) {
505 if (debug) /* always do this if requested */
506 recogShowAverageTemplates(recog);
507 return 0;
508 }
509
510 /* Remove any previous averaging data */
511 size = recog->setsize;
512 pixaDestroy(&recog->pixa_u);
513 ptaDestroy(&recog->pta_u);
514 numaDestroy(&recog->nasum_u);
515 recog->pixa_u = pixaCreate(size);
516 recog->pta_u = ptaCreate(size);
517 recog->nasum_u = numaCreate(size);
518
519 pixaDestroy(&recog->pixa);
520 ptaDestroy(&recog->pta);
521 numaDestroy(&recog->nasum);
522 recog->pixa = pixaCreate(size);
523 recog->pta = ptaCreate(size);
524 recog->nasum = numaCreate(size);
525
526 /* Unscaled bitmaps: compute averaged bitmap, centroid, and fg area.
527 * Note that when we threshold to 1 bpp the 8 bpp averaged template
528 * that is returned from the accumulator, it will not be cropped
529 * to the foreground. We must crop it, because the correlator
530 * makes that assumption and will return a zero value if the
531 * width or height of the two images differs by several pixels.
532 * But cropping to fg can cause the value of the centroid to
533 * change, if bx > 0 or by > 0. */
534 badclass = FALSE;
535 for (i = 0; i < size; i++) {
536 pixa1 = pixaaGetPixa(recog->pixaa_u, i, L_CLONE);
537 pta1 = ptaaGetPta(recog->ptaa_u, i, L_CLONE);
538 nsamp = pixaGetCount(pixa1);
539 nsamp = L_MIN(nsamp, 256); /* we only use the first 256 */
540 if (nsamp == 0) { /* no information for this class */
541 L_ERROR("no samples in class %d\n", procName, i);
542 badclass = TRUE;
543 pixaDestroy(&pixa1);
544 ptaDestroy(&pta1);
545 break;
546 } else {
547 pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y);
548 pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2));
549 pixInvert(pix2, pix2);
550 pixClipToForeground(pix2, &pix3, &box);
551 if (!box) {
552 L_ERROR("no fg pixels in average for uclass %d\n", procName, i);
553 badclass = TRUE;
554 pixDestroy(&pix1);
555 pixDestroy(&pix2);
556 pixaDestroy(&pixa1);
557 ptaDestroy(&pta1);
558 break;
559 } else {
560 boxGetGeometry(box, &bx, &by, NULL, NULL);
561 pixaAddPix(recog->pixa_u, pix3, L_INSERT);
562 ptaAddPt(recog->pta_u, x - bx, y - by); /* correct centroid */
563 pixCountPixels(pix3, &area, recog->sumtab);
564 numaAddNumber(recog->nasum_u, area); /* foreground */
565 boxDestroy(&box);
566 }
567 pixDestroy(&pix1);
568 pixDestroy(&pix2);
569 }
570 pixaDestroy(&pixa1);
571 ptaDestroy(&pta1);
572 }
573
574 /* Are any classes bad? If so, destroy the recog and return an error */
575 if (badclass) {
576 recogDestroy(precog);
577 return ERROR_INT("at least 1 bad class; destroying recog", procName, 1);
578 }
579
580 /* Get the range of sizes of the unscaled average templates.
581 * Reject if the height ratio is too large. */
582 pixaSizeRange(recog->pixa_u, &recog->minwidth_u, &recog->minheight_u,
583 &recog->maxwidth_u, &recog->maxheight_u);
584 hratio = (l_float32)recog->maxheight_u / (l_float32)recog->minheight_u;
585 if (hratio > recog->max_ht_ratio) {
586 L_ERROR("ratio of max/min height of average templates = %4.1f;"
587 " destroying recog\n", procName, hratio);
588 recogDestroy(precog);
589 return 1;
590 }
591
592 /* Scaled bitmaps: compute averaged bitmap, centroid, and fg area */
593 for (i = 0; i < size; i++) {
594 pixa1 = pixaaGetPixa(recog->pixaa, i, L_CLONE);
595 pta1 = ptaaGetPta(recog->ptaa, i, L_CLONE);
596 nsamp = pixaGetCount(pixa1);
597 nsamp = L_MIN(nsamp, 256); /* we only use the first 256 */
598 pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y);
599 pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2));
600 pixInvert(pix2, pix2);
601 pixClipToForeground(pix2, &pix3, &box);
602 if (!box) {
603 L_ERROR("no fg pixels in average for sclass %d\n", procName, i);
604 badclass = TRUE;
605 pixDestroy(&pix1);
606 pixDestroy(&pix2);
607 pixaDestroy(&pixa1);
608 ptaDestroy(&pta1);
609 break;
610 } else {
611 boxGetGeometry(box, &bx, &by, NULL, NULL);
612 pixaAddPix(recog->pixa, pix3, L_INSERT);
613 ptaAddPt(recog->pta, x - bx, y - by); /* correct centroid */
614 pixCountPixels(pix3, &area, recog->sumtab);
615 numaAddNumber(recog->nasum, area); /* foreground */
616 boxDestroy(&box);
617 }
618 pixDestroy(&pix1);
619 pixDestroy(&pix2);
620 pixaDestroy(&pixa1);
621 ptaDestroy(&pta1);
622 }
623
624 if (badclass) {
625 recogDestroy(precog);
626 return ERROR_INT("at least 1 bad class; destroying recog", procName, 1);
627 }
628
629 /* Get the range of widths of the scaled average templates */
630 pixaSizeRange(recog->pixa, &recog->minwidth, NULL, &recog->maxwidth, NULL);
631
632 /* Get dimensions useful for splitting */
633 recog->min_splitw = L_MAX(5, recog->minwidth_u - 5);
634 recog->max_splith = recog->maxheight_u + 12; /* allow for skew */
635
636 if (debug)
637 recogShowAverageTemplates(recog);
638
639 recog->ave_done = TRUE;
640 return 0;
641 }
642
643
644 /*!
645 * \brief pixaAccumulateSamples()
646 *
647 * \param[in] pixa of samples from the same class, 1 bpp
648 * \param[in] pta [optional] of centroids of the samples
649 * \param[out] ppixd accumulated samples, 8 bpp
650 * \param[out] px [optional] average x coordinate of centroids
651 * \param[out] py [optional] average y coordinate of centroids
652 * \return 0 on success, 1 on failure
653 *
654 * <pre>
655 * Notes:
656 * (1) This generates an aligned (by centroid) sum of the input pix.
657 * (2) We use only the first 256 samples; that's plenty.
658 * (3) If pta is not input, we generate two tables, and discard
659 * after use. If this is called many times, it is better
660 * to precompute the pta.
661 * </pre>
662 */
663 l_int32
pixaAccumulateSamples(PIXA * pixa,PTA * pta,PIX ** ppixd,l_float32 * px,l_float32 * py)664 pixaAccumulateSamples(PIXA *pixa,
665 PTA *pta,
666 PIX **ppixd,
667 l_float32 *px,
668 l_float32 *py)
669 {
670 l_int32 i, n, maxw, maxh, xdiff, ydiff;
671 l_int32 *centtab, *sumtab;
672 l_float32 xc, yc, xave, yave;
673 PIX *pix1, *pix2, *pixsum;
674 PTA *ptac;
675
676 PROCNAME("pixaAccumulateSamples");
677
678 if (px) *px = 0;
679 if (py) *py = 0;
680 if (!ppixd)
681 return ERROR_INT("&pixd not defined", procName, 1);
682 *ppixd = NULL;
683 if (!pixa)
684 return ERROR_INT("pixa not defined", procName, 1);
685
686 n = pixaGetCount(pixa);
687 if (pta && ptaGetCount(pta) != n)
688 return ERROR_INT("pta count differs from pixa count", procName, 1);
689 n = L_MIN(n, 256); /* take the first 256 only */
690 if (n == 0)
691 return ERROR_INT("pixa array empty", procName, 1);
692
693 /* Find the centroids */
694 if (pta) {
695 ptac = ptaClone(pta);
696 } else { /* generate them here */
697 ptac = ptaCreate(n);
698 centtab = makePixelCentroidTab8();
699 sumtab = makePixelSumTab8();
700 for (i = 0; i < n; i++) {
701 pix1 = pixaGetPix(pixa, i, L_CLONE);
702 pixCentroid(pix1, centtab, sumtab, &xc, &yc);
703 ptaAddPt(ptac, xc, yc);
704 }
705 LEPT_FREE(centtab);
706 LEPT_FREE(sumtab);
707 }
708
709 /* Find the average value of the centroids */
710 xave = yave = 0;
711 for (i = 0; i < n; i++) {
712 ptaGetPt(pta, i, &xc, &yc);
713 xave += xc;
714 yave += yc;
715 }
716 xave = xave / (l_float32)n;
717 yave = yave / (l_float32)n;
718 if (px) *px = xave;
719 if (py) *py = yave;
720
721 /* Place all pix with their centroids located at the average
722 * centroid value, and sum the results. Make the accumulator
723 * image slightly larger than the largest sample to insure
724 * that all pixels are represented in the accumulator. */
725 pixaSizeRange(pixa, NULL, NULL, &maxw, &maxh);
726 pixsum = pixInitAccumulate(maxw + 5, maxh + 5, 0);
727 pix1 = pixCreate(maxw, maxh, 1);
728 for (i = 0; i < n; i++) {
729 pix2 = pixaGetPix(pixa, i, L_CLONE);
730 ptaGetPt(ptac, i, &xc, &yc);
731 xdiff = (l_int32)(xave - xc);
732 ydiff = (l_int32)(yave - yc);
733 pixClearAll(pix1);
734 pixRasterop(pix1, xdiff, ydiff, maxw, maxh, PIX_SRC,
735 pix2, 0, 0);
736 pixAccumulate(pixsum, pix1, L_ARITH_ADD);
737 pixDestroy(&pix2);
738 }
739 *ppixd = pixFinalAccumulate(pixsum, 0, 8);
740
741 pixDestroy(&pix1);
742 pixDestroy(&pixsum);
743 ptaDestroy(&ptac);
744 return 0;
745 }
746
747
748 /*!
749 * \brief recogTrainingFinished()
750 *
751 * \param[in] precog addr of recog
752 * \param[in] modifyflag 1 to use recogModifyTemplate(); 0 otherwise
753 * \param[in] minsize set to -1 for default
754 * \param[in] minfract set to -1.0 for default
755 * \return 0 if OK, 1 on error (input recog will be destroyed)
756 *
757 * <pre>
758 * Notes:
759 * (1) This must be called after all training samples have been added.
760 * (2) If the templates are not good enough, the recog input is destroyed.
761 * (3) Usually, %modifyflag == 1, because we want to apply
762 * recogModifyTemplate() to generate the actual templates
763 * that will be used. The one exception is when reading a
764 * serialized recog: there we want to put the same set of
765 * templates in both the unscaled and modified pixaa.
766 * See recogReadStream() to see why we do this.
767 * (4) See recogTemplatesAreOK() for %minsize and %minfract usage.
768 * (5) The following things are done here:
769 * (a) Allocate (or reallocate) storage for (possibly) modified
770 * bitmaps, centroids, and fg areas.
771 * (b) Generate the (possibly) modified bitmaps.
772 * (c) Compute centroid and fg area data for both unscaled and
773 * modified bitmaps.
774 * (d) Truncate the pixaa, ptaa and numaa arrays down from
775 * 256 to the actual size.
776 * (6) Putting these operations here makes it simple to recompute
777 * the recog with different modifications on the bitmaps.
778 * (7) Call recogShowContent() to display the templates, both
779 * unscaled and modified.
780 * </pre>
781 */
782 l_int32
recogTrainingFinished(L_RECOG ** precog,l_int32 modifyflag,l_int32 minsize,l_float32 minfract)783 recogTrainingFinished(L_RECOG **precog,
784 l_int32 modifyflag,
785 l_int32 minsize,
786 l_float32 minfract)
787 {
788 l_int32 ok, i, j, size, nc, ns, area;
789 l_float32 xave, yave;
790 PIX *pix, *pixd;
791 PIXA *pixa;
792 PIXAA *paa;
793 PTA *pta;
794 PTAA *ptaa;
795 L_RECOG *recog;
796
797 PROCNAME("recogTrainingFinished");
798
799 if (!precog)
800 return ERROR_INT("&recog not defined", procName, 1);
801 if ((recog = *precog) == NULL)
802 return ERROR_INT("recog not defined", procName, 1);
803 if (recog->train_done) return 0;
804
805 /* Test the input templates */
806 recogTemplatesAreOK(recog, minsize, minfract, &ok);
807 if (!ok) {
808 recogDestroy(precog);
809 return ERROR_INT("bad templates", procName, 1);
810 }
811
812 /* Generate the storage for the possibly-scaled training bitmaps */
813 size = recog->maxarraysize;
814 paa = pixaaCreate(size);
815 pixa = pixaCreate(1);
816 pixaaInitFull(paa, pixa);
817 pixaDestroy(&pixa);
818 pixaaDestroy(&recog->pixaa);
819 recog->pixaa = paa;
820
821 /* Generate the storage for the unscaled centroid training data */
822 ptaa = ptaaCreate(size);
823 pta = ptaCreate(0);
824 ptaaInitFull(ptaa, pta);
825 ptaaDestroy(&recog->ptaa_u);
826 recog->ptaa_u = ptaa;
827
828 /* Generate the storage for the possibly-scaled centroid data */
829 ptaa = ptaaCreate(size);
830 ptaaInitFull(ptaa, pta);
831 ptaDestroy(&pta);
832 ptaaDestroy(&recog->ptaa);
833 recog->ptaa = ptaa;
834
835 /* Generate the storage for the fg area data */
836 numaaDestroy(&recog->naasum_u);
837 numaaDestroy(&recog->naasum);
838 recog->naasum_u = numaaCreateFull(size, 0);
839 recog->naasum = numaaCreateFull(size, 0);
840
841 paa = recog->pixaa_u;
842 nc = recog->setsize;
843 for (i = 0; i < nc; i++) {
844 pixa = pixaaGetPixa(paa, i, L_CLONE);
845 ns = pixaGetCount(pixa);
846 for (j = 0; j < ns; j++) {
847 /* Save centroid and area data for the unscaled pix */
848 pix = pixaGetPix(pixa, j, L_CLONE);
849 pixCentroid(pix, recog->centtab, recog->sumtab, &xave, &yave);
850 ptaaAddPt(recog->ptaa_u, i, xave, yave);
851 pixCountPixels(pix, &area, recog->sumtab);
852 numaaAddNumber(recog->naasum_u, i, area); /* foreground */
853
854 /* Insert the (optionally) scaled character image, and
855 * save centroid and area data for it */
856 if (modifyflag == 1)
857 pixd = recogModifyTemplate(recog, pix);
858 else
859 pixd = pixClone(pix);
860 if (pixd) {
861 pixaaAddPix(recog->pixaa, i, pixd, NULL, L_INSERT);
862 pixCentroid(pixd, recog->centtab, recog->sumtab, &xave, &yave);
863 ptaaAddPt(recog->ptaa, i, xave, yave);
864 pixCountPixels(pixd, &area, recog->sumtab);
865 numaaAddNumber(recog->naasum, i, area);
866 } else {
867 L_ERROR("failed: modified template for class %d, sample %d\n",
868 procName, i, j);
869 }
870 pixDestroy(&pix);
871 }
872 pixaDestroy(&pixa);
873 }
874
875 /* Truncate the arrays to those with non-empty containers */
876 pixaaTruncate(recog->pixaa_u);
877 pixaaTruncate(recog->pixaa);
878 ptaaTruncate(recog->ptaa_u);
879 ptaaTruncate(recog->ptaa);
880 numaaTruncate(recog->naasum_u);
881 numaaTruncate(recog->naasum);
882
883 recog->train_done = TRUE;
884 return 0;
885 }
886
887
888 /*!
889 * \brief recogTemplatesAreOK()
890 *
891 * \param[in] recog
892 * \param[in] minsize set to -1 for default
893 * \param[in] minfract set to -1.0 for default
894 * \param[out] pok set to 1 if template set is valid; 0 otherwise
895 * \return 1 on error; 0 otherwise. An invalid template set is not an error.
896 *
897 * <pre>
898 * Notes:
899 * (1) This is called by recogTrainingFinished(). A return value of 0
900 * will cause recogTrainingFinished() to destroy the recog.
901 * (2) %minsize is the minimum number of samples required for
902 * the class; -1 uses the default
903 * (3) %minfract is the minimum fraction of classes required for
904 * the recog to be usable; -1.0 uses the default
905 * </pre>
906 */
907 static l_int32
recogTemplatesAreOK(L_RECOG * recog,l_int32 minsize,l_float32 minfract,l_int32 * pok)908 recogTemplatesAreOK(L_RECOG *recog,
909 l_int32 minsize,
910 l_float32 minfract,
911 l_int32 *pok)
912 {
913 l_int32 i, n, validsets, nt;
914 l_float32 ratio;
915 NUMA *na;
916
917 PROCNAME("recogTemplatesAreOK");
918
919 if (!pok)
920 return ERROR_INT("&ok not defined", procName, 1);
921 *pok = 0;
922 if (!recog)
923 return ERROR_INT("recog not defined", procName, 1);
924
925 minsize = (minsize < 0) ? DEFAULT_MIN_SET_SIZE : minsize;
926 minfract = (minfract < 0) ? DEFAULT_MIN_SET_FRACT : minfract;
927 n = pixaaGetCount(recog->pixaa_u, &na);
928 validsets = 0;
929 for (i = 0, validsets = 0; i < n; i++) {
930 numaGetIValue(na, i, &nt);
931 if (nt >= minsize)
932 validsets++;
933 }
934 numaDestroy(&na);
935 ratio = (l_float32)validsets / (l_float32)recog->charset_size;
936 *pok = (ratio >= minfract) ? 1 : 0;
937 return 0;
938 }
939
940
941 /*!
942 * \brief recogFilterPixaBySize()
943 *
944 * \param[in] pixas labeled templates
945 * \param[in] setsize size of character set (number of classes)
946 * \param[in] maxkeep max number of templates to keep in a class
947 * \param[in] max_ht_ratio max allowed height ratio (see below)
948 * \param[out] pna [optional] debug output, giving the number in each
949 * class after filtering; use NULL to skip
950 * \return pixa filtered templates, or NULL on error
951 *
952 * <pre>
953 * Notes:
954 * (1) The basic assumption is that the most common and larger
955 * templates in each class are more likely to represent the
956 * characters we are interested in. For example, larger digits
957 * are more likely to represent page numbers, and smaller digits
958 * could be data in tables. Therefore, we bias the first
959 * stage of filtering toward the larger characters by removing
960 * very small ones, and select based on proximity of the
961 * remaining characters to median height.
962 * (2) For each of the %setsize classes, order the templates
963 * increasingly by height. Take the rank 0.9 height. Eliminate
964 * all templates that are shorter by more than %max_ht_ratio.
965 * Of the remaining ones, select up to %maxkeep that are closest
966 * in rank order height to the median template.
967 * </pre>
968 */
969 PIXA *
recogFilterPixaBySize(PIXA * pixas,l_int32 setsize,l_int32 maxkeep,l_float32 max_ht_ratio,NUMA ** pna)970 recogFilterPixaBySize(PIXA *pixas,
971 l_int32 setsize,
972 l_int32 maxkeep,
973 l_float32 max_ht_ratio,
974 NUMA **pna)
975 {
976 l_int32 i, j, h90, hj, j1, j2, j90, n, nc;
977 l_float32 ratio;
978 NUMA *na;
979 PIXA *pixa1, *pixa2, *pixa3, *pixa4, *pixa5;
980 PIXAA *paa;
981
982 PROCNAME("recogFilterPixaBySize");
983
984 if (pna) *pna = NULL;
985 if (!pixas)
986 return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL);
987
988 if ((paa = recogSortPixaByClass(pixas, setsize)) == NULL)
989 return (PIXA *)ERROR_PTR("paa not made", procName, NULL);
990 nc = pixaaGetCount(paa, NULL);
991 na = (pna) ? numaCreate(0) : NULL;
992 if (pna) *pna = na;
993 pixa5 = pixaCreate(0);
994 for (i = 0; i < nc; i++) {
995 pixa1 = pixaaGetPixa(paa, i, L_CLONE);
996 if ((n = pixaGetCount(pixa1)) == 0) {
997 pixaDestroy(&pixa1);
998 continue;
999 }
1000 pixa2 = pixaSort(pixa1, L_SORT_BY_HEIGHT, L_SORT_INCREASING, NULL,
1001 L_COPY);
1002 j90 = (l_int32)(0.9 * n);
1003 pixaGetPixDimensions(pixa2, j90, NULL, &h90, NULL);
1004 pixa3 = pixaCreate(n);
1005 for (j = 0; j < n; j++) {
1006 pixaGetPixDimensions(pixa2, j, NULL, &hj, NULL);
1007 ratio = (l_float32)h90 / (l_float32)hj;
1008 if (ratio <= max_ht_ratio)
1009 pixaAddPix(pixa3, pixaGetPix(pixa2, j, L_COPY), L_INSERT);
1010 }
1011 n = pixaGetCount(pixa3);
1012 if (n <= maxkeep) {
1013 pixa4 = pixaCopy(pixa3, L_CLONE);
1014 } else {
1015 j1 = (n - maxkeep) / 2;
1016 j2 = j1 + maxkeep - 1;
1017 pixa4 = pixaSelectRange(pixa3, j1, j2, L_CLONE);
1018 }
1019 if (na) numaAddNumber(na, pixaGetCount(pixa4));
1020 pixaJoin(pixa5, pixa4, 0, -1);
1021 pixaDestroy(&pixa1);
1022 pixaDestroy(&pixa2);
1023 pixaDestroy(&pixa3);
1024 pixaDestroy(&pixa4);
1025 }
1026
1027 pixaaDestroy(&paa);
1028 return pixa5;
1029 }
1030
1031
1032 /*!
1033 * \brief recogSortPixaByClass()
1034 *
1035 * \param[in] pixa labeled templates
1036 * \param[in] setsize size of character set (number of classes)
1037 * \return paa pixaa where each pixa has templates for one class,
1038 * or null on error
1039 */
1040 PIXAA *
recogSortPixaByClass(PIXA * pixa,l_int32 setsize)1041 recogSortPixaByClass(PIXA *pixa,
1042 l_int32 setsize)
1043 {
1044 PIXAA *paa;
1045 L_RECOG *recog;
1046
1047 PROCNAME("recogSortPixaByClass");
1048
1049 if (!pixa)
1050 return (PIXAA *)ERROR_PTR("pixa not defined", procName, NULL);
1051
1052 if ((recog = recogCreateFromPixaNoFinish(pixa, 0, 0, 0, 0, 0)) == NULL)
1053 return (PIXAA *)ERROR_PTR("recog not made", procName, NULL);
1054 paa = recog->pixaa_u; /* grab the paa of unscaled templates */
1055 recog->pixaa_u = NULL;
1056 recogDestroy(&recog);
1057 return paa;
1058 }
1059
1060
1061 /*!
1062 * \brief recogRemoveOutliers1()
1063 *
1064 * \param[in] precog addr of recog with unscaled labeled templates
1065 * \param[in] minscore keep everything with at least this score
1066 * \param[in] mintarget minimum desired number to retain if possible
1067 * \param[in] minsize minimum number of samples required for a class
1068 * \param[out] ppixsave [optional debug] saved templates, with scores
1069 * \param[out] ppixrem [optional debug] removed templates, with scores
1070 * \return 0 if OK, 1 on error.
1071 *
1072 * <pre>
1073 * Notes:
1074 * (1) This is a convenience wrapper when using default parameters
1075 * for the recog. See pixaRemoveOutliers1() for details.
1076 * (2) If this succeeds, the new recog replaces the input recog;
1077 * if it fails, the input recog is destroyed.
1078 * </pre>
1079 */
1080 l_int32
recogRemoveOutliers1(L_RECOG ** precog,l_float32 minscore,l_int32 mintarget,l_int32 minsize,PIX ** ppixsave,PIX ** ppixrem)1081 recogRemoveOutliers1(L_RECOG **precog,
1082 l_float32 minscore,
1083 l_int32 mintarget,
1084 l_int32 minsize,
1085 PIX **ppixsave,
1086 PIX **ppixrem)
1087 {
1088 PIXA *pixa1, *pixa2;
1089 L_RECOG *recog;
1090
1091 PROCNAME("recogRemoveOutliers1");
1092
1093 if (!precog)
1094 return ERROR_INT("&recog not defined", procName, 1);
1095 if (*precog == NULL)
1096 return ERROR_INT("recog not defined", procName, 1);
1097
1098 /* Extract the unscaled templates */
1099 pixa1 = recogExtractPixa(*precog);
1100 recogDestroy(precog);
1101
1102 pixa2 = pixaRemoveOutliers1(pixa1, minscore, mintarget, minsize,
1103 ppixsave, ppixrem);
1104 pixaDestroy(&pixa1);
1105 if (!pixa2)
1106 return ERROR_INT("failure to remove outliers", procName, 1);
1107
1108 recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1);
1109 pixaDestroy(&pixa2);
1110 if (!recog)
1111 return ERROR_INT("failure to make recog from pixa sans outliers",
1112 procName, 1);
1113
1114 *precog = recog;
1115 return 0;
1116 }
1117
1118
1119 /*!
1120 * \brief pixaRemoveOutliers1()
1121 *
1122 * \param[in] pixas unscaled labeled templates
1123 * \param[in] minscore keep everything with at least this score;
1124 * use -1.0 for default.
1125 * \param[in] mintarget minimum desired number to retain if possible;
1126 * use -1 for default.
1127 * \param[in] minsize minimum number of samples required for a class;
1128 * use -1 for default.
1129 * \param[out] ppixsave [optional debug] saved templates, with scores
1130 * \param[out] ppixrem [optional debug] removed templates, with scores
1131 * \return pixa of unscaled templates to be kept, or NULL on error
1132 *
1133 * <pre>
1134 * Notes:
1135 * (1) Removing outliers is particularly important when recognition
1136 * goes against all the samples in the training set, as opposed
1137 * to the averages for each class. The reason is that we get
1138 * an identification error if a mislabeled template is a best
1139 * match for an input sample.
1140 * (2) Because the score values depend strongly on the quality
1141 * of the character images, to avoid losing too many samples
1142 * we supplement a minimum score for retention with a score
1143 * necessary to acquire the minimum target number of templates.
1144 * To do this we are willing to use a lower threshold,
1145 * LOWER_SCORE_THRESHOLD, on the score. Consequently, with
1146 * poor quality templates, we may keep samples with a score
1147 * less than %minscore, but never less than LOWER_SCORE_THRESHOLD.
1148 * And if the number of samples is less than %minsize, we do
1149 * not use any.
1150 * (3) This is meant to be used on a BAR, where the templates all
1151 * come from the same book; use minscore ~0.75.
1152 * (4) Method: make a scaled recog from the input %pixas. Then,
1153 * for each class: generate the averages, match each
1154 * scaled template against the average, and save unscaled
1155 * templates that had a sufficiently good match.
1156 * </pre>
1157 */
1158 PIXA *
pixaRemoveOutliers1(PIXA * pixas,l_float32 minscore,l_int32 mintarget,l_int32 minsize,PIX ** ppixsave,PIX ** ppixrem)1159 pixaRemoveOutliers1(PIXA *pixas,
1160 l_float32 minscore,
1161 l_int32 mintarget,
1162 l_int32 minsize,
1163 PIX **ppixsave,
1164 PIX **ppixrem)
1165 {
1166 l_int32 i, j, debug, n, area1, area2;
1167 l_float32 x1, y1, x2, y2, minfract, score, rankscore, threshscore;
1168 NUMA *nasum, *narem, *nasave, *nascore;
1169 PIX *pix1, *pix2;
1170 PIXA *pixa, *pixarem, *pixad;
1171 PTA *pta;
1172 L_RECOG *recog;
1173
1174 PROCNAME("pixaRemoveOutliers1");
1175
1176 if (ppixsave) *ppixsave = NULL;
1177 if (ppixrem) *ppixrem = NULL;
1178 if (!pixas)
1179 return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL);
1180 minscore = L_MIN(minscore, 1.0);
1181 if (minscore <= 0.0)
1182 minscore = DEFAULT_MIN_SCORE;
1183 mintarget = L_MIN(mintarget, 3);
1184 if (mintarget <= 0)
1185 mintarget = DEFAULT_MIN_TARGET;
1186 if (minsize < 0)
1187 minsize = DEFAULT_MIN_SET_SIZE;
1188
1189 /* Make a special height-scaled recognizer with average templates */
1190 debug = (ppixsave || ppixrem) ? 1 : 0;
1191 recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1);
1192 if (!recog)
1193 return (PIXA *)ERROR_PTR("bad pixas; recog not made", procName, NULL);
1194 recogAverageSamples(&recog, debug);
1195 if (!recog)
1196 return (PIXA *)ERROR_PTR("bad templates", procName, NULL);
1197
1198 nasave = (ppixsave) ? numaCreate(0) : NULL;
1199 pixarem = (ppixrem) ? pixaCreate(0) : NULL;
1200 narem = (ppixrem) ? numaCreate(0) : NULL;
1201
1202 pixad = pixaCreate(0);
1203 for (i = 0; i < recog->setsize; i++) {
1204 /* Access the average template and values for scaled
1205 * images in this class */
1206 pix1 = pixaGetPix(recog->pixa, i, L_CLONE);
1207 ptaGetPt(recog->pta, i, &x1, &y1);
1208 numaGetIValue(recog->nasum, i, &area1);
1209
1210 /* Get the scores for each sample in the class */
1211 pixa = pixaaGetPixa(recog->pixaa, i, L_CLONE);
1212 pta = ptaaGetPta(recog->ptaa, i, L_CLONE); /* centroids */
1213 nasum = numaaGetNuma(recog->naasum, i, L_CLONE); /* fg areas */
1214 n = pixaGetCount(pixa);
1215 nascore = numaCreate(n);
1216 for (j = 0; j < n; j++) {
1217 pix2 = pixaGetPix(pixa, j, L_CLONE);
1218 ptaGetPt(pta, j, &x2, &y2); /* centroid average */
1219 numaGetIValue(nasum, j, &area2); /* fg sum average */
1220 pixCorrelationScoreSimple(pix1, pix2, area1, area2,
1221 x1 - x2, y1 - y2, 5, 5,
1222 recog->sumtab, &score);
1223 numaAddNumber(nascore, score);
1224 if (debug && score == 0.0) /* typ. large size difference */
1225 fprintf(stderr, "Got 0 score for i = %d, j = %d\n", i, j);
1226 pixDestroy(&pix2);
1227 }
1228 pixDestroy(&pix1);
1229
1230 /* Find the rankscore, corresonding to the 1.0 - minfract.
1231 * To attempt to maintain the minfract of templates, use as a
1232 * cutoff the minimum of minscore and the rank score. However,
1233 * no template is saved with an actual score less than
1234 * that at least one template is kept. */
1235 minfract = (l_float32)mintarget / (l_float32)n;
1236 numaGetRankValue(nascore, 1.0 - minfract, NULL, 0, &rankscore);
1237 threshscore = L_MAX(LOWER_SCORE_THRESHOLD,
1238 L_MIN(minscore, rankscore));
1239 if (debug) {
1240 L_INFO("minscore = %4.2f, rankscore = %4.2f, threshscore = %4.2f\n",
1241 procName, minscore, rankscore, threshscore);
1242 }
1243
1244 /* Save templates that are at or above threshold.
1245 * Toss any classes with less than %minsize templates. */
1246 for (j = 0; j < n; j++) {
1247 numaGetFValue(nascore, j, &score);
1248 pix1 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY);
1249 if (score >= threshscore && n >= minsize) {
1250 pixaAddPix(pixad, pix1, L_INSERT);
1251 if (nasave) numaAddNumber(nasave, score);
1252 } else if (debug) {
1253 pixaAddPix(pixarem, pix1, L_INSERT);
1254 numaAddNumber(narem, score);
1255 } else {
1256 pixDestroy(&pix1);
1257 }
1258 }
1259
1260 pixaDestroy(&pixa);
1261 ptaDestroy(&pta);
1262 numaDestroy(&nasum);
1263 numaDestroy(&nascore);
1264 }
1265
1266 if (ppixsave) {
1267 *ppixsave = pixDisplayOutliers(pixad, nasave);
1268 numaDestroy(&nasave);
1269 }
1270 if (ppixrem) {
1271 *ppixrem = pixDisplayOutliers(pixarem, narem);
1272 pixaDestroy(&pixarem);
1273 numaDestroy(&narem);
1274 }
1275 recogDestroy(&recog);
1276 return pixad;
1277 }
1278
1279
1280 /*!
1281 * \brief recogRemoveOutliers2()
1282 *
1283 * \param[in] precog addr of recog with unscaled labeled templates
1284 * \param[in] minscore keep everything with at least this score
1285 * \param[in] minsize minimum number of samples required for a class
1286 * \param[out] ppixsave [optional debug] saved templates, with scores
1287 * \param[out] ppixrem [optional debug] removed templates, with scores
1288 * \return 0 if OK, 1 on error.
1289 *
1290 * <pre>
1291 * Notes:
1292 * (1) This is a convenience wrapper when using default parameters
1293 * for the recog. See pixaRemoveOutliers2() for details.
1294 * (2) If this succeeds, the new recog replaces the input recog;
1295 * if it fails, the input recog is destroyed.
1296 * </pre>
1297 */
1298 l_int32
recogRemoveOutliers2(L_RECOG ** precog,l_float32 minscore,l_int32 minsize,PIX ** ppixsave,PIX ** ppixrem)1299 recogRemoveOutliers2(L_RECOG **precog,
1300 l_float32 minscore,
1301 l_int32 minsize,
1302 PIX **ppixsave,
1303 PIX **ppixrem)
1304 {
1305 PIXA *pixa1, *pixa2;
1306 L_RECOG *recog;
1307
1308 PROCNAME("recogRemoveOutliers2");
1309
1310 if (!precog)
1311 return ERROR_INT("&recog not defined", procName, 1);
1312 if (*precog == NULL)
1313 return ERROR_INT("recog not defined", procName, 1);
1314
1315 /* Extract the unscaled templates */
1316 pixa1 = recogExtractPixa(*precog);
1317 recogDestroy(precog);
1318
1319 pixa2 = pixaRemoveOutliers2(pixa1, minscore, minsize, ppixsave, ppixrem);
1320 pixaDestroy(&pixa1);
1321 if (!pixa2)
1322 return ERROR_INT("failure to remove outliers", procName, 1);
1323
1324 recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1);
1325 pixaDestroy(&pixa2);
1326 if (!recog)
1327 return ERROR_INT("failure to make recog from pixa sans outliers",
1328 procName, 1);
1329
1330 *precog = recog;
1331 return 0;
1332 }
1333
1334
1335 /*!
1336 * \brief pixaRemoveOutliers2()
1337 *
1338 * \param[in] pixas unscaled labeled templates
1339 * \param[in] minscore keep everything with at least this score;
1340 * use -1.0 for default.
1341 * \param[in] minsize minimum number of samples required for a class;
1342 * use -1 for default.
1343 * \param[out] ppixsave [optional debug] saved templates, with scores
1344 * \param[out] ppixrem [optional debug] removed templates, with scores
1345 * \return pixa of unscaled templates to be kept, or NULL on error
1346 *
1347 * <pre>
1348 * Notes:
1349 * (1) Removing outliers is particularly important when recognition
1350 * goes against all the samples in the training set, as opposed
1351 * to the averages for each class. The reason is that we get
1352 * an identification error if a mislabeled template is a best
1353 * match for an input sample.
1354 * (2) This method compares each template against the average templates
1355 * of each class, and discards any template that has a higher
1356 * correlation to a class different from its own. It also
1357 * sets a lower bound on correlation scores with its class average.
1358 * (3) This is meant to be used on a BAR, where the templates all
1359 * come from the same book; use minscore ~0.75.
1360 * </pre>
1361 */
1362 PIXA *
pixaRemoveOutliers2(PIXA * pixas,l_float32 minscore,l_int32 minsize,PIX ** ppixsave,PIX ** ppixrem)1363 pixaRemoveOutliers2(PIXA *pixas,
1364 l_float32 minscore,
1365 l_int32 minsize,
1366 PIX **ppixsave,
1367 PIX **ppixrem)
1368 {
1369 l_int32 i, j, k, n, area1, area2, maxk, debug;
1370 l_float32 x1, y1, x2, y2, score, maxscore;
1371 NUMA *nan, *nascore, *nasave;
1372 PIX *pix1, *pix2, *pix3;
1373 PIXA *pixarem, *pixad;
1374 L_RECOG *recog;
1375
1376 PROCNAME("pixaRemoveOutliers2");
1377
1378 if (ppixsave) *ppixsave = NULL;
1379 if (ppixrem) *ppixrem = NULL;
1380 if (!pixas)
1381 return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL);
1382 minscore = L_MIN(minscore, 1.0);
1383 if (minscore <= 0.0)
1384 minscore = DEFAULT_MIN_SCORE;
1385 if (minsize < 0)
1386 minsize = DEFAULT_MIN_SET_SIZE;
1387
1388 /* Make a special height-scaled recognizer with average templates */
1389 debug = (ppixsave || ppixrem) ? 1 : 0;
1390 recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1);
1391 if (!recog)
1392 return (PIXA *)ERROR_PTR("bad pixas; recog not made", procName, NULL);
1393 recogAverageSamples(&recog, debug);
1394 if (!recog)
1395 return (PIXA *)ERROR_PTR("bad templates", procName, NULL);
1396
1397 nasave = (ppixsave) ? numaCreate(0) : NULL;
1398 pixarem = (ppixrem) ? pixaCreate(0) : NULL;
1399
1400 pixad = pixaCreate(0);
1401 pixaaGetCount(recog->pixaa, &nan); /* number of templates in each class */
1402 for (i = 0; i < recog->setsize; i++) {
1403 /* Get the scores for each sample in the class, when comparing
1404 * with averages from all the classes. */
1405 numaGetIValue(nan, i, &n);
1406 for (j = 0; j < n; j++) {
1407 pix1 = pixaaGetPix(recog->pixaa, i, j, L_CLONE);
1408 ptaaGetPt(recog->ptaa, i, j, &x1, &y1); /* centroid */
1409 numaaGetValue(recog->naasum, i, j, NULL, &area1); /* fg sum */
1410 nascore = numaCreate(n);
1411 for (k = 0; k < recog->setsize; k++) { /* average templates */
1412 pix2 = pixaGetPix(recog->pixa, k, L_CLONE);
1413 ptaGetPt(recog->pta, k, &x2, &y2); /* average centroid */
1414 numaGetIValue(recog->nasum, k, &area2); /* average fg sum */
1415 pixCorrelationScoreSimple(pix1, pix2, area1, area2,
1416 x1 - x2, y1 - y2, 5, 5,
1417 recog->sumtab, &score);
1418 numaAddNumber(nascore, score);
1419 pixDestroy(&pix2);
1420 }
1421
1422 /* Save templates that are in the correct class and
1423 * at or above threshold. Toss any classes with less
1424 * than %minsize templates. */
1425 numaGetMax(nascore, &maxscore, &maxk);
1426 if (maxk == i && maxscore >= minscore && n >= minsize) {
1427 /* save it */
1428 pix3 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY);
1429 pixaAddPix(pixad, pix3, L_INSERT);
1430 if (nasave) numaAddNumber(nasave, maxscore);
1431 } else if (ppixrem) { /* outlier */
1432 pix3 = recogDisplayOutlier(recog, i, j, maxk, maxscore);
1433 pixaAddPix(pixarem, pix3, L_INSERT);
1434 }
1435 numaDestroy(&nascore);
1436 pixDestroy(&pix1);
1437 }
1438 }
1439
1440 if (ppixsave) {
1441 *ppixsave = pixDisplayOutliers(pixad, nasave);
1442 numaDestroy(&nasave);
1443 }
1444 if (ppixrem) {
1445 *ppixrem = pixaDisplayTiledInRows(pixarem, 32, 1500, 1.0, 0, 20, 2);
1446 pixaDestroy(&pixarem);
1447 }
1448
1449 numaDestroy(&nan);
1450 recogDestroy(&recog);
1451 return pixad;
1452 }
1453
1454
1455 /*------------------------------------------------------------------------*
1456 * Training on unlabeled data *
1457 *------------------------------------------------------------------------*/
1458 /*!
1459 * \brief recogTrainFromBoot()
1460 *
1461 * \param[in] recogboot labeled boot recognizer
1462 * \param[in] pixas set of unlabeled input characters
1463 * \param[in] minscore min score for accepting the example; e.g., 0.75
1464 * \param[in] threshold for binarization, if needed
1465 * \param[in] debug 1 for debug output saved to recogboot; 0 otherwise
1466 * \return pixad labeled version of input pixas, trained on a BSR,
1467 * or NULL on error
1468 *
1469 * <pre>
1470 * Notes:
1471 * (1) This takes %pixas of unscaled single characters and %recboot,
1472 * a bootstrep recognizer (BSR) that has been set up with parameters
1473 * * scaleh: scale all templates to this height
1474 * * linew: width of normalized strokes, or 0 if using
1475 * the input image
1476 * It modifies the pix in %pixas accordingly and correlates
1477 * with the templates in the BSR. It returns those input
1478 * images in %pixas whose best correlation with the BSR is at
1479 * or above %minscore. The returned pix have added text labels
1480 * for the text string of the class to which the best
1481 * correlated template belongs.
1482 * (2) Identification occurs in scaled mode (typically with h = 40),
1483 * optionally using a width-normalized line images derived
1484 * from those in %pixas.
1485 * </pre>
1486 */
1487 PIXA *
recogTrainFromBoot(L_RECOG * recogboot,PIXA * pixas,l_float32 minscore,l_int32 threshold,l_int32 debug)1488 recogTrainFromBoot(L_RECOG *recogboot,
1489 PIXA *pixas,
1490 l_float32 minscore,
1491 l_int32 threshold,
1492 l_int32 debug)
1493 {
1494 char *text;
1495 l_int32 i, n, same, maxd, scaleh, linew;
1496 l_float32 score;
1497 PIX *pix1, *pix2, *pixdb;
1498 PIXA *pixa1, *pixa2, *pixa3, *pixad;
1499
1500 PROCNAME("recogTrainFromBoot");
1501
1502 if (!recogboot)
1503 return (PIXA *)ERROR_PTR("recogboot not defined", procName, NULL);
1504 if (!pixas)
1505 return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL);
1506
1507 /* Make sure all input pix are 1 bpp */
1508 if ((n = pixaGetCount(pixas)) == 0)
1509 return (PIXA *)ERROR_PTR("no pix in pixa", procName, NULL);
1510 pixaVerifyDepth(pixas, &same, &maxd);
1511 if (maxd == 1) {
1512 pixa1 = pixaCopy(pixas, L_COPY);
1513 } else {
1514 pixa1 = pixaCreate(n);
1515 for (i = 0; i < n; i++) {
1516 pix1 = pixaGetPix(pixas, i, L_CLONE);
1517 pix2 = pixConvertTo1(pix1, threshold);
1518 pixaAddPix(pixa1, pix2, L_INSERT);
1519 pixDestroy(&pix1);
1520 }
1521 }
1522
1523 /* Scale the input images to match the BSR */
1524 scaleh = recogboot->scaleh;
1525 linew = recogboot->linew;
1526 pixa2 = pixaCreate(n);
1527 for (i = 0; i < n; i++) {
1528 pix1 = pixaGetPix(pixa1, i, L_CLONE);
1529 pix2 = pixScaleToSize(pix1, 0, scaleh);
1530 pixaAddPix(pixa2, pix2, L_INSERT);
1531 pixDestroy(&pix1);
1532 }
1533 pixaDestroy(&pixa1);
1534
1535 /* Optionally convert to width-normalized line */
1536 if (linew > 0)
1537 pixa3 = pixaSetStrokeWidth(pixa2, linew, 4, 8);
1538 else
1539 pixa3 = pixaCopy(pixa2, L_CLONE);
1540 pixaDestroy(&pixa2);
1541
1542 /* Identify using recogboot */
1543 n = pixaGetCount(pixa3);
1544 pixad = pixaCreate(n);
1545 for (i = 0; i < n; i++) {
1546 pix1 = pixaGetPix(pixa3, i, L_COPY);
1547 pixSetText(pix1, NULL); /* remove any existing text or labelling */
1548 if (!debug) {
1549 recogIdentifyPix(recogboot, pix1, NULL);
1550 } else {
1551 recogIdentifyPix(recogboot, pix1, &pixdb);
1552 pixaAddPix(recogboot->pixadb_boot, pixdb, L_INSERT);
1553 }
1554 rchExtract(recogboot->rch, NULL, &score, &text, NULL, NULL, NULL, NULL);
1555 if (score >= minscore) {
1556 pix2 = pixaGetPix(pixas, i, L_COPY);
1557 pixSetText(pix2, text);
1558 pixaAddPix(pixad, pix2, L_INSERT);
1559 pixaAddPix(recogboot->pixadb_boot, pixdb, L_COPY);
1560 }
1561 LEPT_FREE(text);
1562 pixDestroy(&pix1);
1563 }
1564 pixaDestroy(&pixa3);
1565
1566 return pixad;
1567 }
1568
1569
1570 /*------------------------------------------------------------------------*
1571 * Padding the digit training set *
1572 *------------------------------------------------------------------------*/
1573 /*!
1574 * \brief recogPadDigitTrainingSet()
1575 *
1576 * \param[in/out] precog trained; if padding is needed, it is replaced
1577 * by a a new padded recog
1578 * \param[in] scaleh must be > 0; suggest ~40.
1579 * \param[in] linew use 0 for original scanned images
1580 * \return 0 if OK, 1 on error
1581 *
1582 * <pre>
1583 * Notes:
1584 * (1) This is a no-op if padding is not needed. However,
1585 * if it is, this replaces the input recog with a new recog,
1586 * padded appropriately with templates from a boot recognizer,
1587 * and set up with correlation templates derived from
1588 * %scaleh and %linew.
1589 * </pre>
1590 */
1591 l_int32
recogPadDigitTrainingSet(L_RECOG ** precog,l_int32 scaleh,l_int32 linew)1592 recogPadDigitTrainingSet(L_RECOG **precog,
1593 l_int32 scaleh,
1594 l_int32 linew)
1595 {
1596 PIXA *pixa;
1597 L_RECOG *recog1, *recog2;
1598 SARRAY *sa;
1599
1600 PROCNAME("recogPadDigitTrainingSet");
1601
1602 if (!precog)
1603 return ERROR_INT("&recog not defined", procName, 1);
1604 recog1 = *precog;
1605
1606 recogIsPaddingNeeded(recog1, &sa);
1607 if (!sa) return 0;
1608
1609 /* Get a new pixa with the padding templates added */
1610 pixa = recogAddDigitPadTemplates(recog1, sa);
1611 sarrayDestroy(&sa);
1612 if (!pixa)
1613 return ERROR_INT("pixa not made", procName, 1);
1614
1615 /* Need to use templates that are scaled to a fixed height. */
1616 if (scaleh <= 0) {
1617 L_WARNING("templates must be scaled to fixed height; using %d\n",
1618 procName, 40);
1619 scaleh = 40;
1620 }
1621
1622 /* Create a hybrid recog, composed of templates from both
1623 * the original and bootstrap sources. */
1624 recog2 = recogCreateFromPixa(pixa, 0, scaleh, linew, recog1->threshold,
1625 recog1->maxyshift);
1626 pixaDestroy(&pixa);
1627 recogDestroy(precog);
1628 *precog = recog2;
1629 return 0;
1630 }
1631
1632
1633 /*!
1634 * \brief recogIsPaddingNeeded()
1635 *
1636 * \param[in] recog trained
1637 * \param[out] psa addr of returned string containing text value
1638 * \return 1 on error; 0 if OK, whether or not additional padding
1639 * templates are required.
1640 *
1641 * <pre>
1642 * Notes:
1643 * (1) This returns a string array in &sa containing character values
1644 * for which extra templates are needed; this sarray is
1645 * used by recogGetPadTemplates(). It returns NULL
1646 * if no padding templates are needed.
1647 * </pre>
1648 */
1649 l_int32
recogIsPaddingNeeded(L_RECOG * recog,SARRAY ** psa)1650 recogIsPaddingNeeded(L_RECOG *recog,
1651 SARRAY **psa)
1652 {
1653 char *str;
1654 l_int32 i, nt, min_nopad, nclass, allclasses;
1655 l_float32 minval;
1656 NUMA *naclass;
1657 SARRAY *sa;
1658
1659 PROCNAME("recogIsPaddingNeeded");
1660
1661 if (!psa)
1662 return ERROR_INT("&sa not defined", procName, 1);
1663 *psa = NULL;
1664 if (!recog)
1665 return ERROR_INT("recog not defined", procName, 1);
1666
1667 /* Do we have samples from all classes? */
1668 nclass = pixaaGetCount(recog->pixaa_u, &naclass); /* unscaled bitmaps */
1669 allclasses = (nclass == recog->charset_size) ? 1 : 0;
1670
1671 /* Are there enough samples in each class already? */
1672 min_nopad = recog->min_nopad;
1673 numaGetMin(naclass, &minval, NULL);
1674 if (allclasses && (minval >= min_nopad)) {
1675 numaDestroy(&naclass);
1676 return 0;
1677 }
1678
1679 /* Are any classes not represented? */
1680 sa = recogAddMissingClassStrings(recog);
1681 *psa = sa;
1682
1683 /* Are any other classes under-represented? */
1684 for (i = 0; i < nclass; i++) {
1685 numaGetIValue(naclass, i, &nt);
1686 if (nt < min_nopad) {
1687 str = sarrayGetString(recog->sa_text, i, L_COPY);
1688 sarrayAddString(sa, str, L_INSERT);
1689 }
1690 }
1691 numaDestroy(&naclass);
1692 return 0;
1693 }
1694
1695
1696 /*!
1697 * \brief recogAddMissingClassStrings()
1698 *
1699 * \param[in] recog trained
1700 * \return sa of class string missing in %recog, or NULL on error
1701 *
1702 * <pre>
1703 * Notes:
1704 * (1) This returns an empty %sa if there is at least one template
1705 * in each class in %recog.
1706 * </pre>
1707 */
1708 static SARRAY *
recogAddMissingClassStrings(L_RECOG * recog)1709 recogAddMissingClassStrings(L_RECOG *recog)
1710 {
1711 char *text;
1712 char str[4];
1713 l_int32 i, nclass, index, ival;
1714 NUMA *na;
1715 SARRAY *sa;
1716
1717 PROCNAME("recogAddMissingClassStrings");
1718
1719 if (!recog)
1720 return (SARRAY *)ERROR_PTR("recog not defined", procName, NULL);
1721
1722 /* Only handling digits */
1723 nclass = pixaaGetCount(recog->pixaa_u, NULL); /* unscaled bitmaps */
1724 if (recog->charset_type != 1 || nclass == 10)
1725 return sarrayCreate(0); /* empty */
1726
1727 /* Make an indicator array for missing classes */
1728 na = numaCreate(0);
1729 sa = sarrayCreate(0);
1730 for (i = 0; i < recog->charset_size; i++)
1731 numaAddNumber(na, 1);
1732 for (i = 0; i < nclass; i++) {
1733 text = sarrayGetString(recog->sa_text, i, L_NOCOPY);
1734 index = text[0] - '0';
1735 numaSetValue(na, index, 0);
1736 }
1737
1738 /* Convert to string and add to output */
1739 for (i = 0; i < nclass; i++) {
1740 numaGetIValue(na, i, &ival);
1741 if (ival == 1) {
1742 str[0] = '0' + i;
1743 str[1] = '\0';
1744 sarrayAddString(sa, str, L_COPY);
1745 }
1746 }
1747 numaDestroy(&na);
1748 return sa;
1749 }
1750
1751
1752 /*!
1753 * \brief recogAddDigitPadTemplates()
1754 *
1755 * \param[in] recog trained
1756 * \param[in] sa set of text strings that need to be padded
1757 * \return pixa of all templates from %recog and the additional pad
1758 * templates from a boot recognizer; or NULL on error
1759 *
1760 * <pre>
1761 * Notes:
1762 * (1) Call recogIsPaddingNeeded() first, which returns %sa of
1763 * template text strings for classes where more templates
1764 * are needed.
1765 * </pre>
1766 */
1767 PIXA *
recogAddDigitPadTemplates(L_RECOG * recog,SARRAY * sa)1768 recogAddDigitPadTemplates(L_RECOG *recog,
1769 SARRAY *sa)
1770 {
1771 char *str, *text;
1772 l_int32 i, j, n, nt;
1773 PIX *pix;
1774 PIXA *pixa1, *pixa2;
1775
1776 PROCNAME("recogAddDigitPadTemplates");
1777
1778 if (!recog)
1779 return (PIXA *)ERROR_PTR("recog not defined", procName, NULL);
1780 if (!sa)
1781 return (PIXA *)ERROR_PTR("sa not defined", procName, NULL);
1782 if (recogCharsetAvailable(recog->charset_type) == FALSE)
1783 return (PIXA *)ERROR_PTR("boot charset not available", procName, NULL);
1784
1785 /* Make boot recog templates */
1786 pixa1 = recogMakeBootDigitTemplates(0);
1787 n = pixaGetCount(pixa1);
1788
1789 /* Extract the unscaled templates from %recog */
1790 pixa2 = recogExtractPixa(recog);
1791
1792 /* Add selected boot recog templates based on the text strings in sa */
1793 nt = sarrayGetCount(sa);
1794 for (i = 0; i < n; i++) {
1795 pix = pixaGetPix(pixa1, i, L_CLONE);
1796 text = pixGetText(pix);
1797 for (j = 0; j < nt; j++) {
1798 str = sarrayGetString(sa, j, L_NOCOPY);
1799 if (!strcmp(text, str)) {
1800 pixaAddPix(pixa2, pix, L_COPY);
1801 break;
1802 }
1803 }
1804 pixDestroy(&pix);
1805 }
1806
1807 pixaDestroy(&pixa1);
1808 return pixa2;
1809 }
1810
1811
1812 /*!
1813 * \brief recogCharsetAvailable()
1814 *
1815 * \param[in] type of charset for padding
1816 * \return 1 if available; 0 if not.
1817 */
1818 static l_int32
recogCharsetAvailable(l_int32 type)1819 recogCharsetAvailable(l_int32 type)
1820 {
1821 l_int32 ret;
1822
1823 PROCNAME("recogCharsetAvailable");
1824
1825 switch (type)
1826 {
1827 case L_ARABIC_NUMERALS:
1828 ret = TRUE;
1829 break;
1830 case L_LC_ROMAN_NUMERALS:
1831 case L_UC_ROMAN_NUMERALS:
1832 case L_LC_ALPHA:
1833 case L_UC_ALPHA:
1834 L_INFO("charset type %d not available\n", procName, type);
1835 ret = FALSE;
1836 break;
1837 default:
1838 L_INFO("charset type %d is unknown\n", procName, type);
1839 ret = FALSE;
1840 break;
1841 }
1842
1843 return ret;
1844 }
1845
1846
1847 /*------------------------------------------------------------------------*
1848 * Making a boot digit recognizer *
1849 *------------------------------------------------------------------------*/
1850 /*!
1851 * \brief recogMakeBootDigitRecog()
1852 *
1853 * \param[in] scaleh scale all heights to this; typ. use 40
1854 * \param[in] linew normalized line width; typ. use 5; 0 to skip
1855 * \param[in] maxyshift from nominal centroid alignment; typically 0 or 1
1856 * \param[in] debug 1 for showing templates; 0 otherwise
1857 * \return recog, or NULL on error
1858 *
1859 * <pre>
1860 * Notes:
1861 * (1) This takes a set of pre-computed, labeled pixa of single
1862 * digits, and generates a recognizer where the character templates
1863 * that will be used are derived from the boot-generated pixa:
1864 * - extending by replicating the set with different widths,
1865 * keeping the height the same
1866 * - scaling (isotropically to fixed height)
1867 * - optionally generating a skeleton and thickening so that
1868 * all strokes have the same width.
1869 * (2) The resulting templates are scaled versions of either the
1870 * input bitmaps or images with fixed line widths. To use the
1871 * input bitmaps, set %linew = 0; otherwise, set %linew to the
1872 * desired line width.
1873 * </pre>
1874 */
1875 L_RECOG *
recogMakeBootDigitRecog(l_int32 scaleh,l_int32 linew,l_int32 maxyshift,l_int32 debug)1876 recogMakeBootDigitRecog(l_int32 scaleh,
1877 l_int32 linew,
1878 l_int32 maxyshift,
1879 l_int32 debug)
1880
1881 {
1882 PIXA *pixa;
1883 L_RECOG *recog;
1884
1885 /* Get the templates, extended by horizontal scaling */
1886 pixa = recogMakeBootDigitTemplates(debug);
1887
1888 /* Make the boot recog; recogModifyTemplate() will scale the
1889 * templates and optionally turn them into strokes of fixed width. */
1890 recog = recogCreateFromPixa(pixa, 0, scaleh, linew, 128, maxyshift);
1891 pixaDestroy(&pixa);
1892 if (debug)
1893 recogShowContent(stderr, recog, 0, 1);
1894
1895 return recog;
1896 }
1897
1898
1899 /*!
1900 * \brief recogMakeBootDigitTemplates()
1901 *
1902 * \param[in] debug 1 for display of templates
1903 * \return pixa of templates; or NULL on error
1904 *
1905 * <pre>
1906 * Notes:
1907 * (1) See recogMakeBootDigitRecog().
1908 * </pre>
1909 */
1910 PIXA *
recogMakeBootDigitTemplates(l_int32 debug)1911 recogMakeBootDigitTemplates(l_int32 debug)
1912 {
1913 NUMA *na;
1914 PIX *pix1, *pix2, *pix3;
1915 PIXA *pixa1, *pixa2, *pixa3;
1916
1917 pixa1 = l_bootnum_gen1();
1918 pixa2 = l_bootnum_gen2();
1919 pixa3 = l_bootnum_gen3();
1920 if (debug) {
1921 pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 10, 2, 6, 0xff000000);
1922 pix2 = pixaDisplayTiledWithText(pixa2, 1500, 1.0, 10, 2, 6, 0xff000000);
1923 pix3 = pixaDisplayTiledWithText(pixa3, 1500, 1.0, 10, 2, 6, 0xff000000);
1924 pixDisplay(pix1, 0, 0);
1925 pixDisplay(pix2, 600, 0);
1926 pixDisplay(pix3, 1200, 0);
1927 pixDestroy(&pix1);
1928 pixDestroy(&pix2);
1929 pixDestroy(&pix3);
1930 }
1931 pixaJoin(pixa1, pixa2, 0, -1);
1932 pixaJoin(pixa1, pixa3, 0, -1);
1933 pixaDestroy(&pixa2);
1934 pixaDestroy(&pixa3);
1935
1936 /* Extend by horizontal scaling */
1937 na = numaCreate(4);
1938 numaAddNumber(na, 0.9);
1939 numaAddNumber(na, 1.1);
1940 numaAddNumber(na, 1.2);
1941 pixa2 = pixaExtendByScaling(pixa1, na, L_HORIZ, 1);
1942
1943 pixaDestroy(&pixa1);
1944 numaDestroy(&na);
1945 return pixa2;
1946 }
1947
1948
1949 /*------------------------------------------------------------------------*
1950 * Debugging *
1951 *------------------------------------------------------------------------*/
1952 /*!
1953 * \brief recogShowContent()
1954 *
1955 * \param[in] fp file stream
1956 * \param[in] recog
1957 * \param[in] index for naming of output files of template images
1958 * \param[in] display 1 for showing template images, 0 otherwise
1959 * \return 0 if OK, 1 on error
1960 */
1961 l_int32
recogShowContent(FILE * fp,L_RECOG * recog,l_int32 index,l_int32 display)1962 recogShowContent(FILE *fp,
1963 L_RECOG *recog,
1964 l_int32 index,
1965 l_int32 display)
1966 {
1967 char buf[128];
1968 l_int32 i, val, count;
1969 PIX *pix;
1970 NUMA *na;
1971
1972 PROCNAME("recogShowContent");
1973
1974 if (!fp)
1975 return ERROR_INT("stream not defined", procName, 1);
1976 if (!recog)
1977 return ERROR_INT("recog not defined", procName, 1);
1978
1979 fprintf(fp, "Debug print of recog contents\n");
1980 fprintf(fp, " Setsize: %d\n", recog->setsize);
1981 fprintf(fp, " Binarization threshold: %d\n", recog->threshold);
1982 fprintf(fp, " Maximum matching y-jiggle: %d\n", recog->maxyshift);
1983 if (recog->linew <= 0)
1984 fprintf(fp, " Using image templates for matching\n");
1985 else
1986 fprintf(fp, " Using templates with fixed line width for matching\n");
1987 if (recog->scalew == 0)
1988 fprintf(fp, " No width scaling of templates\n");
1989 else
1990 fprintf(fp, " Template width scaled to %d\n", recog->scalew);
1991 if (recog->scaleh == 0)
1992 fprintf(fp, " No height scaling of templates\n");
1993 else
1994 fprintf(fp, " Template height scaled to %d\n", recog->scaleh);
1995 fprintf(fp, " Number of samples in each class:\n");
1996 pixaaGetCount(recog->pixaa_u, &na);
1997 for (i = 0; i < recog->setsize; i++) {
1998 l_dnaGetIValue(recog->dna_tochar, i, &val);
1999 numaGetIValue(na, i, &count);
2000 if (val < 128)
2001 fprintf(fp, " class %d, char %c: %d\n", i, val, count);
2002 else
2003 fprintf(fp, " class %d, val %d: %d\n", i, val, count);
2004 }
2005 numaDestroy(&na);
2006
2007 if (display) {
2008 lept_mkdir("lept/recog");
2009 pix = pixaaDisplayByPixa(recog->pixaa_u, 20, 20, 1000);
2010 snprintf(buf, sizeof(buf), "/tmp/lept/recog/templates_u.%d.png", index);
2011 pixWriteDebug(buf, pix, IFF_PNG);
2012 pixDisplay(pix, 0, 200 * index);
2013 pixDestroy(&pix);
2014 if (recog->train_done) {
2015 pix = pixaaDisplayByPixa(recog->pixaa, 20, 20, 1000);
2016 snprintf(buf, sizeof(buf),
2017 "/tmp/lept/recog/templates.%d.png", index);
2018 pixWriteDebug(buf, pix, IFF_PNG);
2019 pixDisplay(pix, 800, 200 * index);
2020 pixDestroy(&pix);
2021 }
2022 }
2023 return 0;
2024 }
2025
2026
2027 /*!
2028 * \brief recogDebugAverages()
2029 *
2030 * \param[in] precog addr of recog
2031 * \param[in] debug 0 no output; 1 for images; 2 for text; 3 for both
2032 * \return 0 if OK, 1 on error
2033 *
2034 * <pre>
2035 * Notes:
2036 * (1) Generates an image that pairs each of the input images used
2037 * in training with the average template that it is best
2038 * correlated to. This is written into the recog.
2039 * (2) It also generates pixa_tr of all the input training images,
2040 * which can be used, e.g., in recogShowMatchesInRange().
2041 * (3) Destroys the recog if the averaging function finds any bad classes.
2042 * </pre>
2043 */
2044 l_int32
recogDebugAverages(L_RECOG ** precog,l_int32 debug)2045 recogDebugAverages(L_RECOG **precog,
2046 l_int32 debug)
2047 {
2048 l_int32 i, j, n, np, index;
2049 l_float32 score;
2050 PIX *pix1, *pix2, *pix3;
2051 PIXA *pixa, *pixat;
2052 PIXAA *paa1, *paa2;
2053 L_RECOG *recog;
2054
2055 PROCNAME("recogDebugAverages");
2056
2057 if (!precog)
2058 return ERROR_INT("&recog not defined", procName, 1);
2059 if ((recog = *precog) == NULL)
2060 return ERROR_INT("recog not defined", procName, 1);
2061
2062 /* Mark the training as finished if necessary, and make sure
2063 * that the average templates have been built. */
2064 recogAverageSamples(&recog, 0);
2065 if (!recog)
2066 return ERROR_INT("averaging failed; recog destroyed", procName, 1);
2067
2068 /* Save a pixa of all the training examples */
2069 paa1 = recog->pixaa;
2070 if (!recog->pixa_tr)
2071 recog->pixa_tr = pixaaFlattenToPixa(paa1, NULL, L_CLONE);
2072
2073 /* Destroy any existing image and make a new one */
2074 if (recog->pixdb_ave)
2075 pixDestroy(&recog->pixdb_ave);
2076 n = pixaaGetCount(paa1, NULL);
2077 paa2 = pixaaCreate(n);
2078 for (i = 0; i < n; i++) {
2079 pixa = pixaCreate(0);
2080 pixat = pixaaGetPixa(paa1, i, L_CLONE);
2081 np = pixaGetCount(pixat);
2082 for (j = 0; j < np; j++) {
2083 pix1 = pixaaGetPix(paa1, i, j, L_CLONE);
2084 recogIdentifyPix(recog, pix1, &pix2);
2085 rchExtract(recog->rch, &index, &score, NULL, NULL, NULL,
2086 NULL, NULL);
2087 if (debug >= 2)
2088 fprintf(stderr, "index = %d, score = %7.3f\n", index, score);
2089 pix3 = pixAddBorder(pix2, 2, 1);
2090 pixaAddPix(pixa, pix3, L_INSERT);
2091 pixDestroy(&pix1);
2092 pixDestroy(&pix2);
2093 }
2094 pixaaAddPixa(paa2, pixa, L_INSERT);
2095 pixaDestroy(&pixat);
2096 }
2097 recog->pixdb_ave = pixaaDisplayByPixa(paa2, 20, 20, 2500);
2098 if (debug % 2) {
2099 lept_mkdir("lept/recog");
2100 pixWriteDebug("/tmp/lept/recog/templ_match.png", recog->pixdb_ave,
2101 IFF_PNG);
2102 pixDisplay(recog->pixdb_ave, 100, 100);
2103 }
2104
2105 pixaaDestroy(&paa2);
2106 return 0;
2107 }
2108
2109
2110 /*!
2111 * \brief recogShowAverageTemplates()
2112 *
2113 * \param[in] recog
2114 * \return 0 on success, 1 on failure
2115 *
2116 * <pre>
2117 * Notes:
2118 * (1) This debug routine generates a display of the averaged templates,
2119 * both scaled and unscaled, with the centroid visible in red.
2120 * </pre>
2121 */
2122 l_int32
recogShowAverageTemplates(L_RECOG * recog)2123 recogShowAverageTemplates(L_RECOG *recog)
2124 {
2125 l_int32 i, size;
2126 l_float32 x, y;
2127 PIX *pix1, *pix2, *pixr;
2128 PIXA *pixat, *pixadb;
2129
2130 PROCNAME("recogShowAverageTemplates");
2131
2132 if (!recog)
2133 return ERROR_INT("recog not defined", procName, 1);
2134
2135 fprintf(stderr, "min/max width_u = (%d,%d); min/max height_u = (%d,%d)\n",
2136 recog->minwidth_u, recog->maxwidth_u,
2137 recog->minheight_u, recog->maxheight_u);
2138 fprintf(stderr, "min splitw = %d, max splith = %d\n",
2139 recog->min_splitw, recog->max_splith);
2140
2141 pixaDestroy(&recog->pixadb_ave);
2142
2143 pixr = pixCreate(3, 3, 32); /* 3x3 red square for centroid location */
2144 pixSetAllArbitrary(pixr, 0xff000000);
2145 pixadb = pixaCreate(2);
2146
2147 /* Unscaled bitmaps */
2148 size = recog->setsize;
2149 pixat = pixaCreate(size);
2150 for (i = 0; i < size; i++) {
2151 if ((pix1 = pixaGetPix(recog->pixa_u, i, L_CLONE)) == NULL)
2152 continue;
2153 pix2 = pixConvertTo32(pix1);
2154 ptaGetPt(recog->pta_u, i, &x, &y);
2155 pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3,
2156 PIX_SRC, pixr, 0, 0);
2157 pixaAddPix(pixat, pix2, L_INSERT);
2158 pixDestroy(&pix1);
2159 }
2160 pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0);
2161 pixaAddPix(pixadb, pix1, L_INSERT);
2162 pixDisplay(pix1, 100, 100);
2163 pixaDestroy(&pixat);
2164
2165 /* Scaled bitmaps */
2166 pixat = pixaCreate(size);
2167 for (i = 0; i < size; i++) {
2168 if ((pix1 = pixaGetPix(recog->pixa, i, L_CLONE)) == NULL)
2169 continue;
2170 pix2 = pixConvertTo32(pix1);
2171 ptaGetPt(recog->pta, i, &x, &y);
2172 pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3,
2173 PIX_SRC, pixr, 0, 0);
2174 pixaAddPix(pixat, pix2, L_INSERT);
2175 pixDestroy(&pix1);
2176 }
2177 pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0);
2178 pixaAddPix(pixadb, pix1, L_INSERT);
2179 pixDisplay(pix1, 100, 100);
2180 pixaDestroy(&pixat);
2181 pixDestroy(&pixr);
2182 recog->pixadb_ave = pixadb;
2183 return 0;
2184 }
2185
2186
2187 /*!
2188 * \brief pixDisplayOutliers()
2189 *
2190 * \param[in] pixas unscaled labeled templates
2191 * \param[in] nas scores of templates (against class averages)
2192 * \return pix tiled pixa with text and scores, or NULL on failure
2193 *
2194 * <pre>
2195 * Notes:
2196 * (1) This debug routine is called from recogRemoveOutliers2(),
2197 * and takes the saved templates and their scores as input.
2198 * </pre>
2199 */
2200 static PIX *
pixDisplayOutliers(PIXA * pixas,NUMA * nas)2201 pixDisplayOutliers(PIXA *pixas,
2202 NUMA *nas)
2203 {
2204 char *text;
2205 char buf[16];
2206 l_int32 i, n;
2207 l_float32 fval;
2208 PIX *pix1, *pix2;
2209 PIXA *pixa1;
2210
2211 PROCNAME("pixDisplayOutliers");
2212
2213 if (!pixas)
2214 return (PIX *)ERROR_PTR("pixas not defined", procName, NULL);
2215 if (!nas)
2216 return (PIX *)ERROR_PTR("nas not defined", procName, NULL);
2217 n = pixaGetCount(pixas);
2218 if (numaGetCount(nas) != n)
2219 return (PIX *)ERROR_PTR("pixas and nas sizes differ", procName, NULL);
2220
2221 pixa1 = pixaCreate(n);
2222 for (i = 0; i < n; i++) {
2223 pix1 = pixaGetPix(pixas, i, L_CLONE);
2224 pix2 = pixAddBlackOrWhiteBorder(pix1, 25, 25, 0, 0, L_GET_WHITE_VAL);
2225 text = pixGetText(pix1);
2226 numaGetFValue(nas, i, &fval);
2227 snprintf(buf, sizeof(buf), "'%s': %5.2f", text, fval);
2228 pixSetText(pix2, buf);
2229 pixaAddPix(pixa1, pix2, L_INSERT);
2230 pixDestroy(&pix1);
2231 }
2232 pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 20, 2, 6, 0xff000000);
2233 pixaDestroy(&pixa1);
2234 return pix1;
2235 }
2236
2237
2238 /*!
2239 * \brief recogDisplayOutlier()
2240 *
2241 * \param[in] recog
2242 * \param[in] iclass sample is in this class
2243 * \param[in] jsamp index of sample is class i
2244 * \param[in] maxclass index of class with closest average to sample
2245 * \param[in] maxscore score of sample with average of class %maxclass
2246 * \return pix sample and template images, with score, or NULL on error
2247 *
2248 * <pre>
2249 * Notes:
2250 * (1) This shows three templates, side-by-side:
2251 * - The outlier sample
2252 * - The average template from the same class
2253 * - The average class template that best matched the outlier sample
2254 * </pre>
2255 */
2256 static PIX *
recogDisplayOutlier(L_RECOG * recog,l_int32 iclass,l_int32 jsamp,l_int32 maxclass,l_float32 maxscore)2257 recogDisplayOutlier(L_RECOG *recog,
2258 l_int32 iclass,
2259 l_int32 jsamp,
2260 l_int32 maxclass,
2261 l_float32 maxscore)
2262 {
2263 char buf[64];
2264 PIX *pix1, *pix2, *pix3, *pix4, *pix5;
2265 PIXA *pixa;
2266
2267 PROCNAME("recogDisplayOutlier");
2268
2269 if (!recog)
2270 return (PIX *)ERROR_PTR("recog not defined", procName, NULL);
2271
2272 pix1 = pixaaGetPix(recog->pixaa, iclass, jsamp, L_CLONE);
2273 pix2 = pixaGetPix(recog->pixa, iclass, L_CLONE);
2274 pix3 = pixaGetPix(recog->pixa, maxclass, L_CLONE);
2275 pixa = pixaCreate(3);
2276 pixaAddPix(pixa, pix1, L_INSERT);
2277 pixaAddPix(pixa, pix2, L_INSERT);
2278 pixaAddPix(pixa, pix3, L_INSERT);
2279 pix4 = pixaDisplayTiledInRows(pixa, 32, 400, 2.0, 0, 12, 2);
2280 snprintf(buf, sizeof(buf), "C=%d, BAC=%d, S=%4.2f", iclass, maxclass,
2281 maxscore);
2282 pix5 = pixAddSingleTextblock(pix4, recog->bmf, buf, 0xff000000,
2283 L_ADD_BELOW, NULL);
2284 pixDestroy(&pix4);
2285 pixaDestroy(&pixa);
2286 return pix5;
2287 }
2288
2289
2290 /*!
2291 * \brief recogShowMatchesInRange()
2292 *
2293 * \param[in] recog
2294 * \param[in] pixa of 1 bpp images to match
2295 * \param[in] minscore, maxscore range to include output
2296 * \param[in] display to display the result
2297 * \return 0 if OK, 1 on error
2298 *
2299 * <pre>
2300 * Notes:
2301 * (1) This gives a visual output of the best matches for a given
2302 * range of scores. Each pair of images can optionally be
2303 * labeled with the index of the best match and the correlation.
2304 * (2) To use this, save a set of 1 bpp images (labeled or
2305 * unlabeled) that can be given to a recognizer in a pixa.
2306 * Then call this function with the pixa and parameters
2307 * to filter a range of scores.
2308 * </pre>
2309 */
2310 l_int32
recogShowMatchesInRange(L_RECOG * recog,PIXA * pixa,l_float32 minscore,l_float32 maxscore,l_int32 display)2311 recogShowMatchesInRange(L_RECOG *recog,
2312 PIXA *pixa,
2313 l_float32 minscore,
2314 l_float32 maxscore,
2315 l_int32 display)
2316 {
2317 l_int32 i, n, index, depth;
2318 l_float32 score;
2319 NUMA *nascore, *naindex;
2320 PIX *pix1, *pix2;
2321 PIXA *pixa1, *pixa2;
2322
2323 PROCNAME("recogShowMatchesInRange");
2324
2325 if (!recog)
2326 return ERROR_INT("recog not defined", procName, 1);
2327 if (!pixa)
2328 return ERROR_INT("pixa not defined", procName, 1);
2329
2330 /* Run the recognizer on the set of images */
2331 n = pixaGetCount(pixa);
2332 nascore = numaCreate(n);
2333 naindex = numaCreate(n);
2334 pixa1 = pixaCreate(n);
2335 for (i = 0; i < n; i++) {
2336 pix1 = pixaGetPix(pixa, i, L_CLONE);
2337 recogIdentifyPix(recog, pix1, &pix2);
2338 rchExtract(recog->rch, &index, &score, NULL, NULL, NULL, NULL, NULL);
2339 numaAddNumber(nascore, score);
2340 numaAddNumber(naindex, index);
2341 pixaAddPix(pixa1, pix2, L_INSERT);
2342 pixDestroy(&pix1);
2343 }
2344
2345 /* Filter the set and optionally add text to each */
2346 pixa2 = pixaCreate(n);
2347 depth = 1;
2348 for (i = 0; i < n; i++) {
2349 numaGetFValue(nascore, i, &score);
2350 if (score < minscore || score > maxscore) continue;
2351 pix1 = pixaGetPix(pixa1, i, L_CLONE);
2352 numaGetIValue(naindex, i, &index);
2353 pix2 = recogShowMatch(recog, pix1, NULL, NULL, index, score);
2354 if (i == 0) depth = pixGetDepth(pix2);
2355 pixaAddPix(pixa2, pix2, L_INSERT);
2356 pixDestroy(&pix1);
2357 }
2358
2359 /* Package it up */
2360 pixDestroy(&recog->pixdb_range);
2361 if (pixaGetCount(pixa2) > 0) {
2362 recog->pixdb_range =
2363 pixaDisplayTiledInRows(pixa2, depth, 2500, 1.0, 0, 20, 1);
2364 if (display)
2365 pixDisplay(recog->pixdb_range, 300, 100);
2366 } else {
2367 L_INFO("no character matches in the range of scores\n", procName);
2368 }
2369
2370 pixaDestroy(&pixa1);
2371 pixaDestroy(&pixa2);
2372 numaDestroy(&nascore);
2373 numaDestroy(&naindex);
2374 return 0;
2375 }
2376
2377
2378 /*!
2379 * \brief recogShowMatch()
2380 *
2381 * \param[in] recog
2382 * \param[in] pix1 input pix; several possibilities
2383 * \param[in] pix2 [optional] matching template
2384 * \param[in] box [optional] region in pix1 for which pix2 matches
2385 * \param[in] index index of matching template; use -1 to disable printing
2386 * \param[in] score score of match
2387 * \return pixd pair of images, showing input pix and best template,
2388 * optionally with matching information, or NULL on error.
2389 *
2390 * <pre>
2391 * Notes:
2392 * (1) pix1 can be one of these:
2393 * (a) The input pix alone, which can be either a single character
2394 * (box == NULL) or several characters that need to be
2395 * segmented. If more than character is present, the box
2396 * region is displayed with an outline.
2397 * (b) Both the input pix and the matching template. In this case,
2398 * pix2 and box will both be null.
2399 * (2) If the bmf has been made (by a call to recogMakeBmf())
2400 * and the index >= 0, the text field, match score and index
2401 * will be rendered; otherwise their values will be ignored.
2402 * </pre>
2403 */
2404 PIX *
recogShowMatch(L_RECOG * recog,PIX * pix1,PIX * pix2,BOX * box,l_int32 index,l_float32 score)2405 recogShowMatch(L_RECOG *recog,
2406 PIX *pix1,
2407 PIX *pix2,
2408 BOX *box,
2409 l_int32 index,
2410 l_float32 score)
2411 {
2412 char buf[32];
2413 char *text;
2414 L_BMF *bmf;
2415 PIX *pix3, *pix4, *pix5, *pixd;
2416 PIXA *pixa;
2417
2418 PROCNAME("recogShowMatch");
2419
2420 if (!recog)
2421 return (PIX *)ERROR_PTR("recog not defined", procName, NULL);
2422 if (!pix1)
2423 return (PIX *)ERROR_PTR("pix1 not defined", procName, NULL);
2424
2425 bmf = (recog->bmf && index >= 0) ? recog->bmf : NULL;
2426 if (!pix2 && !box && !bmf) /* nothing to do */
2427 return pixCopy(NULL, pix1);
2428
2429 pix3 = pixConvertTo32(pix1);
2430 if (box)
2431 pixRenderBoxArb(pix3, box, 1, 255, 0, 0);
2432
2433 if (pix2) {
2434 pixa = pixaCreate(2);
2435 pixaAddPix(pixa, pix3, L_CLONE);
2436 pixaAddPix(pixa, pix2, L_CLONE);
2437 pix4 = pixaDisplayTiledInRows(pixa, 1, 500, 1.0, 0, 15, 0);
2438 pixaDestroy(&pixa);
2439 } else {
2440 pix4 = pixCopy(NULL, pix3);
2441 }
2442 pixDestroy(&pix3);
2443
2444 if (bmf) {
2445 pix5 = pixAddBorderGeneral(pix4, 55, 55, 0, 0, 0xffffff00);
2446 recogGetClassString(recog, index, &text);
2447 snprintf(buf, sizeof(buf), "C=%s, S=%4.3f, I=%d", text, score, index);
2448 pixd = pixAddSingleTextblock(pix5, bmf, buf, 0xff000000,
2449 L_ADD_BELOW, NULL);
2450 pixDestroy(&pix5);
2451 LEPT_FREE(text);
2452 } else {
2453 pixd = pixClone(pix4);
2454 }
2455 pixDestroy(&pix4);
2456
2457 return pixd;
2458 }
2459