1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*
28 * recogtest3.c
29 *
30 * Test padding of book-adapted recognizer (BAR) using templates
31 * from a bootstrap recognizer (BSR) to identify unlabeled samples
32 * from the book.
33 *
34 * Terminology note:
35 * templates: labeled character images that can be inserted
36 * into a recognizer.
37 * samples: unlabeled character images that must be labeled by
38 * a recognizer before they can be used as templates.
39 *
40 * This demonstrates the following operations:
41 * (1) Making a BAR from labeled book templates (as a pixa).
42 * (2) Making a hybrid BAR/BSR from scaled templates in the BAR,
43 * supplemented with similarly scaled bootstrap templates for those
44 * classes where the BAR templates are either missing or not
45 * of sufficient quantity.
46 * (3) Using the BAR/BSR to label unlabeled book sampless.
47 * (4) Adding the pixa of the original set of labeled book
48 * templates to the pixa of the newly labeled templates, and
49 * making a BAR from the joined pixa. The BAR would then
50 * work to identify unscaled samples from the book.
51 * (5) Removing outliers from the BAR.
52 *
53 * Note that if this final BAR were not to have a sufficient number
54 * of templates in each class, it can again be augmented with BSR
55 * templates, and the hybrid BAR/BSR would be the final recognizer
56 * that is used to identify unknown (scaled) samples.
57 */
58
59 #include "string.h"
60 #include "allheaders.h"
61
62
main(int argc,char ** argv)63 l_int32 main(int argc,
64 char **argv)
65 {
66 char *text;
67 l_int32 histo[10];
68 l_int32 i, n, ival, same;
69 PIX *pix1, *pix2;
70 PIXA *pixa1, *pixa2, *pixa3, *pixa4;
71 L_RECOG *recog1, *recog2, *recog3;
72
73 if (argc != 1) {
74 fprintf(stderr, " Syntax: recogtest3\n");
75 return 1;
76 }
77
78 setLeptDebugOK(1);
79 lept_mkdir("lept/recog");
80
81 /* Read templates and split them into two sets. Use one to
82 * make a BAR recog that needs padding; use the other with a
83 * hybrid BAR/BSR to make more labeled templates to augment
84 * the BAR */
85 pixa1 = pixaRead("recog/sets/train05.pa");
86 pixa2 = pixaCreate(0); /* to generate a small BAR */
87 pixa3 = pixaCreate(0); /* for templates to be labeled and
88 * added to the BAR */
89 n = pixaGetCount(pixa1);
90 for (i = 0; i < 10; i++)
91 histo[i] = 0;
92 for (i = 0; i < n; i++) {
93 pix1 = pixaGetPix(pixa1, i, L_COPY);
94 text = pixGetText(pix1);
95 ival = text[0] - '0';
96 /* remove all 4's, and all but 2 7's and 9's */
97 if (ival == 4 || (ival == 7 && histo[7] == 2) ||
98 (ival == 9 && histo[9] == 2)) {
99 pixaAddPix(pixa3, pix1, L_INSERT);
100 } else {
101 pixaAddPix(pixa2, pix1, L_INSERT);
102 histo[ival]++;
103 }
104 }
105 pix1 = pixaDisplayTiledWithText(pixa3, 1500, 1.0, 15, 2, 6, 0xff000000);
106 pixDisplay(pix1, 500, 0);
107 pixDestroy(&pix1);
108
109 /* Make a BAR from the small set */
110 recog1 = recogCreateFromPixa(pixa2, 0, 40, 0, 128, 1);
111 recogShowContent(stderr, recog1, 0, 1);
112
113 /* Pad with BSR templates to make a hybrid BAR/BSR */
114 recogPadDigitTrainingSet(&recog1, 40, 0);
115 recogShowContent(stderr, recog1, 1, 1);
116
117 /* Use the BAR/BSR to label the left-over templates from the book */
118 pixa4 = recogTrainFromBoot(recog1, pixa3, 0.75, 128, 1);
119
120 /* Join the two sets */
121 pixaJoin(pixa1, pixa4, 0, 0);
122 pixaDestroy(&pixa4);
123
124 /* Make a new BAR that uses unscaled templates.
125 * This now has all the templates from pixa1, before deletions */
126 recog2 = recogCreateFromPixa(pixa1, 0, 0, 5, 128, 1);
127 recogShowContent(stderr, recog2, 2, 1);
128
129 /* Test recog serialization */
130 recogWrite("/tmp/lept/recog/recog2.rec", recog2);
131 recog3 = recogRead("/tmp/lept/recog/recog2.rec");
132 recogWrite("/tmp/lept/recog/recog3.rec", recog3);
133 filesAreIdentical("/tmp/lept/recog/recog2.rec",
134 "/tmp/lept/recog/recog3.rec", &same);
135 if (!same)
136 fprintf(stderr, "Error in serialization!\n");
137 recogDestroy(&recog3);
138
139 /* Remove outliers: method 1 */
140 pixa4 = pixaRemoveOutliers1(pixa1, 0.8, 4, 3, &pix1, &pix2);
141 pixDisplay(pix1, 500, 0);
142 pixDisplay(pix2, 500, 500);
143 pixDestroy(&pix1);
144 pixDestroy(&pix2);
145 recog3 = recogCreateFromPixa(pixa4, 0, 0, 0, 128, 1);
146 recogShowContent(stderr, recog3, 3, 1);
147 pixaDestroy(&pixa4);
148 recogDestroy(&recog3);
149
150 /* Relabel a few templates to put them in the wrong classes */
151 pix1 = pixaGetPix(pixa1, 7, L_CLONE);
152 pixSetText(pix1, "4");
153 pixDestroy(&pix1);
154 pix1 = pixaGetPix(pixa1, 38, L_CLONE);
155 pixSetText(pix1, "9");
156 pixDestroy(&pix1);
157 pix1 = pixaGetPix(pixa1, 61, L_CLONE);
158 pixSetText(pix1, "2");
159 pixDestroy(&pix1);
160
161 /* Remove outliers: method 2 */
162 pixa4 = pixaRemoveOutliers2(pixa1, 0.65, 3, &pix1, &pix2);
163 pixDisplay(pix1, 900, 0);
164 pixDisplay(pix2, 900, 500);
165 pixDestroy(&pix1);
166 pixDestroy(&pix2);
167 recog3 = recogCreateFromPixa(pixa4, 0, 0, 0, 128, 1);
168 recogShowContent(stderr, recog3, 3, 1);
169 pixaDestroy(&pixa4);
170 recogDestroy(&recog3);
171
172 recogDestroy(&recog1);
173 recogDestroy(&recog2);
174 pixaDestroy(&pixa1);
175 pixaDestroy(&pixa2);
176 pixaDestroy(&pixa3);
177 return 0;
178 }
179