1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*
28 * jbcorrelation.c
29 *
30 * jbcorrelation dirin thresh weight [firstpage npages]
31 *
32 * dirin: directory of input pages
33 * thresh: 0.80 - 0.85 is a reasonable compromise between accuracy
34 * and number of classes, for characters
35 * weight: 0.6 seems to work reasonably with thresh = 0.8.
36 *
37 * Notes:
38 * (1) All components larger than a default size are not saved.
39 * The default size is given in jbclass.c.
40 * (2) The two output files (for templates and c.c. data)
41 * are written with the rootname
42 * /tmp/lept/jb/result
43 */
44
45 #include "allheaders.h"
46
47 /* Choose one of these */
48 #define COMPONENTS JB_CONN_COMPS
49 /* #define COMPONENTS JB_CHARACTERS */
50 /* #define COMPONENTS JB_WORDS */
51
52 #define BUF_SIZE 512
53
54 /* Select additional debug output */
55 #define DEBUG_TEST_DATA_IO 0
56 #define RENDER_DEBUG 1
57 #define DISPLAY_DIFFERENCE 1
58 #define DISPLAY_ALL_INSTANCES 0
59
60 /* For display output of all instances, sorted by class */
61 #define X_SPACING 10
62 #define Y_SPACING 15
63 #define MAX_OUTPUT_WIDTH 400
64
65 static const char rootname[] = "/tmp/lept/jb/result";
66
main(int argc,char ** argv)67 int main(int argc,
68 char **argv)
69 {
70 char filename[BUF_SIZE];
71 char *dirin;
72 l_int32 i, firstpage, npages, nfiles;
73 l_float32 thresh, weight;
74 JBDATA *data;
75 JBCLASSER *classer;
76 SARRAY *safiles;
77 PIX *pix;
78 PIXA *pixa, *pixadb;
79 static char mainName[] = "jbcorrelation";
80
81 if (argc != 4 && argc != 6)
82 return ERROR_INT(
83 " Syntax: jbcorrelation dirin thresh weight [firstpage, npages]",
84 mainName, 1);
85 dirin = argv[1];
86 thresh = atof(argv[2]);
87 weight = atof(argv[3]);
88
89 if (argc == 4) {
90 firstpage = 0;
91 npages = 0;
92 }
93 else {
94 firstpage = atoi(argv[4]);
95 npages = atoi(argv[5]);
96 }
97
98 setLeptDebugOK(1);
99 lept_mkdir("lept/jb");
100
101 #if 0
102
103 /*--------------------------------------------------------------*/
104
105 jbCorrelation(dirin, thresh, weight, COMPONENTS, rootname,
106 firstpage, npages, 1);
107
108 /*--------------------------------------------------------------*/
109
110 #else
111
112 /*--------------------------------------------------------------*/
113
114 safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
115 nfiles = sarrayGetCount(safiles);
116
117 /* sarrayWriteStream(stderr, safiles); */
118
119 /* Classify components on requested pages */
120 startTimer();
121 classer = jbCorrelationInit(COMPONENTS, 0, 0, thresh, weight);
122 jbAddPages(classer, safiles);
123 fprintf(stderr, "Time to generate classes: %6.3f sec\n", stopTimer());
124
125 /* Save and write out the result */
126 data = jbDataSave(classer);
127 jbDataWrite(rootname, data);
128 fprintf(stderr, "Number of classes: %d\n", classer->nclass);
129
130 /* Render the pages from the classifier data.
131 * Use debugflag == FALSE to omit outlines of each component. */
132 pixa = jbDataRender(data, FALSE);
133
134 /* Write the pages out */
135 npages = pixaGetCount(pixa);
136 if (npages != nfiles)
137 fprintf(stderr, "npages = %d, nfiles = %d, not equal!\n",
138 npages, nfiles);
139 for (i = 0; i < npages; i++) {
140 pix = pixaGetPix(pixa, i, L_CLONE);
141 snprintf(filename, BUF_SIZE, "%s.%03d", rootname, i);
142 fprintf(stderr, "filename: %s\n", filename);
143 pixWrite(filename, pix, IFF_PNG);
144 pixDestroy(&pix);
145 }
146
147 #if DISPLAY_DIFFERENCE
148 {
149 char *fname;
150 PIX *pix1, *pix2;
151 fname = sarrayGetString(safiles, 0, L_NOCOPY);
152 pix1 = pixRead(fname);
153 pix2 = pixaGetPix(pixa, 0, L_CLONE);
154 pixXor(pix1, pix1, pix2);
155 pixWrite("/tmp/lept/jb/output_diff.png", pix1, IFF_PNG);
156 pixDestroy(&pix1);
157 pixDestroy(&pix2);
158 }
159 #endif /* DISPLAY_DIFFERENCE */
160
161 #if DEBUG_TEST_DATA_IO
162 {
163 JBDATA *newdata;
164 PIX *newpix;
165 PIXA *newpixa;
166 l_int32 same, iofail;
167
168 /* Read the data back in and render the pages */
169 newdata = jbDataRead(rootname);
170 newpixa = jbDataRender(newdata, FALSE);
171 iofail = FALSE;
172 for (i = 0; i < npages; i++) {
173 pix = pixaGetPix(pixa, i, L_CLONE);
174 newpix = pixaGetPix(newpixa, i, L_CLONE);
175 pixEqual(pix, newpix, &same);
176 if (!same) {
177 iofail = TRUE;
178 fprintf(stderr, "pix on page %d are unequal!\n", i);
179 }
180 pixDestroy(&pix);
181 pixDestroy(&newpix);
182
183 }
184 if (iofail)
185 fprintf(stderr, "read/write for jbdata fails\n");
186 else
187 fprintf(stderr, "read/write for jbdata succeeds\n");
188 jbDataDestroy(&newdata);
189 pixaDestroy(&newpixa);
190 }
191 #endif /* DEBUG_TEST_DATA_IO */
192
193 #if RENDER_DEBUG
194 /* Use debugflag == TRUE to see outlines of each component. */
195 pixadb = jbDataRender(data, TRUE);
196 /* Write the debug pages out */
197 npages = pixaGetCount(pixadb);
198 for (i = 0; i < npages; i++) {
199 pix = pixaGetPix(pixadb, i, L_CLONE);
200 snprintf(filename, BUF_SIZE, "%s.db.%04d", rootname, i);
201 fprintf(stderr, "filename: %s\n", filename);
202 pixWrite(filename, pix, IFF_PNG);
203 pixDestroy(&pix);
204 }
205 pixaDestroy(&pixadb);
206 #endif /* RENDER_DEBUG */
207
208 #if DISPLAY_ALL_INSTANCES
209 /* display all instances, organized by template */
210 pix = pixaaDisplayByPixa(classer->pixaa,
211 X_SPACING, Y_SPACING, MAX_OUTPUT_WIDTH);
212 pixWrite("/tmp/lept/jb/output_instances", pix, IFF_PNG);
213 pixDestroy(&pix);
214 #endif /* DISPLAY_ALL_INSTANCES */
215
216 pixaDestroy(&pixa);
217 sarrayDestroy(&safiles);
218 jbClasserDestroy(&classer);
219 jbDataDestroy(&data);
220
221 /*--------------------------------------------------------------*/
222
223 #endif
224
225 return 0;
226 }
227
228
229