1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*
28 * jbrankhaus.c
29 *
30 * jbrankhaus dirin size rank rootname [firstpage npages]
31 *
32 * dirin: directory of input pages
33 * size: size of SE used for dilation
34 * rank: min pixel fraction required in both directions in match
35 *
36 * Notes:
37 * (1) All components larger than a default size are not saved.
38 * The default size is given in jbclass.c.
39 * (2) A set of reasonable values for cc or characters, that
40 * gives good accuracy without too manyclasses, is:
41 * size = 2 (2 x 2 structuring element)
42 * rank = 0.97
43 * (3) The two output files (for templates and c.c. data)
44 * are written with the rootname
45 * /tmp/lept/jb/result
46 */
47
48 #include "allheaders.h"
49
50 /* Choose one of these */
51 #define COMPONENTS JB_CONN_COMPS
52 /* #define COMPONENTS JB_CHARACTERS */
53 /* #define COMPONENTS JB_WORDS */
54
55 #define BUF_SIZE 512
56
57 /* select additional debug output */
58 #define DEBUG_TEST_DATA_IO 0
59 #define RENDER_DEBUG 1
60 #define DISPLAY_DIFFERENCE 1
61 #define DISPLAY_ALL_INSTANCES 0
62
63 /* for display output of all instances, sorted by class */
64 #define X_SPACING 10
65 #define Y_SPACING 15
66 #define MAX_OUTPUT_WIDTH 400
67
68 static const char rootname[] = "/tmp/lept/jb/result";
69
main(int argc,char ** argv)70 int main(int argc,
71 char **argv)
72 {
73 char filename[BUF_SIZE];
74 char *dirin, *fname;
75 l_int32 i, size, firstpage, npages, nfiles;
76 l_float32 rank;
77 JBDATA *data;
78 JBCLASSER *classer;
79 SARRAY *safiles;
80 PIX *pix, *pixt;
81 PIXA *pixa, *pixadb;
82 static char mainName[] = "jbrankhaus";
83
84 if (argc != 4 && argc != 6)
85 return ERROR_INT(
86 " Syntax: jbrankhaus dirin size rank [firstpage, npages]",
87 mainName, 1);
88 dirin = argv[1];
89 size = atoi(argv[2]);
90 rank = atof(argv[3]);
91 if (argc == 4) {
92 firstpage = 0;
93 npages = 0;
94 }
95 else {
96 firstpage = atoi(argv[4]);
97 npages = atoi(argv[5]);
98 }
99
100 setLeptDebugOK(1);
101 lept_mkdir("lept/jb");
102
103 #if 0
104
105 /*--------------------------------------------------------------*/
106
107 jbRankHaus(dirin, size, rank, COMPONENTS, rootname, firstpage, npages, 1);
108
109 /*--------------------------------------------------------------*/
110
111 #else
112
113 /*--------------------------------------------------------------*/
114
115 safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
116 nfiles = sarrayGetCount(safiles);
117
118 /* sarrayWriteStream(stderr, safiles); */
119
120 /* Classify components on requested pages */
121 startTimer();
122 classer = jbRankHausInit(COMPONENTS, 0, 0, size, rank);
123 jbAddPages(classer, safiles);
124 fprintf(stderr, "Time to classify components: %6.3f sec\n", stopTimer());
125
126 /* Save and write out the result */
127 data = jbDataSave(classer);
128 jbDataWrite(rootname, data);
129
130 /* Render the pages from the classifier data.
131 * Use debugflag == FALSE to omit outlines of each component. */
132 pixa = jbDataRender(data, FALSE);
133
134 /* Write the pages out */
135 npages = pixaGetCount(pixa);
136 if (npages != nfiles)
137 fprintf(stderr, "npages = %d, nfiles = %d, not equal!\n",
138 npages, nfiles);
139 for (i = 0; i < npages; i++) {
140 pix = pixaGetPix(pixa, i, L_CLONE);
141 snprintf(filename, BUF_SIZE, "%s.%03d", rootname, i);
142 fprintf(stderr, "filename: %s\n", filename);
143 pixWrite(filename, pix, IFF_PNG);
144 pixDestroy(&pix);
145 }
146
147 #if DISPLAY_DIFFERENCE
148 {
149 char *fname;
150 PIX *pix1, *pix2;
151 fname = sarrayGetString(safiles, 0, L_NOCOPY);
152 pix1 = pixRead(fname);
153 pix2 = pixaGetPix(pixa, 0, L_CLONE);
154 pixXor(pix1, pix1, pix2);
155 pixWrite("/tmp/lept/jb/output_diff.png", pix1, IFF_PNG);
156 pixDestroy(&pix1);
157 pixDestroy(&pix2);
158 }
159 #endif /* DISPLAY_DIFFERENCE */
160
161 #if DEBUG_TEST_DATA_IO
162 {
163 JBDATA *newdata;
164 PIX *newpix;
165 PIXA *newpixa;
166 l_int32 same, iofail;
167
168 /* Read the data back in and render the pages */
169 newdata = jbDataRead(rootname);
170 newpixa = jbDataRender(newdata, FALSE);
171 iofail = FALSE;
172 for (i = 0; i < npages; i++) {
173 pix = pixaGetPix(pixa, i, L_CLONE);
174 newpix = pixaGetPix(newpixa, i, L_CLONE);
175 pixEqual(pix, newpix, &same);
176 if (!same) {
177 iofail = TRUE;
178 fprintf(stderr, "pix on page %d are unequal!\n", i);
179 }
180 pixDestroy(&pix);
181 pixDestroy(&newpix);
182
183 }
184 if (iofail)
185 fprintf(stderr, "read/write for jbdata fails\n");
186 else
187 fprintf(stderr, "read/write for jbdata succeeds\n");
188 jbDataDestroy(&newdata);
189 pixaDestroy(&newpixa);
190 }
191 #endif /* DEBUG_TEST_DATA_IO */
192
193 #if RENDER_DEBUG
194 /* Use debugflag == TRUE to see outlines of each component. */
195 pixadb = jbDataRender(data, TRUE);
196 /* Write the debug pages out */
197 npages = pixaGetCount(pixadb);
198 for (i = 0; i < npages; i++) {
199 pix = pixaGetPix(pixadb, i, L_CLONE);
200 snprintf(filename, BUF_SIZE, "%s.db.%04d", rootname, i);
201 fprintf(stderr, "filename: %s\n", filename);
202 pixWrite(filename, pix, IFF_PNG);
203 pixDestroy(&pix);
204 }
205 pixaDestroy(&pixadb);
206 #endif /* RENDER_DEBUG */
207
208 #if DISPLAY_ALL_INSTANCES
209 /* display all instances, organized by template */
210 pix = pixaaDisplayByPixa(classer->pixaa,
211 X_SPACING, Y_SPACING, MAX_OUTPUT_WIDTH);
212 pixWrite("/tmp/lept/jb/output_instances", pix, IFF_PNG);
213 pixDestroy(&pix);
214 #endif /* DISPLAY_ALL_INSTANCES */
215
216 pixaDestroy(&pixa);
217 sarrayDestroy(&safiles);
218 jbClasserDestroy(&classer);
219 jbDataDestroy(&data);
220
221 /*--------------------------------------------------------------*/
222
223 #endif
224
225 return 0;
226 }
227