1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -
4  -  Redistribution and use in source and binary forms, with or without
5  -  modification, are permitted provided that the following conditions
6  -  are met:
7  -  1. Redistributions of source code must retain the above copyright
8  -     notice, this list of conditions and the following disclaimer.
9  -  2. Redistributions in binary form must reproduce the above
10  -     copyright notice, this list of conditions and the following
11  -     disclaimer in the documentation and/or other materials
12  -     provided with the distribution.
13  -
14  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18  -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 /*
28  * jbrankhaus.c
29  *
30  *     jbrankhaus dirin size rank rootname [firstpage npages]
31  *
32  *         dirin:  directory of input pages
33  *         size: size of SE used for dilation
34  *         rank: min pixel fraction required in both directions in match
35  *
36  * Notes:
37  *     (1) All components larger than a default size are not saved.
38  *         The default size is given in jbclass.c.
39  *     (2) A set of reasonable values for cc or characters, that
40  *         gives good accuracy without too manyclasses, is:
41  *               size = 2  (2 x 2 structuring element)
42  *               rank = 0.97
43  *     (3) The two output files (for templates and c.c. data)
44  *         are written with the rootname
45  *               /tmp/lept/jb/result
46  */
47 
48 #include "allheaders.h"
49 
50     /* Choose one of these */
51 #define  COMPONENTS  JB_CONN_COMPS
52 /* #define  COMPONENTS  JB_CHARACTERS */
53 /* #define  COMPONENTS  JB_WORDS */
54 
55 #define   BUF_SIZE         512
56 
57     /* select additional debug output */
58 #define   DEBUG_TEST_DATA_IO        0
59 #define   RENDER_DEBUG              1
60 #define   DISPLAY_DIFFERENCE        1
61 #define   DISPLAY_ALL_INSTANCES     0
62 
63     /* for display output of all instances, sorted by class */
64 #define   X_SPACING                10
65 #define   Y_SPACING                15
66 #define   MAX_OUTPUT_WIDTH         400
67 
68 static const char  rootname[] = "/tmp/lept/jb/result";
69 
main(int argc,char ** argv)70 int main(int    argc,
71          char **argv)
72 {
73 char         filename[BUF_SIZE];
74 char        *dirin, *fname;
75 l_int32      i, size, firstpage, npages, nfiles;
76 l_float32    rank;
77 JBDATA      *data;
78 JBCLASSER   *classer;
79 SARRAY      *safiles;
80 PIX         *pix, *pixt;
81 PIXA        *pixa, *pixadb;
82 static char  mainName[] = "jbrankhaus";
83 
84     if (argc != 4 && argc != 6)
85         return ERROR_INT(
86              " Syntax: jbrankhaus dirin size rank [firstpage, npages]",
87              mainName, 1);
88     dirin = argv[1];
89     size = atoi(argv[2]);
90     rank = atof(argv[3]);
91     if (argc == 4) {
92         firstpage = 0;
93         npages = 0;
94     }
95     else {
96         firstpage = atoi(argv[4]);
97         npages = atoi(argv[5]);
98     }
99 
100     setLeptDebugOK(1);
101     lept_mkdir("lept/jb");
102 
103 #if 0
104 
105     /*--------------------------------------------------------------*/
106 
107     jbRankHaus(dirin, size, rank, COMPONENTS, rootname, firstpage, npages, 1);
108 
109     /*--------------------------------------------------------------*/
110 
111 #else
112 
113     /*--------------------------------------------------------------*/
114 
115     safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
116     nfiles = sarrayGetCount(safiles);
117 
118 /*    sarrayWriteStream(stderr, safiles); */
119 
120         /* Classify components on requested pages */
121     startTimer();
122     classer = jbRankHausInit(COMPONENTS, 0, 0, size, rank);
123     jbAddPages(classer, safiles);
124     fprintf(stderr, "Time to classify components: %6.3f sec\n", stopTimer());
125 
126         /* Save and write out the result */
127     data = jbDataSave(classer);
128     jbDataWrite(rootname, data);
129 
130         /* Render the pages from the classifier data.
131          * Use debugflag == FALSE to omit outlines of each component. */
132     pixa = jbDataRender(data, FALSE);
133 
134         /* Write the pages out */
135     npages = pixaGetCount(pixa);
136     if (npages != nfiles)
137         fprintf(stderr, "npages = %d, nfiles = %d, not equal!\n",
138                 npages, nfiles);
139     for (i = 0; i < npages; i++) {
140         pix = pixaGetPix(pixa, i, L_CLONE);
141         snprintf(filename, BUF_SIZE, "%s.%03d", rootname, i);
142         fprintf(stderr, "filename: %s\n", filename);
143         pixWrite(filename, pix, IFF_PNG);
144         pixDestroy(&pix);
145     }
146 
147 #if  DISPLAY_DIFFERENCE
148     {
149     char *fname;
150     PIX  *pix1, *pix2;
151     fname = sarrayGetString(safiles, 0, L_NOCOPY);
152     pix1 = pixRead(fname);
153     pix2 = pixaGetPix(pixa, 0, L_CLONE);
154     pixXor(pix1, pix1, pix2);
155     pixWrite("/tmp/lept/jb/output_diff.png", pix1, IFF_PNG);
156     pixDestroy(&pix1);
157     pixDestroy(&pix2);
158     }
159 #endif  /* DISPLAY_DIFFERENCE */
160 
161 #if  DEBUG_TEST_DATA_IO
162     {
163     JBDATA  *newdata;
164     PIX     *newpix;
165     PIXA    *newpixa;
166     l_int32  same, iofail;
167 
168         /* Read the data back in and render the pages */
169     newdata = jbDataRead(rootname);
170     newpixa = jbDataRender(newdata, FALSE);
171     iofail = FALSE;
172     for (i = 0; i < npages; i++) {
173         pix = pixaGetPix(pixa, i, L_CLONE);
174         newpix = pixaGetPix(newpixa, i, L_CLONE);
175         pixEqual(pix, newpix, &same);
176         if (!same) {
177             iofail = TRUE;
178             fprintf(stderr, "pix on page %d are unequal!\n", i);
179         }
180         pixDestroy(&pix);
181         pixDestroy(&newpix);
182 
183     }
184     if (iofail)
185         fprintf(stderr, "read/write for jbdata fails\n");
186     else
187         fprintf(stderr, "read/write for jbdata succeeds\n");
188     jbDataDestroy(&newdata);
189     pixaDestroy(&newpixa);
190     }
191 #endif  /* DEBUG_TEST_DATA_IO */
192 
193 #if  RENDER_DEBUG
194         /* Use debugflag == TRUE to see outlines of each component. */
195     pixadb = jbDataRender(data, TRUE);
196         /* Write the debug pages out */
197     npages = pixaGetCount(pixadb);
198     for (i = 0; i < npages; i++) {
199         pix = pixaGetPix(pixadb, i, L_CLONE);
200         snprintf(filename, BUF_SIZE, "%s.db.%04d", rootname, i);
201         fprintf(stderr, "filename: %s\n", filename);
202         pixWrite(filename, pix, IFF_PNG);
203         pixDestroy(&pix);
204     }
205     pixaDestroy(&pixadb);
206 #endif  /* RENDER_DEBUG */
207 
208 #if  DISPLAY_ALL_INSTANCES
209         /* display all instances, organized by template */
210     pix = pixaaDisplayByPixa(classer->pixaa,
211                              X_SPACING, Y_SPACING, MAX_OUTPUT_WIDTH);
212     pixWrite("/tmp/lept/jb/output_instances", pix, IFF_PNG);
213     pixDestroy(&pix);
214 #endif  /* DISPLAY_ALL_INSTANCES */
215 
216     pixaDestroy(&pixa);
217     sarrayDestroy(&safiles);
218     jbClasserDestroy(&classer);
219     jbDataDestroy(&data);
220 
221     /*--------------------------------------------------------------*/
222 
223 #endif
224 
225     return 0;
226 }
227