1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -
4  -  Redistribution and use in source and binary forms, with or without
5  -  modification, are permitted provided that the following conditions
6  -  are met:
7  -  1. Redistributions of source code must retain the above copyright
8  -     notice, this list of conditions and the following disclaimer.
9  -  2. Redistributions in binary form must reproduce the above
10  -     copyright notice, this list of conditions and the following
11  -     disclaimer in the documentation and/or other materials
12  -     provided with the distribution.
13  -
14  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18  -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 /*
28  * jbcorrelation.c
29  *
30  *     jbcorrelation dirin thresh weight [firstpage npages]
31  *
32  *         dirin:  directory of input pages
33  *         thresh: 0.80 - 0.85 is a reasonable compromise between accuracy
34  *                 and number of classes, for characters
35  *         weight: 0.6 seems to work reasonably with thresh = 0.8.
36  *
37  *     Notes:
38  *         (1) All components larger than a default size are not saved.
39  *             The default size is given in jbclass.c.
40  *         (2) The two output files (for templates and c.c. data)
41  *             are written with the rootname
42  *               /tmp/lept/jb/result
43  */
44 
45 #include "allheaders.h"
46 
47     /* Choose one of these */
48 #define  COMPONENTS  JB_CONN_COMPS
49 /* #define  COMPONENTS  JB_CHARACTERS */
50 /* #define  COMPONENTS  JB_WORDS */
51 
52 #define   BUF_SIZE         512
53 
54     /* Select additional debug output */
55 #define   DEBUG_TEST_DATA_IO        0
56 #define   RENDER_DEBUG              1
57 #define   DISPLAY_DIFFERENCE        1
58 #define   DISPLAY_ALL_INSTANCES     0
59 
60     /* For display output of all instances, sorted by class */
61 #define   X_SPACING                10
62 #define   Y_SPACING                15
63 #define   MAX_OUTPUT_WIDTH         400
64 
65 static const char  rootname[] = "/tmp/lept/jb/result";
66 
main(int argc,char ** argv)67 int main(int    argc,
68          char **argv)
69 {
70 char         filename[BUF_SIZE];
71 char        *dirin;
72 l_int32      i, firstpage, npages, nfiles;
73 l_float32    thresh, weight;
74 JBDATA      *data;
75 JBCLASSER   *classer;
76 SARRAY      *safiles;
77 PIX         *pix;
78 PIXA        *pixa, *pixadb;
79 static char  mainName[] = "jbcorrelation";
80 
81     if (argc != 4 && argc != 6)
82         return ERROR_INT(
83              " Syntax: jbcorrelation dirin thresh weight [firstpage, npages]",
84              mainName, 1);
85     dirin = argv[1];
86     thresh = atof(argv[2]);
87     weight = atof(argv[3]);
88 
89     if (argc == 4) {
90         firstpage = 0;
91         npages = 0;
92     }
93     else {
94         firstpage = atoi(argv[4]);
95         npages = atoi(argv[5]);
96     }
97 
98     setLeptDebugOK(1);
99     lept_mkdir("lept/jb");
100 
101 #if 0
102 
103     /*--------------------------------------------------------------*/
104 
105     jbCorrelation(dirin, thresh, weight, COMPONENTS, rootname,
106                   firstpage, npages, 1);
107 
108     /*--------------------------------------------------------------*/
109 
110 #else
111 
112     /*--------------------------------------------------------------*/
113 
114     safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
115     nfiles = sarrayGetCount(safiles);
116 
117 /*    sarrayWriteStream(stderr, safiles); */
118 
119         /* Classify components on requested pages */
120     startTimer();
121     classer = jbCorrelationInit(COMPONENTS, 0, 0, thresh, weight);
122     jbAddPages(classer, safiles);
123     fprintf(stderr, "Time to generate classes: %6.3f sec\n", stopTimer());
124 
125         /* Save and write out the result */
126     data = jbDataSave(classer);
127     jbDataWrite(rootname, data);
128     fprintf(stderr, "Number of classes: %d\n", classer->nclass);
129 
130         /* Render the pages from the classifier data.
131          * Use debugflag == FALSE to omit outlines of each component. */
132     pixa = jbDataRender(data, FALSE);
133 
134         /* Write the pages out */
135     npages = pixaGetCount(pixa);
136     if (npages != nfiles)
137         fprintf(stderr, "npages = %d, nfiles = %d, not equal!\n",
138                 npages, nfiles);
139     for (i = 0; i < npages; i++) {
140         pix = pixaGetPix(pixa, i, L_CLONE);
141         snprintf(filename, BUF_SIZE, "%s.%03d", rootname, i);
142         fprintf(stderr, "filename: %s\n", filename);
143         pixWrite(filename, pix, IFF_PNG);
144         pixDestroy(&pix);
145     }
146 
147 #if  DISPLAY_DIFFERENCE
148     {
149     char *fname;
150     PIX  *pix1, *pix2;
151     fname = sarrayGetString(safiles, 0, L_NOCOPY);
152     pix1 = pixRead(fname);
153     pix2 = pixaGetPix(pixa, 0, L_CLONE);
154     pixXor(pix1, pix1, pix2);
155     pixWrite("/tmp/lept/jb/output_diff.png", pix1, IFF_PNG);
156     pixDestroy(&pix1);
157     pixDestroy(&pix2);
158     }
159 #endif  /* DISPLAY_DIFFERENCE */
160 
161 #if  DEBUG_TEST_DATA_IO
162     {
163     JBDATA  *newdata;
164     PIX     *newpix;
165     PIXA    *newpixa;
166     l_int32  same, iofail;
167 
168         /* Read the data back in and render the pages */
169     newdata = jbDataRead(rootname);
170     newpixa = jbDataRender(newdata, FALSE);
171     iofail = FALSE;
172     for (i = 0; i < npages; i++) {
173         pix = pixaGetPix(pixa, i, L_CLONE);
174         newpix = pixaGetPix(newpixa, i, L_CLONE);
175         pixEqual(pix, newpix, &same);
176         if (!same) {
177             iofail = TRUE;
178             fprintf(stderr, "pix on page %d are unequal!\n", i);
179         }
180         pixDestroy(&pix);
181         pixDestroy(&newpix);
182 
183     }
184     if (iofail)
185         fprintf(stderr, "read/write for jbdata fails\n");
186     else
187         fprintf(stderr, "read/write for jbdata succeeds\n");
188     jbDataDestroy(&newdata);
189     pixaDestroy(&newpixa);
190     }
191 #endif  /* DEBUG_TEST_DATA_IO */
192 
193 #if  RENDER_DEBUG
194         /* Use debugflag == TRUE to see outlines of each component. */
195     pixadb = jbDataRender(data, TRUE);
196         /* Write the debug pages out */
197     npages = pixaGetCount(pixadb);
198     for (i = 0; i < npages; i++) {
199         pix = pixaGetPix(pixadb, i, L_CLONE);
200         snprintf(filename, BUF_SIZE, "%s.db.%04d", rootname, i);
201         fprintf(stderr, "filename: %s\n", filename);
202         pixWrite(filename, pix, IFF_PNG);
203         pixDestroy(&pix);
204     }
205     pixaDestroy(&pixadb);
206 #endif  /* RENDER_DEBUG */
207 
208 #if  DISPLAY_ALL_INSTANCES
209         /* display all instances, organized by template */
210     pix = pixaaDisplayByPixa(classer->pixaa,
211                              X_SPACING, Y_SPACING, MAX_OUTPUT_WIDTH);
212     pixWrite("/tmp/lept/jb/output_instances", pix, IFF_PNG);
213     pixDestroy(&pix);
214 #endif  /* DISPLAY_ALL_INSTANCES */
215 
216     pixaDestroy(&pixa);
217     sarrayDestroy(&safiles);
218     jbClasserDestroy(&classer);
219     jbDataDestroy(&data);
220 
221     /*--------------------------------------------------------------*/
222 
223 #endif
224 
225     return 0;
226 }
227 
228 
229