1 /*
2 rule based OCR engine, partly rewritten for edges (old=pixel)
3 */
4 /*
5 This is a Optical-Character-Recognition program
6 Copyright (C) 2000-2018 Joerg Schulenburg
7
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2
11 of the License, or (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21
22 see README for email address
23
24 >>> DO NOT EDIT THIS FILE IF YOU NOT REALLY KNOW WHAT YOU ARE DOING! <<<
25
26 I have invested lot of time, to write this part of the program.
27 This engine should recognize chars allways right or return UNKNOWN.
28 If you change something, test all other example files too,
29 to be sure that all things work better. (JoergS)
30
31 This engine was pixelbased until 0.40 which was not successfull enough.
32 Also code changes always hade side effects. The vectorisation of the code
33 starts from version 0.41 with the chars XNz and seems to be much better
34 to handle. Vectorization means we frame each character by a chain of
35 vectors and dont care about pixels anymore. Unfortunatly I have to
36 replace all the pixel codes, which is a long process. Old code will be lost.
37 (JorgS)
38
39
40 ToDo:
41 - if box1->p and b differ, reduce probability
42 - probability makes life much easier here
43 - use only one box!?, may be bits have usefull infos
44 - divide this file, suggestion: classify chars:
45 high=ABCDEFGHIJKLMNOPQRSTUVWXYZbdfhklt, low=acegijmnopqrsuvwxyz
46 or
47 often_used=etianmsurwdkgo rarely_used=hvjcflpqxyz.,:
48 or
49 every char (large overhead)
50 - two-pass version (first pass without tolerance)
51 2nd pass with tolerance (ex: one tiny more in sdata->holes)
52
53 general feature extraction:
54 - white holes at middle, upper, lower position (cost much time)
55 - test lines and triangles insteat of rectangles
56
57 char is removed, wchar_t is used (better code)
58
59 making a static global variable-set x.x0,x.x1, and call test_a,
60 test_b ... (faster compilation, but not reentrant!)
61
62 - adding slant-angle (if detected) to distinguish between l and / ?
63 - ac (alternate chars) as string add_ac(box1,"/") => box1->ac="Il/";
64 for better context correction or output: "Ha[lI][lI]o!"
65
66 */
67
68 #include <stdlib.h>
69 #include <stdio.h>
70 // #include "pgm2asc.h"
71 #include "ocr0.h"
72 // #include "ocr1.h"
73 #include "amiga.h"
74 #include "pnm.h"
75 #include "gocr.h"
76 #include "unicode_defs.h"
77
78 #include "ocr0_dbg.h" /* define DO_DEBUG IFV MM Setac Break MSG DBG */
79
80 /* extern "C"{ */
81
82 // static inline int sq(int x) { return x*x; } /* square */
83
84 /*
85 * go from vector j1 to vector j2 and measure maximum deviation of
86 * the steps from the line connecting j1 and j2
87 * return the squared maximum distance
88 * in units of the box size times 1024
89 * ToDo: 1) better give back max-dx and max-dy ???
90 * errors if j1 and j2 are in different frames or belong to
91 * more then one frame?
92 * 2) Better get deviation from a complete vector graphic?
93 * The vectorgraphic is the ideal test char adapted to the
94 * extrem vertices of the real char.
95 */
line_deviation(struct box * box1,int j1,int j2)96 int line_deviation( struct box *box1, int j1, int j2 ) {
97 int r1x, r1y, r2x, r2y, r3x, r3y, i, x, y, d, dist, maxdist=0, frame, l2;
98 r1x=box1->frame_vector[j1][0];
99 r1y=box1->frame_vector[j1][1];
100 r2x=box1->frame_vector[j2][0];
101 r2y=box1->frame_vector[j2][1];
102 if (!box1->num_frames) return(-1);
103 if (j1<0 || j1>box1->num_frame_vectors[box1->num_frames-1] ||
104 j2<0 || j2>box1->num_frame_vectors[box1->num_frames-1]) {
105 fprintf(stderr,"Error in "__FILE__" L%d: idx out of range",__LINE__);
106 return(-1);
107 }
108 /* get the frame the endvector belongs to */
109 for (i=0;i<box1->num_frames;i++)
110 if (j2<box1->num_frame_vectors[i]) break;
111 frame=i;
112 /* frame(j1)<=frame(j2) possible */
113 if (j1!=j2) // 2017-03 start j1+1 and j1!=j2 added
114 for (i=j1+1;;i++) { // do it for each vector between j1 and j2
115 if (i >= box1->num_frame_vectors[frame])
116 i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */
117 if (i==j2) break;
118 // for (i=j1;i!=j2;i=(i+1)%box1->num_frame_vectors[0]) {~}
119 r3x=box1->frame_vector[i][0];
120 r3y=box1->frame_vector[i][1];
121 // Language=german
122 // german: Abstand Punkt von Strecke, Laenge Lotrechte
123 // germ.Strecke : l1=(r1+r2)/2+d*(r2-r1)/2 for d=-1..1
124 // germ.Lotrechte: l2=r3+b*[-(r2-r1).y,(r2-r1).x]
125 // Schnittpunkt : l1=l2,
126 // eq1x: (r1x+r2x)/2-r3x+d*(r2x-r1x)/2+b*(r2y-r1y)=0
127 // eq1y: (r1y+r2y)/2-r3y+d*(r2y-r1y)/2-b*(r2x-r1x)=0
128 // eq2x: b*(r2x-r1x)*(r2y-r1y)=-((r1x+r2x)/2-r3x+d*(r2x-r1x)/2)*(r2x-r1x)
129 // eq2y: b*(r2x-r1x)*(r2y-r1y)= ((r1y+r2y)/2-r3y+d*(r2y-r1y)/2)*(r2y-r1y)
130 // eq2y-eq2x: ... in units of 1024 (fast integer rounded correctly)
131 l2=sq(r2x-r1x)+sq(r2y-r1y); // square of distance r2-r1
132 if (l2==0) {
133 // fprintf(stderr,"ocr0 L%d: r1==r2 r1= %d %d",__LINE__, r1x, r1y); // debugging
134 d=-1024;
135 } else
136 d=-( ((r1x+r2x)-2*r3x)*(r2x-r1x)
137 +((r1y+r2y)-2*r3y)*(r2y-r1y))*1024/l2; // ..-1024..+1024..
138 // d is rel. position on j1-j2 line -1024=j1 0=center +1024=j2
139 if (d<=-1024) { x=r1x*1024; y=r1y*1024; } // starting point
140 else {
141 if (d>=1024) { x=r2x*1024; y=r2y*1024; } // end point
142 else {
143 x=(r1x+r2x+1)*1024/2+(d*(r2x-r1x))/2; // 1024 units
144 y=(r1y+r2y+1)*1024/2+(d*(r2y-r1y))/2;
145 /* we have the crossing point x,y now */
146 }
147 }
148 dist=sq((x-r3x*1024)/(box1->x1-box1->x0+1+4)) // 2017-03 +2 (small fonts)
149 +sq((y-r3y*1024)/(box1->y1-box1->y0+1+4)); // 0..2*sq(1024)
150 // d is rel. position on j1-j2 line -1024=j1 0=center +1024=j2
151 // j1 i j2 x[i] y[i]
152 DBG( IFV fprintf(stderr,"\nDBG deviation j1-j2 %2d %2d %2d d %5.2f xy %3d %3d %4.1f %4.1f dist %5d",
153 j1,i,j2, d/1024., r3x-box1->x0,r3y-box1->y0,x/1024.-r3x,y/1024.-r3y,dist);)
154 if (dist>maxdist) maxdist=dist;
155 // for debugging:
156 // fprintf(stderr,"\nDBG dev: %d-%d-%d dist=%5d max=%5d d=%d %d,%d-%d,%d"
157 // " vector= %d %d crosspoint= %d %d ",
158 // j1,i,j2,dist,maxdist,d,r1x,r1y,r2x,r2y,r3x,r3y,x,y);
159 } // loop i=j1..j2
160 return maxdist;
161 } // line_deviation
162
163 /*
164 * search vectors between j1 and j2 for nearest point a to point r
165 * example:
166 *
167 * r-> $$...$$ $ - mark vectors
168 * @@$..@@ @ - black pixels
169 * @@$..@@ . - white pixels
170 * @@@@.$@
171 * a-> @@$@$@@
172 * @$.@@@@
173 * @@..$@@
174 * @@..$@@
175 * j1 --> $$...$$ <-- j2
176 *
177 * ToDo: vector aa[5] = {rx,ry,x,y,d^2,idx} statt rx,ry?
178 * j1 and j2 must be in the same frame
179 * return aa?
180 * 2009-07:
181 * - change from normalized (dx=128,dy=128) to absolute distance
182 * - simpler and no squeeze effect (problem getting right i2 for "3")
183 * 2018-09:
184 * thin fonts may have inner and outer vector nearest to rx,ry, 5x8.y
185 *
186 */
nearest_frame_vector(struct box * box1,int j1,int j2,int rx,int ry)187 int nearest_frame_vector( struct box *box1, int j1, int j2, int rx, int ry) {
188 int x,y,d,i,aa[4]; /* x,y,normalized_distance^2,vector_index */
189 int frame=0;
190 // int x0=box1->x0, y0=box1->y0,
191 // x1=box1->x1, y1=box1->y1;
192 // int dx=box1->x1-x0+1, dy=box1->y1-y0+1;
193 // 2017-03 fix j1,j2 >= max (old: j1,j2 > max)
194 if (!box1->num_frames) return(-1);
195 if (j1<0 || j1>=box1->num_frame_vectors[box1->num_frames-1] ||
196 j2<0 || j2>=box1->num_frame_vectors[box1->num_frames-1]) {
197 fprintf(stderr,"Error in "__FILE__" L%d: idx %d-%d out of range\n",__LINE__,j1,j2);
198 out_x(box1);
199 return(-1);
200 }
201 aa[0]=x=box1->frame_vector[j2][0]; /* x */
202 aa[1]=y=box1->frame_vector[j2][1]; /* y */
203 /* maximum is (distance*128)^2 if r is inside the box */
204 // aa[2]=d=2*sq(128)+sq((rx-(x0+x1)/2)*128/dx)+sq((ry-(y0+y1)/2)*128/dy);
205 aa[2]=d=2*(sq(x-rx)+sq(y-ry)); /* must be greater than min. dist, Jul09 */
206 aa[3]=j2; /* vector index */
207 /* get the frame the endvector belongs to */
208 for (i=0;i<box1->num_frames;i++)
209 if (j2<box1->num_frame_vectors[i]) break;
210 frame=i;
211 /* frame(j1)<=frame(j2) possible */
212 for (i=j1;;i++) {
213 if (i >= box1->num_frame_vectors[frame])
214 i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */
215 x=box1->frame_vector[i][0]; /* take a vector */
216 y=box1->frame_vector[i][1];
217 /* distance to upper left end, normalized to 128 */
218 // d=sq((x-rx)*128/dx)+sq((y-ry)*128/dy); // old 2009-07
219 d=sq(x-rx)+sq(y-ry);
220 if (d<aa[2]) { aa[0]=x; aa[1]=y; aa[2]=d; aa[3]=i; }
221 if (i==j2) break;
222 }
223 return aa[3];
224 }
225
226 // test for umlauts, if ret>0 and m==1 box1 is changed
227 // m>0 modify box1->dots
228 // m==2 modify box1->y0
229 // called by pgm2asc + ocr0(?)
230 // ToDo: because we do modifications here, call it with a copy!?
231 // modification can have undesired side effects else
232 // dont remove upper dot from ":" 2010-09-30
testumlaut(struct box * box1,int cs,int m,wchar_t * modifier)233 int testumlaut(struct box *box1, int cs, int m, wchar_t *modifier){
234 // pix p=*(box1->p);
235 int r,y,x,x0,x1,y0,y1,dx,dy,m1,m2,m3,
236 xl,xr,yu,yl; // left, right, upper and lower border of dots
237 wchar_t mod='\0'; /* (TeX-) modifier ~"'` for compose() */
238 DBG( wchar_t c_ask='"'; )
239
240 if (box1->num_frames<1) return 0;
241 if (box1->num_frames==2) {
242 if (box1->y0>box1->m1
243 && abs(box1->frame_vol[0]
244 -box1->frame_vol[1])
245 <=abs(box1->frame_vol[0]
246 +box1->frame_vol[1])/8) return 0; // ":" 2010-09-30
247 }
248 r=0;
249 x0=box1->x0; x1=box1->x1; dx=x1-x0+1;
250 y0=box1->y0; y1=box1->y1; dy=y1-y0+1;
251 m1=box1->m1; m2=box1->m2; m3=box1->m3;
252 xl=x0; xr=x1; yu=yl=y0;
253 if (dy < 5 || 4*y0 > 3*m2+m3) return 0; // no low chars: .,-=
254 if (y0 >= m2 && y1 >= m3) return 0; // 2010-10-08 ocr-a + ocr-b + qemu ';'
255 /* modifier in box included? */
256 if( 2*y1 > m1+m2 ){
257 /* modifier in box included? */
258 for(y=y0;2*y<y0+y1;y++)if( get_bw(xl,xr,y,y,box1->p,cs,1)==0 ) break;
259 if( 2*y<y0+y1 ){ /* yes => extract */
260 yl=y;
261 while( get_bw(xl,xr,y,y,box1->p,cs,1)==0 && 2*y<=y0+y1) y++;
262 // 2010-09-24 this was bad code, destroying earlier good work for ":"
263 // if( m&2 ) box1->y0=y; /* set new upper bond */
264 }
265 }
266 if( yu>=yl ) { if(m) box1->dots=0; return 0; } /* nothing found */
267 if( get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==1 ) // neighbour overlap?
268 while( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==1 && 2*xl<x0+x1) xl++;
269 for(;xl<x1;xl++)if( get_bw(xl,xl,yu,yl,box1->p,cs,1)==1 ) break;
270 for(;xr>xl;xr--)if( get_bw(xr,xr,yu,yl,box1->p,cs,1)==1 ) break;
271
272 if ( yl-1>yu ) { // tall box ij"a"o"u
273 #if 0
274 // temporary set new y0 (not needed!)
275 x=box1->y0; box1->y0=m1; out_x(box1); box1->y0=x;
276 fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0);
277 fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0);
278 #define DEBUG 1
279 #endif
280 {
281
282 x=xl;y=yu;
283 if( get_bw(xl,x1+1,yu,yl-1,box1->p,cs,1)==0 ) r=0; // neighbour overlap?
284 else
285 if( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==0
286 || get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==0 ) // be sure there are gap to neighbours
287 if( get_bw(xr ,xr ,yu,yl-1,box1->p,cs,1)==0
288 || get_bw(xr+1,xr+1,yu,yl-1,box1->p,cs,1)==0 )
289 { int i,j,x;
290 r=1;
291 // ...@@@.... RING_ABOVE // ..@@@..@@. TILDE
292 // ..@...@... // @@.@@@@@..
293 // ..@...@... // @.........
294 // ..@..@@...
295 // ...@@@....
296 for (i=yu;i<yl;i++) if (get_bw(xl,xr,i,i,box1->p,cs,1)==1) break;
297 for ( ;i<yl;i++) if (get_bw(xl,xr,i,i,box1->p,cs,1)==0) break;
298 for (j=xl;j<xr;j++) if (get_bw(j,j,yu,i,box1->p,cs,1)==1) break;
299 for ( ;j<xr;j++) if (get_bw(j,j,yu,i,box1->p,cs,1)==0) break;
300 for ( x=j;x<xr;x++) if (get_bw(x,x,yu,i,box1->p,cs,1)==1) break;
301 // vert. gap detected
302 if( j<xr && x<xr && j<x && xr-xl>2
303 && num_obj(xl,xr,yu,yl-1,box1->p,cs)>=2 // not best!!!
304 && num_cross(xl,xr,yu +(yl-yu)/4,yu+ (yl-yu)/4,box1->p,cs) == 2
305 && num_cross(xl,xr,yl-1-(yl-yu)/2,yl-1-(yl-yu)/2,box1->p,cs) == 2
306 ){ // may be the following lines are not quite ok
307 while( get_bw(xl,xr,yl,yl,box1->p,cs,1)==0 && 2*yl<y0+y1) yl++;
308 r=2;
309 // out_x(box1);printf(" x,y=%d,%d i=%d xl=%d xr=%d yu=%d yl=%d",x0,y0,i-x0,xl-x0,xr-x0,yu-y0,yl-y0);
310 mod = DIAERESIS;
311 }
312 // ToDo: bad work? use vector code or extra subbox outside! 2010-09
313 if ((m&2) && box1->y0!=yl) {
314 MSG(fprintf(stderr,"set new upper bound y0+= %+3d",yl-box1->y0);)
315 box1->y0=yl;
316 }
317 /* if( m&2 ) box1->y0= ( (r==1) ? yu : yl ); */
318 // out_x(box1);
319 }
320 if (r==0){ // divided fr != fi
321 while( get_bw(x0,x1,yu,yu,box1->p,cs,1)==0 && 2*yu<y0+y1) yu++;
322 if (m) box1->y0=yu;
323 }
324 if( r==1 ){ yl--;
325 // .@@@. ..@@.
326 // .@@.. .@@..
327 // .@... .@@..
328 //
329 // if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
330 // > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8
331 // && loop(box1->p,xr,yu,xr-xl,cs,0,LE)
332 // < loop(box1->p,xr,yl,xr-xl,cs,0,LE)) // -dx/8 ) // é Nov03
333 if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
334 - loop(box1->p,xr,yu,xr-xl,cs,0,LE)
335 > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8
336 - loop(box1->p,xr,yl,xr-xl,cs,0,LE)+1) // -dx/8 ) // é Nov03
337 mod = ACUTE_ACCENT; // '
338
339 if( xr-xl+1 > 3*(yl-yu+1)
340 && get_bw(xl,xr,yu,yl,box1->p,cs,2)==0 )
341 mod = MACRON; // "-" above
342
343 // .@@@. .@@..
344 // ..@@. ..@@.
345 // ...@. ..@@.
346 //
347 // if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
348 // < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8
349 // && loop(box1->p,xr,yu,xr-xl,cs,0,LE)
350 // > loop(box1->p,xr,yl,xr-xl,cs,0,LE) ) // +dx/8 ) à Nov03
351 if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
352 - loop(box1->p,xr,yu,xr-xl,cs,0,LE)
353 < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8
354 - loop(box1->p,xr,yl,xr-xl,cs,0,LE) -1 ) // +dx/8 ) à Nov03
355 mod = GRAVE_ACCENT; // ``
356
357 #ifdef DEBUG
358 fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0);
359 fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0);
360 #endif
361 if( (xr-xl+1) < 2*(yl-yu+1)+2
362 && 2*(xr-xl+1)+2 > (yl-yu+1) ) {
363 int i,i1,i2,i3,i4;
364 i1=loop(box1->p,xl ,(yu+yl)/2,xr-xl+1,cs,0,RI);
365 i1=loop(box1->p,xl+i1,(yu+yl)/2,xr-xl+1,cs,1,RI);
366 i2=loop(box1->p,(xl+xr)/2,yu ,yl-yu+1,cs,0,DO);
367 i2=loop(box1->p,(xl+xr)/2,yu+i2,yl-yu+1,cs,1,DO);
368 for (i=0;i<xr-xl+1 && i<yl-yu+1;i++)
369 if (getpixel(box1->p,xl+i,yu+i)< cs) break; i3=i;
370 for ( ;i<xr-xl+1 && i<yl-yu+1;i++)
371 if (getpixel(box1->p,xl+i,yu+i)>=cs) break; i3=i-i3;
372 for (i=0;i<xr-xl+1 && i<yl-yu+1;i++)
373 if (getpixel(box1->p,xr-i,yu+i)< cs) break; i4=i;
374 for ( ;i<xr-xl+1 && i<yl-yu+1;i++)
375 if (getpixel(box1->p,xr-i,yu+i)>=cs) break; i4=i-i4;
376 #ifdef DEBUG
377 fprintf(stderr,"\n#DEBUG DOT_ABOVE %d %d %d %d",i1,i2,i3,i4);
378 #endif
379 if ( (xr-xl<5 && yl-yu<8) /* to small */
380 || (i1>=(xr-xl+1)/2+2 && i2>=(yl-yu+1)/2+2 /* symmetrical */
381 && abs(i3-i4)<=i1/4+2 && abs(i1-i2)<=i1/4+2
382 && abs(i3-i1)<=i1/4+4 && abs(i4-i2)<=i1/4+4)
383 )
384 mod = DOT_ABOVE; // "." above "ij", not ":;", ToDo: improve it!
385 }
386
387 if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
388 > loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/8
389 || loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
390 > loop(box1->p,xl,yl-1,xr-xl,cs,0,RI)-dx/8 )
391 && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
392 > loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/8
393 || loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
394 > loop(box1->p,xr,yl-1,xr-xl,cs,0,LE)-dx/8 )
395 && num_cross(xl,xr,yu ,yu ,box1->p,cs) == 1
396 && ( num_cross(xl,xr,yl ,yl ,box1->p,cs) == 2
397 || num_cross(xl,xr,yl-1,yl-1,box1->p,cs) == 2 ))
398 mod = CIRCUMFLEX_ACCENT; // "^"
399
400 if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
401 < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10
402 || loop(box1->p,xl,yu+1,xr-xl,cs,0,RI)
403 < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 )
404 && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
405 < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10
406 || loop(box1->p,xr,yu+1,xr-xl,cs,0,LE)
407 < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 )
408 && ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2
409 || num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 )
410 && num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 )
411 mod = CARON; // "v" above
412
413 if( /* test for bow (new0.3.6) */
414 loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
415 + loop(box1->p,xl,yl ,xr-xl,cs,0,RI)
416 - 2*loop(box1->p,xl,(yl+yu)/2,xr-xl,cs,0,RI) > dx/16+1
417 && xr-xl>10)
418 if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
419 < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10
420 || loop(box1->p,xl,yu+1,xr-xl,cs,0,RI)
421 < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 )
422 && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
423 < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10
424 || loop(box1->p,xr,yu+1,xr-xl,cs,0,LE)
425 < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 )
426 && ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2
427 || num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 )
428 && num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 )
429 mod = BREVE; // round "u" above
430
431 if( xr-xl>3 && yl-yu>1 )
432 if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
433 > loop(box1->p,xl,yl,xr-xl,cs,0,RI)
434 && loop(box1->p,xr,yu,xr-xl,cs,0,LE)
435 < loop(box1->p,xr,yl,xr-xl,cs,0,LE)
436 && num_cross(xl,xr,yu,yu,box1->p,cs) == 2
437 && num_cross(xl,xr,yl,yl,box1->p,cs) == 2 )
438 mod = TILDE;
439
440 if( xr-xl>2 && yl-yu>2)
441 if( num_cross(xl,xr,(yu+yl)/2,(yu+yl)/2,box1->p,cs) >1 )
442 if( num_cross((xl+xr)/2,(xl+xr)/2,yu,yl,box1->p,cs) >1 )
443 if( num_hole(xl,xr,yu,yl,box1->p,cs,NULL) == 1 )
444 // if (sdata->holes.num) ... not in this range?
445 mod = RING_ABOVE;
446
447 #ifdef DEBUG
448 printf("\n#DEBUG umlaut mod=0x%04x x=%d..%d y=%d..%d r=%d %s",
449 (int)mod,yu-box1->y0,yl-box1->y0,
450 xl-box1->x0,xr-box1->x0,r,((mod==CARON)?"CARON":
451 ((mod==ACUTE_ACCENT)?"ACUTE":
452 ((mod==TILDE)?"TILDE":"?"))));
453 out_x(box1);
454 #endif
455
456 }
457 }
458 if (m) box1->dots=r; // set to 0 also possible after division
459 if (m) box1->modifier=mod; /* should be resetted after compose ??? */
460 MSG(fprintf(stderr,"testumlaut mod=%s dots=%d y0+%d m=%d nac=%d",
461 decode(mod,ASCII),r,box1->y0-y0,m,box1->num_ac);)
462 }
463 // printf(" modifier=%c",mod);
464 if (modifier) *modifier=mod; /* set modifier */
465 return r;
466 }
467
468
ocr0_eE(ocr0_shared_t * sdata)469 static wchar_t ocr0_eE(ocr0_shared_t *sdata){
470 struct box *box1=sdata->box1;
471 int i,i1,i2,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,bad_e=0,
472 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
473 int dx=x1-x0+1,dy=y1-y0+1, /* size */
474 ad; /* tmp-vars */
475 int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
476
477 // --- most frequent letter e first!!!
478 // new vector based fat e variant withot holes 2010-10-10
479 // --- test e ---------------------------------------------------
480 for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (smallest seen is 5x6)
481 DBG( wchar_t c_ask='e'; )
482 // if (sdata->holes.num > 0) break;
483 if (box1->num_frames != 1) break;
484 /* 7x7
485
486 .@@@@@.<- ..0@@$.
487 @@@@@@@ .@@@@@3
488 @@@@@@@ $@@@@@@ <- min_gray=101 b=0 w=233 (ToDo17: check mingray)
489 @@@@@@@ <- 2 @@$@@@$
490 @@..... <- 1 1$
491 @@@@@@@ .@$@@@$
492 .@@@@@@<- ..$@@@2 see tmp13/sslmozFP_Fi.png 8x9-font a0.x>dx/3?
493 */
494 if (aa[0][0]>x0+dx/3 || aa[0][1]>y0+dy/4) Break;
495 if (aa[1][0]>x0+dx/3 || aa[1][1]<y1-dy/2) Break;
496 if (aa[2][0]<x1-dx/4 || aa[2][1]<y1-dy/4) Break;
497 if (aa[3][0]<x1-dx/4 || aa[3][1]>y0+dy/4) Break;
498 // upper body must at least 2 times thicker than low line
499 i= loop(box1->p,x0+dx/2,y0 ,y1-y0,cs,0,DO); if (i>dy/8) Break;
500 i= loop(box1->p,x0+dx/2,y0+i,y1-y0,cs,1,DO);
501 j= loop(box1->p,x0+dx/2,y1 ,y1-y0,cs,0,UP); if (j>dy/8) Break;
502 j= loop(box1->p,x0+dx/2,y1-j,y1-y0,cs,1,UP); if (j>dy/3) Break;
503 if (i<2*j-dy/16) Break;
504 // leftmost gap from the right
505 i1=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y0+2*dy/3);
506 if (box1->frame_vector[i1][0]>=x0+(dx+2)/3) Break; // 2017-03 tmp15/ssl
507 if (box1->frame_vector[i1][1]<=y0+dy/2) Break; // strong for bad e
508 i2=nearest_frame_vector(box1,i1 ,aa[3][3], x1, y0+2*dy/3);
509 if (box1->frame_vector[i2][0]< x1-dx/8-1) Break;
510 if (box1->frame_vector[i2][1]< y0+dy/2-1) Break; // strong for bad e
511 if (box1->m2) {
512 if (sdata->gchar) ad=98*ad/100;
513 if (sdata->hchar) ad=98*ad/100;
514 } else ad=99*ad/100;
515
516 Setac(box1,(wchar_t)'e',ad);
517 if (ad>=100) return 'e';
518 break;
519 }
520 // old-pixel based variant
521 // --- test e ---------------------------------------------------
522 for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (smallest seen is 5x6)
523 DBG( wchar_t c_ask='e'; )
524 // if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
525 // if (sdata->holes.num != 1) ad=97*ad/100;
526 if (box1->num_frames != 1) ad=97*ad/100; // excludes tiny holes 1810.rnd80
527 if (box1->num_frames > 2) Break; // excludes tiny holes 1810.rnd80
528 /* ToDo: may be a two pass version intolerant/tolerant is better */
529 if( loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI)>dx/3 ) Break; // rough test
530 if( loop(box1->p,x0+dx/2,y0,y1-y0,cs,0,DO)>dy/3 ) Break;
531 if( loop(box1->p,x0+dx/2,y1,y1-y0,cs,0,UP)>dy/3 ) Break;
532 if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 2
533 && num_cross(x0,x1,y0+dy/4+1,y0+dy/4+1,box1->p,cs) > 2 ) Break; // gt
534 x=(x0+x1)/2;i= num_cross(x,x,y0,y1,box1->p,cs); // v0.40
535 if (i!=3) { x=(x0+2*x1)/3;i= num_cross(x,x,y0,y1,box1->p,cs); }
536 if (i!=3) { x=(x0+3*x1)/4;i= num_cross(x,x,y0,y1,box1->p,cs); }
537 if (i!=3) { i= num_cross((x0+2*x1)/3,(x0+x1)/2,y0,y1,box1->p,cs); }
538 i=loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI); if( i>dx/2 ) Break;
539 j=loop(box1->p,x0,y0 ,x1-x0,cs,0,RI); if( j<i ) Break;
540 j=loop(box1->p,x0,y1 ,x1-x0,cs,0,RI); if( j<i ) Break;
541 i=loop(box1->p,x0+dx/2,y0,y1-y0,cs,0,DO); if( i>dx/2 ) Break;
542 j=loop(box1->p,x1-dx/3,y0,y1-y0,cs,0,DO); if( j<i ) i=j;
543 j=loop(box1->p,x0 ,y0,y1-y0,cs,0,DO); if( j<i ) Break;
544 j=loop(box1->p,x1 ,y0,y1-y0,cs,0,DO); if( j<i ) Break;
545 i=loop(box1->p,x0+dx/2,y1,y1-y0,cs,0,UP); if( i>dx/2 ) Break;
546 j=loop(box1->p,x0 ,y1,y1-y0,cs,0,UP); if( j<i ) Break;
547 j=loop(box1->p,x1 ,y1,y1-y0,cs,0,UP); if( j<i ) Break;
548 j=2*loop(box1->p,x0, (y0+y1)/2,x1-x0,cs,0,RI)
549 -loop(box1->p,x0,(3*y0+y1)/4,x1-x0,cs,0,RI)
550 -loop(box1->p,x0,(y0+3*y1)/4,x1-x0,cs,0,RI);
551 if (dx>3 && j>=dx/4) Break; // ~g 4x6font
552 for(y=1;y<dy/2;y++) if( num_cross(x0,x1,y0+y,y0+y,box1->p,cs) == 2 ) break;
553 if( y==dy/2 ) Break; // v0.2.5 ~ bad_t
554 for(i=0,j=x0+dx/4;j<=x1-dx/4 && i<=dx/4;j++)
555 if( num_cross(j,j,y0,y1,box1->p,cs) == 3 ) i++;
556 if( dx>4 && dy>5 && (i<dx/4-1 || i==0) ) Break; // ~g but 4x6-e
557 // look for horizontal white line (right gap) => set x,y
558 for(x=0,y=i=y0+dy/3;i<y1-dy/6;i++){
559 j=loop(box1->p,x1,i,y1-y0,cs,0,LE);
560 if(j>=x) { x=j;y=i; }
561 }
562 if (x<dx/2){ // no gap found, fat font???
563 // check smallest thickness left > 2* smallest thickness right
564 for(i1=dx,i=y0+dy/3;i<y1-dy/6;i++){
565 j =loop(box1->p,x0 ,i,y1-y0,cs,0,RI); if (j>dx/2) break;
566 j =loop(box1->p,x0+j,i,y1-y0,cs,1,RI);
567 if (j<i1) i1=j; // smallest thickness on left bow
568 }
569 for(i2=dx,y=i=y0+dy/3;i<y1-dy/6;i++){
570 j =loop(box1->p,x1 ,i,y1-y0,cs,0,LE);
571 j =loop(box1->p,x1-j,i,y1-y0,cs,1,LE);
572 if(j<i2) { i2=j;y=i; }
573 } if (3*i2>2*i1) Break; // not accepted, if right line is not very thinn
574 x =loop(box1->p,x1 ,y,y1-y0,cs,0,LE);
575 x+=loop(box1->p,x1-x,y,y1-y0,cs,1,LE);
576 x+=loop(box1->p,x1-x,y,y1-y0,cs,0,LE);
577 if (3*i2>i1) ad=99*ad/100;
578 if (2*i2>i1) ad=99*ad/100;
579 bad_e=60; // used later?
580 }
581 if (x<dx/2) Break;
582 for(i=1,j=x0+dx/6;j<x1-dx/6 && i;j++)
583 if( num_cross(j,j,y0,y,box1->p,cs) > 1 ) i=0;
584 if( i ) Break;
585 // ..@@@@...<-
586 // .@@@@@@;.
587 // @@,...@@.
588 // @@.....@,
589 // @@@@@@@@@
590 // @@.,;.@,. <- problem (y) == bad_e>50
591 // @@.....@.
592 // @@,...@@.
593 // .@@@,@@@.
594 // ..@@@@;..<-
595 if (dy>11 && bad_e<50)
596 if ( num_cross(x0,x1,y,y,box1->p,cs) != 1 ) Break; // except "geschwungenem e"
597 if ( num_cross(x0,x1-dx/3,y ,y ,box1->p,cs) != 1
598 && num_cross(x0,x1-dx/3,y+1,y+1,box1->p,cs) != 1 ) Break;
599 // if( num_hole(x0, x1, y0 , y ,box1->p,cs,NULL) < 1 ){
600 if( sdata->holes.num == 0 || sdata->holes.hole[0].y1 >= y-y0){
601 if( sdata->hchar ) Break; // ~ \it t
602 // look if thinn font (may be h-line is broken) Mai00
603 for(j=0,i=x0+dx/8;i<x1-1;i++)
604 if( get_bw(i,i,y0+dy/4,y,box1->p,cs,1) == 1 ) j++;
605 if(j<2*dx/4) Break;
606 }
607 if( sdata->holes.num>0 && sdata->holes.hole[0].y0 > y-y0) Break;
608 if( sdata->holes.num>1 && sdata->holes.hole[1].y0 > y-y0) Break;
609 if( sdata->holes.num==1 && sdata->holes.hole[0].x0 >= dx/2) {
610 ad=95*ad/100; } /* 8*10 @ (=at) is not an e */
611 // look for horizontal gap
612 for(x=0,y=i=y0+dy/4;i<y1-dy/4;i++){
613 j=loop(box1->p,x0,i,x1-x0,cs,0,RI);
614 if(j>=x) { x=j;y=i; }
615 }
616 if (y>y0+dy/4 && y<y1-dy/4 && x>dx/2) Break; // s
617 if (x>dx/4) ad=99*ad/100;
618
619 if( num_cross(x0+dx/2,x1 ,y1-dy/4,y1 ,box1->p,cs) == 0
620 && num_cross(x0+dx/2,x1-1,y1-dy/4,y1 ,box1->p,cs) == 0
621 && num_cross(x0+dx/2,x1 ,y1-dy/4,y1-1,box1->p,cs) == 0 ) {
622 if (sdata->gchar) Break; // ~p
623 ad=99*ad/100;
624 }
625 /* upper case is for 5x6 box */
626 if( sdata->hchar // broken B ? should also work when linedetection fails
627 && loop(box1->p,x1,y1-dy/3,dx,cs,0,LE)<=dx/8 ) {
628 x = loop(box1->p,x0,y0+dy/2,dx,cs,0,RI);
629 if( loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)<=x
630 && loop(box1->p,x0,y0+dy/8,dx,cs,0,RI)<=x ) Break;
631 if( loop(box1->p,x0,y1-dy/4,dx,cs,0,RI)<=x
632 && loop(box1->p,x0,y1-dy/8,dx,cs,0,RI)<=x ) Break;
633 }
634 x = loop(sdata->bp,0,dy-2 ,dx,cs,0,RI);
635 if( loop(sdata->bp,0,dy-1-dy/8,dx,cs,0,RI)>x && dy>16) Break; // some Q
636 if (box1->m2) {
637 if (sdata->gchar) ad=99*ad/100;
638 if (sdata->hchar) ad=99*ad/100;
639 } else ad=99*ad/100;
640
641 Setac(box1,(wchar_t)'e',ad);
642 if (ad>=100) return 'e';
643 break;
644 }
645 // --- test E ---------------------------------------------------
646 for(ad=d=100;dx>2 && dy>4 ;){ // min 3x4
647 // rewritten for vectors 0.43
648 int i1, i2, i3, i4, i5; // line derivation + corners
649 DBG( wchar_t c_ask='E'; )
650 // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
651 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
652 /* half distance to the center */
653 d=2*sq(128/4);
654 /* now we check for the upper right end of the h */
655 if (aa[3][2]>d/2) Break; /* [2] = distance, ~dj... */
656 if (aa[0][2]>d/2) Break; /* upper left end */
657 if (aa[1][2]>d/2) Break; /* lower left end */
658 if (aa[2][2]>d/2) Break; /* lowerright end */
659 /*
660 E f near E
661
662 OOOOOOOO OOOO
663 O5 O O
664 O4 O
665 OOOO3 OOOOOO
666 O2 O
667 O O
668 O1 O O
669 OOOOOOOO OOOOOO
670 */
671 // check the bow from below
672 for (i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) {
673 if (y1-box1->frame_vector[ i][1]>dy/4) break; // fatal!
674 } if (i!=aa[2][3]) Break; // ~AHKMNRX
675 // search most left+down between bottom right and top right
676 i1=nearest_frame_vector(box1, aa[2][3],aa[3][3], x0, y1);
677 i5=nearest_frame_vector(box1, i1,aa[3][3], x0, y0);
678 i3=nearest_frame_vector(box1, i1, i5, x1, (y0+y1)/2);
679 i2=nearest_frame_vector(box1, i1, i3, x0, (2*y0+y1)/3);
680 i4=nearest_frame_vector(box1, i3, i5, x0, (y0+2*y1)/3);
681 i =nearest_frame_vector(box1, aa[0][3],aa[1][3], x0-dx/4, (y0+y1)/2);
682 if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]-1-dx/16) Break;
683 if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]) ad=99*ad/100; // f
684
685 MSG(fprintf(stderr,"i1-5 %d %d %d %d %d",i1,i2,i3,i4,i5);)
686 // holes right open?
687 for( i=1,y=y0; y<y0+dy/4 && i; y++ ) // long black line
688 if( get_bw(x0+dx/3,x1-dx/6,y,y,box1->p,cs,2) == 0 ) i=0;
689 if( i ) Break;
690 for( i=1,y=y1; y>y1-dy/4 && i; y-- ) // long black line
691 if( get_bw(x0+dx/6,x1-dx/4,y,y,box1->p,cs,2) == 0 ) i=0;
692 if( i ) Break;
693 for( i=1,y=y0+dy/3; y<y1-dy/3 && i; y++ ){ // black line
694 j=loop(box1->p,x0 ,y,dx,cs,0,RI);
695 j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>dx/3 ) i=0;
696 } if( i ) Break;
697 x=x1-dx/3; y=y0; // von oben durchbohren!
698 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break;
699 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break;
700 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,DO); if( x<=x1 || y>y0+dy/2 ) Break;
701 x=x1-dx/3; y=y1; // von unten durchbohren!
702 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); if( y<y1-dy/4 ) Break;
703 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,UP); if( y<y0-dy/3 ) Break;
704 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,UP); if( x<=x1 || y<y0+dy/2 ) Break;
705 x=x1-dx/3; y=y0; // von oben durchbohren!
706 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break;
707 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break;
708 y+=dy/15;
709 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( x<x0 ) Break;
710 if (dx>15 && x==x0) ad=99*ad/100; // to thin
711 x+=dx/15+1;
712 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y1-dy/3 ) Break;
713 // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) Break;
714 // if (sdata->holes.num > 0) Break;
715 i=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI); if(i>dx/2) Break;
716 j=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI); if(j<i-dx/4 || j>i+dx/8) Break; i=j;
717 j=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI); if(j<i-dx/4 || j>i+dx/8) Break;
718 j=loop(box1->p,x1,y1-dy/4,dx,cs,0,LE);
719 for( x=dx,y=y0+dy/6; y<y1-dy/9; y++ ) // left border straight
720 { i=loop(box1->p,x0,y,dx,cs,0,RI);
721 if (i>j/2 && ad>98) ad=99*ad/100;
722 if (i>dx/4) break;
723 if(i<x) x=i;
724 } if( y<y1-dy/9 ) Break; // t
725 if(dy>3*dx) // ~[
726 if( get_bw(x0+dx/2,x0+dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 ) Break;
727
728 if (box1->m2) {
729 if (!hchar) ad=ad*99/100;
730 if ( gchar) ad=ad*99/100;
731 }
732 Setac(box1,(wchar_t)'E',ad);
733 if (ad>=100) return 'E';
734 break;
735 }
736 return box1->c;
737 }
738
ocr0_n(ocr0_shared_t * sdata)739 static wchar_t ocr0_n(ocr0_shared_t *sdata){
740 struct box *box1=sdata->box1;
741 int i,j,d,x,y,i1,i2,i3,handwritten=0,
742 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
743 int dx=x1-x0+1,dy=y1-y0+1, /* size */
744 ad; /* tmp-vars */
745
746 // --- test n ---------------------------------------------------
747 // glued rm is very similar to glued nn -> thickness of h-line should grow
748 // may02: tested for 8x12 font
749 for(ad=d=100;dx>2 && dy>3;){ // min 3x4
750 DBG( wchar_t c_ask='n'; )
751 // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
752 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
753 i= num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs);
754 j= num_cross( 0,dx-1,dy/2,dy/2,sdata->bp,cs);
755 if( (i<2 || i>3) && j!=2 ) Break;
756 if( loop(sdata->bp,dx/2,0,dy,cs,0,DO) > dy/8 && sdata->hchar ) Break; /* tt */
757 y=5*dy/8; /* also for handwritten n, where first bow goes not down enough */
758 if( num_cross( 0,dx/2,y ,y ,sdata->bp,cs) != 1
759 && num_cross( 0,dx/2,y-1,y-1,sdata->bp,cs) != 1
760 && num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) < 1 ) Break; // n rr
761 // ~thick_w
762 y=loop(sdata->bp,dx-1-dx/4,0,dy,cs,0,DO); if(y>dy/2) Break;
763 if(y>1)if( get_bw(dx-1-dx/4,dx-1,0,y-2,sdata->bp,cs,1) == 1 ) Break;
764
765 y=3*dy/4;
766 if( num_cross(0, dx/2,y ,y ,sdata->bp,cs) == 1
767 && num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) == 0 ) Break; // ~p
768 y=dy/2;
769 if( num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) == 2
770 && num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) == 2 ) { // n rr
771 /* printed n */
772 x =loop(sdata->bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line
773 x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // 1st gap
774 x+=loop(sdata->bp,x,y,dx-x,cs,0,RI); if(x< dx/2) Break; i2=x; // 2nd v-line
775 x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x<3*dx/4) Break; i3=x; // 2nd gap
776 i=dy/4; y=13*dy/16;
777 if( num_cross(dx/2,dx-1,y,y,sdata->bp,cs)==2 ) i=3*dy/8; // \it n
778 if (i<2 && i<dy/2) i++; // correct for small fonts like 8x12
779 // the same game for the lower part =>l1 l2 l3 l4 ???
780 for(x=i1;x<i2;x++) if( loop(sdata->bp,x, 0,dy,cs,0,DO)>=i ) break;
781 if(x <i2) Break; // gap detected
782 for(x=i1;x<i2;x++) if( loop(sdata->bp,x,dy-1,dy,cs,0,UP) >dy/4 ) break;
783 if(x==i2) Break; // no gap detected (glued serifs ??? )
784 // glued rm as nn ???
785 for(y=0,x=(i1+i2)/2;x<i2;x++){
786 i=loop(sdata->bp,x,0,dy,cs,0,DO);
787 i=loop(sdata->bp,x,i,dy,cs,1,DO); // measure thickness
788 if( i>y ) y=i; if( i<y/2 ) break;
789 }
790 if(x <i2) Break; // unusual property for n
791 if( dy>7 )
792 if( loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,LE)
793 +loop(sdata->bp, 0,dy-1-dy/8,dx,cs,0,RI)-dx/8-1
794 > loop(sdata->bp,dx-1,dy-1-dy/2,dx,cs,0,LE)
795 +loop(sdata->bp, 0,dy-1-dy/2,dx,cs,0,RI) ) ad=90*ad/100; // broken o
796 if( dy>7 && dx>7 )
797 if( loop(sdata->bp,dx-1, dy/2,dx,cs,0,LE)==0
798 && loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,RI)>dx/8 ) ad=98*ad/100; // broken o
799 } else { /* check handwritten n */
800 if( num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) != 3
801 && num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) != 3 ) Break;
802 i =loop(sdata->bp,0,dy/2-dy/8,dx,cs,0,RI); if (i>dx/4) Break;
803 i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI); if (i>dx/2) Break;
804 i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,0,RI);
805 if( num_cross(i,i, 0,dy/2-2*dy/8,sdata->bp,cs) != 0 ) Break;
806 i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI);
807 if( num_cross(i,i,dy/2+1, dy-1,sdata->bp,cs) != 0 ) Break;
808 handwritten=80;
809 }
810
811 i= loop(sdata->bp,dx-1 ,dy/2,dx,cs,0,LE); if(i>5)
812 if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,sdata->bp,cs,1) == 1 ) Break; // ~rr
813 i+=loop(sdata->bp,dx-1-i,dy/2,dx,cs,1,LE);
814 if( get_bw(dx-1-i ,dx-1-i ,0,dy/2,sdata->bp,cs,1) == 0 ) Break; // ~rv
815
816 if( get_bw(dx/2,dx/2,dy/4,dy/4,sdata->bp,cs,1) == 0
817 && get_bw(dx/2,dx-1,dy-2,dy-2,sdata->bp,cs,1) == 0
818 && get_bw(dx/2,dx/2,dy/4,dy-2,sdata->bp,cs,1) == 1 ) Break; // ~P
819
820 // glued ri ???
821 if( box1->dots>0 && box1->m1 )
822 if( get_bw((x1+x0)/2,x1,box1->m1,y0-1,box1->p,cs,1) == 1 )
823 if( num_cross( 0,dx-1,0 ,0 ,sdata->bp,cs) >2
824 || num_cross( 0,dx-1,1 ,1 ,sdata->bp,cs) >2 ) Break;
825
826
827 i=loop(sdata->bp,dx-1, dy-1,dx,cs,0,LE); if (i>dx/2)
828 i=loop(sdata->bp,dx-1, dy-2,dx,cs,0,LE);
829 x=loop(sdata->bp,dx-1,dy-1-dy/4,dx,cs,0,LE);
830 if (sdata->hchar && i-x>1) Break; // ß
831 x=loop(sdata->bp, 0,dy-1,dx,cs,0,LE); // check for serifs
832 i=loop(sdata->bp, 0,dy-2,dx,cs,0,LE); if (i<x) x=i;
833 i=loop(sdata->bp, 0, 1,dx,cs,0,LE); if (i<x) x=i;
834 i=loop(sdata->bp, 0, 2,dx,cs,0,LE); if (i<x) x=i;
835 if (sdata->hchar && x>0) Break; // fl
836
837 if (num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs)>=3) ad=98*ad/100; // small M
838 if (sdata->hchar || 2*y0<box1->m1+box1->m2) ad=96*ad/100;
839 if (sdata->gchar) ad=96*ad/100; // ß fl
840 if (dx<5) { // for small fonts no middle line is possible for m
841 ad=99*ad/100; // 4x6 m
842 if (num_cross(0,dx-1,dy/8,dy/8,sdata->bp,cs)>=2) {
843 ad=97*ad/100; // ~m
844 if (dy<=4) Setac(box1,'m',97); // only for 4x6 font!
845 }
846 }
847 Setac(box1,'n',ad);
848 break;
849 }
850 return box1->c;
851 }
852
ocr0_M(ocr0_shared_t * sdata)853 static wchar_t ocr0_M(ocr0_shared_t *sdata){
854 struct box *box1=sdata->box1;
855 pix *bp=sdata->bp;
856 int d,x,y,i0,i1,i2,i3,i4,i5,i6,i7,t1,
857 hchar=sdata->hchar,gchar=sdata->gchar,
858 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
859 int dx=x1-x0+1,dy=y1-y0+1, /* size */
860 (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */
861 ad; /* tmp-vars */
862
863 // ------------------ test M ---------------------------
864 for(ad=d=100;dx>3 && dy>3;){ // dy<=dx nicht perfekt! besser mittleres
865 // min-suchen fuer m
866 DBG( wchar_t c_ask='M'; )
867 // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
868 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
869
870 d=2*sq(128/4); /* half distance to the center, added 2018-09 */
871 if (aa[3][2]>d/2) Break; /* [2] = distance, ~dj..., upper right */
872 if (aa[0][2]>d/2) Break; /* upper left end */
873 if (aa[1][2]>d/2) Break; /* lower left end */
874 if (aa[2][2]>d/2) Break; /* lowerright end */
875 // search 3 legs and 2 space between, [][3]=vector_index
876 i1=nearest_frame_vector(box1, aa[0][3],aa[2][3], x0, y1); // leg1
877 i3=nearest_frame_vector(box1, i1,aa[3][3], x1, y1); // leg3
878 i4=nearest_frame_vector(box1, i1, i3, x0, y0); // gap1
879 i5=nearest_frame_vector(box1, i1, i3, x1, y0); // gap2
880 i2=nearest_frame_vector(box1, i4, i5, (x0+x1)/2, y1); // leg2
881 MSG(fprintf(stderr,"i1-5 %d %d %d %d %d",i1,i2,i3,i4,i5);)
882 // 2018-09 ToDo: check essentials of 3 legs
883 if (box1->frame_vector[i1][0] > x0+dx/4) Break; // leg1 not leftmost
884 if (box1->frame_vector[i4][1] > y0+dy/2) Break; // gap1 too low
885 if (box1->frame_vector[i5][1] > y0+dy/2) Break; // gap2 too low
886 if (box1->frame_vector[i5][0] < x0+dx/2) Break; // gap2 too left
887 if (box1->frame_vector[i5][0]
888 < box1->frame_vector[i2][0]) Break; // gap2 more left than leg2
889 // check right side molten 'nt' of tmp13/sslmozFP.png
890 i6=nearest_frame_vector(box1, i3, aa[3][3], x1+dx/2, y0+dy/8);
891 i7=nearest_frame_vector(box1, i3, i6, x0, y0+dy/2);
892 if (box1->frame_vector[i7][0]<box1->frame_vector[i6][0]-dx/8 && hchar)
893 Break; // ad=97*ad/100;
894 MSG(fprintf(stderr,"i1-7 %d %d %d %d %d %d %d ad=%d",\
895 i1,i2,i3,i4,i5,i6,i7,ad);)
896
897 for (y=dy/4;y<=3*dy/4;y++)
898 if (num_cross(0,dx-1,y,y,bp,cs)>=3) break;
899 if (y>3*dy/4 && dx>4) Break;
900 if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<2
901 && num_cross(0,dx-1, dy/8, dy/8,bp,cs)<2 ) Break; /* fat M */
902 if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<2 ) Break;
903
904 x = loop(bp,dx-1 ,dy-1,dx,cs,0,LE); // ~ melted kl
905 x = loop(bp,dx-1-x,dy-1,dx,cs,1,LE); if( x>dx/2 ) Break;
906
907 if( loop(bp, 0,7*dy/16,dx,cs,0,RI)
908 + loop(bp,dx-1,7*dy/16,dx,cs,0,LE) > dx/2 ) Break; // ~K
909
910 if (loop(bp, 0,dy-1, dx,cs,0,RI)>dy/4) Break; // ~V 2010-10
911 if (loop(bp, 0,dy-1, dx,cs,0,RI)>dy/8) { ad=ad*99/100;MSG({}) } // ~V 2010-10
912 if( dy>8 /* following lines should be extend to range check */
913 && loop(bp, dx/4,dy-1, dy,cs,0,UP)<dy/4
914 && loop(bp,3*dx/8,dy-1, dy,cs,0,UP)<dy/4 )
915 if( loop(bp, 0,dy-1-dy/ 8,dx,cs,0,RI)
916 < loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)-dx/32 ) Break; // ~it_u
917 if( num_cross(0,dx-1, dy/2, dy/2,bp,cs)==2
918 && num_cross(0,dx-1, dy/4, dy/4,bp,cs)> 2
919 && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)> 2 ) Break; // ~it_u
920 if( num_cross(0 ,dx-1,3*dy/4,3*dy/4,bp,cs)==2
921 && num_cross(dx/2,dx/2,3*dy/4, dy-1,bp,cs)> 0 ) Break; // ~it_v
922
923 if( loop(bp,3*dx/4, 0,dy,cs,0,DO)
924 > loop(bp,2*dx/4, 0,dy,cs,0,DO)
925 && loop(bp,3*dx/4,dy-1,dy,cs,0,UP)
926 < loop(bp,2*dx/4,dy-1,dy,cs,0,UP) ) Break; // ~N
927 if( loop(bp,3*dx/4, dy/8,dy,cs,0,DO)
928 > loop(bp,2*dx/4, dy/8,dy,cs,0,DO)
929 && loop(bp,3*dx/4,dy-1-dy/8,dy,cs,0,UP)
930 < loop(bp,2*dx/4,dy-1-dy/8,dy,cs,0,UP) ) Break; // ~serif_N
931
932 // i0 is lower end of upper serifen (widest gap? )
933 i0=0;
934
935 if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=4 ){ // Is it a N ?
936 if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==3 ){
937 for(y=dy/2+1;y<dy;y++){
938 if( num_cross(0,dx-1,y,y,bp,cs)<3 ) break;
939 }
940 if( num_cross(0,dx-1,y,y,bp,cs)==2 ){
941 x =loop(bp,dx-1 ,y-1,dx,cs,0,LE);
942 x+=loop(bp,dx-1-x,y-1,dx,cs,1,LE);
943 x+=loop(bp,dx-1-x,y-1,dx,cs,0,LE);
944 if( loop(bp,dx-x,y-1,dy,cs,0,UP)>y-2 ) Break; // ~N
945 }
946 }
947 }
948 // MNWK
949 for(i2=0,i1=x=dx/2;x<dx-dx/4;x++){ // lowest pixel
950 y=loop(bp,x,0,dy,cs,0,DO); if(y>i2) {i2=y;i1=x;} else break; }
951 i3=i2+loop(bp,i1,i2,dy-i2,cs,1,DO);
952 if(i2<dy/4) {
953 if (!sdata->hchar) Break; // rm
954 ad=99*ad/100;
955 }
956 if (i2==0 && dx>8 && dy>12) Break; // glued and bad splitted serifen-MN
957
958 // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) != 0 ) Break; // small A
959 //if (sdata->holes.num != 0) Break; // includes tiny holes 1810.rnd80
960 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
961 t1=loop(bp,0 ,3*dy/4,dx,cs,0,RI);
962 t1=loop(bp,t1,3*dy/4,dx,cs,1,RI); // thickness of line?
963 if( 7*(t1+1)<dx )
964 if( num_cross(i1,dx-1,i2-1,i2-1,bp,cs)!=2
965 || num_cross(0 ,i1 ,i2-1,i2-1,bp,cs)!=2 ) Break; // too hard ???
966
967 // ~u_n-pair
968 if( num_cross(0,dx-1,0,0,bp,cs)!=2
969 && num_cross(0,dx-1,1,1,bp,cs)!=2
970 && num_cross(0,dx-1,2,2,bp,cs)!=2 ) Break;
971
972 // ~nn v0.2.4a3
973 if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)==4
974 && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)==4 ){
975 i1 =loop(bp, 0, dy/4,dx,cs,0,RI);
976 i1+=loop(bp,i1, dy/4,dx,cs,1,RI);
977 i1+=loop(bp,i1, dy/4,dx,cs,0,RI);
978 i2 =loop(bp, 0,3*dy/4,dx,cs,0,RI);
979 i2+=loop(bp,i2,3*dy/4,dx,cs,1,RI);
980 i2+=loop(bp,i2,3*dy/4,dx,cs,0,RI);
981 if( i1>=i2 ) Break; // no good M
982 i1+=loop(bp,i1, dy/4,dx,cs,1,RI);
983 i2+=loop(bp,i2,3*dy/4,dx,cs,1,RI);
984 if( i1>=i2 ) Break; // no good M
985 i1+=loop(bp,i1, dy/4,dx,cs,0,RI);
986 i2+=loop(bp,i2,3*dy/4,dx,cs,0,RI);
987 if( i1<=i2 ) Break; // no good M
988 }
989 if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==2
990 && num_cross(0,dx-1,dy/4,dy/4,bp,cs)==2 && !hchar ) Break; // ~ \it u
991
992 if (dy<17)
993 if( num_cross(0,dx-1, 0, 0,bp,cs)<2 ) ad=99*ad/100;
994 if (dx>5) /* 4x6 font has only 1 cross at y=1 */
995 if( num_cross(0,dx-1, 1, 1,bp,cs)<2 ) ad=96*ad/100; // kt
996 if( num_cross(dx/2,dx/2, 0, dy-1,bp,cs)!=1) ad=98*ad/100; // kt
997 if (dx<5 && loop(bp,dx/2,0,dy,cs,0,DO)>=3*dy/8) ad=96*ad/100; // 4x6 H
998
999 if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<=2
1000 && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<=2
1001 && dx>8 && dy>12 ){
1002 ad=98*ad/100;
1003 for(y=5*dy/16;y<5*dy/8;y++) // look for H-line
1004 if( num_cross(0,dx-1,y ,y ,bp,cs)==1 ) break;
1005 if( y<5*dy/8 ) ad=95*ad/100;
1006 if( y<5*dy/8 )
1007 if( num_cross(2+dx/6,dx-3-dx/6,y-2,y-2,bp,cs)==0
1008 || num_cross(2+dx/6,dx-3-dx/6,y-1,y-1,bp,cs)==0 ) Break; // ~H bad!
1009 }
1010
1011 if( loop(bp,3*dx/8, 0,dy,cs,0,DO) >dy/2
1012 && loop(bp,5*dx/8,dy-1,dy,cs,0,UP) >dy/2 ) ad=95*ad/100;
1013
1014 if(!hchar){
1015 ad=98*ad/100; /* not sure */
1016 if( loop(bp,0, dy/4,dx,cs,0,RI)
1017 < loop(bp,0,dy-1-dy/8,dx,cs,0,RI)-dx/16 ) Break; // ~wi glued
1018 }
1019 if( gchar ) ad=98*ad/100;
1020 if (ad>99 && dx<8) ad=99*ad/100; /* give 5x8 N a chance */
1021 Setac(box1,'M',ad);
1022 break;
1023 }
1024 return box1->c;
1025 }
1026
ocr0_N(ocr0_shared_t * sdata)1027 static wchar_t ocr0_N(ocr0_shared_t *sdata){
1028 struct box *box1=sdata->box1;
1029 int i,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
1030 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
1031 int dx=x1-x0+1,dy=y1-y0+1, /* size */
1032 (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */
1033 dbg[9],
1034 ad; /* tmp-vars */
1035
1036 // --- test N ------- +hchar -gchar
1037 for(ad=d=100;dx>3 && dy>3;){ // 4x6font
1038 int j;
1039 DBG( wchar_t c_ask='N'; )
1040 //if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
1041 //if (sdata->holes.num > 0) ad=98*ad/100; /* # */
1042 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
1043 if (dx<6) ad=99*ad/100;
1044 if (dx<5) ad=99*ad/100;
1045 /* half distance to the center */
1046 d=2*sq(128/4); /* ToDo17 bug? add doc, [][2]=dist? square() */
1047 /* now we check for the 4 ends of the x */
1048 if (aa[0][2]>d) Break;
1049 if (aa[1][2]>d) Break;
1050 if (aa[2][2]>d) Break;
1051 if (aa[3][2]>d) Break;
1052 if (aa[3][0]-aa[0][0]<dx/2) Break;
1053 if (aa[2][0]-aa[1][0]<dx/2) Break;
1054 if (aa[1][1]-aa[0][1]<dy/2) Break;
1055 if (aa[2][1]-aa[3][1]<dy/2) Break;
1056 if (aa[3][0]-aa[0][0]<4-1) Break; /* to small to hold an N */
1057 if (aa[2][0]-aa[1][0]<4-1) Break; /* to small */
1058 if (aa[1][1]<=y1-dy/4) Break; // joined ty
1059 if (aa[1][0]>x0+dx/8) Break; // 2010-10-11
1060 if (abs(aa[3][1]-aa[0][1])>(dy+2)/5) Break; /* glued tu */
1061 if (abs(aa[3][1]-aa[0][1])>(dy+4)/8) ad=98*ad/100; /* glued tu */
1062 i= nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y0+dy/2);
1063 x=box1->frame_vector[i][0];
1064 if (x<=x0+dx/2 || x<aa[2][0]-dx/8) Break; // ~k 2010-10
1065 /* left and right vertical line */
1066 d=line_deviation(box1, aa[0][3], aa[1][3]); if (d>2*sq(1024/4)) Break;
1067 ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100;
1068 d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break;
1069
1070 /* i1: uppermost left ^ from bottom (near 0,0) */
1071 i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0+dx/8, y0);
1072 x=box1->frame_vector[i1][0];
1073 y=box1->frame_vector[i1][1];
1074 MSG( fprintf(stderr,"i1= %d (%d,%d) left ^ from below", i1,x-x0,y-y0);)
1075 if (y-y0 > 5*dy/8) Break;
1076 if (x-x0 > 5*dx/8) Break;
1077 /* i3: uppermost right ^ ~H */
1078 i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0);
1079 MSG( fprintf(stderr,"i3= %d (%d,%d) right ^ (ad=%d)",\
1080 i3, box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0,ad);)
1081
1082 /* check lower border of diagonal line, may fail on fonts where
1083 * line ends on middle of right vertical line (screen font) */
1084 dbg[0]=d=line_deviation(box1,i1, aa[2][3]);
1085 /* check right border of left vertical line */
1086 /* but split to possible lower left serif + vert. line */
1087 j=nearest_frame_vector(box1,aa[1][3],i1, x0+dx/2, y1+dy/2);
1088 dbg[1]=d=line_deviation(box1, aa[1][3],j )
1089 +line_deviation(box1, j,i1);
1090 MSG(fprintf(stderr," i1-a2 %d a1_serif-i1 %d ad=%d",dbg[0],dbg[1],ad);)
1091 if (dbg[0] > sq(1024/4)) Break;
1092 if (dx>4 && dbg[1] > sq(1024/4)) ad=97*ad/100; // d=0..2*sq(1024)
1093 if (dx>4 && dbg[1] > sq(1024/3)) Break; // d=0..2*sq(1024)
1094 // serif N has d=sq(1024/3)=116508
1095 MSG( fprintf(stderr,"ad %d", ad); )
1096
1097 /* i2: lowest right v from top, same frame? N-tilde etc.? */
1098 i2=nearest_frame_vector(box1,aa[3][3],aa[0][3], x1, y1-dy/8);
1099 x=box1->frame_vector[i2][0];
1100 y=box1->frame_vector[i2][1];
1101 MSG( fprintf(stderr,"i2= %d (%d,%d) lowest right v from top",\
1102 i2, box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0);)
1103 if (y-y0 < 3*dy/8) Break;
1104 if (x-x0 < 3*dx/8) Break;
1105 // test H
1106 if ( box1->frame_vector[i3][0]-box1->frame_vector[i1][0]> dx/4
1107 && box1->frame_vector[i3][1]-box1->frame_vector[i1][1]<=dy/8
1108 && y<=box1->frame_vector[i1][1]) Break;
1109 /* check if upper left and lower right point are joined directly */
1110 /* but split to possible upper right serif + down line */
1111 j=nearest_frame_vector(box1,i2,aa[0][3], x0+dx/2, y0-dy/2);
1112 dbg[2]=d=line_deviation(box1,i2, j)
1113 +line_deviation(box1, j, aa[0][3]);
1114 /* check if upper right and lower right point are joined directly */
1115 /* but split to possible upper right serif + vert. line */
1116 j=nearest_frame_vector(box1,aa[3][3],i2, x0+dx/2, y0-dy/2);
1117 dbg[3]=d=line_deviation(box1, aa[3][3],j)
1118 +line_deviation(box1, j,i2); // ToDo: split once more?
1119 MSG( fprintf(stderr," i2-a0 %d a3-i2 %d ad %d",dbg[2],dbg[3], ad); )
1120 if (dbg[2] > sq(1024/4)) Break;
1121 // serif N, ToDo: do it better
1122 if (dbg[3] > sq(1024/4)) ad=97*ad/100;
1123 if (dbg[3] > sq(1024/3)) Break;
1124 MSG( fprintf(stderr,"ad %d", ad); )
1125
1126 MSG( fprintf(stderr,"check against melted tu"); )
1127 // i1 = left ^ from below, i2 = lowest right v from top
1128 // sample gocr_Device*: 3-8,dy=27
1129 if ( (box1->frame_vector[i1][1]-y0)
1130 -(y1-box1->frame_vector[i2][1])>dy/8) ad=99*ad/100; /* ~ tu */
1131 MSG( fprintf(stderr,"tu ad %d", ad); )
1132 if (box1->frame_vector[i2][0]
1133 -box1->frame_vector[i1][0]<=dx/8) Break; /* nonsignificant distance */
1134 MSG( fprintf(stderr,"i2-i1<=dx/8 ad %d", ad); )
1135 /* i1: uppermost left ^ from bottom (near 0,0) */
1136 /* i2: lowest right v from top, same frame? N-tilde etc.? */
1137 if (box1->frame_vector[i2][1]
1138 -box1->frame_vector[i1][1]<=dy/8) {
1139 // may happen on screen fonts 7x10
1140 if (dx>8) ad=97*ad/100; /* too flat (ff,H) */
1141 }
1142 MSG( fprintf(stderr,"i2-i1<=dy/8 ad %d", ad); )
1143 if (box1->frame_vector[i2][1]
1144 -box1->frame_vector[i1][1]<=dy/2) ad=99*ad/100;
1145 MSG( \
1146 fprintf(stderr,"^v %d %d %d %d line dev %d %d %d %d max %d %d ad %d",\
1147 box1->frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\
1148 box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\
1149 dbg[0],dbg[1],dbg[2],dbg[3],sq(1024/4),sq(1024),ad);)
1150 ad=(100-(dbg[0]-sq(1024)/2)/sq(1024)/4)*ad/100;
1151 MSG( fprintf(stderr,"ad %d", ad); )
1152 ad=(100-(dbg[1]-sq(1024)/2)/sq(1024)/4)*ad/100;
1153 MSG( fprintf(stderr,"ad %d", ad); )
1154 ad=(100-(dbg[2]-sq(1024)/2)/sq(1024)/4)*ad/100;
1155 MSG( fprintf(stderr,"ad %d", ad); )
1156 ad=(100-(dbg[3]-sq(1024)/2)/sq(1024)/4)*ad/100;
1157 MSG( fprintf(stderr,"ad %d", ad); )
1158
1159 if (!hchar) ad=99*ad/100;
1160 if ( gchar) ad=98*ad/100; // \sc N
1161 Setac(box1,'N',ad);
1162 break;
1163 }
1164 return box1->c;
1165 }
1166
ocr0_h(ocr0_shared_t * sdata)1167 static wchar_t ocr0_h(ocr0_shared_t *sdata){
1168 struct box *box1=sdata->box1;
1169 pix *bp=sdata->bp;
1170 int i,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
1171 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
1172 int dx=x1-x0+1,dy=y1-y0+1, /* size */
1173 ad; /* tmp-vars */
1174 int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
1175
1176 // --- test h ---------------------------------------------------
1177 for(ad=d=100;dx>2 && dy>3;){ // min 3x4
1178 // rewritten for vectors 0.42
1179 int i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners
1180 DBG( wchar_t c_ask='h'; )
1181 //if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
1182 if (box1->num_frames != 1) ad=97*ad/100; // excludes tiny holes 1810.rnd80
1183 if (box1->num_frames > 2) Break; // excludes tiny holes 1810.rnd80
1184 /* half distance to the center */
1185 d=2*sq(128/4);
1186 /* now we check for the upper right end of the h */
1187 if (aa[3][2]<d/4) Break; /* [2] = distance, ~BCDEF... */
1188 if (aa[0][2]>d/2) Break; /* upper left end */
1189 if (aa[1][2]>d/2) Break; /* lower left end */
1190 if (aa[2][2]>d/2) Break; /* lowerright end */
1191 /*
1192 type A B=italic ???
1193 18 OOO
1194 O O O
1195 O O
1196 O7OOO OOOO
1197 O4 O O O
1198 O O O O
1199 O O O O O
1200 2O3 5O6 O OOO
1201 */
1202 i1=i8=aa[0][3];
1203 i2=i3=aa[1][3];
1204 i5=i6=aa[2][3];
1205 // check the bow from below (fails on melted serifs)
1206 for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) {
1207 if (box1->frame_vector[ i][1]
1208 <box1->frame_vector[i4][1]) i4=i; // get next maximum
1209 if (box1->frame_vector[ i][1]<=y0) break; // fatal!
1210 }
1211 if (box1->frame_vector[i4][1]-y0<dy/4) Break; // ~MN
1212 if (y1-box1->frame_vector[i4][1]<dy/4) Break; // ~BCDEGIJLOQSUYZ
1213 // two steps for i7 to go around pitfalls on italic h
1214 i7=nearest_frame_vector(box1, i6, i8, (x0+x1)/2, (y0+y1)/2);
1215 i7=nearest_frame_vector(box1, i6, i7, x0, (y0+y1)/2);
1216 i3=nearest_frame_vector(box1, i2, i4, (x0+x1)/2, y1);
1217 i5=nearest_frame_vector(box1, i4, i6, (x0+x1)/2, y1);
1218
1219 MSG(fprintf(stderr,"i1-7 %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7);)
1220 /* ... new part /// old obsolete part ... */
1221 if( get_bw(0 ,dx/2,dy/8 ,dy/8 ,bp,cs,1) != 1 ) Break;
1222 if( get_bw(0 ,dx/2,dy/2 ,dy/2 ,bp,cs,1) != 1 ) Break;
1223 if( get_bw(dx/2 ,dx-1,dy-1-dy/3,dy-1-dy/3,bp,cs,1) != 1 ) Break;
1224 if( get_bw(dx/2 ,dx/2,dy/5 ,dy-1-dy/3,bp,cs,1) != 1 ) Break;
1225 if( get_bw(dx-1-dx/3,dx-1,0 ,1 ,bp,cs,1) == 1 ) Break;
1226 if( get_bw(dx-1-dx/3,dx-1,1 ,dy/6 ,bp,cs,1) == 1 ) Break;
1227 if( dy>18 )
1228 if( get_bw(dx-1-dx/3,dx-1,dy/6 ,dy/5 ,bp,cs,1) == 1 ) Break;
1229 if( get_bw(dx-1-dx/3,dx-1,dy-1-dy/4,dy-1 ,bp,cs,1) == 0 ) Break; // s-
1230 for( x=x0+dx/3;x<x1-dx/3;x++)
1231 if( get_bw(x, x,y1-dy/4, y1, box1->p,cs,1) == 0 ) break;
1232 if( x>=x1-dx/3 ) Break;
1233 for(i=dy/4,y=y0+dy/3;y<=y1 && i;y++){
1234 if( num_cross(x0,x1 ,y,y, box1->p,cs) == 2 ) i--;
1235 } if( i ) Break;
1236 for(i=dy/4,y=y0;y<=y0+dy/2 && i;y++){
1237 if( num_cross(x0,x0+dx/2,y,y, box1->p,cs) == 1 ) i--;
1238 } if( i ) Break;
1239 // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) // could happen
1240 // if (sdata->holes.num > 0)
1241 if (box1->num_frames > 1) // excludes tiny holes 1810.rnd80
1242 if (sdata->holes.hole[0].y0 > dy/3
1243 && sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
1244 // if( num_hole(x0, x1, y0+dy/3 , y1-dy/3 ,box1->p,cs,NULL) != 1 ) Break; // mini
1245 if( loop(bp,dx-1,dy/3,dx,cs,0,LE)+dx/8
1246 < loop(bp,dx-1,dy/2,dx,cs,0,LE)
1247 && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)+dx/8
1248 < loop(bp,dx-1,dy/2,dx,cs,0,LE)) Break; // ~k Okt00
1249 i=loop(bp,0,dy-1-dy/4,dx,cs,0,RI);
1250 if (i>1 && num_cross(x0,x0,y0+dy/8+2,y0+dy/2, box1->p,cs) == 1 ){ // fi fu
1251 ad=(99-(1<<i))*ad/100;
1252 if (num_cross(x0,x0,y0,y0+dy/8+2, box1->p,cs) == 0 ) ad=97*ad/100;
1253 if (num_cross(x0+dx/2,x0+dx/2,y0,y0+dy/8+2, box1->p,cs) == 1 ) ad=97*ad/100;
1254 if (ad<1) break;
1255 }
1256 i =loop(bp,0,dy/4,dx,cs,0,RI);
1257 i+=loop(bp,i,dy/4,dx,cs,1,RI)+1;
1258 for ( ; i<dx-dx/3; i++ )
1259 if( loop(bp,i,0,dy,cs,0,DO)>5*dy/8 ) {
1260 ad=98*ad/100; // melted hi, li, but handwritten h
1261 MSG(fprintf(stderr,"ad=%d",ad);) }
1262 if( num_cross(x0,x0,y0+(dy+3)/8,y1,box1->p,cs) > 1 ) {
1263 ad=98*ad/100; // melted fr
1264 MSG(fprintf(stderr,"ad=%d",ad);) }
1265
1266 i=loop(bp,dx-1,3*dy/4,dx,cs,0,LE); // melted "fr" for vertikal letters
1267 if (i>dx/4 && loop(bp,dx-1-i,dy-1,dy,cs,1,UP)>dy/2) {
1268 ad=94*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) }
1269
1270 i=loop(bp,dx-1,1+dy/16,dx,cs,0,LE); if (i<dx/4) {
1271 ad=98*ad/100;
1272 MSG(fprintf(stderr,"ad=%d",ad);) }
1273 if( num_cross(dx-i+1+dx/8,dx-i+1+dx/8,0,1+dy/16,bp,cs) > 0 ) {
1274 ad=95*ad/100; // melted fi
1275 MSG(fprintf(stderr,"ad=%d",ad);) }
1276 if (loop(box1->p,x1,y0+1+dy/16,dx,cs,0,LE)<dx/4) {
1277 ad=98*ad/100; // fi
1278 MSG(fprintf(stderr,"ad=%d",ad);) }
1279 if (loop(box1->p,x1,y0 ,dx,cs,0,LE)<dx/4
1280 || loop(box1->p,x1,y0+1,dx,cs,0,LE)<dx/4) {
1281 ad=98*ad/100; // li
1282 MSG(fprintf(stderr,"ad=%d",ad);) }
1283
1284
1285 // if (sdata->holes.num > 0) ad=97*ad/100;
1286 if (box1->m2) {
1287 if ( gchar) ad=98*ad/100;
1288 if (!hchar) ad=97*ad/100;
1289 } else ad=99*ad/100;
1290 Setac(box1,'h',ad);
1291 break;
1292 }
1293 return box1->c;
1294 }
1295
ocr0_H(ocr0_shared_t * sdata)1296 static wchar_t ocr0_H(ocr0_shared_t *sdata){
1297 struct box *box1=sdata->box1;
1298 pix *bp=sdata->bp;
1299 int i,j,j1,d,x,y,ya,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
1300 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
1301 int dx=x1-x0+1,dy=y1-y0+1, /* size */
1302 ad; /* tmp-vars */
1303
1304 // --- test H ---------------------------------------------------
1305 for(ad=d=100;dx>2 && dy>3;){ // min 3x4
1306 DBG( wchar_t c_ask='H'; )
1307 // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
1308 if (box1->num_frames != 1) ad=97*ad/100; // excludes tiny holes 1810.rnd80
1309 if (box1->num_frames > 3) break; // excludes tiny holes 1810.rnd80
1310 if ( num_cross(0,dx-1,dy/4 ,dy/4 ,bp,cs) != 2
1311 && num_cross(0,dx-1,dy/4-1,dy/4-1,bp,cs) != 2 ) Break;
1312 if ( num_cross(0,dx-1,3*dy/4 ,3*dy/4 ,bp,cs) != 2
1313 && num_cross(0,dx-1,3*dy/4+1,3*dy/4+1,bp,cs) != 2 ) Break;
1314 if ( loop(bp,0 ,dy/8,dx,cs,0,RI)
1315 + loop(bp,dx-1,dy/8,dx,cs,0,LE)>dx/2 ) Break; // ~A
1316 for ( j1=0,i=1,y=y0+dy/10; y<y1-dy/10 && i; y++ ) // 2 vertikal lines
1317 { j=loop(box1->p,x0 ,y,dx,cs,0,RI)
1318 +loop(box1->p,x1 ,y,dx,cs,0,LE); if( j>dx/2 ) i=0; if(j>j1)j1=j; }
1319 if ( !i ) Break;
1320 for ( i=1,y=dy/4; y<dy-1-dy/4 && i; y++ ) // max - min width
1321 { j=loop(bp,0 ,y,dx,cs,0,RI)
1322 +loop(bp,dx-1,y,dx,cs,0,LE); if( j1-j>dx/5 ) i=0; }
1323 if (!i) Break; // ~K Jul00
1324 for (i=0,ya=y=y0+dy/3; y<y1-dy/3; y++ ) // horizontal line
1325 { j=loop(box1->p,x0 ,y,dx,cs,0,RI);
1326 j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ) { i=j; ya=y; } }
1327 if (i<=dx/2) Break; ya-=y0;
1328 if (num_cross(0,dx-1,ya ,ya ,bp,cs) != 1
1329 && num_cross(0,dx-1,ya+1,ya+1,bp,cs) != 1 ) Break; /* Dec00 */
1330 for (y=ya; y<dy-dy/4; y++ ) // ~M Dec00
1331 if (num_cross(0,dx-1,y ,y ,bp,cs) > 2
1332 && num_cross(0,dx-1,y+1,y+1,bp,cs) > 2 ) break;
1333 if (y<dy-dy/4) Break;
1334 for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
1335 if (get_bw( x, x,y0 ,y0+dy/4,box1->p,cs,1) == 0 ) i=0;
1336 } if (i) Break;
1337 for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
1338 if (get_bw( x, x,y1-dy/4,y1 ,box1->p,cs,1) == 0 ) i=0;
1339 } if (i) Break;
1340 for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
1341 if (num_cross(x,x,y0+dy/8,y1-dy/8, box1->p,cs) == 1 ) i=0;
1342 } if (i) Break;
1343 for (i=1,y=y0;y<=y0+dy/4 && i;y++){
1344 if (num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
1345 } if (i) Break;
1346 for(i=1,y=y1-dy/4;y<=y1 && i;y++){
1347 if (num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
1348 } if (i) Break;
1349 if (get_bw(x1-dx/8, x1 , y0, y0+dy/8,box1->p,cs,1) != 1 ) Break;
1350 if (get_bw(x0 , x0+dx/8, y1-dy/8, y1,box1->p,cs,1) != 1 ) Break;
1351 i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(i1>dx/2) Break;
1352 i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(i2<i1-dx/4 || i2>i1+dx/8) Break;
1353 i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if(i3<i2-dx/4 || i3>i2+dx/8) Break;
1354 if (abs(i1+i3-2*i2)>dx/16+1) Break;
1355 // test for thick tall N looking like a H
1356 if (num_cross(x0,x1,y0, y1 , box1->p,cs) < 2 ) Break; // sure N
1357 if (num_cross(x0,x1,y0,(y0+y1)/2, box1->p,cs) < 2 ) Break; // sure N
1358 i1=loop(bp, 0, dy/4,dx,cs,0,RI);
1359 i1=loop(bp, i1, dy/4,dx,cs,1,RI); // right side of left vert. line
1360 i2=loop(bp, 0,dy-1-dy/4,dx,cs,0,RI);
1361 i2=loop(bp, i2,dy-1-dy/4,dx,cs,1,RI); // right side of left vert. line
1362 i3=loop(bp,dx-1 ,dy-1-dy/4,dx,cs,0,LE);
1363 i3=loop(bp,dx-1-i3,dy-1-dy/4,dx,cs,1,LE); // left side of right vert. line
1364 if (dx<10 && i1-i2>dx/4) Break;
1365 if (dx<10 && i1-i2>dx/8) ad=99*ad/100; // 7x10 ~N
1366 i =loop(bp, 0,dy/2+1+dy/8,dx,cs,0,RI);
1367 i+=loop(bp, i,dy/2+1+dy/8,dx,cs,1,RI);
1368 i =loop(bp, i,dy/2+1+dy/8,dx,cs,0,RI);
1369 if (i<dx/2-1 && 5*i1>6*i2 && 5*i3>6*i2 && i1>i2 && i3>i2) Break; // ???
1370 if (dx>8)
1371 if (loop(bp,dx-1, 3*dy/8,dx,cs,0,LE)
1372 -loop(bp,dx-1, dy/8,dx,cs,0,LE)>dx/4
1373 && loop(bp,dx-1, 3*dy/8,dx,cs,0,LE)
1374 -loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)>dx/4 ) Break; // ~K
1375 // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) Break;
1376 // if (sdata->holes.num != 0) Break;
1377 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
1378 MSG( fprintf(stderr,"i123 %d %d %d",i1,i2,i3); )
1379 if ( gchar) ad=99*ad/100;
1380 if (!hchar) ad=98*ad/100;
1381 Setac(box1,'H',ad);
1382 break;
1383 }
1384 return box1->c;
1385 }
1386
ocr0_k(ocr0_shared_t * sdata)1387 static wchar_t ocr0_k(ocr0_shared_t *sdata){
1388 struct box *box1=sdata->box1;
1389 pix *bp=sdata->bp;
1390 int i,j,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
1391 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
1392 int dx=x1-x0+1,dy=y1-y0+1, /* size */
1393 ad; /* tmp-vars */
1394 int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
1395
1396 // --- test k ---------------------------------------------------
1397 for(ad=100;dx>2 && dy>3;){ // min 3x4
1398 // rewritten for vectors 0.43
1399 int d, i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners
1400 DBG( wchar_t c_ask='k'; )
1401 // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
1402 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
1403 /* half distance to the center */
1404 d=2*sq(128/4);
1405 /* now we check for the upper right end of the h */
1406 if (aa[3][2]<d/4) Break; /* [2] = distance, ~BCDEF... */
1407 if (aa[0][2]>d/2) Break; /* upper left end */
1408 if (aa[1][2]>d/2) Break; /* lower left end */
1409 if (aa[2][2]>d/2) Break; /* lowerright end */
1410 /*
1411 type A B=italic ???
1412 18 OOO
1413 O O O
1414 O O6 O
1415 O7 OO O OO
1416 O4OO OO OO
1417 O OO O O
1418 O OO O O O
1419 2O3 O5 O OOO
1420 */
1421 i1=i8=aa[0][3];
1422 i2=i3=aa[1][3];
1423 i5= aa[2][3];
1424 // check the bow from below
1425 for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) {
1426 if (box1->frame_vector[ i][1]
1427 <box1->frame_vector[i4][1]) i4=i; // get next maximum
1428 if (box1->frame_vector[ i][1]<=y0) break; // fatal!
1429 }
1430 if (box1->frame_vector[i4][1]-y0<dy/4) Break; // ~MN
1431 if (y1-box1->frame_vector[i4][1]<dy/4) Break; // ~BCDEGIJLOQSUYZ
1432 i6=nearest_frame_vector(box1, i5, i8, x1, (2*y0+y1)/3);
1433 // two steps for i7 to go around pitfalls on italic h
1434 i7=nearest_frame_vector(box1, i6, i8, x0, y1);
1435 i3=nearest_frame_vector(box1, i2, i4, (x0+x1)/2, y1);
1436 // get the leftmost point between right up and down line
1437 i =nearest_frame_vector(box1, i5, i6, x0-dx, (y0+2*y1)/3);
1438 MSG(fprintf(stderr,"i1-8 %d %d %d %d %d %d %d %d ad %d i %d",i1,i2,i3,i4,i5,i6,i7,i8,ad,i);)
1439 if (x1-box1->frame_vector[i][0]<dx/4) {Break;} // h
1440 if (x1-box1->frame_vector[i][0]<dx/3) { ad=99*ad/100; MSG({}) }
1441 if (x1-box1->frame_vector[i][0]<dx/2) { ad=99*ad/100; MSG({}) }
1442
1443
1444 if( num_cross(0, dx-1,0,0,bp,cs) != 1
1445 && num_cross(0, dx-1,1,1,bp,cs) != 1 ) Break;
1446 if( num_cross(0,3*dx/4, dy/8 , dy/8 ,bp,cs) != 1
1447 || num_cross(0,3*dx/4,3*dy/16,3*dy/16,bp,cs) != 1 ) Break;
1448 if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 2
1449 && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 2 ) Break;
1450 if( dx<8
1451 && num_cross(dx-1,dx-1,dy/4,dy-1,bp,cs) != 2
1452 && num_cross(dx-2,dx-2,dy/4,dy-1,bp,cs) != 2 ) Break;
1453 i1=loop(bp,0,dy/2-dy/4,dx,cs,0,RI);
1454 i2=loop(bp,0,dy/2 ,dx,cs,0,RI);if(i2>dx/2) Break;
1455 i3=loop(bp,0,dy/2+dy/4,dx,cs,0,RI);
1456 if (abs(i1+i3-2*i2)>(dx+8)/16+1 || i1<i3-1) Break; // v-line on left side?
1457 if( get_bw(x0 ,x0+dx/2,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break;
1458 if( get_bw(x0+dx/2,x1, y1-dy/3,y1 ,box1->p,cs,1) != 1 ) Break;
1459 if( get_bw(x1-dx/4,x1, y0 ,y0+3*dy/16,box1->p,cs,1) == 1 ) Break;
1460 if( get_bw(x1-dx/4,x1, y0+dy/4,y1-dy/4,box1->p,cs,1) != 1 ) Break; //~1
1461 if( get_bw(x1-dx/4,x1, y1-dy/8,y1 ,box1->p,cs,1) != 1 ) Break;
1462 if (sdata->holes.num > 0)
1463 if (sdata->holes.hole[0].y0 > dy/4) Break;
1464 // if( num_hole(x0,x1,y0+dy/4,y1,box1->p,cs,NULL) != 0 ) Break;
1465 for(y=y0+1;y<y0+dy/2;y++) // luecke ???
1466 if( get_bw(x0,x1,y,y,box1->p,cs,1) == 0 ) break;
1467 if( y<y0+dy/2 ) Break;
1468 for(i=1,x=x0;x<=x0+dx/2 && i;x++)
1469 if(get_line(x,y0 ,x ,y1,box1->p,cs,100)>50) i=0;
1470 if( i ) Break; // no vertikal line!
1471
1472 /* check for falling line in the lower right corner */
1473 for (j=x=0,y=5*dy/8;y<7*dy/8;y++) {
1474 i= loop(bp,dx-1,y,dx,cs,0,LE); if(i>x) { x=i;j=y; }
1475 } // x=dx/6 on fat k
1476 if (x + loop(bp,dx-1-x,j,dx,cs,1,LE)/2 <dx/4) Break; // y to j 2010-09
1477 if (x + loop(bp,dx-1-x,j,dx,cs,1,LE)/2 <dx/2){ ad=98*ad/100; MSG({}) }
1478 x=dx-1-x; y=j;
1479 i =loop(bp,dx-1,dy-1,dx,cs,0,LE); if(i>dx/2)
1480 i =loop(bp,dx-1,dy-2,dx,cs,0,LE); if(i>dx/2) Break;
1481 i+=loop(bp,dx-1-i,dy-1,dx,cs,1,LE)/2;
1482 if( get_line(x,y,dx-1-i,dy-1,bp,cs,100)<60 ) Break;
1483
1484 for(y=y0+dy/3;y<y1;y++) if( num_cross(x0,x1,y,y,box1->p,cs)==2 ) break;
1485 if( y==y1 ) Break;
1486 if(
1487 // num_hole(x0,x1 ,y0 ,y1 ,box1->p,cs,NULL)>0 // ~A happens!
1488 sdata->holes.num > 0 )
1489 if (sdata->holes.hole[0].x1>dx-1-dx/4
1490 || sdata->holes.hole[0].y1>dy-1-dy/4
1491 || sdata->holes.hole[0].y0< dy/4) Break;
1492 // if ( num_hole(x0,x1-dx/4,y0+dy/4,y1-dy/4,box1->p,cs,NULL)==0 ) Break;
1493 i=loop(bp,0,dy-1,dx,cs,0,RI);
1494 i=loop(bp,i,dy-1,dx,cs,1,RI); if (dx>8 && 4*i>3*dx) Break; // ~glued_tz
1495 i =loop(bp,0,dy/4,dx,cs,0,RI);
1496 if (i>dx/4
1497 && i+loop(bp,i,dy/4,dx,cs,1,RI)>dx/2
1498 && loop(bp, 0,0,dx,cs,0,RI)<=dx/4
1499 && loop(bp,dx-1,0,dx,cs,0,LE)>=dx/2 ) ad=90*ad/100; // divided Q
1500
1501 if( 2*y0>(box1->m1+box1->m2) ) { ad=99*ad/100; MSG({}) }
1502
1503 if ( gchar) ad=99*ad/100;
1504 if (!hchar) ad=99*ad/100;
1505 Setac(box1,'k',ad);
1506 break;
1507 }
1508 return box1->c;
1509 }
1510
ocr0_K(ocr0_shared_t * sdata)1511 static wchar_t ocr0_K(ocr0_shared_t *sdata){
1512 struct box *box1=sdata->box1;
1513 pix *bp=sdata->bp;
1514 int i,j,i1,i2,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
1515 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
1516 int dx=x1-x0+1,dy=y1-y0+1, /* size */
1517 ad,ya,xa,yb,xb,yc,xc,yd,xd,ye,xe,yf,xf; /* tmp-vars */
1518
1519 // --- test K ---------------------------------------------------
1520 for(ad=d=100;dx>2 && dy>3;){ // Mar2000 perfect??? no ocr-a X Jul09
1521 DBG( wchar_t c_ask='K'; )
1522 // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
1523 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
1524 for(y=dy/8;y<dy-dy/8;y++)
1525 if( !get_bw(0,dx/2,y,y,bp,cs,1) ) break;
1526 if( y<dy-dy/8 ) Break;
1527 for(j=0,i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
1528 y= loop(box1->p,x,y0,y1-y0,cs,0,DO); if (y>3*dy/4) { i=1;break; }
1529 if (dy>15 && j>dy/8){
1530 j =loop(box1->p,x-1,y0+y-1,x1-x0,cs,0,LE)/2;
1531 y+=loop(box1->p,x-j,y0+y-1,y1-y0,cs,0,DO)-1;
1532 }
1533 if(y>=dy/4) i=0; /* ok, found gap */
1534 } if( i ) Break;
1535 for(y=0,x=x0+dx/4;x<=x1-dx/4;x++){ // lower h-gap
1536 i=loop(box1->p,x,y1,dy,cs,0,UP);
1537 /* on small chars bypass possible low left serifs */
1538 if (i>0) { i2=loop(box1->p,x-1,y1-i-1,dy,cs,0,UP);
1539 if (i2>1) i+=i2-1; }
1540 if (i>y) { y=i; i1=x; }
1541 } if( y<=dy/8 ) Break; if (y<dy/4) ad=80*ad/100;
1542 for(i=1,x=x0+dx/3;x<=x1-dx/8 && i;x++){
1543 if( num_cross(x,x,y0,y1, box1->p,cs) == 2 ) i=0;
1544 } if( i ) Break;
1545 for(i=1,y=y0;y<=y0+dy/4 && i;y++){
1546 if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
1547 } if( i ) Break;
1548 if( dx<10 ){
1549 for(i=1,y=y0+dy/3;y<=y1-dy/3 && i;y++){
1550 if( num_cross(x0,x1,y,y, box1->p,cs) == 1 ) i=0;
1551 } if( i ) Break;
1552 }
1553 for(i=1,y=y1-dy/4;y<=y1 && i;y++){
1554 if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
1555 } if( i ) Break;
1556 if( get_bw(x1-dx/3,x1,y0,y0+dy/8,box1->p,cs,1) != 1 ) Break; // ~k
1557 if( loop(bp,0, dy/4,dx,cs,0,RI)
1558 +loop(bp,0,3*dy/4,dx,cs,0,RI)
1559 <2*loop(bp,0, dy/2,dx,cs,0,RI)-2-dx/32 )
1560 { if (dy>=16) { Break; } else ad=98*ad/100; } // ~X Jul09
1561
1562 i=loop(box1->p,x1,y0+ dy/4,x1-x0+1,cs,0,LE); if(i>dx/2) Break;
1563 j=loop(box1->p,x1,y0+ dy/2,x1-x0+1,cs,0,LE);
1564 x=loop(box1->p,x1,y0+3*dy/8,x1-x0+1,cs,0,LE); if(x>j) j=x;
1565 if(j<=i ) Break; i=j;
1566 j=loop(box1->p,x1,y1-dy/4,x1-x0+1,cs,0,LE); if(j>=i ) Break;
1567 // out_x(box1); // detailed analysis
1568 //
1569 // a d <= that are main points of K
1570 // | / + horizontal zerifes at a,c,d,f
1571 // b/e
1572 // | \ .
1573 // c f
1574 ya= dy/4;xa=loop(bp,0,ya,dx,cs,0,RI);xa+=loop(bp,xa,ya,dx,cs,1,RI)/2;
1575 yc=dy-dy/4;xc=loop(bp,0,yc,dx,cs,0,RI);xc+=loop(bp,xc,yc,dx,cs,1,RI)/2;
1576 yb=dy/2; xb=dx-1-loop(bp,dx-1,dy/2,dx,cs,0,LE);
1577 for(yd=ye=yf=xe=y=i=0,xf=xd=dx;y<dy/4;y++){ // range 0..1/4
1578 x =loop(bp,dx-1, y,dx,cs,0,LE); if(x<xd){ xd=x;yd= y; }
1579 x =loop(bp,dx-1,dy-1-y,dx,cs,0,LE); if(x<xf){ xf=x;yf=dy-1-y; }
1580 x =loop(bp,dx-1,dy/2+y,dx,cs,0,LE); if(x>xe){ xe=x;ye=dy/2+y; }
1581 x =loop(bp,dx-1,dy/2-y,dx,cs,0,LE); if(x>xe){ xe=x;ye=dy/2-y; }
1582 #if 0 // removed v0.2.4a2
1583 x =loop(bp,0 ,dy/2+y,dx,cs,0,RI); // middle left border
1584 x+=loop(bp,x ,dy/2+y,dx,cs,1,RI); // test 2nd cross
1585 x+=loop(bp,x ,dy/2+y,dx,cs,0,RI); if(x<xb){ xb=x;yb=dy/2+y; }
1586 #endif
1587 x =loop(bp,0 ,dy/2-y,dx,cs,0,RI);
1588 x+=loop(bp,x ,dy/2-y,dx,cs,1,RI); // test 2nd cross
1589 x+=loop(bp,x ,dy/2-y,dx,cs,0,RI); if(x<xb){ xb=x;yb=dy/2-y; }
1590 x =dx-1-loop(bp,dx-1,dy/2-y,dx,cs,0,LE); if(x<xb){ xb=x;yb=dy/2-y; }
1591 }
1592 xd=dx-1-xd;xe=dx-1-xe;xf=dx-1-xf;
1593 xb+=loop(bp,xb,yb,dx,cs,1,RI)/4; // detect center of line
1594 xe-=loop(bp,xe,ye,dx,cs,1,LE)/4;
1595 xd-=loop(bp,xd,yd,dx,cs,1,LE)/4;
1596 xf-=loop(bp,xf,yf,dx,cs,1,LE)/4;
1597 #if 0
1598 MSG( \
1599 printf("a=%d %d b=%d %d c=%d %d d=%d %d e=%d %d f=%d %d dxdy %d %d",\
1600 xa,ya,xb,yb,xc,yc,xd,yd,xe,ye,xf,yf,dx,dy);\
1601 )
1602 #endif
1603 if( get_line2(xa,ya,xc,yc,bp,cs,100)<95 ) Break;
1604 if( dx>8 ){ // example szaka0103
1605 if( xe>5*dx/8 || xb>5*dx/8 ) Break; // ~{\it n}
1606 i=loop(bp,xb,yb,xb,cs,1,LE); // thick center? see font22
1607 if( get_line2(xb,yb,xd,yd,bp,cs,100)<95 ) // right up
1608 if( get_line2(xb-i/2,yb,xd,yd,bp,cs,100)<95 ) Break;
1609 // ImageMagick.convert.textfont=FreeMono-Regular,80 bowed
1610 if( get_line2(xe,ye,xf,yf,bp,cs,100)<95 ) ad=99*ad/100; // right down
1611 if( get_line2(xe,ye,xf,yf,bp,cs,100)<80 ) Break; // right down
1612 xe+=loop(bp,xe,ye,dx,cs,1,RI); if( xe>=xf ) Break; // ~{\it n}
1613 } else {
1614 if( dy<16 && !hchar ) Break;
1615 if( loop(bp,0,1,dy,cs,1,DO)<=3*dx/4
1616 && loop(bp,1,1,dy,cs,1,DO)<=3*dx/4
1617 && loop(bp,2,1,dy,cs,1,DO)<=3*dx/4 ) Break; // ~x
1618 }
1619 if (loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)<=dx/8){
1620 ad=99*ad/100; /* broken B ? */
1621 if (sdata->holes.num > 0)
1622 if (sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
1623 // if( num_hole(x0,x1,y0,(y0+2*y1)/3,box1->p,cs,NULL)>0) Break; // broken B
1624 }
1625 if(box1->m3 && !hchar) ad=99*ad/100;
1626 if(box1->m3 && gchar) ad=99*ad/100;
1627 // printf(" ok xe=%d",xe);
1628 Setac(box1,'K',ad);
1629 break;
1630 }
1631 return box1->c;
1632 }
1633
ocr0_f(ocr0_shared_t * sdata)1634 static wchar_t ocr0_f(ocr0_shared_t *sdata){
1635 struct box *box1=sdata->box1;
1636 pix *bp=sdata->bp;
1637 int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
1638 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
1639 int dx=x1-x0+1,dy=y1-y0+1, /* size */
1640 (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */
1641 ab[8][4], /* special points (x,y,dist^2,vector_idx) */
1642 ad; /* tmp-vars */
1643 /* x=mindist_to_a y=0 "t"
1644 0>..$$. 0>..$$ 0>..$$ end right bow a--..$$ a--.$7. y>0 "f"
1645 1>.$..$ 1>.$.. 1>.$$$ start right bow .$7. .$..
1646 .@... .@.. 2>.@@. start upper end .@.. .@..
1647 2>.$... 2>.$.. 3>$$$$ crossing bar .$.. $$$.
1648 3>$@$$. 3>$@$. $@@$ $@$. .@..
1649 4>.$... 4>.$.. 4>.$$. lower end .$.. .$..
1650 .@... .@.. .@@. .@.. .@..
1651 .@... .@.. .@@. .@.. .@..
1652 5>.$... 5>.$.. 5>.$$. lower start .$.. .$..
1653 6>..... 6>$... 6>.... optional left bow
1654 */
1655 // --- test f like t ---------------------------------------------------
1656 for(ad=d=100;dx>2 && dy>5;){ // sometimes no hchar!
1657 // rewritten for vectors 0.43
1658 int d, i1, i2, i3, i4, i5, i6, i7, i8, i9; // line derivation + corners
1659 DBG( wchar_t c_ask='f'; )
1660 // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
1661 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
1662 /* half distance to the center */
1663 d=2*sq(128/4);
1664 /* now we check for the upper right end of the h */
1665 if (aa[3][2]>d/2) Break; /* [2] = distance, ~BCDEF... */
1666 if (aa[0][2]>d ) Break; /* upper left end */
1667 /*
1668 9
1669 OOO
1670 O 7 O8
1671 O6
1672 1OOOO5
1673 O4
1674 O
1675 2O3
1676 OOOOO
1677 */
1678 i1=nearest_frame_vector(box1,aa[0][3],aa[1][3],x0-dx/2,(5*y0+3*y1)/8);
1679 /* we need i for 4x6 font, where left side of h-bar is near (x0,y1) */
1680 i =aa[1][3]; if (box1->frame_vector[i][1]<y1-dy/8)
1681 i =nearest_frame_vector(box1,aa[1][3],aa[2][3], x0, y1+dy/4);
1682 i2=nearest_frame_vector(box1, i1, i, x1, y1);
1683 i =nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y1+dy/4);
1684 i3=nearest_frame_vector(box1, i,aa[3][3], x0, y1);
1685 i7=nearest_frame_vector(box1, i3,aa[3][3],(x0+x1)/2, y0);
1686 i8=nearest_frame_vector(box1, i7,aa[0][3], x1, (3*y0+y1)/4);
1687 i9=nearest_frame_vector(box1,aa[3][3],aa[0][3],(x0+2*x1)/3,y0-dy/4);
1688 i5=nearest_frame_vector(box1, i3, i7, x1+dx/4, (5*y0+3*y1)/8);
1689 i4=nearest_frame_vector(box1, i3, i5, x0, (3*y0+y1)/4);
1690 i6=nearest_frame_vector(box1, i5, i7, x0, (y0+3*y1)/4);
1691
1692 MSG(fprintf(stderr,"i1-9 %d %d %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7,i8,i9);)
1693
1694 // check if vertical line is near to the left side
1695 if (box1->frame_vector[i2][0]-x0>dx/2) Break; // ~3
1696 i =nearest_frame_vector(box1, aa[0][3], i2, x1+2*dx, (y0+y1)/2);
1697 // MSG(fprintf(stderr,"i %d",i);)
1698 if (box1->frame_vector[i ][0]
1699 -box1->frame_vector[i9][0]>dx/8) Break; // ~3
1700
1701 if( (box1->dots) ) Break; // Bold-face is gchar
1702 if (dy<=box1->m3-box1->m2+1) Break;
1703 for(x=0,j=y=2+(3*dy+4)/32;y<=5*dy/8;y++){ // upper cross line min=2
1704 i=loop(bp,0,y,dx,cs,0,RI); if( y>dy/4 && i>5*dx/8 ) break;
1705 i=loop(bp,i,y,dx,cs,1,RI); if( i>x ) { x=i;j=y; }
1706 if( y<3*dy/4 && y>dy/4
1707 && num_cross(0,dx-1,y ,y ,bp,cs) != 1
1708 && num_cross(0,dx-1,y+1,y+1,bp,cs) != 1 // against noise
1709 ) break;
1710 } if( y<=5*dy/8 ) Break; y=j;// if( y>dy/2 || y<dy/8 ) Break;
1711 // x is thickest width of vertical line here
1712 i=loop(bp,(dx+1)/2,0,dy,cs,0,DO)/2;
1713 if( i>dy/8
1714 && num_cross( 0, (dx+1)/2,i,i,bp,cs) > 0
1715 && num_cross((dx+1)/2,dx-1,i,i,bp,cs) > 0 ) Break; // ~Y
1716
1717 if (loop(bp,3*dx/4, 0,dy,cs,0,DO)>dy/8
1718 && loop(bp,3*dx/4-1,0,dy,cs,0,DO)>dy/8) Break; // upper bow
1719 i=3*dy/4; if (box1->m3 && i>=box1->m3) i=box1->m3-1;
1720 if (num_cross(0,dx-1,i,i,bp,cs)!=1) Break;
1721
1722 // the middle bar appear in a wide vertical range, get part below
1723 for (i1=dx,i2=y,j=y+1;j<dy-dy/4;j++){
1724 i=loop(bp,0,j,dx,cs,0,RI);
1725 i=loop(bp,i,j,dx,cs,1,RI); // thickness vert. line
1726 if (i<i1) { i1=i; i2=j; if (2*i<=x) break; }
1727 } i=i1; j=i2; /* i=dx, j=y below horiz-bar */
1728 MSG(fprintf(stderr,"j=%d i=%d y=%d x=%d",j,i,y,x);)
1729 // bar should have twice of the thickness of v-line
1730 if (x<2*i && x<dx) Break;
1731 if (x<i+2+dx/8) ad=97*ad/100; // fat f
1732
1733 // check for the upper bow to the right top side
1734 i3=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y0);
1735 MSG(fprintf(stderr,"xy= %d %d %d %d",x0,y0,\
1736 box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0);)
1737 ab[7][0]=box1->frame_vector[i3][0];
1738 ab[7][1]=box1->frame_vector[i3][1];
1739 ab[7][3]=i3;
1740 if (ab[7][1]-y0<=dy/16) ad=95*ad/100; // ~t
1741 // because of the dx,dy scaling the horiz. bar could be nearer to (x1,y0)
1742 // as the upper right end of the "t"
1743 if (aa[3][0]-x0>3*dx/4 && aa[3][1]-y0>3*dy/16) ad=99*ad/100; // ~t
1744
1745
1746 j=loop(bp,0,dy/8,dx,cs,0,RI); // if j>dx/2 we have italic f
1747 if ((2*x<dx && j<=dx/2) || 3*x<dx) Break; // bar should be not to small
1748 for(i=dy/8;i<dy;i++)
1749 if (loop(bp,0,i,dx,cs,0,RI)>(j+dx/4)) break;
1750 if (i<dy) Break; // check for v-line
1751
1752 if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<dx/2 )
1753 if( loop(bp,dx-1,dy/2,dx,cs,0,LE)-1
1754 <=loop(bp,dx-1, y ,dx,cs,0,LE) )
1755 if( loop(bp,dx-1, y-1,dx,cs,0,LE)
1756 <=loop(bp,dx-1, y ,dx,cs,0,LE) ) Break; // ~1
1757
1758 if( loop(bp,0,dy/2,dx,cs,0,RI)-1
1759 >loop(bp,0, 1,dx,cs,0,RI) ) Break; // ~X
1760
1761 i=y;j=1; // j used as flag
1762 if( num_cross(0,dx-1,0,0,bp,cs)==1 && hchar) //~r
1763 if( num_cross(0,dx-1,dy-1,dy-1,bp,cs)!=1
1764 && num_cross(0,dx-1,dy-2,dy-2,bp,cs)!=1 ) Break; // ~* etc.
1765 // check for upper bow to right
1766 for(y=1;j && y<i; y++) // no @@ pattern
1767 if( num_cross(0,dx-1,y ,y ,bp,cs) ==2 ) j=0;
1768 if (j==0) { ad=(ad+101)/2; }
1769 for(y=1;j && y<i; y++) // no @@ pattern, try to detect it
1770 for(x=0;j && x<dx ;x++){ // ..
1771 if( (getpixel(bp,x ,y )>=cs || dx<7) && getpixel(bp,x+1,y )>=cs
1772 && getpixel(bp,x ,y-1)< cs && getpixel(bp,x+1,y-1)< cs )
1773 { j=0;break; }
1774 } if(j) ad=98*ad/100; // not detected
1775
1776 // if( num_hole (x0 , x1 , y0, y1,box1->p,cs,NULL) != 0 ) Break; // ~e
1777 if (sdata->holes.num != 0) Break; // ~e
1778 for(i1=i2=dx,y=7*dy/8;y<dy;y++){
1779 x=loop(bp,0 ,y,dx,cs,0,RI);if(x<i1)i1=x;
1780 x=loop(bp,dx-1,y,dx,cs,0,LE);if(x<i2)i2=x;
1781 }
1782 if(i1>i2+dx/4) Break; // ~t ~e
1783 if(i1>i2+1) ad=96*ad/100; // ~t ~e
1784 if( loop(bp,0,3*dy/4,dx,cs,0,RI)<i1-dx/4 ) Break;
1785 if( dx>5 && !hchar)
1786 if( loop(bp,dx-1,dy/2,dx,cs,0,LE)>3*dx/4 )
1787 if( loop(bp,dx-1,dy-1,dy,cs,0,UP)<dx/2 ) Break; // ~c
1788 if( dx>8 )
1789 if( loop(bp, 0,2*dy/3 ,dx,cs,0,RI)>2*dx/3
1790 || loop(bp, 0,2*dy/3-1,dx,cs,0,RI)>2*dx/3 )
1791 if( loop(bp,dx-1, dy/4 ,dx,cs,0,LE)>2*dx/3 ) Break; // ~5 ~S
1792
1793 if (!hchar)
1794 if ( get_bw(x0+dx/8,x0+dx/8,y0+dy/4,y1-dy/16,box1->p,cs,2) == 0
1795 && num_cross(x1-dx/4,x1-dx/4,y0,y1,box1->p,cs)!=2
1796 && num_cross(x1-dx/8,x1-dx/8,y0,y1,box1->p,cs)!=2 ) Break; // ~r
1797
1798 if (dy>15)
1799 if( num_cross(x0,x1,y1-dy/4,y1-dy/4,box1->p,cs)>1
1800 && num_cross(x0,x1,y0+dy/4,y0+dy/4,box1->p,cs)>1 ) Break; // ~H
1801
1802 if( dx>4 )
1803 if( loop(bp,dx-1 ,3*dy/4,dx,cs,0,LE)-
1804 loop(bp,0 ,3*dy/4,dx,cs,0,RI)>dx/5+1
1805 && loop(bp,dx-1-dx/8,dy-1 ,dy,cs,0,UP)<dy/4 ) {
1806 if( loop(bp,dx-1 ,5*dy/16,dx,cs,0,LE)-
1807 loop(bp,0 ,5*dy/16,dx,cs,0,RI)>=dx/5+1) ad=98*ad/100; // ~E
1808 i=loop(bp,dx/8,0,dy,cs,0,DO);
1809 if (i<dy/8 || i>dy/2) {
1810 ad=98*ad/100; // ~E, could also be a "f" with big serifs
1811 MSG(fprintf(stderr,"ad=%d",ad);) }
1812 if (!gchar) { ad=98*ad/100;
1813 MSG(fprintf(stderr,"ad=%d",ad);) }
1814 }
1815 i = loop(bp,dx-1 ,3*dy/4,dx ,cs,0,LE)/2;
1816 if (loop(bp,dx-1-i , dy-1,dy/2,cs,0,UP)<dy/4)
1817 if (loop(bp,0 ,3*dy/4,dx ,cs,0,RI)<dx/4) {
1818 ad=98*ad/100; // ~E but serif-f
1819 MSG(fprintf(stderr,"ad=%d",ad);) }
1820
1821 if( loop(bp,0,dy/4,dx ,cs,0,RI)>1
1822 && loop(bp,0, 0,dy/4,cs,0,DO)<dy/4 ) {
1823 ad=95*ad/100; // ~I
1824 MSG(fprintf(stderr,"ad=%d",ad);) }
1825
1826 if (get_bw(x0+dx/16,x1-dx/16,y0,y0,box1->p,cs,2) == 0) { // white pixels?
1827 ad=98*ad/100; // F
1828 MSG(fprintf(stderr,"ad=%d",ad);) }
1829
1830 if (!hchar) ad=ad*98/100; // d*=100;d/=128 // not 100% !
1831 if (box1->m4>0 && gchar && ad<99 &&
1832 8*box1->y1 >= box1->m4*7+box1->m3) ad++;
1833 Setac(box1,'f',ad);
1834 break;
1835 }
1836 return box1->c;
1837 }
1838
ocr0_bB(ocr0_shared_t * sdata)1839 static wchar_t ocr0_bB(ocr0_shared_t *sdata){
1840 struct box *box1=sdata->box1;
1841 pix *bp=sdata->bp;
1842 int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
1843 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
1844 int dx=x1-x0+1,dy=y1-y0+1, /* size */
1845 ad; /* tmp-vars */
1846
1847 // --- test B ---------------------------------------------------
1848 for(ad=d=100;dx>2 && dy>4;){ // min 3x4
1849 DBG( wchar_t c_ask='B'; )
1850 //if (sdata->holes.num < 2) Break; /* tolerant against a tiny hole */
1851 if (box1->num_frames != 3) Break; // excludes tiny holes 1810.rnd80
1852 for(i=1,y=y0;y<y1-dy/2 && i;y++)
1853 if( get_bw(x0,x0+dx/2, y , y ,box1->p,cs,1) != 1 ) i=0;
1854 if( !i ) Break;
1855 for(i=1,y=y1-dy/2;y<y1 && i;y++)
1856 if( get_bw(x0,x0+dx/3, y , y ,box1->p,cs,1) != 1 ) i=0;
1857 if( !i ) Break;
1858 if( get_bw(x1,x1 , y0 , y0 ,box1->p,cs,1) == 1 ) Break;
1859 if( num_cross(x0+dx/2, x0+dx/2,y0,y1 ,box1->p,cs) != 3 )
1860 if( num_cross(x1-dx/3, x1-dx/3,y0,y1 ,box1->p,cs) != 3 ) Break;
1861 /* --- detect center of lower hole --- */
1862 y = loop(box1->p,x0+dx/2,y1 ,dy,cs,0,UP); if (y>1+dy/8) Break;
1863 y+= loop(box1->p,x0+dx/2,y1-y,dy,cs,1,UP); if (y>dy/3) Break;
1864 y=y1-y-loop(box1->p,x0+dx/2,y1-y,dy,cs,0,UP)/2; if (y<y0+3*dy/8) Break;
1865 if (y<y0+dy/2) ad=96*ad/100;
1866 if( num_cross(0,dx-1,y-y0 ,y-y0 ,bp,cs) != 2 )
1867 if( num_cross(0,dx-1,y-y0+1,y-y0+1,bp,cs) != 2 ) Break;
1868 if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
1869 if( num_cross(0,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 )
1870 if( num_cross(0,dx-1, dy/4-1, dy/4-1,bp,cs) != 2 ) Break;
1871 for( y=dy/4;y<3*dy/4;y++ ) if( num_cross(0,dx-1,y,y,bp,cs)==1 ) break;
1872 if( y==3*dy/4 ) Break;
1873
1874 if( loop(box1->p,x0,y0+ y ,dx,cs,0,RI)
1875 > loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)+dx/32 )
1876 if( get_bw(x0,x0,y0,y0,box1->p,cs,1) == 0 )
1877 if( get_bw(x0,x0,y1,y1,box1->p,cs,1) == 0 ) Break; // ~8
1878 i1=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI);
1879 i2=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI);
1880 i =loop(box1->p,x0,y0+dy/2-dy/ 8,dx,cs,0,RI); if(i>i2) i2=i;
1881 i =loop(box1->p,x0,y0+dy/2-dy/16,dx,cs,0,RI); if(i>i2) i2=i;
1882 i3=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI);
1883 if(dy>16 && i3<i2 && i1+i3<2*i2){
1884 if (i3+i1<2*i2-dx/16) ad=98*ad/100; // ~8
1885 if (i3+i1<2*i2-dx/8 ) ad=96*ad/100;
1886 if( loop(box1->p,x0,y0+ 1 ,dx,cs,0,RI)
1887 >= loop(box1->p,x0,y0+ 3 ,dx,cs,0,RI)+dx/32 )
1888 if( loop(box1->p,x0,y0+ 0 ,dx,cs,0,RI)
1889 > loop(box1->p,x0,y0+ 3 ,dx,cs,0,RI)+dx/32 )
1890 if( loop(box1->p,x0,y1- 0 ,dx,cs,0,RI)
1891 > loop(box1->p,x0,y1- 3 ,dx,cs,0,RI)+dx/32 )
1892 if( loop(box1->p,x0,y1- 1 ,dx,cs,0,RI)
1893 > loop(box1->p,x0,y1- 3 ,dx,cs,0,RI)+dx/32 ) Break; // ~8 Aug00
1894 }
1895
1896 // if (sdata->holes.num != 2) Break;
1897 // if (box1->num_frames != 3) Break; // excludes tiny holes 1810.rnd80
1898 if (sdata->holes.hole[0].y0 < y-1
1899 && sdata->holes.hole[1].y0 < y-1 ) Break;
1900 if (sdata->holes.hole[0].y1 > y+1
1901 && sdata->holes.hole[1].y1 > y+1 ) Break;
1902 // if( num_hole(0,dx-1,0 ,y+1 ,bp,cs,NULL) != 1 ) Break;
1903 // if( num_hole(0,dx-1,y-1,dy-1,bp,cs,NULL) != 1 ) Break;
1904 // out_x(box1);
1905
1906 for( x=dx,y=dy/6; y<dy-dy/8; y++ ) // left border straight
1907 { i=loop(box1->p,x0,y0+y,dx,cs,0,RI); if( i>x+dx/9 ) break;
1908 if(i<x) x=i;
1909 } if( y<dy-dy/8 ) Break; // ~8 bad_a
1910
1911 for( x=dx,y=1;y<dy/4;y++ ) // right border straight
1912 { i=loop(bp,dx-1,dy-y,dx,cs,0,LE);
1913 if( i<x ) x=i; else if( i>x )break;
1914 } if( y<dy/4 ) Break; // ~ff (serifen?)
1915
1916 x=loop(bp,0,dy/2 ,dx,cs,0,RI);
1917 i=loop(bp,0,dy/2-1,dx,cs,0,RI); if (i>x) x=i; // allow dust
1918 i=loop(bp,0,dy/2+1,dx,cs,0,RI); if (i>x) x=i;
1919 if ( loop(bp,0, dy/8,dx,cs,0,RI)
1920 +loop(bp,0,7*dy/8,dx,cs,0,RI) > 2*x+1 ) Break; // not konvex!
1921
1922 if(!hchar){ // ~ fat_a
1923 ad=99*ad/100;
1924 x =loop(bp,0,dy/4,dx,cs,0,RI);
1925 if(loop(bp,0,dy/2,dx,cs,0,RI)>x+dx/8) ad=97*ad/100;
1926 }
1927
1928 if ( (!hchar) && (dx<=10 || dy<=10) ) ad=97*ad/100; // hchar or good_quality
1929 if (gchar) ad=99*ad/100;
1930 Setac(box1,'B',ad);
1931 break;
1932 }
1933 // --- test b ---------------------------------------------------
1934 for(ad=d=100;dx>3 && dy>4;){ // min 3x4
1935 DBG( wchar_t c_ask='b'; )
1936 //if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */
1937 if (box1->num_frames != 2) Break; // excludes tiny holes 1810.rnd80
1938 for(y=y0;y<y1;y++)
1939 if( get_bw(x0 , x0+dx/2, y , y ,box1->p,cs,1) != 1 ) Break;
1940 if(y<y1-dy/32-1) Break;
1941 if( get_bw(x0+ dx/2, x0+dx/2, y1-dy/3, y1 ,box1->p,cs,1) != 1 ) Break;
1942 if( get_bw(x1- dx/2, x1 , y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
1943 if( get_bw(x1- dx/3, x1 , y0 , y0+dy/5,box1->p,cs,1) == 1 ) Break;
1944 if( get_bw(x1-4*dx/9, x1 , y0+dy/5, y0+dy/5,box1->p,cs,1) == 1 ) Break;
1945 if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 1 ) // &
1946 if( num_cross(x0,x1,y0+dy/4-1,y0+dy/4-1,box1->p,cs) > 1 )
1947 if( dy<16 ||
1948 num_cross(x0,x1,y0+dy/5 ,y0+dy/5 ,box1->p,cs) > 1 ) Break; // fat b
1949 for(i=j=0,y=dy/2;y<dy-dy/8;y++)
1950 if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) i++; else j++;
1951 if( i<2*j ) Break; // v024a4
1952 if (sdata->holes.num != 1) Break;
1953 if (sdata->holes.hole[0].y0 < dy/4) Break;
1954 if ((sdata->holes.hole[0].y1-sdata->holes.hole[0].y0+1)
1955 *(sdata->holes.hole[0].x1-sdata->holes.hole[0].x0+1)*16
1956 < dx*dy) ad=90*ad/100; // hole to small
1957 if( num_hole( x0, x1 , y0+dy/4, y1,box1->p,cs,NULL) != 1 ) Break;
1958 i=loop(bp,dx-1,dy-1 ,dx,cs,0,LE);
1959 j=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); if(j>i) Break;
1960 // 2017-08 divided To to Tb ?
1961 i=loop(bp,0 ,dy/8 ,dx,cs,0,RI); if (i>dx/2) Break;
1962 i=loop(bp,i ,dy/8 ,dx,cs,1,RI); j=i; // upper width
1963 i=loop(bp,0 ,2*dy/3 ,dx,cs,0,RI); if (i>dx/4) Break;
1964 i=loop(bp,i ,2*dy/3 ,dx,cs,1,RI); // lower width
1965 DBG( IFV fprintf(stderr,"\nDBG b ij %3d %3d",i,j);)
1966 if (2*j<i) ad=98*ad/100;
1967 if (!hchar) ad=99*ad/100;
1968 if ( gchar) ad=99*ad/100;
1969 Setac(box1,'b',ad);
1970 if (ad>=100) return 'b';
1971 break;
1972 }
1973 return box1->c;
1974 }
1975
ocr0_dD(ocr0_shared_t * sdata)1976 static wchar_t ocr0_dD(ocr0_shared_t *sdata){
1977 struct box *box1=sdata->box1;
1978 pix *bp=sdata->bp;
1979 int i,d,x,y,ya,yb,hchar=sdata->hchar,gchar=sdata->gchar,
1980 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
1981 int dx=x1-x0+1,dy=y1-y0+1, /* size */
1982 ad; /* tmp-vars */
1983
1984 // --- test D ---------------------------------------------------
1985 for(ad=d=100;dx>2 && dy>3;){ // min 3x4
1986 DBG( wchar_t c_ask='D'; )
1987 //if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */
1988 if (box1->num_frames != 2) Break; // excludes tiny holes 1810.rnd80
1989 if( get_bw(x0 ,x0+dx/3,y0+dy/2,y0+dy/2,box1->p,cs,1) != 1 ) Break;
1990 if( get_bw(x1-dx/3,x1 ,y0+dy/2,y0+dy/2,box1->p,cs,1) != 1 ) Break;
1991 if( get_bw(x1 ,x1 ,y0 ,y0+dy/16,box1->p,cs,1) == 1 ) Break;
1992 if( get_bw(x1-dx/2,x1 ,y0+dy/4,y0+dy/4 ,box1->p,cs,1) != 1 ) Break;
1993 if( num_cross(x0+dx/2,x0+dx/2,y0 ,y1 ,box1->p,cs) != 2 )
1994 if( num_cross(x1-dx/3,x1-dx/3,y0 ,y1 ,box1->p,cs) != 2 ) Break;
1995 if( num_cross(x0 ,x1 ,y0+dy/3 ,y0+dy/3 ,box1->p,cs) != 2
1996 && num_cross(x0 ,x1 ,y0+dy/3+1,y0+dy/3+1,box1->p,cs) != 2 ) Break;
1997 if( num_cross(x0 ,x1 ,y1-dy/3,y1-dy/3,box1->p,cs) != 2 ) Break;
1998 //if (box1->num_frames != 2) Break; // excludes tiny holes 1810.rnd80
1999 //if (sdata->holes.num != 1) Break;
2000 if (sdata->holes.hole[0].y0 > dy/3) Break;
2001 if (sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
2002 // if( num_hole (x0 ,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break;
2003 // test if left edge is straight
2004 for(x=0,y=bp->y-1-dy/8;y>=dy/5;y--){
2005 i=loop(bp,0,y,x1-x0,cs,0,RI);
2006 if( i+2+dx/16<=x ) break;
2007 if( i>x ) x=i;
2008 }
2009 if (y>=dy/5 ) Break;
2010 /* test if right edge is falling */
2011 for(x=dx,y=0;y<dy/3;y++){
2012 i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE);
2013 if( i>x+dx/16 ) break;
2014 if( i<x ) x=i;
2015 }
2016 if (y<dy/3 ) Break;
2017 /* test if right edge is raising */
2018 for(x=dx,y=bp->y-1;y>2*dy/3;y--){
2019 i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE);
2020 if( i>x+dx/16 ) break;
2021 if( i<x ) x=i;
2022 }
2023 if (y>2*dy/3 ) Break;
2024 if( loop(bp,dx-1,dy-1 ,dx,cs,0,LE) <=
2025 loop(bp,dx-1,dy-2-dy/16,dx,cs,0,LE) ) Break; // P
2026
2027 y=loop(bp,dx/2,dy-1,dy,cs,0,UP)-1; if (dy>16) y/=2;
2028 if ( y>=dy/16 ) { y-=dy/16;
2029 if (get_bw(dx/2,dx-1,dy-1-y,dy-1-y,bp,cs,1)==1) Break; // ~A
2030 }
2031
2032 ya=loop(bp, 0,dy-1,dy,cs,0,UP);
2033 yb=loop(bp,dx/16+1,dy-1,dy,cs,0,UP);
2034 if (ya<dy/2 && ya>dy/16 && ya>yb) Break; // ~O
2035 if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)>dx/16) ad=99*ad/100; // O? 0907
2036 if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)>=
2037 loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)) ad=97*ad/100; // ~O 0907
2038
2039 if ( loop(bp, dx/2, 0,dy,cs,0,DO)
2040 -loop(bp, dx/2,dy-1,dy,cs,0,UP) > dy/8 ) ad=97*ad/100; // ~b
2041
2042
2043
2044 if (loop(bp, 0, 0,dx,cs,0,RI)>=dx/2
2045 && loop(bp,dx-1,dy-1,dx,cs,0,LE)>=dx/2
2046 && loop(bp, 0,dy/2,dx,cs,0,RI)< 2 ) ad=96*ad/100; // thin O
2047
2048 if(box1->dots) ad=ad*94/100;
2049 if ( gchar) ad=99*ad/100;
2050 if (!hchar) ad=99*ad/100;
2051 Setac(box1,'D',ad);
2052 break;
2053 }
2054 // --- test d ---------------------------------------------------
2055 for(d=100;dx>2 && dy>3;){ // min 3x4
2056 DBG( wchar_t c_ask='d'; )
2057 ad=100;
2058 //if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */
2059 if (box1->num_frames != 2) Break; // excludes tiny holes 1810.rnd80
2060 if( get_bw(x0 , x0+dx/2, y1-dy/6, y1-dy/9,box1->p,cs,1) != 1 ) Break;
2061 if( get_bw(x0 , x0+dx/2, y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
2062 if( get_bw(x0+dx/2, x1 , y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
2063 if( get_bw(x1-dx/4, x1 , y0+dy/8, y0+dy/8,box1->p,cs,1) != 1 ) Break;
2064 if( get_bw(x0+dx/2, x0+dx/2, y1-dy/4, y1 ,box1->p,cs,1) != 1 ) Break;
2065 if(dy>19) // 0..dx/3 dont work on serif d, 0..dx/4 is more safe
2066 if( get_bw(x0 , x0+dx/4, y0 , y0+dy/5,box1->p,cs,1) == 1 ) Break;
2067 if( get_bw(x0 , x0+dx/4, y0 , y0+dy/6,box1->p,cs,1) == 1 ) Break;
2068 if( get_bw(x0 , x0+dx/4, y1-dy/8, y1 ,box1->p,cs,1) != 1 ) Break;
2069 if( get_bw(x0+dx/2-1,x0+dx/2,y1-dy/8, y1 ,box1->p,cs,1) != 1 ) Break; // ~"A
2070 if( loop(bp,bp->x-1, bp->y/4,x1-x0,cs,0,LE) >
2071 loop(bp,bp->x-1,3*bp->y/4,x1-x0,cs,0,LE)+1 ) Break;
2072 // more than dx/8 of width should show two horizontal lines (o-like)
2073 for (i=dx/8+1,x=0;x<dx && i;x++) {
2074 if (num_cross(x, x, 0, dy-1, bp, cs) == 2) i--; // fails for serif?
2075 } if (i>1) Break; // "d as a" patch
2076 if (i==1) ad=99*ad/100; // "d as a" patch, serif?
2077 for(i=dy/6+1,y=dy/4;y<dy && i;y++){
2078 if( num_cross(0 ,dx-1,y ,y , bp,cs) == 2 ) i--;
2079 if( num_cross(0 ,dx-1,y ,y , bp,cs) > 3 ) i++; // ~al
2080 } if( i ) ad=98*ad/100;
2081 for(i=dy/8+1,y=0;y<dy/2 && i;y++){
2082 if( num_cross(0 ,dx-1,y ,y , bp,cs) == 1 )
2083 if( num_cross(dx/2,dx-1,y ,y , bp,cs) == 1 ) i--;
2084 } if( i ) Break;
2085 if (sdata->holes.num<1) Break;
2086 if (sdata->holes.num>1) {
2087 if (dx<6) Break; ad=95*ad/100; } // glued j above 8 (4x6 sample)
2088 MSG(fprintf(stderr,"hole[0].y0,y1= %d %d",sdata->holes.hole[0].y0,sdata->holes.hole[0].y1););
2089 if ( sdata->holes.hole[0].y0 < dy/4 ) Break;
2090 if (dy-sdata->holes.hole[0].y1 > dy/4+1) Break; // glued et
2091 // if( num_hole(x0 , x1 , y0+dy/4 , y1 ,box1->p,cs,NULL) !=1 ) Break;
2092 if( num_cross(0 ,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs) != 2 ) { // glued al
2093 if (dy>15) { Break; } else ad=96*ad/100;
2094 }
2095 if (!hchar) ad=98*ad/100;
2096 if ( gchar) ad=99*ad/100;
2097 Setac(box1,'d',ad);
2098 break;
2099 }
2100 return box1->c;
2101 }
2102
ocr0_F(ocr0_shared_t * sdata)2103 static wchar_t ocr0_F(ocr0_shared_t *sdata){
2104 struct box *box1=sdata->box1;
2105 pix *bp=sdata->bp;
2106 int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
2107 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
2108 int dx=x1-x0+1,dy=y1-y0+1, /* size */
2109 ad; /* tmp-vars */
2110
2111 // --- test F ---------------------------------------------------
2112 for(ad=d=100;dx>2 && dy>4;){ // dx>1 dy>2*dx
2113 DBG( wchar_t c_ask='F'; )
2114 //if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
2115 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
2116 if( get_bw(x0+dx/2,x0+dx/2,y0,y0+dy/8,box1->p,cs,1) != 1 ) Break;
2117 if( get_bw(x0,x0+dx/4,y1-dy/4,y1-dy/4,box1->p,cs,1) != 1 ) Break;
2118 if( get_bw(x0,x0+dx/2,y0+dy/4,y0+dy/4,box1->p,cs,1) != 1 ) Break;
2119
2120 for (x=0,y=0;y<dy/4;y++) {
2121 j=loop(bp,dx-1,dy-1-y,dx,cs,0,LE); if(j<3 || 3*j<dx) break; // ~f Jun00
2122 if (j>x) x=j;
2123 } if (y<dy/4 || x<dx/2) Break;
2124
2125 for( i=1,y=0; y<dy/4 && i; y++ ){ // long black line
2126 j=loop(bp,0,y,dx,cs,0,RI);
2127 j=loop(bp,j,y,dx,cs,1,RI); if( j>dx/2 ) i=0; }
2128 if( i ) Break;
2129
2130 x=loop(bp,0,dy-1-dy/4,dx,cs,0,RI);
2131 x=loop(bp,x,dy-1-dy/4,dx,cs,1,RI); // strichdicke
2132 for( i=1,y=dy/3; y<dy-1-dy/3 && i; y++ ) // black line
2133 { j=loop(bp,0,y,dx,cs,0,RI);
2134 j=loop(bp,j,y,dx,cs,1,RI); if( j>dx/3 && ((j>2*x && dx>8) || j>x+1)) i=0; }
2135 if( i ) Break;
2136
2137 y=dy/8; if (y<1) y=1;
2138 for( i=1; y<dy-1-dy/2; y++ ){ // search horizontal white gap
2139 x =loop(bp,dx-1,y,dx,cs,0,LE); if(x<2) continue; // skip serifs
2140 j =loop(bp,dx-x,y,dy/4,cs,0,UP);
2141 x+=loop(bp,dx-x,y-j+1,dx,cs,0,LE); if (x>=dx/3) { i=0; break; }
2142 }
2143 if( i ) Break;
2144
2145 // check for vertical line on left side
2146 for(i=1,y=1;y<=dy/2 && i;y++)
2147 if( get_bw(0,dx/2,y,y,bp,cs,1) != 1 ) i=0;
2148 if( !i ) Break;
2149
2150 for(i=1,y=dy/2;y<dy && i;y++)
2151 if( get_bw(0,dx/3,y,y,bp,cs,1) != 1 ) i=0;
2152 if( !i ) Break;
2153
2154 i=loop(bp,dx-1,dy-1,dx,cs,0,LE); // serif or E ?
2155 if (i<=dx/3) {
2156 if (loop(bp,dx-1,(dy+4)/8,dx,cs,0,LE)>dx/8 // no serif
2157 || loop(bp, 0, dy-3,dx,cs,0,RI)<1) break;
2158 ad=99*ad/100;
2159 }
2160 if( get_bw(dx-1-dx/4,dx-1,dy-1-dy/4,dy-1,bp,cs,1) == 1 ) Break; // ~E
2161 if( get_bw(dx-1 ,dx-1,0 ,dy/3,bp,cs,1) != 1 ) Break;
2162
2163 if( loop(bp,0, bp->y/4,dx,cs,0,RI) <
2164 loop(bp,0,3*bp->y/4,dx,cs,0,RI)-1 ) Break;
2165 // if( num_hole(x0 , x1 , y0 , y1 ,box1->p,cs,NULL) >0 ) Break;
2166 //if (sdata->holes.num > 0) Break;
2167 for(i=0,x=dx/4;x<dx-1;x++)
2168 if( num_cross(x,x,0,dy-2,bp,cs) == 2 ) i++;
2169 if ( i<1 ) Break; // 0.2.4a4
2170
2171 if(dy<20) /* special case of small fi, not very elegant */
2172 if( get_bw( 1, 1,1,1,bp,cs,1) == 1
2173 && get_bw( 0, 0,2,2,bp,cs,1) == 1
2174 && get_bw(dx-2,dx-1,0,0,bp,cs,1) == 0
2175 && get_bw( 0, 1,0,0,bp,cs,1) == 0
2176 && get_bw( 0, 0,0,1,bp,cs,1) == 0 ) Break;
2177
2178 // check for screen font f
2179 i= loop(bp,0,3*bp->y/4,dx,cs,0,RI)-1;
2180 if (i>=0 && loop(bp,dy-1,i,dy,cs,0,UP)<=3*dy/4 ) ad=ad*98/100;
2181
2182 // check for screen font P
2183 i= loop(bp,bp->x-1,bp->y/4,dx,cs,0,LE);
2184 if (i<1) {
2185 j=i+loop(bp,bp->x-1-i,bp->y/4, dx ,cs,1,LE);
2186 j= loop(bp,bp->x-1-j,bp->y/4,3*dy/4,cs,0,DO);
2187 if (j<=dy/2) {
2188 i=loop(bp,bp->x-1,0,dx,cs,0,LE);
2189 ad=ad*98/100;
2190 if (i>dx/8) Break;
2191 if (i) ad=98*ad/100;
2192 }
2193 }
2194
2195 if (!hchar) if ((box1->m2-box1->y0)*8>=dy) { // ignore bad m1..4
2196 if ( num_cross(2*dx/3,2*dx/3,0,dy-1,bp,cs) < 2 ) ad=90*ad/100; // ~r
2197 }
2198 if (gchar) ad=99*ad/100;
2199 Setac(box1,'F',ad);
2200 break;
2201 }
2202 return box1->c;
2203 }
2204
ocr0_uU(ocr0_shared_t * sdata)2205 static wchar_t ocr0_uU(ocr0_shared_t *sdata){
2206 struct box *box1=sdata->box1;
2207 pix *bp=sdata->bp;
2208 int i1,i2,i3,i4,i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
2209 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
2210 int dx=x1-x0+1,dy=y1-y0+1, /* size */
2211 ad; /* tmp-vars */
2212 int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
2213 wchar_t bc=UNKNOWN;
2214
2215 // --- test uU ---------------------------------------------------
2216 // in Mitte so breit wie oben (bei V kontinuierlich schmaler)
2217 for(ad=d=100;dx>2 && dy>3;){ // min 3x4
2218 DBG( wchar_t c_ask='u'; )
2219 //if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
2220 if (box1->num_frames > 3) Break; // u+"u excludes tiny holes 1810.rnd80
2221 if (aa[0][0]>x0+dx/3 || aa[0][1]>y0+dy/4) Break; // left top
2222 if (aa[1][0]>x0+dx/4 || aa[1][1]<y1-dy/4) Break; // left down
2223 if (aa[2][0]<x1-dx/3 || aa[2][1]<y1-dy/4) Break; // right down
2224 if (aa[3][0]<x1-dx/4 || aa[3][1]>y0+dy/4) Break; // right top
2225
2226 /* V.8x10
2227
2228 @...@
2229 @...@
2230 @...@
2231 @...@
2232 .@.@.
2233 ..@..
2234 */
2235 // upper gap from the top 2018-09
2236 i1=nearest_frame_vector(box1,aa[3][3],aa[0][3], x0+dx/2, y1);
2237 // lower gap from bottom (handwritten u?)
2238 i2=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1-dx/4, y1-dy/4);
2239 if (box1->frame_vector[i1][1]<y1-dy/4) ad=99*ad/100; // 2018-09
2240 if (box1->frame_vector[i2][1]<y1-dy/4) ad=99*ad/100; // 2018-09
2241 MSG(fprintf(stderr,"i12 %d %d ad=%d",i1,i2,ad);)
2242 if (box1->frame_vector[i1][1]<y1-dy/3) Break; // 2018-09
2243 if (box1->frame_vector[i2][1]<y1-dy/3) Break; // 2018-09
2244
2245 for(y=y0+dy/4;y<y1-dy/4;y++) /* also handwritten u */
2246 if( num_cross(x0,x1,y,y,box1->p,cs) < 2 ) break;
2247 if( y<y1-dy/4 ) Break;
2248 if( get_bw(dx/2,dx/2,dy/2,dy-1,bp,cs,1)==0 ) Break;
2249 if( get_bw(dx/2,dx-1,dy/2,dy/2,bp,cs,1)==0 ) Break;
2250 for(i=0,x=3*dx/8;x<dx-dx/4;x++){
2251 y=loop(bp,x,0,dy,cs,0,DO); if(y>i)i=y; if(y<i && i>1) break;
2252 } if( i<dy/4 ) Break; x--;
2253 if( get_bw(0,x ,i-1,i-1,bp,cs,1)==0 ) Break;
2254 if( get_bw(x,dx-1,i-1,i-1,bp,cs,1)==0 ) Break;
2255
2256 for(i=dy/8+2,y=dy/8;y<dy-(dy+2)/4 && i;y++){ // 12%+1 Fehler
2257 j=num_cross(0,dx/2-((y>dy/2)?dx/8:0),y,y,bp,cs); // left vert line
2258 if( y<dy/2 && num_cross(dx/2,dx-1,y,y,bp,cs)>1 ) i--; // ~{\it v}
2259 if( y<dy/2 && (j<1 && j>2) ) { i--; ad=90*ad/100; }
2260 if( y>dy/2 && j!=1 ) { i--; ad=98*ad/100; // handwritten u?
2261 MSG(fprintf(stderr,"ad=%d",ad);) }
2262 } if( !i ) Break;
2263 for(i=dy/16+1,y=dy/8;y<dy-dy/4 && i;y++){ // 12%+1 Fehler
2264 j=num_cross(dx-dx/2,dx-1,y,y,bp,cs);
2265 if( y>dy/2 && (j<1 && j>2) ) i--;
2266 if( y<dy/2 && j!=1 ) i--;
2267 } if( !i ) Break;
2268 for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
2269 if( get_bw( x, x, y0, y0+dy/3,box1->p,cs,1) != 1 ) i=0;
2270 } if( i ) Break;
2271 for(i=dx/4+1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
2272 if( get_bw( x, x,y0+dy/3,y1-dy/3,box1->p,cs,3) != 2 ) i--;
2273 } if( !i ) Break;
2274 for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
2275 if( get_bw( x, x,y1-dy/2,y1,box1->p,cs,3) == 2 ) i=0;
2276 if( get_bw( x, x,y1-dy/3,y1,box1->p,cs,3) == 2 ) { ad=98*ad/100;
2277 MSG(fprintf(stderr,"ad=%d",ad);) }
2278 } if( !i ) Break;
2279 if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)==2
2280 && num_cross(dx-dx/2,dx-1,dy-dy/4,dy-dy/4,bp,cs)==1 ) Break; // ~{\it v}
2281
2282 i=loop(bp,0,dy-1-dy/16,dx,cs,0,RI);
2283 j=loop(bp,0,dy-1-dy/8 ,dx,cs,0,RI);
2284 if( i<j ) Break; // ~ll v0.2.4a3
2285 if(dy>15)
2286 if( loop(bp,dx-1,dy/16,dx,cs,0,LE)
2287 > loop(bp,dx-1,dy/8 ,dx,cs,0,LE)+1+dx/32 ) Break; // ~bad 0 (thinn)
2288 if( hchar && dy>7)
2289 if( loop(bp, 0, dy-1,dx,cs,1,RI)==dx
2290 && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)>dx/16
2291 && loop(bp, 0,3*dy/4,dx,cs,0,RI)>dx/16
2292 && loop(bp,dx-1, dy/2,dx,cs,0,LE)>dx/16
2293 && loop(bp, 0, dy/2,dx,cs,0,RI)>dx/16
2294 ) Break; // melted ll
2295
2296 i=loop(bp, 0,dy-1-dy/8,dx,cs,0,RI); // 2010-10 -2 to -1
2297 j=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); // -//-
2298 if ( i>dx/4 && j>dx/4 && i+j>=dx/2) Break; // v
2299 if (i+j>=dx/2) ad=97*ad/100;
2300
2301 if ( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=2 ) ad=96*ad/100; // w
2302 if ( loop(bp,dx/2,dy-1,dy,cs,0,UP)>dy/16 ) ad=98*ad/100; // w
2303
2304 if (ad==100) ad=99; // ToDo: only if lines.wt<100
2305 bc='u';
2306 if (gchar) ad=98*ad/100;
2307 if (hchar) bc='U';
2308 if (box1->dots>0) ad=99*ad/100;
2309 Setac(box1,bc,ad);
2310 break;
2311 }
2312 return box1->c;
2313 }
2314
ocr0_micro(ocr0_shared_t * sdata)2315 static wchar_t ocr0_micro(ocr0_shared_t *sdata){
2316 struct box *box1=sdata->box1;
2317 pix *bp=sdata->bp;
2318 int i,j,d,x,y,i2,hchar=sdata->hchar,gchar=sdata->gchar,
2319 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
2320 int dx=x1-x0+1,dy=y1-y0+1, /* size */
2321 ad; /* tmp-vars */
2322
2323 // --- test \mu µ MICRO_SIGN --------------------------------------
2324 // in Mitte so breit wie oben (bei V kontinuierlich schmaler)
2325 if( gchar && !hchar )
2326 for(ad=d=100;dx>2 && dy>4;){ // min 3x4
2327 DBG( wchar_t c_ask='u'; )
2328 //if (sdata->holes.num > 1) break; /* tolerant against a tiny hole */
2329 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
2330 for(y=y0+dy/8;y<box1->m3-dy/4;y++)
2331 if( num_cross(x0,x1,y,y,box1->p,cs) < 2 ) break;
2332 if( y<box1->m3-dy/4 ) break;
2333 if( get_bw(dx/2,dx/2,3*dy/8,7*dy/8,bp,cs,1)==0 ) break;
2334 if( get_bw(dx/2,dx-1,3*dy/8,7*dy/8,bp,cs,1)==0 ) break;
2335 for(y=dy/2;y<dy;y++){
2336 x=loop(bp,dx-1,y,dx,cs,0,LE); if(8*x>5*dx) break;
2337 } if( y>=dy || 2*y>box1->m3+box1->m4) break; i2=y;
2338 for(i=0,x=2*dx/8;x<dx-1-dx/4;x++){
2339 y=loop(bp,x,0,dy,cs,0,DO); if(y>i)i=y; if(y<i && i>1) break;
2340 } if( i<dy/4 ) break; x--;
2341 if( get_bw(0,x ,i-1,i-1,bp,cs,1)==0 ) break;
2342 if( get_bw(x,dx-1,i-1,i-1,bp,cs,1)==0 ) break;
2343 for(i=dy/16+1,y=dy/8;y<dy-(box1->m4-box1->m3)-dy/4 && i;y++){ // 12%+1 Fehler
2344 j=num_cross(0,dx/2,y,y,bp,cs);
2345 if( y<dy/2 && num_cross(dx/2,dx-1,y,y,bp,cs)>1 ) i--; // ~{\it v}
2346 if( y<dy/2 && (j<1 && j>2) ) i--;
2347 if( y>dy/2 && j!=1 ) i--;
2348 } if( !i ) break;
2349 for(i=dy/16+1,y=dy/8;y<dy-(box1->m4-box1->m3)-dy/4 && i;y++){ // 12%+1 Fehler
2350 j=num_cross(dx-dx/2,dx-1,y,y,bp,cs);
2351 if( y>dy/2 && (j<1 && j>2) ) i--;
2352 if( y<dy/2 && j!=1 ) i--;
2353 } if( !i ) break;
2354 for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
2355 if( get_bw( x, x, y0, y0+dy/4,box1->p,cs,1) != 1 ) i=0;
2356 } if( i ) break;
2357 for(i=dx/4+1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
2358 if( get_bw( x, x,y0+dy/4,y1-dy/2,box1->p,cs,3) != 2 ) i--;
2359 } if( !i ) break;
2360 if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)!=1 ) break;
2361 if( num_cross(dx-dx/2,dx-1,dy-dy/2,dy-dy/2,bp,cs)!=1 ) break;
2362 if( get_bw( (dx+2)/4,dx-1,dy-2-3*dy/16,dy-1,bp,cs,1) == 1 ) break;
2363 if( num_cross(0,dx/4,dy-1,dy-1,bp,cs)!=1 ) break;
2364
2365 Setac(box1,MICRO_SIGN,ad);
2366 break;
2367 }
2368 return box1->c;
2369 }
2370
ocr0_vV(ocr0_shared_t * sdata)2371 static wchar_t ocr0_vV(ocr0_shared_t *sdata){
2372 struct box *box1=sdata->box1;
2373 pix *bp=sdata->bp;
2374 int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
2375 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
2376 int dx=x1-x0+1,dy=y1-y0+1, /* size */
2377 ad; /* tmp-vars */
2378 wchar_t bc=UNKNOWN;
2379
2380 // --- test v -------------------------------------------------
2381 for(ad=d=100;dx>2 && dy>3;){ // min 3x4, ToDo: replace by vector-code
2382 DBG( wchar_t c_ask='v'; )
2383 //if (sdata->holes.num > 0) Break; /* no tolerant against a tiny hole */
2384 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
2385 /* V.8x10
2386
2387 @...@
2388 @...@
2389 @...@
2390 @...@
2391 .@.@.
2392 ..@..
2393 */
2394 x=loop(bp,dx/2,0,dx,cs,1,RI)+dx/2; // be sure in the upper gap
2395 y=loop(bp, x,0,(dy+1)/2,cs,0,DO)-1; // (x,y) should be in the gap
2396 if (x>3*dx/4 || y<dy/4) Break;
2397 if( get_bw(x0,x0+x,y0+y,y0+y,box1->p,cs,1) != 1 ) Break;
2398 if( get_bw(x0+x,x1,y0+y,y0+y,box1->p,cs,1) != 1 ) Break;
2399 if( get_bw(x0+x,x0+x,y1-dy/2,y1, box1->p,cs,1) != 1 ) Break;
2400 if( get_bw(x0+x, x0+x ,y0, y0+dy/3,box1->p,cs,1) == 1 ) // it v?
2401 if( get_bw(x0+x+1,x0+x+1,y0, y0+dy/3,box1->p,cs,1) == 1 ) Break;
2402
2403 // UVW
2404 if(((num_cross( 0,dx/2+1,dy/ 8,dy/ 8,bp,cs)!=1)
2405 && (num_cross( 0,dx/2+1,dy/16,dy/16,bp,cs)!=1) // it v
2406 && (num_cross(dx/2+1,dx -1,dy/ 8,dy/ 8,bp,cs)!=1)) /* () added on Sep00 */
2407 || ((num_cross( 0,dx-1,dy-1-dy/8,dy-1-dy/8,bp,cs)> 1)
2408 && (num_cross( 0,dx-1,dy-1 ,dy-1 ,bp,cs)> 1)) ) Break;
2409 // UV
2410 if( get_bw(0 ,dx/8,dy-1-dy/6,dy-1,bp,cs,1)==1 ) Break;
2411 if( get_bw(dx-1-dx/8,dx-1,dy-1-dy/6,dy-1,bp,cs,1)==1 ) Break;
2412 if (!hchar // 2010-10
2413 && loop(bp,0 ,dy/6 ,dx,cs,0,RI)
2414 >=loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI) // old Break;
2415 && loop(bp,0 ,dy/6 ,dx,cs,0,RI) // 2017-03-23 for 7x7 DOS-v
2416 >=loop(bp,0 ,dy-1-dy/4,dx,cs,0,RI) && dy>6 ) Break;
2417 if ( hchar
2418 && loop(bp,0 , dy/3 ,dx,cs,0,RI)
2419 >=loop(bp,0 ,dy-1-dy/3 ,dx,cs,0,RI)
2420 && loop(bp,0 ,dy-1-dy/3 ,dx,cs,0,RI)
2421 >=loop(bp,0 ,dy-1-dy/3+dy/6,dx,cs,0,RI) && dy>6 ) Break; // 2010-10
2422 if( loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI)
2423 >loop(bp,0 ,dy-1-dy/8,dx,cs,0,RI)
2424 && loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE)
2425 >loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) ) Break; // better OR ?
2426 if( loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI)
2427 >=loop(bp,0 ,dy-1-dy/8,dx,cs,0,RI)
2428 && loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE)
2429 >=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) ) ad=99*ad/100; // font21
2430 if (!hchar // 2010-10
2431 && loop(bp,dx-1,dy/6 ,dx,cs,0,LE)
2432 >=loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE)
2433 && loop(bp,dx-1,dy/6 ,dx,cs,0,LE) // 2017-03 add 7x7-DOS-v
2434 >=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE) && dy>6 ) Break;
2435 x=loop(bp,0,dy-1,dx,cs,0,RI); // 3*x>dx changed to 2*x>dx May2001 JS
2436 x=loop(bp,x,dy-1,dx,cs,1,RI); if ( dx>14 && 2*x>dx ) Break; // U
2437 if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)==2
2438 && num_cross(dx-dx/2,dx-1,dy-dy/4,dy-dy/4,bp,cs)==2 ) Break; // ~{\it u}
2439
2440 #if 0
2441 // measure thickness of lower v
2442 i=loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)
2443 +loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE);
2444 j=loop(bp, 0,dy-1-dy/4 ,dx,cs,0,RI)
2445 +loop(bp,dx-1,dy-1-dy/4 ,dx,cs,0,LE);
2446 if( box1->m1 && hchar && dy>15 && j>=i-dx/32 ) Break; // ~Y
2447 #endif
2448 /* V has serifs only on upper site! Y also on bottom, check it. Okt00 */
2449 i=loop(bp, 0, 0,dx,cs,0,RI);
2450 i=loop(bp, i, 0,dx,cs,1,RI); i1=i; // thickness upper left
2451 i=loop(bp, 0, 1,dx,cs,0,RI);
2452 i=loop(bp, i, 1,dx,cs,1,RI); if(i>i1) i1=i; // thiggest
2453 i=loop(bp, 0,dy/4,dx,cs,0,RI);
2454 i=loop(bp, i,dy/4,dx,cs,1,RI); i2=i; // thickness on 1/4 from up
2455 i=loop(bp, 0,dy/4+dy/32,dx,cs,0,RI);
2456 i=loop(bp, i,dy/4+dy/32,dx,cs,1,RI); if (i>i2) i2=i; // fat fonts
2457 i=loop(bp, 0,dy-1,dx,cs,0,RI);
2458 i=loop(bp, i,dy-1,dx,cs,1,RI); i3=i; // thickness, fix dy-1 2013-06
2459 i=loop(bp, 0,dy-2,dx,cs,0,RI);
2460 i=loop(bp, i,dy-2,dx,cs,1,RI); if(i>i3) i3=i; // thiggest bottom
2461 if( y0 < box1->m2)
2462 if( i1-i2 > dx/32+2
2463 && i3-i2 > dx/32+2 ) {
2464 /* tmp10/invalid_ogv.jpg 120x145 font may have jpg-artefacts */
2465 DBG( IFV fprintf(stderr,"\nDBG v L%d i123= %d %d %d",__LINE__,i1,i2,i3); )
2466 Break; // ~serif_Y
2467 }
2468
2469 if( y0 < box1->m2 ) // uppercase V ?
2470 if( i1-i2 < dx/32+2 ) /* no serif detected */
2471 if( num_cross(0,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs)==1 ){
2472 j=loop(bp, 0,dy-1-dy/4 ,dx,cs,0,RI);
2473 j=loop(bp, j,dy-1-dy/4 ,dx,cs,1,RI);
2474 if (j<i2+1) Break; // ~Y
2475 if (j<=i2+1) ad=99*ad/100; // ~Y
2476 }
2477
2478 ad=99*ad/100; // be carefull (remove later)
2479
2480 if( loop(bp,0 ,dy-1-dy/4,dx,cs,0,RI)
2481 >loop(bp,0 ,dy-1 ,dx,cs,0,RI) ) ad=96*ad/100;
2482
2483 if (num_cross(0,dx-1, dy/2, dy/2,bp,cs)==1
2484 && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)> 1) Break; // 5x8 * Jul09
2485
2486 if (gchar) ad=99*ad/100;
2487 bc='v';
2488 if( hchar ) bc='V';
2489 Setac(box1, bc, ad);
2490 break;
2491 }
2492 return box1->c;
2493 }
2494
ocr0_rR(ocr0_shared_t * sdata)2495 static wchar_t ocr0_rR(ocr0_shared_t *sdata){
2496 struct box *box1=sdata->box1;
2497 pix *bp=sdata->bp;
2498 int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
2499 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
2500 int dx=x1-x0+1,dy=y1-y0+1, /* size */
2501 ad; /* tmp-vars */
2502 int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
2503
2504 // --- test r -------
2505 for(ad=d=100;dy>3 && dx>1;){ // dy>dx, 4x6 font, dx=2 smallest prop-font
2506 DBG( wchar_t c_ask='r'; )
2507 if (sdata->holes.num > 0
2508 && ( sdata->holes.hole[0].y1 > dy/2 // tiny hole in upper left
2509 || sdata->holes.hole[0].x1 > dx/2 ) // is tolerated, ~Pp
2510 ) Break; /* tolerant against a tiny hole */
2511 if( 2*dy<box1->m3-box1->m1) Break;
2512
2513 if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<=dx/8 ) Break;
2514 x= loop(bp,dx-1,dy/2,dx,cs,0,LE); if (x<=dx/2) ad=99*ad/100; // ~t
2515 if (loop(bp,dx-1-x/2,0,dy,cs,0,DO)>dy/8) ad=99*ad/100; // ~t
2516 if( dx>4 )
2517 if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<=dx/8+2 ) Break; // ~v Jun00
2518
2519 i=dy-(dy+20)/32; // ignore dust on the ground
2520
2521 for( y=4*dy/8; y<i; y++ ){ // center down v-line
2522 if( y<dy-2*dy/8 && num_cross(0,dx-1,y,y,bp,cs) !=1 ) break;
2523 i1= loop(bp,0 ,y,dx,cs,0,RI); if(i1>3*dx/8) break;
2524 i2= loop(bp,dx-1,y,dx,cs,0,LE); if(i1>i2) break;
2525 if( (i1+(dx-i2
2526 -1))/2 >= 4*dx/8 ) break; // mass middle should be left
2527 }
2528 if (y<i) Break;
2529
2530 for( x=4*dx/8; x<dx-dx/8; x++ ){ // right upper h-line
2531 if( get_bw(x,x,0,(dy+2)/4,bp,cs,1) !=1 ) break; }
2532 if (x<dx-dx/8) Break;
2533
2534 if( loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)>5*dx/8 // not a C
2535 && get_bw(dx-1-dx/8,dx-1,dy-1-dy/4,dy-1,bp,cs,1) ==1 ) Break;
2536
2537 if( loop(bp, 0,5*dy/8,dx,cs,0,RI)<=dx/8
2538 && loop(bp,dx-1,5*dy/8,dx,cs,0,LE)>=5*dy/8
2539 && loop(bp,dx/2, dy-1,dy,cs,0,UP)<=dy/8 ) Break; // ~c
2540
2541 if( loop(bp, 0,3*dy/8,dx,cs,0,RI)
2542 > loop(bp,dx-1,3*dy/8,dx,cs,0,LE)+dx/8 ) {
2543 if( loop(bp, 0, dy/8,dx,cs,0,RI)<dx/8 ) Break; // ~z (broken)
2544 ad=98*ad/100;
2545 }
2546
2547 if( loop(bp,0,dy/3,dx,cs,0,RI)>3*dx/4 ) Break; // ~i
2548 if( loop(bp,0,dy/4,dx,cs,0,RI)>3*dx/8 // ~I
2549 && get_bw(0,dx/8,0,dy/4,bp,cs,1) ==1 ) Break;
2550 if( num_cross(0,dx-1,dy/2, dy/2 ,bp,cs)!=1
2551 && num_cross(0,dx-1,dy/2+1,dy/2+1,bp,cs)!=1 ) Break; // ~n 024a3
2552
2553 // itallic t is sometimes not high enough, look for v-like shape
2554 for(y=3*dy/4;y<dy-1;y++)
2555 if( num_cross(0,dx-1,y, y ,bp,cs)==2
2556 && num_cross(0,dx-1,y+1+dy/32,y+1+dy/32,bp,cs)==2 ) break; // ~t
2557 if(y<dy-1) Break;
2558 if (loop(bp,dx-1-dx/4,dy-1,dx,cs,0,UP)<dy/4) ad=98*ad/100; // ~f (serif)
2559 if( num_cross(dx-1,dx-1,0,3*dy/4,bp,cs)>1) {ad=95*ad/100;MSG({})} // ~f
2560 if( num_cross(dx/2 ,dx/2 ,0,dy-1,bp,cs)>2
2561 && num_cross(dx/2+1,dx/2+1,0,dy-1,bp,cs)>2 ) Break; // ~f
2562
2563 // 2010-10 ocr-a-subset 4x8 '' was detected as bad flying r
2564 i1=nearest_frame_vector(box1,aa[3][3],aa[0][3], x0+dx/3, y0+dy/4);
2565 // i2=nearest_frame_vector(box1,aa[3][3],aa[0][3], x0+dx/3, y0+dy/4);
2566 if (box1->m2
2567 && 2*box1->y0 <= box1->m1 + box1->m2
2568 && 3*box1->y1 <= box1->m2 + 2*box1->m3
2569 && box1->frame_vector[i1][1]-y0==0)
2570 { ad=97*ad/100;MSG(fprintf(stderr,"ad %d",ad);) }
2571
2572 if (box1->dots) ad=98*ad/100; /* could be modified latin2-r */
2573 if (hchar) ad=96*ad/100;
2574 if (gchar) ad=97*ad/100;
2575 Setac(box1,'r',ad);
2576 break; // not 100% sure!
2577 }
2578 // --- test R ---------------------------------------------------
2579 for(ad=d=100;dx>2 && dy>3;){ // min 3x4
2580 DBG( wchar_t c_ask='R'; )
2581 // if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
2582 if (box1->num_frames != 2) Break; // excludes tiny holes 1810.rnd80
2583 if( num_cross(x0,x1,y1-dy/8,y1-dy/8, box1->p,cs) < 2 ) Break; // ~P
2584 if (loop(bp, dx/2, dy/4,dy,cs,0,DO)>dy/2) Break; // ~C
2585 if (loop(bp, dx/2, 0,dy,cs,0,DO)>dy/8
2586 && loop(bp, dx/2,dy/16,dx,cs,0,RI)<dx/2
2587 && dy>=16 ) Break;
2588 for(i=1,y=y0+dy/8;y<=y1-dy/8 && i;y++){ // left v-line
2589 if( get_bw(x0 , x0+dx/2,y, y,box1->p,cs,1) != 1 ) i=0;
2590 } if( !i ) Break;
2591 for(i=1,x=x0+3*dx/8;x<=x1-dx/4 && i;x++){ // upper h-line
2592 if( get_bw( x, x, y0, y0+dy/4,box1->p,cs,1) != 1 ) i=0;
2593 } if( !i ) Break;
2594 for(y=0,x=x0+dx/4;x<=x1-dx/4;x++){ // lower h-gap
2595 i=loop(box1->p,x,y1,dy,cs,0,UP);
2596 /* on small chars bypass possible low left serifs */
2597 if (i>0) { i2=loop(box1->p,x-1,y1-i-1,dy,cs,0,UP);
2598 if (i2>1) i+=i2-1; }
2599 if (i>y) { y=i; i1=x; }
2600 } if( y<=dy/8 ) Break; if (y<dy/4) ad=80*ad/100;
2601 for(i=1,x=x0+dx/3;x<=x1-dx/8 && i;x++){ // vert crossed 2 ???
2602 if( num_cross(x,x,y0,y1, box1->p,cs) == 2 ) i=0;
2603 } if( i ) Break;
2604 for(i=1,y=y0;y<=y0+3*dy/8 && i;y++){ // upper 2 vert lines
2605 if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
2606 } if( i ) Break;
2607 for(i=1,y=y0+dy/3;y<=y1-dy/3 && i;y++){ // midle h line
2608 if( num_cross(x0,x1,y,y, box1->p,cs) == 1 ) i=0;
2609 } if( i ) ad=95*ad/100; /* sometimes there is a small gap */
2610 for(i=1,y=y1-dy/4;y<=y1 && i;y++){ // lower 2 vert lies
2611 if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
2612 } if( i ) Break;
2613 if( get_bw(x1-dx/3,x1,y0,y0+dy/4,box1->p,cs,1) != 1 ) Break; // pixel ru
2614 x=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(x>dx/2) Break; i=x; // ru
2615 x=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(x<=i ) Break; i=x; // rc
2616 x=loop(bp,dx-1, 5*dy/8,dx,cs,0,LE); if(x>i ) i=x;
2617 x=loop(bp,dx-1, 6*dy/8,dx,cs,0,LE); if(x>i ) i=x;
2618 x=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); if(x>=i ) Break; // rd
2619
2620 i1=loop(bp,0, dy/4,dx,cs,0,RI); // straight
2621 i2=loop(bp,0, dy/2,dx,cs,0,RI);
2622 i3=loop(bp,0,dy-1-dy/4,dx,cs,0,RI); if( abs(i1+i3-2*i2)>1+dx/16 ) Break;
2623 if (dy>15)
2624 if (loop(bp,dx-1, dy/2,dx,cs,0,LE)>=loop(bp,dx-1, dy-1,dx,cs,0,LE)
2625 && loop(bp,dx-1,3*dy/16,dx,cs,0,LE)>=loop(bp,dx-1,dy/16,dx,cs,0,LE)+dx/8 ) Break; // ~ff
2626 if (dy>7)
2627 if (loop(bp,dx-1,dy-2 ,dx,cs,0,LE)
2628 >loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE)) {
2629 ad=98*ad/100;
2630 if (loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)==0
2631 && loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE)>0 ) Break; // broken B ??
2632 }
2633 j=sdata->holes.num;
2634 if (j != 1) {
2635 i=num_hole (x0,x1,y0,y1-dy/3,box1->p,cs,NULL);
2636 // j=num_hole (x0,x1,y0,y1 ,box1->p,cs,NULL);
2637 if (i==0) ad=90*ad/100; /* some times there is a small gap */
2638 if (j>1 || j>i) Break;
2639 }
2640 if (sdata->holes.num < 1) ad=90*ad/100;
2641 if (sdata->holes.num==1)
2642 if (sdata->holes.hole[0].y1 > 3*dy/4) ad=95*ad/100; // alpha
2643
2644 if (!hchar) ad=98*ad/100;
2645 if ( gchar) ad=98*ad/100;
2646 Setac(box1,'R',ad);
2647 break;
2648 }
2649 return box1->c;
2650 }
2651
ocr0_m(ocr0_shared_t * sdata)2652 static wchar_t ocr0_m(ocr0_shared_t *sdata){
2653 struct box *box1=sdata->box1;
2654 pix *bp=sdata->bp;
2655 const int hchar=sdata->hchar,gchar=sdata->gchar;
2656 int i,d,x,y,i1,i2,i3,i4,i5,i6,i7,
2657 handwritten=0,
2658 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
2659 int dx=x1-x0+1,dy=y1-y0+1, /* size */
2660 (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */
2661 ad; /* tmp-vars */
2662
2663 // --- test m -------
2664 for(ad=d=100;dx>4 && dy>3;){
2665 DBG( wchar_t c_ask='m'; )
2666 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
2667 if (sdata->holes.num > 0) ad=96*ad/100;
2668 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
2669 // added 2018 tmp13/sslmozFP.png molten "ity"
2670 d=2*sq(128/4); /* half distance to the center */
2671 // if (aa[3][2]>d/2) Break; /* [2] = distance, ~dj... */
2672 if (aa[0][2]>d/2) Break; /* upper left end */
2673 if (aa[1][2]>d/2) Break; /* lower left end */
2674 if (aa[2][2]>d/2) Break; /* lowerright end */
2675 // search 3 legs and 2 space between, [][3]=vector_index
2676 i1=nearest_frame_vector(box1, aa[0][3],aa[2][3], x0, y1); // leg1
2677 i3=nearest_frame_vector(box1, i1,aa[3][3], x1, y1); // leg3
2678 i4=nearest_frame_vector(box1, i1, i3, x0, y0); // gap1
2679 i5=nearest_frame_vector(box1, i1, i3, x1, y0); // gap2
2680 i2=nearest_frame_vector(box1, i4, i5, (x0+x1)/2, y1); // leg2
2681 MSG(fprintf(stderr,"i1-5 %d %d %d %d %d",i1,i2,i3,i4,i5);)
2682 // 2018-09 ToDo: check essentials of 3 legs
2683 if (box1->frame_vector[i1][0] > x0+dx/4) Break; // leg1 not leftmost
2684 if (box1->frame_vector[i4][1] > y0+dy/2) Break; // gap1 too low
2685 if (box1->frame_vector[i5][1] > y0+dy/2) Break; // gap2 too low
2686 if (box1->frame_vector[i5][0] < x0+dx/2) Break; // gap2 too left
2687 if (box1->frame_vector[i5][0]
2688 < box1->frame_vector[i2][0]) Break; // gap2 more left than leg2
2689 // check right side molten 'nt' of tmp13/sslmozFP.png
2690 i6=nearest_frame_vector(box1, i3, aa[3][3], x1+dx/2, y0+dy/8);
2691 i7=nearest_frame_vector(box1, i3, i6, x0, y0+dy/2);
2692 if (box1->frame_vector[i7][0]<box1->frame_vector[i6][0]-dx/8 && hchar)
2693 Break; // ad=97*ad/100;
2694 MSG(fprintf(stderr,"i1-7 %d %d %d %d %d %d %d ad=%d",\
2695 i1,i2,i3,i4,i5,i6,i7,ad);)
2696 // check up side molten 'ity' of tmp13/sslmozFP.png
2697 i6=nearest_frame_vector(box1, aa[3][3], aa[0][3], x0+dx/2, y1);
2698 if (box1->frame_vector[i6][1]>y0+dy/4 && hchar && gchar) Break;
2699 // ad=97*ad/100;
2700
2701 x =loop(bp,dx-1,dy/2,dx,cs,0,LE); if(3*x>dx) Break; // ~K
2702 y=dy/2;
2703 i=num_cross(0,dx-1,y ,y ,bp,cs); if (i!=3)
2704 i=num_cross(0,dx-1,y+1,y+1,bp,cs);
2705 if (num_cross(0,dx-1,dy/2,dy/2,bp,cs)==1) Break; // 5x8 * Jul09
2706 if (i<3 && i>5) Break; // m ru rn, handwritten m
2707 // im or glued.mm cut to nm
2708 if (i>3) { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) }
2709 if (i>=5) { // melted rw ? (Oct08 JS)
2710 x =loop(bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // 1st v-line
2711 x+=loop(bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // 1st gap
2712 x+=loop(bp,x,y,dx-x,cs,0,RI); if(x>3*dx/4) Break; i1=x-i1; // 2nd v-line
2713 x+=loop(bp,x,y,dx-x,cs,1,RI); if(x>6*dx/8) Break; i2=x;// 2nd gap
2714 x+=loop(bp,x,y,dx-x,cs,0,RI); i2=x-i2; // 3th v-line
2715 // printf("\nDBG i1,i2 %d %d", i1, i2);
2716 if (i1>2*i2) Break; // rw
2717 }
2718 for (i=0,y=dy-1-dy/8;y>dy/2;y--) {
2719 i=num_cross(0,dx-1,y,y,bp,cs); if (i>2) break;
2720 } if (i>3) Break;
2721 for ( ;y>dy/2;y--) {
2722 i=num_cross(0,dx-1,y,y,bp,cs); if (i!=3) break;
2723 } if (i>5) Break; y++; i5=y;
2724 if (y> dy/2) handwritten=10;
2725 if (y>3*dy/4) handwritten=60;
2726 /* @@...............
2727 @@......,........
2728 @@,...@@@....@@@.
2729 @@,,.@@@@..@@@@@,
2730 @@@.@@@@@.@@@@@@,
2731 @@;@@@@@@@@@;,@@,
2732 @@@@@,.@@@@,,,@@@ <- i5
2733 ,@@@...;@@....@@@
2734 .@;...........,@@
2735 ...............@@
2736 i1 i2 i3 i4
2737 */
2738 x =loop(bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line
2739 x+=loop(bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // first gap
2740 x+=loop(bp,x,y,dx-x,cs,0,RI); if(x>3*dx/4) Break; i2=x; // 2nd v-line
2741 x+=loop(bp,x,y,dx-x,cs,1,RI); if(x>6*dx/8) Break; i3=x; // 2nd gap
2742 x+=loop(bp,x,y,dx-x,cs,0,RI); if(x<5*dx/8) Break; i4=x; // 3th v-line
2743 if (x>=dx) Break; // missing 3th v-line, ~W
2744 MSG(fprintf(stderr,"y=%d x=%d %d %d %d",y,i1,i2,i3,i4);)
2745 if( abs((i2-i1)-(i4-i3)) > 2+((i2-i1)+(i4-i3))/4 ) Break; // same gap width? rn
2746 if( abs((i2-i1)-(i4-i3)) > 2+((i2-i1)+(i4-i3))/8 ) ad=98*ad/100; // same gap width? rn
2747 // the same game for the lower part =>l1 l2 l3 l4 ???
2748 i =loop(bp,0,5*dy/8,dx,cs,0,RI);
2749 i =loop(bp,i,5*dy/8,dx,cs,1,RI);
2750 x =loop(bp,0,dy-dy/32-1,dx,cs,0,RI);
2751 x =loop(bp,x,dy-dy/32-1,dx,cs,1,RI);
2752 if( x > i+1 ) i=1; else i=0; /* looks like serif m, Okt00 */
2753 for(y=0,x=i1;x<i2;x++) {
2754 i=loop(bp,x,dy-1,dy,cs,0,UP); if (i>y) y=i;
2755 }
2756 if(y<dy/4 || y<y1-y0-i5-1-dy/16) Break; // no gap detected
2757 for(y=0,x=i3;x<i4;x++) {
2758 i=loop(bp,x,dy-1,dy,cs,0,UP); if (i>y) y=i;
2759 }
2760 if(y<dy/4) Break; // no gap detected
2761 for(x=i1;x<i4;x++) if( loop(bp,x,0,dy,cs,0,DO)>=dy/2 ) break;
2762 if(x<i4 && handwritten<10) Break; // gap detected
2763 // glued rn as m ??? hmm seems a ballance act
2764 if(i2-i1>i4-i3+dx/16){
2765 for(y=0,x=(i1+i2)/2;x<i2;x++){
2766 i=loop(bp,x,0,dy,cs,0,DO);
2767 i=loop(bp,x,i,dy,cs,1,DO); // measure thickness
2768 if( i>y ) y=i; if( 2*i<y ) Break;
2769 }
2770 if(x <i2) Break; // unusual property for m (see n)
2771 }
2772 if(gchar) ad=99*ad/100;
2773 if(hchar) ad=99*ad/100;
2774
2775 if( loop(bp,dx-1,dy/16,dx,cs,0,LE)<2
2776 && loop(bp,dx-1,dy/4 ,dx,cs,0,LE)>3 ) Break; // melted WT
2777
2778 x=loop(bp,dx-1,dy/2,dx,cs,0,LE);
2779 if (x>2 && loop(bp,dx-1-x/2,0,dy,cs,0,DO)<dy/2) Break; // melt toc
2780 if (loop(bp,(i3+i4)/2,0,dy,cs,0,DO)>dy/2) Break; // N
2781
2782 // {\it m}
2783 if( loop(bp,1, dy/4,dx,cs,0,RI)
2784 >loop(bp,0,7*dy/8,dx,cs,0,RI) )
2785 Setac(box1,'m',98*ad/100);
2786
2787 if (handwritten<10){
2788 x =loop(bp,0,dy/4,dx,cs,0,RI);
2789 x+=loop(bp,x,dy/4,dx,cs,1,RI);
2790 for( ;x<i4;x++){ // x=i1 ?
2791 i=loop(bp,x,0,dy,cs,0,DO);
2792 if (i>=dy/4) ad=99*ad/100;
2793 if (i>(dy+2)/4) ad=95*ad/100;
2794 if (3*i>dy) Break;
2795 }
2796 if(x<i4) Break; // gap detected
2797 }
2798
2799 if (hchar) ad=99*ad/100; // 2017 ~M
2800 if (box1->dots) ad=99*ad/100;
2801 Setac(box1,'m',ad);
2802 if (ad>=100) return 'm';
2803 break;
2804
2805 }
2806 return box1->c;
2807 }
2808
ocr0_tT(ocr0_shared_t * sdata)2809 static wchar_t ocr0_tT(ocr0_shared_t *sdata){
2810 struct box *box1=sdata->box1;
2811 pix *bp=sdata->bp;
2812 int i,i1,i2,i3,i4,i5,i6, j,d,d2,x,y,yb,
2813 hchar=sdata->hchar,gchar=sdata->gchar,
2814 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
2815 int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
2816 int dx=x1-x0+1,dy=y1-y0+1, /* size */
2817 ad; /* tmp-vars */
2818
2819 // --- test T ---------------------------------------------------
2820 for(ad=d=100;dx>2 && dy>3;){ // dx>1 dy>2*dx
2821 DBG( wchar_t c_ask='T'; )
2822 // if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
2823 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
2824 if (box1->num_frames>1){
2825 if (4*box1->frame_vol[1]<box1->frame_vol[0]) Break; // ~?
2826 ad=ad*99/100; // ~? ocr-b.png 2010-10-07
2827 }
2828 /* half distance to the center */
2829 d=2*sq(128/4); /* square */
2830 /* now we check for the 3 edges of the T, but failes glued tmp13/ssl */
2831 if (aa[0][2]>d) Break; /* square distance to upper left corner */
2832 if (aa[3][2]>d) Break; /* square distance to upper right corner */
2833 if (aa[3][0]-aa[0][0]<dx/2) Break; /* upper line to short? */
2834 // FreeMono-Regular with serif has -10+3+10=23 lower serif dx=36 64%
2835 if (aa[2][0]-aa[1][0]>2*dx/3) Break; /* lower line to long? ~t 7x9 */
2836 if (aa[1][1]-aa[0][1]<dy/2) Break; /* left y-distance to short? */
2837 if (aa[2][1]-aa[3][1]<dy/2) Break; /* right y-distance to short? */
2838 if (aa[3][0]-aa[0][0]<4-1) Break; /* to small to hold a T */
2839 if (aa[3][1]-y0>dy/8) ad=99*ad/100;
2840 if (aa[0][1]-y0>dy/8) ad=99*ad/100;
2841 if (aa[2][0]>=aa[3][0]) ad=98*ad/100; // ~t 7x9
2842 if (aa[2][0]>=aa[3][0]-1) ad=99*ad/100; // ~t 7x9
2843 // detect bottom end of vertical line 2018-09
2844 i1=nearest_frame_vector(box1, aa[0][3],aa[2][3], x0, y1+dy/8);
2845 i2=nearest_frame_vector(box1, aa[1][3],aa[3][3], x1, y1+dy/8);
2846 // detect top end of vertical line
2847 i3=nearest_frame_vector(box1, aa[0][3],aa[1][3], x1, y0);
2848 i4=nearest_frame_vector(box1, aa[2][3],aa[3][3], x0, y0);
2849 // serifen at bottom? if i1==i6 + i2==i7 no serifen, ad*99%?
2850 i5=nearest_frame_vector(box1, i3, i1, x1, y1); // left
2851 i6=nearest_frame_vector(box1, i2, i4, x0, y1); // right
2852 if (box1->frame_vector[i3][0]-x0 > 3*dx/4) Break; // ~7
2853 if (box1->frame_vector[i4][0]-x0 >= dx-dx/8) Break; // ~7
2854 if (box1->frame_vector[i3][1]
2855 != box1->frame_vector[i4][1]) { ad=99*ad/100; MSG({}) } //
2856 if ( box1->frame_vector[i3][0]-x0
2857 > 2*(x1-box1->frame_vector[i4][0])) Break; // ~7
2858 MSG(fprintf(stderr,"i1-6 %d %d %d %d %d %d ad %d",i1,i2,i3,i4,i5,i6,ad);)
2859
2860 // old pixel code ... (ToDo replace)
2861 // upper horizontal line
2862 i1= loop (bp, dx/8, 0,dy,cs,0,DO); // left side
2863 i2= loop (bp,dx-1-dx/8, 0,dy,cs,0,DO); // right side
2864 i3= loop (bp, dx/8,i1,dy,cs,1,DO); // left side
2865 i4= loop (bp,dx-1-dx/8,i2,dy,cs,1,DO); // right side
2866 if (abs(i1-i2)>=dy/8) { ad=ad*99/100;MSG({}) } // ~ ocr-b '?' 2010-10
2867 if (i1>dy/4 || i2>dy/4) Break;
2868 for (x=dx/8;x<dx-1-dx/8;x++) {
2869 i= loop (bp,x,0,dy,cs,0,DO);
2870 if (i>i1+dy/8 && i>i2+dy/8) break;
2871 if (i<i1-dy/8 && i<i2-dy/8) break;
2872 } if (x<dx-1-dx/8) Break;
2873 if( get_bw( 0,dx-1, dy/2, dy/2,bp,cs,1) != 1 ) Break;
2874 if( get_bw( 0,(dx-1)/8, dy/2,dy-1-dy/8,bp,cs,1) == 1 ) Break;
2875 if( get_bw( 0,3*dx/16, dy/2,dy-1-dy/4,bp,cs,1) == 1 ) Break;
2876 // ToDo17: pixel version fails on cutted fat "To"
2877 if( get_bw(dx-1-dx/4,dx-1, dy/2,dy-1-dy/4,bp,cs,1) == 1 ) {ad=98*ad/100;MSG({}) }
2878 if( get_bw(dx-1-dx/5,dx-1, dy/2,dy-1-dy/4,bp,cs,1) == 1 ) Break;
2879 // top width
2880 for( d2=0,y=0;y<=dy/4;y++){
2881 i=loop(bp,0,y,dx,cs,0,RI); // possible gap?
2882 j=loop(bp,i,y,dx,cs,1,RI); // width of horizontal line
2883 MSG(fprintf(stderr,"ij1 %2d %2d",i,j);)
2884 if (dy>16 && y>0 && i+j<dx) j+=loop(bp,i+j,y-1,dx-i-j,cs,1,RI);
2885 if (dy>32 && y>1 && i+j<dx) j+=loop(bp,i+j,y-2,dx-i-j,cs,1,RI);
2886 MSG(fprintf(stderr,"ij2 %2d %2d",i,j);)
2887 if (j>d2) d2=j; // found
2888 } if (3*d2<dx) Break; // 2010-10 ocr-b '?'
2889 if (4*d2<2*dx) {ad=99*ad/100;MSG({}) } // handwritten T 2010-10
2890 if (5*d2<3*dx) {ad=99*ad/100;MSG({}) } // handwritten T 2010-10
2891 // center width
2892 for( y=dy/4;y<3*dy/4;y++){ // between top and bottom-serif
2893 i=dx/4+loop(bp,dx/4,y,dx,cs,0,RI); // left side of vertical line
2894 j= loop(bp, i,y,dx,cs,1,RI); // width of vertical line
2895 if (2*j>dx+1 || i+j>=dx || i<dx/4-1) break; // ~r?7 Jan08,
2896 // but allow serif T Jul09
2897 } if (y<3*dy/4) Break; // Jan07
2898 // down width
2899 for( y=3*dy/4;y<dy;y++){
2900 i= loop(bp,dx/4,y,dx,cs,0,RI);
2901 i= loop(bp, i,y,dx,cs,1,RI);if(4*i>3*x) break; //~I
2902 } if( y<dy ) Break;
2903
2904 i =dx/4+loop(bp,dx/4,dy/4,dx,cs,0,RI);if(i>3*dx/4) Break; // ~7
2905 i+= loop(bp,i ,dy/4,dx,cs,1,RI);if(i>3*dx/4) Break;
2906
2907 if( num_cross(0,dx-1, dy-1, dy-1,bp,cs) != 1
2908 && num_cross(0,dx-1, dy-2, dy-2,bp,cs) != 1 ) Break;
2909 if( num_cross(0,dx-1,2*dy/3,2*dy/3,bp,cs) != 1
2910 && num_cross(0,dx-1,2*dy/3,2*dy/3,bp,cs) != 1 ) Break;
2911 if (box1->m3 && 2*y1>box1->m3+box1->m4
2912 && loop(bp,0, 0,dy/2,cs,0,DO)>=dy/4
2913 && loop(bp,0,dy-1,dy ,cs,0,UP)<=dy/2) { ad=96*ad/100; MSG({}) }// ~J
2914 if (gchar) ad=98*ad/100;
2915 if( loop(bp,0,dy-1,dx,cs,0,RI)<=dx/8) { ad=99*ad/100; MSG({}) }// ~J
2916 i = loop(bp,0,dy/2,dx,cs,0,RI); // middle of vert. line
2917 j = loop(bp,i,dy/2,dx,cs,1,RI); // thickness
2918 if( 2*i>/*=*/dx ) { ad=99*ad/100; MSG({}) }
2919 if( 2*(dx-j-i)<i) {
2920 ad=95*ad/100; MSG(fprintf(stderr,"ij1 %2d %2d",i,j);) } // ~J
2921
2922 Setac(box1,'T',ad);
2923 if (ad>=100) return 'T';
2924 break;
2925 }
2926 // --- test t ---------------------------------------------------
2927 // written t can look like a + or even with missing right side
2928 // smallest t found in win-screenshot (prop-font) dx=2
2929 for(ad=d=100;dx>1 && dy>3/*&& dy>=box1->m3-box1->m2-1 2017_7x9*/;){ // sometimes no hchar!
2930 DBG( wchar_t c_ask='t'; )
2931 //if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
2932 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
2933 if (dy<=box1->m3-box1->m2+1) ad=96*ad/100; // bad line detection?
2934 if (box1->num_frames>1){
2935 if (4*box1->frame_vol[1]<box1->frame_vol[0]) Break; // ~! dosemu
2936 ad=ad*99/100; // ~! 2017_dosemu_4x10
2937 }
2938 /* old pixel code ... ToDo17 replace */
2939 if (num_cross(0,dx-1,0 ,0 ,bp,cs) != 1) { // font4x5 *=x+-
2940 if (dy<10) Break; ad=98*ad/100;
2941 }
2942 if (num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1) { // *=x+-
2943 if (dy<10) Break; ad=98*ad/100;
2944 }
2945 for(x=0,yb=j=y=dy/32+3*dy/16;y<5*dy/8;y++)if(y>0){ // upper cross line
2946 i=loop(bp,0,y,dx,cs,0,RI);
2947 i=loop(bp,i,y,dx,cs,1,RI); if( i>x ) { x=i;yb=j=y; } // hor. line
2948 i=num_cross(0,dx-1,y ,y ,bp,cs);
2949 j=num_cross(0,dx-1,y+1,y+1,bp,cs); if (i>2 && j>2) break;
2950 if( y<11*dy/16
2951 && num_cross(0,dx-1,y ,y ,bp,cs) != 1
2952 && ( num_cross(0,dx-1,y+dy/8,y+dy/8,bp,cs) != 1 || dy<13) // against noise
2953 ) break;
2954 } if( y<4*dy/8 ) Break;
2955 if (dy>12 && x>4 && x>dx/2 && yb<=(dy+4)/8)
2956 if ( loop(bp,dx-1-3*x/4,yb,dy,cs,1,UP)
2957 <=loop(bp,dx-1-1*x/4,yb,dy,cs,1,UP)+1 )
2958 if ( loop(bp,0 ,dy/2,dy,cs,1,UP)>dx/8 ) Break; // ~C
2959
2960 if (x<dx/2) ad=95*ad/100; // unusual small ?
2961 if (x>=dx && 9*dx>=8*dy) { ad=99*ad/100; MSG({}) } // +
2962
2963 if (box1->y1 < box1->m3 - (box1->m3-box1->m2+1)/32
2964 // ToDo && no bottom serif && h-line near (m2+m3)/2, not m2 = "+"
2965 && x0+loop(bp,dx/4,0,dy,cs,0,DO) >= (3*box1->m2+box1->m3)/4
2966 && loop(bp,0,dy/4,dx,cs,0,RI) >= dx/2-dx/8
2967 && 10*dx>=8*dy
2968 ) { ad=98*ad/100; MSG({}) } // +
2969
2970 i=loop(bp,dx-1,0,dx,cs,0,LE);
2971 for(y=0;y<dy/4;y++){
2972 if( num_cross(0,dx-1,y ,y ,bp,cs) == 2
2973 && num_cross(0,dx-1,y+1,y+1,bp,cs) == 2 ) break;
2974 j=loop(bp,dx-1,y,dx,cs,0,LE); if(j-i>1) break; i=j;
2975 }
2976 if( y<dy/4 ) Break; // ~f
2977
2978 MSG(fprintf(stderr,"b.y %2d",yb);)
2979 i=loop(bp,dx-1,yb,dx,cs,0,LE); // yb = upper cross line
2980 for(y=dy/8;y<yb;y++)
2981 if( loop(bp,dx-1,y,dx,cs,0,LE)>i ) break;
2982 if( y==yb && yb>dy/8) Break; // 2017-07 fix 6x8t on 7x9-font
2983
2984 j=loop(bp,0, dy/2,dx,cs,0,RI);
2985 j=loop(bp,j, dy/2,dx,cs,1,RI); i=j; // thickness
2986 j=loop(bp,0, dy/4,dx,cs,0,RI);
2987 j=loop(bp,j, dy/4,dx,cs,1,RI); if (j<i) i=j; // thickness
2988 j=loop(bp,0,3*dy/4,dx,cs,0,RI);
2989 j=loop(bp,j,3*dy/4,dx,cs,1,RI); if (j<i) i=j; // thickness
2990 if( 2*x<3*i ) Break;
2991
2992 if( loop(bp,dx-1,dy/2,dx,cs,0,LE)-dx/8
2993 <=loop(bp,dx-1, yb ,dx,cs,0,LE) )
2994 if( loop(bp,dx-1, yb ,dx,cs,0,LE)-dx/8
2995 >=loop(bp,dx-1,yb/2,dx,cs,0,LE) ) Break; // ~1 ???
2996
2997 j=1;
2998 for(y=1;j && y<yb; y++) // no @@ pattern
2999 for(x=0;j && x<dx-2;x++){ // ..
3000 if( getpixel(bp,x ,y )>=cs && getpixel(bp,x+1,y )>=cs
3001 && getpixel(bp,x ,y-1)< cs && getpixel(bp,x+1,y-1)< cs ) { j=0;break; }
3002 } if(!j) Break;
3003
3004 if( num_cross(0,dx-1,dy-2,dy-2,bp,cs) == 2
3005 && num_cross(0,dx-1,dy-1,dy-1,bp,cs) == 2 ) Break; // ~* (5er)
3006
3007 if( dy>= 16
3008 && loop(bp, 0, 3*dy/4,dx,cs,0,RI)
3009 >=loop(bp, 0, dy-2,dx,cs,0,RI)
3010 && loop(bp,dx-1, 3*dy/4,dx,cs,0,LE)
3011 <=loop(bp,dx-1, dy-2,dx,cs,0,LE)
3012 && loop(bp,dx-1, 1,dx,cs,0,LE)+dx/16
3013 <loop(bp,dx-1,3*dy/16,dx,cs,0,LE)
3014 && ( loop(bp, 0, 1,dx,cs,0,RI)
3015 >loop(bp, 0,3*dy/16,dx,cs,0,RI)+dx/16
3016 || loop(bp,dx-1, 0,dx,cs,0,LE)==0
3017 || loop(bp,dx-1, 1,dx,cs,0,LE)==0) ) ad=96*ad/100; // ~f Jan02
3018 if(dx<8 && dy>12){ // thin f's could easily confound with t
3019 x=loop(bp,dx-1,3*dy/16,dx,cs,0,LE);
3020 if (x)
3021 if (loop(bp,dx-x,0,dy,cs,0,DO)<3*dy/16
3022 && loop(bp, 0, 3*dy/4,dx,cs,0,RI)+1
3023 >=loop(bp, 0, dy-2,dx,cs,0,RI)
3024 && loop(bp,dx-1, 3*dy/4,dx,cs,0,LE)
3025 <=loop(bp,dx-1, dy-2,dx,cs,0,LE) ) Break;
3026 }
3027 if (dx>7)
3028 if( num_cross( 0,dx-1,2*dy/3,2*dy/3,bp,cs) > 1
3029 && num_cross( 0,dx/2,2*dy/3,2*dy/3,bp,cs) > 0
3030 && num_cross(dx/2,dx-1,2*dy/3,2*dy/3,bp,cs) > 0 )
3031 if (sdata->holes.num > 0)
3032 if (sdata->holes.hole[0].y0 > dy/4) Break; // ~6
3033 // if ( num_hole( x0, x1, y0+dy/4, y1, box1->p,cs,NULL) > 0 ) Break; // ~6
3034
3035 if( num_cross(0,dx-1,3*dy/4, 3*dy/4, bp,cs) >= 2
3036 && num_cross(0,dx-1,3*dy/4-1,3*dy/4-1,bp,cs) >= 2 ){
3037 ad=99*ad/100; /* italic t ? */
3038 if (loop(bp,dx/2 ,dy-1,dy,cs,0,UP)>dy/4) Break; // ~h
3039 if (loop(bp,dx/2+1,dy-1,dy,cs,0,UP)>dy/4) Break; // ~h
3040 }
3041
3042 x= loop(bp,dx-1,dy/2,dx,cs,0,LE);
3043 i= loop(bp,dx-1,dy/8,dx,cs,0,LE);
3044 if (i>x && loop(bp,dx-x,0,dy,cs,0,DO)>=dy/2) ad=90*ad/100; /* ~\ */
3045
3046 x= loop(bp,0, 0,dx,cs,0,RI);
3047 if (yb>1) {i= loop(bp,0, 1,dx,cs,0,RI); if (i<x) x=i;} // fix 6x8t
3048 i= loop(bp,0,dy/4,dx,cs,0,RI);
3049 if (i-x>1) Break; // l
3050
3051 // 2010-10 $ (S + vert. lines at bottom and top, not crossing the S)
3052 i1=nearest_frame_vector(box1, aa[0][3], aa[1][3], x1+2*dx, (y0+y1)/2);
3053 i2=nearest_frame_vector(box1, aa[2][3], aa[3][3], x0-2*dx, (y0+y1)/2);
3054 if (box1->frame_vector[i1][0]-x0>3*dx/4
3055 && box1->frame_vector[i2][0]-x0< dx/4
3056 && box1->frame_vector[i1][1]-y0
3057 >box1->frame_vector[i2][1]-y0) Break;
3058
3059 // this happens quite often, do not be to strong
3060 if (!box1->m2) ad=99*ad/100;
3061 if (box1->m2) {
3062 if (!hchar) ad=99*ad/100; /* some times t is not long enough */
3063 if( y0>=box1->m2-(box1->m2-box1->m1)/4 ) ad=99*ad/100; /* to short */
3064 if( y0>=box1->m2 ) ad=99*ad/100; /* to short */
3065 }
3066
3067 if (dx<3) {
3068 ad=ad*99/100; // ~ 2x11 )
3069 if (loop(bp,0, 0,dx,cs,0,RI)==0
3070 && loop(bp,0,dy-1,dx,cs,0,RI)==0
3071 && loop(bp,dx-1,(dy+1)/2,dx,cs,0,LE)==0) Break; // )
3072 }
3073 if (loop(bp,0, 0,dx,cs,0,RI)<=dx/8 // 2010-09-26
3074 && loop(bp,dx-1,dy/2+1,dx,cs,0,LE)<=dx/8) {
3075 ad=ad*98/100; // ~) 3x11
3076 MSG(fprintf(stderr,"ad= %d",ad);)
3077 }
3078
3079 if (sdata->holes.num > 0) ad=95*ad/100;
3080 if (gchar) ad=99*ad/100;
3081 if (box1->dots) ad=90*ad/100;
3082 Setac(box1,'t',ad);
3083 break;
3084 }
3085 return box1->c;
3086 }
3087
ocr0_sS(ocr0_shared_t * sdata)3088 static wchar_t ocr0_sS(ocr0_shared_t *sdata){
3089 struct box *box1=sdata->box1;
3090 pix *bp=sdata->bp;
3091 int d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
3092 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
3093 int dx=x1-x0+1,dy=y1-y0+1, /* size */
3094 ad; /* tmp-vars */
3095 wchar_t ac;
3096
3097 // --- test sS near 5 ---------------------------------------------------
3098 for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (4x6 font)
3099 DBG( wchar_t c_ask='s'; )
3100 //if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
3101 if (box1->num_frames != 1) Break; // excludes tiny holes 1810.rnd80
3102 if( num_cross( dx/2, dx/2,0,dy-1,bp,cs)!=3
3103 && num_cross(6*dx/8,2*dx/8,0,dy-1,bp,cs)!=3 // 6x7 fat s 2010-09-25
3104 && dy>4 ) Break;
3105 if( num_cross(0,dx-1,dy/2 ,dy/2 ,bp,cs)!=1
3106 && num_cross(0,dx-1,dy/2-1,dy/2-1,bp,cs)!=1 ) Break;
3107 // get the upper and lower hole koords
3108 y=(dy+2)/4; // 6x7 fat s 2010-09-25
3109 x =loop(bp,0,(dy+2)/4,dx,cs,0,RI); if(x>3*dx/8) Break; /* slanted too */
3110 x +=loop(bp,x,(dy+2)/4,dx,cs,1,RI); if(x>5*dx/8) Break; /* fat too */
3111 i1 =loop(bp,x,(dy+2)/4,dx,cs,0,RI); i1=(i1+2*x)/2; // upper center x
3112 y=11*dy/16; // not safe for 6x7 fat font
3113 x =loop(bp,dx-1 ,y,dx,cs,0,LE); if(x>dx/4) Break;
3114 x +=loop(bp,dx-1-x,y,dx,cs,1,LE); if(dx>5 && dy>7 && x>dx/2) Break;
3115 if (x>3*dx/4) Break; if(x>dx/2) { ad=98*ad/100; MSG({})}
3116 i2 =loop(bp,dx-1-x,y,dx,cs,0,LE); i2=dx-1-(i2+2*x)/2; // upper center x
3117 MSG(fprintf(stderr,"~3 upper right gap (i1=%d,dy/4)",i1);)
3118 // use i3 temporary for x of left upper border of slanted S
3119 i3 =loop(bp,0,(dy+2)/4,dx,cs,0,RI);
3120 for( y=dy/4;y<dy/2;y++ ) { // Jul09 ~3 font4x5
3121 x = loop(bp,0,y,dx,cs,0,RI);
3122 if (x>i3+dx/8
3123 && loop(bp,x-1,y,dx,cs,0,UP)>dy/8+1) break; // +1 for s font 5x8 Jul09
3124 }
3125 if( y<dy/2 ) Break;
3126 y=dy/2-loop(bp,dx-1,dy/2,dy/2,cs,1,UP);
3127 // if( !joined(bp,i1,dy/4,dx-1,y,cs) ){
3128 // break; // sometimes thick small fonts have no gap
3129 // }
3130 for(y=dy/4;y<dy/2;y++){
3131 x=loop(bp,dx-1,y,dx,cs,0,LE);if(x>dx/8) break;
3132 }
3133 if(y==dy/2) Break; // Mai00
3134
3135 y=dy/2+loop(bp,0,dy/2,dy/2,cs,1,DO);
3136 if (!joined(bp,0,y,i2,11*dy/16,cs)) Break; // ? 6x7 fat s tmp08/gocr0801_bad5
3137
3138 if (sdata->holes.num > 0)
3139 if (sdata->holes.hole[0].y0 > (dy+2)/4) Break; // ???
3140 // if( num_hole( x0, x1, y0+dy/4, y1, box1->p,cs,NULL) > 0 ) Break;
3141
3142 i1=loop(bp,dx-1,dy-1,dx,cs,0,LE);
3143 i2=loop(bp,dx-1,dy-2,dx,cs,0,LE);
3144 if (i2-i1 >= dx/4) Break; // ~{ 5x7font
3145
3146 i1=loop(bp, 0, 0,dx,cs,0,RI);
3147 i2=loop(bp, 0, 1,dx,cs,0,RI);
3148 if (i2-i1 >= dx/4) Break; // ~} 5x7font
3149
3150 // sS5 \sl z left upper v-bow ?
3151
3152 i1=loop(bp, 0,dy/2,dx,cs,0,RI);
3153 i1=loop(bp, i1,dy/2,dx,cs,1,RI);
3154 if (4*i1>=3*dx) ad=97*ad/100; // ~5 7-segment
3155
3156 i1=loop(bp,0, dy/16,dx,cs,0,RI);
3157 i2=loop(bp,0,4*dy/16,dx,cs,0,RI);
3158 i3=loop(bp,0,7*dy/16,dx,cs,0,RI);
3159 if( 2*i2+dx/32 >= i1+i3 ){
3160 if( 2*i2+dx/32 > i1+i3 || dx>9 ) Break;
3161 // very small s?
3162 i1+=loop(bp,i1, dy/16,dx,cs,1,RI);
3163 i2+=loop(bp,i2,4*dy/16,dx,cs,1,RI);
3164 i3+=loop(bp,i3,7*dy/16,dx,cs,1,RI);
3165 if( 2*i2+dx/32 >= i1+i3 ) Break;
3166 }
3167
3168 for(y=(7*dy+8)/16;y<(5*dy+4)/8;y++){
3169 if( num_cross( 0,dx-1,y ,y ,bp,cs)==2 )
3170 if( num_cross( 0,dx-1,y+1,y+1,bp,cs)==1 )
3171 if( num_cross( 0,dx/4,y,y,bp,cs)==1 ) break; // ~5
3172 } if(y<5*dy/8) Break; // v0.2.4a5
3173 if ( loop(bp, dx-1,dy-2-dy/32,dx,cs,0,LE)
3174 > loop(bp, 0, 1+dy/32,dx,cs,0,RI) + dx/4 ) Break; // ~5 Dec00
3175 ac='s';
3176 if (gchar) { ad=98*ad/100; MSG(fprintf(stderr,"gchar=bad");) }
3177 if ( loop(bp, dx-1, 0,dx,cs,1,LE) // ToDo: improve
3178 > loop(bp, 0,dy-1,dx,cs,1,RI)+dx/8 ) ad=98*ad/100; // ~5 4x5 font
3179 if( hchar ){ // (slanted) S but 5 is very similar! check it
3180 ac='S';
3181 if ( loop(bp,3*dx/4, 0,dy,cs,1,DO) // ToDo: improve
3182 > loop(bp, dx/4,dy-1,dy,cs,1,UP) ) ad=99*ad/100; // ~5
3183 if ( loop(bp, dx-1,dy-1-dy/32,dx,cs,0,LE)
3184 > loop(bp, 0, 0+dy/32,dx,cs,0,RI) ) ad=99*ad/100; // ~5
3185 if ( loop(bp, 0,dy-1-dy/32,dx,cs,0,RI)
3186 > loop(bp, dx-1, 0+dy/32,dx,cs,0,LE) ) ad=99*ad/100; // ~5
3187 }
3188 Setac(box1,ac,ad);
3189 break;
3190 }
3191 return box1->c;
3192 }
3193
ocr0_gG(ocr0_shared_t * sdata)3194 static wchar_t ocr0_gG(ocr0_shared_t *sdata){
3195 struct box *box1=sdata->box1;
3196 pix *bp=sdata->bp;
3197 int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
3198 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
3199 int dx=x1-x0+1,dy=y1-y0+1, /* size */
3200 ad; /* tmp-vars */
3201
3202 // --- test g ---------------------------------------------------
3203 /* some g's have crotchet at upper right end, so hchar can be set */
3204 // ~italic g
3205 for(ad=d=100;dx>2 && dy>4;){ // min 3x5
3206 DBG( wchar_t c_ask='g'; )
3207 if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */
3208 if (sdata->holes.num > 2) ad=99*ad/100;
3209 if (sdata->holes.num < 1) Break;
3210 // ToDo: if 2 holes, loewer hole should be below baseline ~8
3211 if( get_bw(x0+dx/2, x0+dx/2, y1-dy/2, y1,box1->p,cs,1) != 1 ) Break;
3212 if( get_bw(x1-dx/4, x1 , y1-dy/4, y1,box1->p,cs,1) != 1 ) Break; // ~p
3213 if( get_bw(x0+dx/2, x0+dx/2, y0, y0+dy/2,box1->p,cs,1) != 1 ) Break;
3214
3215 if( num_cross(x0+dx/2, x0+dx/2, y0, y1, box1->p,cs) < 3 )
3216 if( num_cross(x1-dx/2, x1-dx/2, y0, y1, box1->p,cs) < 3 ) Break;
3217 for (i=0;i<sdata->holes.num;i++){ // check for upper hole
3218 if (sdata->holes.hole[i].y1 < 5*dy/8+1) break;
3219 } if (i==sdata->holes.num) Break; // no upper hole found
3220 if (sdata->holes.num>1){ // 2018-09 check for low hole
3221 for (i=0;i<sdata->holes.num;i++){
3222 if (sdata->holes.hole[i].y0 >= box1->m3-dy/4-y0) break;
3223 } if (i==sdata->holes.num) Break; } // no lower hole found 2018-09 nums1
3224 // if( num_hole ( x0, x1, y0, y0+5*dy/8, box1->p,cs,NULL) != 1 ) Break;
3225 for(y=dy/4;y<dy;y++) if( num_cross(0,dx-1,y,y,bp,cs)==2 ) break;
3226 if( y==dy ) Break; // ~q
3227 if( get_bw(0,dx/2,7*dy/8,7*dy/8,bp,cs,1) != 1 ) Break; // ~q
3228 y =loop(bp,dx/16,0,dy,cs,0,DO); if(y<=dy/8)
3229 y+=loop(bp,dx/16,y,dy,cs,1,DO); if(16*y>=15*dy) Break; // ~B
3230
3231 if (num_cross(x1, x1, (y0+y1)/2, y1, box1->p,cs)>1) {
3232 ad=98*ad/100; // ~&
3233 if (num_cross(x1 , x1 , y0, (y0+y1)/2, box1->p,cs)<1 ) ad=96*ad/100;
3234 if (num_cross(x1-1, x1-1, y0, (y0+y1)/2, box1->p,cs)<1 ) ad=95*ad/100;
3235 }
3236 // looking for a gap
3237 for (x=0,y=dy/4;y<dy-dy/4;y++){
3238 i=loop(bp,dx-1,y,dy,cs,0,LE); if (i>x) x=i;
3239 } // in a good font x is greater dx/2
3240
3241 if (x<dx/2) { // bad font? or %
3242 if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) > 2
3243 || num_cross(x0,x1 ,y0+dy/8,y0+dy/8,box1->p,cs) > 2) ad=90*ad/100;
3244 if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) > 2
3245 || num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) > 2) ad=90*ad/100;
3246 }
3247 if( num_cross(0,dx-1,dy/2,dy/2,bp,cs) >2 ) ad=99*ad/100; // ~/o
3248
3249 /* test for horizontal symmetry ~8 */
3250 for (y=0;y<dy;y++) for (x=0;x<dx/2;x++)
3251 if ((getpixel(bp,x,y)<cs)!=(getpixel(bp,dx-1-x,y)<cs)) { y=dy+1; break; }
3252 if (y==dy) Break; /* ~8 */
3253
3254 if (box1->m4==0) ad=98*ad/100;
3255 if ( hchar) ad=96*ad/100;
3256 if (!gchar) ad=96*ad/100; // tmp12/nums1.jpg '8' ?
3257 ad=98*ad/100;
3258 Setac(box1,'g',ad);
3259 break;
3260 }
3261 // --- test rundes G ---------------------------------------------
3262 for(ad=d=100;dx>3 && dy>4;){ // min 3x4
3263 DBG( wchar_t c_ask='G'; )
3264 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
3265 if( get_bw(x0 ,x0+dx/2,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break;
3266 if( get_bw(x0+dx/2,x1-dx/4,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break;
3267 if( get_bw(x0+dx/2,x0+dx/2,y1-dy/4,y1 ,box1->p,cs,1) != 1 ) Break;
3268 if( get_bw(x0 ,x0+dx/2,y1-dy/3,y1-dy/3,box1->p,cs,1) != 1 ) Break; // ~S
3269 for( y=y0+dy/4;y<y1-dy/3;y++ )
3270 if( get_bw(x1-dx/2,x1,y,y,box1->p,cs,1) == 0 ) break;
3271 if( y==y1-dy/3 ) Break; // no gap
3272
3273 if( num_cross(x0+dx/2 , x0+dx/2 , y0, y, box1->p,cs) != 1
3274 || num_cross(x0+dx/2+1, x0+dx/2+1, y0, y, box1->p,cs) != 1 ) Break; // ~e
3275
3276 x=x0; y=y1;
3277 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); // left bow?
3278 if( y<y0+dy/4 ) Break; // filter W
3279
3280 x=x1; y=y1-dy/3; // upper right offen bow
3281 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST);
3282 if( x<x1-3*dx/8 ) Break;
3283 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE);
3284 if( x<x0+dx/2 ){ // not sure, try again (not best)
3285 x=x1; y=y1-dy/4;
3286 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST);
3287 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE);
3288 if( x<x0+dx/2 ) Break;
3289 }
3290 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,UP); // upper end right midle
3291 if( x<=x1 ) Break;
3292 if( y<y0+3*dy/8 ) Break;
3293 if( y>y1-dy/4 ) Break;
3294
3295 x=x1-dx/3;y=y1; // follow left C-bow, filter S
3296 turmite(box1->p,&x,&y,x0,x1,y0+dy/3,y1,cs,LE,UP); // w=LE b=UP
3297 // MSG(fprintf(stderr,"xy= %d %d",x-x0,y-y0);)
3298 if( y>y0+dy/3+1 ) Break; /* leave box below for S or on top for CG */
3299 /* if (y<y0) y++; else x++; */ /* enter the box again */
3300 turmite(box1->p,&x,&y,x0,x1,y0 ,y1,cs,RI,UP);
3301 MSG(fprintf(stderr,"xy= %d %d",x-x0,y-y0);)
3302 if( y>y0 ) Break;
3303 if (sdata->holes.num > 0) Break;
3304 // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) > 0 ) Break;
3305 if( dx>4 && dy>6){ // no (<[
3306 for(i=1,y=0;i && y<dy/3;y++)
3307 if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) i=0;
3308 if( i ) ad=98*ad/100;
3309 for(i=1,y=0;i && y<dy/3;y++)
3310 if( num_cross(0,dx-1,dy-1-y,dy-1-y,bp,cs) == 2 ) i=0;
3311 if( i ) Break;
3312 }
3313 for(i=1,y=dy/2;i && y<dy;y++)
3314 if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) i=0;
3315 if( i ) Break;
3316 for(i=0,y=3*dy/4;y<dy;y++){
3317 x=loop(bp,0,y,dx,cs,0,RI); // Kante abfallend <=> Z
3318 if( x<i-dx/20 ) break;
3319 if( x>i ) i=x;
3320 } if( y<dy ) Break;
3321
3322 // only check the middle!
3323 for(i=0,i1=y=dy/4;y<dy-dy/4;y++){ // look for horizontal line
3324 x=loop(bp,dx-1 ,y,dx/4,cs,0,LE);
3325 x=loop(bp,dx-1-x,y,dx/2,cs,1,LE); if(x>i){ i=x;i1=y; }
3326 } if( i1<=dy/4 || i1>=dy-dy/4 ) Break; // around the middle ?
3327 // check from above for gap and left vertical line (~S)
3328 x =loop(bp,0,i1,dx ,cs,0,RI);
3329 x+=loop(bp,x,i1,dx-x,cs,1,RI); // left vertical bow
3330 x+=loop(bp,x,i1,dx-x,cs,0,RI); if (x>=dx) ad=90*ad/100;
3331 MSG(fprintf(stderr,"h-bar y dx %d %d ad= %d",i1,i,ad);)
3332
3333 i=1; // Mar06: adapted to 4x6 font
3334 for(x=dx/2;x<dx-1 && i;x++) // look for @@ (instead +1 use +delta?)
3335 for(y=dy/2;y<dy-1 && i;y++){ // .@
3336 if( getpixel(bp,x ,y )>=cs
3337 && getpixel(bp,x+1,y )< cs
3338 && getpixel(bp,x+1,y-1)< cs
3339 && getpixel(bp,x ,y-1)< cs ) { i=0;break; }
3340 }
3341 if(i) ad=95*ad/100; // ~C
3342 if(!hchar) ad=98*ad/100;
3343 if( gchar) ad=98*ad/100;
3344
3345 Setac(box1,'G',ad);
3346 break;
3347 }
3348 // --- test \it g like 9 ----------------------------------------------
3349 for(ad=d=100;dx>2 && dy>4;){ // dx>1 dy>2*dx
3350 DBG( wchar_t c_ask='g'; )
3351 if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
3352 // ToDo: if 2 holes, loewer hole should be below baseline ~8
3353 if( num_cross(x0+dx/2,x0+dx/2,y0,y1,box1->p,cs) != 3 // pre select
3354 && num_cross(x0+dx/4,x1-dx/4,y0,y1,box1->p,cs) != 3 ) Break;
3355 for( x=0,i=y=y0+dy/2;y<=y1-3*dy/16;y++){ // suche kerbe
3356 j=loop(box1->p,x0,y,dx,cs,0,RI);
3357 if( j>2 && j>dx/4 && y<y1-3 && j<dx/2 ) // long bow
3358 j+=loop(box1->p,x0+j-2,y+1,dx,cs,0,RI)-2;
3359 if( j>x ) { x=j; i=y; }
3360 }
3361 if( x<4*dx/8 ) Break;
3362 if( num_cross(x0+dx/2,x1,i ,y1,box1->p,cs) != 1
3363 && num_cross(x0+dx/2,x1,i+1,y1,box1->p,cs) != 1 ) Break;
3364 if( num_hole(x0,x1,y0,i+1,box1->p,cs,NULL)!=1 ) Break;
3365 if( num_hole(x0,x1,i-1,y1,box1->p,cs,NULL)!=0 ) Break;
3366 if( loop(box1->p,x0,y1 ,dy,cs,0,RI)>dx/3 &&
3367 loop(box1->p,x0,y1-1,dy,cs,0,RI)>dx/3) Break; // no q
3368 for( x=0,i=y=y0+dy/3;y<=y1-dy/3;y++){ // suche kerbe
3369 j=loop(box1->p,x1,y,dx,cs,0,LE);
3370 if( j>x ) { x=j; i=y; }
3371 } if( x>dx/2 ) Break; // no g
3372 i1=loop(bp,dx-1,dy/8 ,dx,cs,0,LE); if(i1>dx/2) Break;
3373 i3=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE);
3374 i2=loop(bp,dx-1,dy/2 ,dx,cs,0,LE); if(i1+i3<2*i2-dx/8) Break; // konvex
3375 i1=loop(bp,dx-1,dy/4 ,dx,cs,0,LE); if(i1>dx/2) Break;
3376 i3=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE);
3377 for(y=dy/4;y<dy-1-dy/4;y++){
3378 i2=loop(bp,dx-1,y,dx,cs,0,LE);
3379 if(i1+i3-2*i2<-1-dx/16) break; // konvex from right ~g ~3
3380 } if(y<dy-1-dy/4) Break;
3381 x=loop(bp,dx -1,6*dy/8,dx,cs,0,LE); if(x>0){ x--; // robust
3382 y=loop(bp,dx-x-1, dy-1,dy,cs,0,UP);
3383 if(y<dy/8) Break; // ~q (serif!)
3384 }
3385 // %
3386 if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) > 2) ad=90*ad/100;
3387 if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) > 2
3388 || num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) > 2) ad=90*ad/100;
3389
3390 if (box1->m4==0) ad=98*ad/100;
3391 if ( hchar) ad=96*ad/100;
3392 if (!gchar) {
3393 if (box1->m4 - box1->m3 > 2) ad=96*ad/100;
3394 else {
3395 if (y1 > box1->m3) ad=99*ad/100;
3396 else ad=97*ad/100;
3397 }
3398 }
3399 if (ad>99) ad=99; // never be sure to have a 9
3400 Setac(box1,'g',ad); //
3401 break;
3402 }
3403 return box1->c;
3404 }
3405
3406 // rewritten for vector usage v0.41
ocr0_xX(ocr0_shared_t * sdata)3407 static wchar_t ocr0_xX(ocr0_shared_t *sdata){
3408 struct box *box1=sdata->box1;
3409 // pix *bp=sdata->bp; // obsolete
3410 int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
3411 x0=box1->x0, x1=box1->x1, y0=box1->y0, y1=box1->y1; // ,cs=sdata->cs;
3412 int dx=x1-x0+1, dy=y1-y0+1, /* size */
3413 (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */
3414 ad; /* tmp-vars */
3415 wchar_t bc=UNKNOWN;
3416
3417 // --- test xX ---------------------------------------------------
3418 // rewritten for vectors 0.41
3419 for(ad=d=100;dx>2 && dy>3;){ // min 3x4
3420 int ld, i1, i2, i3, i4; // lien derivation, 4 inner edges
3421 DBG( wchar_t c_ask='x'; )
3422 if (sdata->holes.num > 0) Break; /* # */
3423 /* half distance to the center */
3424 d=2*sq(128/4);
3425 /* now we check for the 4 ends of the x */
3426 if (aa[0][2]>d) Break;
3427 if (aa[1][2]>d) Break;
3428 if (aa[2][2]>d) Break;
3429 if (aa[3][2]>d) Break;
3430 if (aa[3][0]-aa[0][0]<dx/2) Break;
3431 if (aa[2][0]-aa[1][0]<dx/2) Break;
3432 if (aa[1][1]-aa[0][1]<dy/2) Break;
3433 if (aa[2][1]-aa[3][1]<dy/2) Break;
3434 if (aa[0][1]-y0 > dy/16
3435 && aa[3][1]-y0 > dy/16
3436 && y1-aa[1][1] > dy/16
3437 && y1-aa[2][1] > dy/16) Break; // 10x10 ~* (X + vert. line)
3438 /* searching for 4 notches between neighbouring ends */
3439 // JS-2010-09 tmp08/0811qemu2_crop (left top above right top?)
3440 if (box1->m2-box1->m1>2 &&
3441 3*(aa[3][1]-aa[0][1])>=2*(box1->m2-box1->m1)) Break; // ~k
3442
3443 /* 2009-07: false positive on font4x5 '*' = '-' | 'x' */
3444 /* 2010-08: +/-(0,dy/4) to +/-(dx/4,dy/3) (handwritten x) */
3445 i1=nearest_frame_vector(box1,aa[0][3],aa[1][3],x0+dx, y0+dy/3);
3446 i3=nearest_frame_vector(box1,aa[0][3],aa[1][3],x0+dx, y1-dy/3);
3447 i2=nearest_frame_vector(box1,i1, i3, x0+dx/4,y0+dy/2);
3448 MSG(fprintf(stderr,"left gap i132 %d %d %d",i1,i3,i2);)
3449 if (box1->frame_vector[i2][0]<=x0+ dx/8) Break;
3450 if (box1->frame_vector[i2][0]<=x0+ dx/4) ad=98*ad/100;
3451 i1=nearest_frame_vector(box1,aa[2][3],aa[3][3],x0 ,y0+dy/3);
3452 i3=nearest_frame_vector(box1,aa[2][3],aa[3][3],x0 ,y1-dy/3);
3453 i2=nearest_frame_vector(box1,i1, i3, x1-dx/4,y0+dy/2);
3454 MSG(fprintf(stderr,"right gap i132 %d %d %d",i1,i3,i2);)
3455 if (box1->frame_vector[i2][0]>=x1- dx/8) Break;
3456 if (box1->frame_vector[i2][0]>=x1- dx/4) ad=98*ad/100;
3457
3458 /* only left side */
3459 for (j=i=aa[0][3];i!=aa[1][3];i=(i+1)%box1->num_frame_vectors[0]) {
3460 if (box1->frame_vector[i][0]
3461 >=box1->frame_vector[j][0]) j=i; /* notice most right vector */
3462 } if (j==i) Break;
3463 /* calculate the distance to the center */
3464 x=box1->frame_vector[j][0];
3465 y=box1->frame_vector[j][1]; i1=j;
3466 if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
3467 if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
3468 if ( aa[0][0]+aa[1][0]-2*x>=0) Break;
3469 if ( aa[1][0] >= x ) Break;
3470 if ( aa[0][0] > x ) Break;
3471 if ( aa[0][0] >= x ) ad=99*ad/100;
3472 if (x-x0<dx/8) Break;
3473 if (x-x0<dx/4) ad=99*ad/100;
3474 /* check if upper left and center point are joined directly */
3475 ld=line_deviation(box1, aa[0][3], j);
3476 MSG(fprintf(stderr," 0-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
3477 if (ld >2*sq(1024/4)) Break;
3478 /* check if lower left and center point are joined directly */
3479 ld=line_deviation(box1, j, aa[1][3]);
3480 MSG(fprintf(stderr," X-1 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
3481 if (ld >2*sq(1024/4)) Break;
3482
3483 /* only lower side */
3484 for (j=i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) {
3485 if (box1->frame_vector[i][1]
3486 <=box1->frame_vector[j][1]) j=i; /* notice most upper vector */
3487 } if (j==i) Break;
3488 /* calculate the distance to the center */
3489 x=box1->frame_vector[j][0];
3490 y=box1->frame_vector[j][1]; i2=j;
3491 if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
3492 if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
3493 if ( aa[1][1]+aa[2][1]-2*y<=0) Break;
3494 /* check if lower left and center point are joined directly */
3495 ld=line_deviation(box1, aa[1][3], j);
3496 MSG(fprintf(stderr," 1-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
3497 if (ld >2*sq(1024/4)) Break;
3498 /* check if lower right and center point are joined directly */
3499 ld=line_deviation(box1, j, aa[2][3]);
3500 MSG(fprintf(stderr," X-2 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
3501 if (ld >2*sq(1024/4)) Break;
3502
3503 /* only right side */
3504 for (j=i=aa[2][3];i!=aa[3][3];i=(i+1)%box1->num_frame_vectors[0]) {
3505 if (box1->frame_vector[i][0]
3506 <=box1->frame_vector[j][0]) j=i; /* notice most left vector */
3507 } if (j==i) Break;
3508 /* calculate the distance to the center */
3509 x=box1->frame_vector[j][0];
3510 y=box1->frame_vector[j][1]; i3=j;
3511 if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
3512 if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
3513 if ( aa[2][0]+aa[3][0]-2*x<=0) Break;
3514 if ( aa[3][0] <= x ) Break;
3515 if ( aa[2][0] < x ) Break;
3516 if ( aa[2][0] <= x ) ad=99*ad/100;
3517 if (dx-(x-x0)<dx/8) Break;
3518 if (dx-(x-x0)<dx/4) ad=99*ad/100;
3519 /* check if lower right and center point are joined directly */
3520 ld=line_deviation(box1, aa[2][3], j);
3521 MSG(fprintf(stderr," 2-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
3522 if (ld >2*sq(1024/4)) Break;
3523 /* check if upper right and center point are joined directly */
3524 ld=line_deviation(box1, j, aa[3][3]);
3525 MSG(fprintf(stderr," X-3 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
3526 if (ld >2*sq(1024/4)) Break;
3527
3528 /* only upper side */
3529 for (j=i=aa[3][3];i!=aa[0][3];i=(i+1)%box1->num_frame_vectors[0]) {
3530 if (box1->frame_vector[i][1]
3531 >=box1->frame_vector[j][1]) j=i; /* notice lowest vector */
3532 } if (j==i) Break;
3533 /* calculate the distance to the center */
3534 x=box1->frame_vector[j][0];
3535 y=box1->frame_vector[j][1]; i4=j;
3536 if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
3537 if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
3538 if ( aa[3][1]+aa[0][1]-2*y>=0) Break;
3539 /* check if upper left and center point are joined directly */
3540 ld=line_deviation(box1, aa[3][3], j);
3541 MSG(fprintf(stderr," 3-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
3542 if (ld >2*sq(1024/4)) Break;
3543 /* check if lower left and center point are joined directly */
3544 ld=line_deviation(box1, j, aa[0][3]);
3545 MSG(fprintf(stderr," X-0 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
3546 if (ld >2*sq(1024/4)) Break;
3547
3548 // center crossing of diagonal lines is small?
3549 if (box1->frame_vector[i3][0] - box1->frame_vector[i1][0] > dx/2) Break;
3550
3551 if (gchar) ad=99*ad/100;
3552 bc='x'; if(hchar) bc='X';
3553 Setac(box1,bc,ad);
3554 break;
3555 }
3556 // --- test \it x ---------------------------------------------------
3557 #if 0
3558 for(ad=d=99;dx>4 && dy>4;){ // min 3x4
3559 DBG( wchar_t c_ask='x'; )
3560 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
3561 if( get_bw(x0,x0+dx/4,y0+dy/2,y0+dy/2,box1->p,cs,1) != 0 ) Break;
3562 if( get_bw(x1-dx/4,x1,y0+dy/2,y0+dy/2,box1->p,cs,1) != 0 ) Break;
3563 if( num_cross(x0+dx/4,x1-dx/4,y0+dy/2,y0+dy/2, box1->p,cs) != 1 ) Break;
3564 if( num_cross(x0,x1,y0+dy/4,y0+dy/4, box1->p,cs) != 3
3565 && num_cross(x0,x1,y0+dy/8,y0+dy/8, box1->p,cs) < 3 ) Break;
3566 if( num_cross(x0,x1,y1-dy/4,y1-dy/4, box1->p,cs) != 3
3567 && num_cross(x0,x1,y1-dy/8,y1-dy/8, box1->p,cs) < 3 ) Break;
3568 if( gchar ) ad=97*ad/100;
3569 if( hchar ) ad=96*ad/100;
3570 bc='x';
3571 Setac(box1,bc,ad);
3572 break;
3573 }
3574 #endif
3575 return box1->c;
3576 }
3577
ocr0_yY(ocr0_shared_t * sdata)3578 static wchar_t ocr0_yY(ocr0_shared_t *sdata){
3579 struct box *box1=sdata->box1;
3580 pix *bp=sdata->bp;
3581 int i1,i2, i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
3582 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
3583 int dx=x1-x0+1,dy=y1-y0+1, /* size */
3584 ad,xa,ya,xb,yb,xc,yc,xd,yd; /* tmp-vars */
3585 int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
3586 wchar_t bc=UNKNOWN;
3587
3588 // --- test italic yY --------------------------------------------
3589 for(ad=d=100;dx>2 && dy>3;){ // min 3x4
3590 DBG( wchar_t c_ask='y'; )
3591 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
3592 if (sdata->holes.num > 0) ad=97*ad/100;
3593 /* half distance to the center, added 2018-09 */
3594 d=2*sq(128/4);
3595 /* now we check for the upper right end of the h */
3596 if (aa[3][2]>d/2) Break; /* [2] = distance, ~N... */
3597 if (aa[0][2]>3*d/4) Break; /* upper left end, handwritten tolerance */
3598 if (aa[1][2]<d/4 /* lower left end */
3599 && aa[2][2]<d/4) ad=99*ad/100; /* lowerright end */
3600 i1=i2=-1;
3601 if (aa[1][2]<(d+2)/4
3602 && aa[2][2]<(d+2)/4) { // 2018-09 tmp12/rasterchars_small.png ~N
3603 i1=nearest_frame_vector(box1,aa[1][3],aa[2][3],x0,y0); // left N-gap?
3604 if (box1->frame_vector[i1][1]-y0<=(dy+2)/4
3605 && box1->frame_vector[i1][0]-x0<=(dx+2)/4) Break;
3606 }
3607 if (aa[2][2]<(d+2)/4
3608 && aa[0][2]<(d+2)/4) { // 2018-09 tmp12/rasterchars_small.png ~N
3609 i2=nearest_frame_vector(box1,aa[3][3],aa[0][3],x1,y1); // right N-gap?
3610 if (y1-box1->frame_vector[i2][1]<=(dy+2)/4
3611 && x1-box1->frame_vector[i2][0]<=(dx+2)/4) Break;
3612 }
3613 MSG(fprintf(stderr,"i1-2 %d %d ad=%d",\
3614 i1,i2,ad);)
3615
3616 if( num_cross(0,dx-1,dy/8,dy/8,bp,cs) < 2
3617 && num_cross(0,dx-1, 1, 1,bp,cs) < 2 ) Break;
3618 if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1
3619 && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 1 ) Break;
3620 if( num_cross(dx-1,dx-1,0,dy-1,bp,cs) != 1
3621 && num_cross(dx-2,dx-2,0,dy-1,bp,cs) != 1 ) Break;
3622 if( num_cross(dx/3,dx/3,dy/4,dy-1,bp,cs) != 2
3623 && num_cross(dx/2,dx/2,dy/4,dy-1,bp,cs) != 2 ) Break;
3624 for(yc=y=0,xc=x=dx/4;x<dx-dx/4;x++){ // search deepest point
3625 i=loop(bp,x,0,dy,cs,0,DO); if(i>y){ yc=y=i;xc=x; }
3626 } if( y>12*dy/16 || y<3*dy/8 ) Break;
3627 ya=dy/8; xa=xc-loop(bp,xc,ya,dx,cs,0,LE); if(xa< 0) Break;
3628 yb=dy/8; xb=xc+loop(bp,xc,yb,dx,cs,0,RI); if(xb>=dx) Break;
3629 for(y=dy/8;y<yc-dy/8;y++){
3630 if( num_cross(xc,dx-1,y,y,bp,cs) != 1 ) break;
3631 if( num_cross(0 ,xc ,y,y,bp,cs) < 1 ) break;
3632 } if(y<yc-dy/8) Break;
3633 yd=dy-1-dy/8;xd=dx-1-loop(bp,dx-1,yd,dx,cs,0,LE);
3634 g_debug(fprintf(stderr," debug_yY: \n"
3635 " /a b \n"
3636 " | | \n"
3637 " -c/ \n"
3638 " \e-d \n");)
3639 g_debug(fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d",
3640 xa,ya,xb,yb,xc,yc,xd,yd);)
3641 if(xd>6*dx/8) ad=99*ad/100; // why this???
3642 if (loop(bp,dx-1,dy-1,dx,cs,0,LE)<1) Break;
3643 // printf(" abcd=%d %d %d %d %d %d %d %d -",xa,ya,xb,yb,xc,yc,xd,yd);
3644 if( get_line2(xb,yb,xd,yd,bp,cs,100)<95 ) Break;
3645 // if( get_line2(xc,yc,xd,yd,bp,cs,100)<95 ) Break;
3646 // printf("ok");
3647 bc='y';
3648 if(gchar && !hchar) bc='y'; else
3649 if(hchar && (!gchar || dy<14)) bc='Y'; else ad=98*ad/100; // SMALL-CAPS ???
3650 Setac(box1,bc,ad);
3651 break;
3652 }
3653 // --- test yY ---------------------------------------------------
3654 for(ad=d=100;dx>2 && dy>3;){ // min 3x4
3655 DBG( wchar_t c_ask='y'; )
3656 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
3657 /* half distance to the center, added 2018-09 */
3658 d=2*sq(128/4);
3659 /* now we check for the upper right end of the h */
3660 if (aa[3][2]>d/2) Break; /* [2] = distance, ~N... */
3661 if (aa[0][2]>3*d/4) Break; /* upper left end, handwritten tolerance */
3662 if (aa[1][2]<d/4 /* lower left end */
3663 && aa[2][2]<d/4) ad=99*ad/100; /* lowerright end */
3664 i1=i2=0; // initialize
3665 if (aa[1][2]<(d+2)/4
3666 && aa[2][2]<(d+2)/4) { // 2018-09 tmp12/rasterchars_small.png ~N
3667 i1=nearest_frame_vector(box1,aa[1][3],aa[2][3],x0,y0); // left N-gap?
3668 if (box1->frame_vector[i1][1]-y0<=(dy+2)/4
3669 && box1->frame_vector[i1][0]-x0<=(dx+2)/4) Break;
3670 }
3671 if (aa[2][2]<(d+2)/4
3672 && aa[0][2]<(d+2)/4) { // 2018-09 tmp12/rasterchars_small.png ~N
3673 i2=nearest_frame_vector(box1,aa[3][3],aa[0][3],x1,y1); // right N-gap?
3674 if (y1-box1->frame_vector[i2][1]<=(dy+2)/4
3675 && x1-box1->frame_vector[i2][0]<=(dx+2)/4) Break;
3676 }
3677 MSG(fprintf(stderr,"i1-2 %d %d ad=%d",\
3678 i1,i2,ad);)
3679
3680 if( get_bw(x0,x0,y1-dy/8,y1,box1->p,cs,1) == 1 ) {
3681 if( get_bw(x0,x0+4*dx/8,y0+dy/8,y0+dy/8,box1->p,cs,1) != 1 ) Break;
3682 } else {
3683 if( get_bw(x0,x0+3*dx/8,y0+dy/8,y0+dy/8,box1->p,cs,1) != 1 ) Break;
3684 }
3685 if( num_cross(0,dx-1,dy/8,dy/8,bp,cs) != 2
3686 && num_cross(0,dx-1, 1, 1,bp,cs) != 2 ) Break;
3687 if( num_cross(dx/2,dx/2,0, 1,bp,cs) != 0 ) Break;
3688 if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1
3689 && num_cross(0,dx-1,dy-2-dy/32,dy-2-dy/32,bp,cs) != 1 ) Break;
3690 if( num_cross(dx-1,dx-1,0,dy-1,bp,cs) != 1
3691 && num_cross(dx-2,dx-2,0,dy-1,bp,cs) != 1
3692 && num_cross(dx-dx/8-1,dx-dx/8-1,0,dy-1,bp,cs) != 1 ) Break;
3693 if( loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)+dx/8+1 // Jul00
3694 < loop(bp, 0,dy-1-dy/8,dx,cs,0,RI) ) Break;
3695 for(y=0,x=dx/4;x<dx-dx/4;x++){ // search lowest point
3696 i=loop(bp,x,0,dy,cs,0,DO); if(i>y) y=i;
3697 } if( y>10*dy/16 || y<2*dy/8 ) Break;
3698 for(xc=xb=xa=dx,yc=yb=ya=y=0;y<dy/4;y++){
3699 x =loop(bp, 0 , y,dx,cs,0,RI); if(x<xa){ xa=x;ya=y; }
3700 x =loop(bp,dx-1 , y,dx,cs,0,LE); if(x<xb){ xb=x;yb=y; }
3701 }
3702 if(yb>dy/8) Break;
3703 for(i=dx,yc=y=dy/4;y<3*dy/4;y++){
3704 if( num_cross(0,dx-1,y,y,bp,cs) < 2 ) break;
3705 x =loop(bp,dx-1 ,y,dx,cs,0,LE);
3706 x+=loop(bp,dx-1-x,y,dx,cs,1,LE);
3707 j =loop(bp,dx-1-x,y,dx,cs,0,LE); if(j<=i){ i=j;yc=y;xc=dx-1-x-j/2; }
3708 } yc+=dy/16+1;
3709 yc+=loop(bp,xc,yc,i,cs,1,DO)/2;
3710 xa+= loop(bp,xa ,ya,dx,cs,1,RI)/2;
3711 xb=dx-1-loop(bp,dx-1,yb,dx,cs,1,LE)/2;
3712 yd=dy-1-dy/8;xd=dx-1-loop(bp,dx-1,yd,dx,cs,0,LE); if(xd>6*dx/8) Break;
3713 /* check for serife at lower end */
3714 for (i=0,x=dx-1;i<dy/4;i++) {
3715 j=loop(bp,dx-1,dy-1-i,dx,cs,0,LE);
3716 if (j>x+dx/16+1) break; /* detect serif */
3717 if (j<x) x=j;
3718 } if (i<dy/4) xd-=loop(bp,xd,yd,dx,cs,1,LE)/2;
3719 MSG( fprintf(stderr," debug_yY: \n"
3720 " a b \n"
3721 " \\ / \n"
3722 " c \n"
3723 " ed ");)
3724 MSG(fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d",
3725 xa,ya,xb,yb,xc,yc,xd,yd);)
3726 // check upper left line
3727 if( get_line2(xa,ya,xc ,yc,bp,cs,100)<95
3728 && get_line2(xa,ya,xc-1,yc,bp,cs,100)<95 ) Break;
3729 // check upper right line
3730 if( get_line2(xb,yb,xc ,yc,bp,cs,100)<95
3731 && get_line2(xb,yb,xc-1,yc,bp,cs,100)<95 ) {
3732 // Times-Italic y ???
3733 xb+=loop(bp,xb,yb,dx/4,cs,1,RI)-1;
3734 yb+=loop(bp,xb,yb,dy/8,cs,1,DO)-1;
3735 if( get_line2(xb,yb,xc ,yc,bp,cs,100)<95 ) Break;
3736 }
3737 if( get_line2(xc,yc,xd,yd,bp,cs,100)<95 ) Break;
3738
3739 // decission between V and Y is sometimes very difficult
3740 // hope that the following code is the ultimate solution
3741 if( yc>=5*dy/8 && !gchar)
3742 if( get_line2(xa,ya,xd ,yd,bp,cs,100)>95 )
3743 if( get_line2(xb,yb,xd ,yd,bp,cs,100)>95 )
3744 { if (dx>4) { Break; } else ad=ad*98/100; } // ~V
3745 xa=loop(bp,0,dy/8,dx,cs,0,RI);
3746 xb=loop(bp,0,dy/2,dx,cs,0,RI);
3747 xc=loop(bp,0,dy-1,dx,cs,0,RI);
3748 if( 2*xb< xa+xc ) ad=98*ad/100; // ~V
3749 if( 2*xb<=xa+xc ) ad=98*ad/100;
3750 if( 2*xb<=xa+xc+1 ) ad=98*ad/100;
3751
3752 bc='y';
3753 if ((!gchar) && (!hchar)) ad=98*ad/100;
3754 if(y0<box1->m2-(box1->m2-box1->m1)/4)
3755 { bc='Y'; if(gchar) ad=98*ad/100; }
3756 // SMALL-CAPS ???
3757 Setac(box1,bc,ad);
3758 break;
3759 }
3760 return box1->c;
3761 }
3762
ocr0_zZ(ocr0_shared_t * sdata)3763 static wchar_t ocr0_zZ(ocr0_shared_t *sdata){
3764 struct box *box1=sdata->box1;
3765 int i,j,i1,i2,i3,i4,i5,dbg[9],
3766 d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
3767 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
3768 int dx=x1-x0+1,dy=y1-y0+1, /* size */
3769 (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */
3770 ad; /* tmp-vars */
3771 wchar_t bc=UNKNOWN;
3772
3773 // --- test zZ -------
3774 for(ad=d=100;dx>3 && dy>3;){ // dy>dx
3775 DBG( wchar_t c_ask='z'; ) /* for debugging purpose */
3776 if (sdata->holes.num > 0) Break;
3777 if (box1->num_frames > 1) ad=98*ad/100; /* tolerant, tiny hole */
3778 /* half distance to the center */
3779 d=2*sq(128/4); /* square */
3780 /* now we check for the 4 edges of the z */
3781 if (aa[0][2]>d) Break; /* square distance to upper left corner */
3782 if (aa[1][2]>d) Break; /* square distance to lower left corner */
3783 if (aa[2][2]>d) Break; /* square distance to lower right corner */
3784 if (aa[3][2]>d) Break; /* square distance to upper right corner */
3785 if (aa[3][0]-aa[0][0]<dx/2) Break; /* upper line to short? */
3786 if (aa[2][0]-aa[1][0]<dx/2) Break; /* lower line to short? */
3787 if (aa[1][1]-aa[0][1]<dy/2) Break; /* left y-distance to short? */
3788 if (aa[2][1]-aa[3][1]<dy/2) Break; /* right y-distance to short? */
3789 if (aa[3][0]-aa[0][0]<4-1) Break; /* to small to hold a z */
3790 if (aa[2][0]-aa[1][0]<4-1) Break; /* to small */
3791 if (aa[3][1]-y0>dy/8) ad=99*ad/100;
3792 if (aa[0][1]-y0>dy/8) ad=99*ad/100;
3793 // 2010-10-11 ~ joined ty (tmp08/gocr0801_bad5)
3794 // check bottom line to be low
3795 i= nearest_frame_vector(box1,aa[1][3],aa[2][3], x0+dx/2, y0);
3796 j= box1->frame_vector[i][1];
3797 if (j<y0+dy/2
3798 || (j<aa[1][1]-dy/8
3799 && j<aa[2][1]-dy/8)) Break;
3800 if (2*dx<dy) ad=99*ad/100;
3801 /* output rel. koords of aa[0..3] */
3802 MSG( \
3803 fprintf(stderr,"aa[0-3].xy %d %d %d %d %d %d %d %d ad %d", \
3804 aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,\
3805 aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0,ad);)
3806 /* upper and lower horizontal line */
3807 d=line_deviation(box1, aa[3][3], aa[0][3]); if (d>2*sq(1024/4)) Break;
3808 ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100;
3809 d=line_deviation(box1, aa[1][3], aa[2][3]); if (d>2*sq(1024/4)) Break;
3810
3811 /* search uppermost right > from left side */
3812 i1=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1, y0);
3813 x=box1->frame_vector[i1][0];
3814 y=box1->frame_vector[i1][1];
3815 if (y-y0 > 5*dy/8) Break;
3816 if (x-x0 < 3*dx/8) Break;
3817 if (x-aa[0][0]<=dx/4) Break; // ~lI
3818 if (x-aa[0][0]<=dx/3) ad=98*ad/100; // ~lI
3819 if (x-aa[0][0]< dx/2) ad=99*ad/100; // ~lI, dx/2 for fat 8x10 font
3820 /* search most right > ~2 from left side */
3821 i3=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1+2*dx, (y0+y1)/2);
3822 MSG(fprintf(stderr,"i23xy= %d %d %d %d ad %d",x-x0,y-y0,\
3823 box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0,ad);)
3824 /* dy=7 i3.y=2 for fat screen font (Apr09) */
3825 if ( box1->frame_vector[i3][1]-y0> (dy+2)/4
3826 && box1->frame_vector[i3][0]-x>= 0) Break;
3827 if ( box1->frame_vector[i3][1]-y> dy/8+1 // +1 needed for 5x7 Z Jul09
3828 && box1->frame_vector[i3][0]-x>=-dx/8) ad=98*ad/100;
3829 if ( box1->frame_vector[i3][1]-y> dy/8+1
3830 && box1->frame_vector[i3][0]-x>= 0) ad=97*ad/100;
3831 if ( box1->frame_vector[i3][1]-y> dy/16+1) ad=99*ad/100; // 9x10 ~2
3832 if (box1->frame_vector[i3][0]-aa[0][0]
3833 < aa[3][0]-box1->frame_vector[i3][0]) Break; // ~lI
3834 if (box1->frame_vector[i3][0]-aa[0][0]
3835 <(aa[3][0]-box1->frame_vector[i3][0]-1)*2) ad=98*ad/100; // ~lI
3836 /* better test for a bow or peaked angle */
3837 /* upper part of a 2, on a Z a and b should be at c
3838 .....$@@@@@@a...c. o1 (o1-a)=(dx+5)^2 =dx^2+10*dx+25
3839 ...$$@@@@@@@@@.... (o1-b)=(dx+1)^2+4^2=dx^2+ 2*dx+18
3840 ..$@@$@@@$@@@@@...
3841 ..@@@.....$$@@@@..
3842 ..@@.......@$@@@b.
3843 ..$.........$@@@@.
3844 .$$..........$@@@.
3845 .$...........@@@@.
3846 .............@@@@.<
3847 .............$@@$.
3848 ............$@@@..
3849 ............@@$...
3850 ............$@$...
3851 --- snip ----
3852 */
3853 /* upper right corner? */
3854 i4=nearest_frame_vector(box1,aa[2][3],aa[0][3], x1+dx, y0);
3855 i5=nearest_frame_vector(box1,aa[2][3],aa[0][3], x1, y0-dx);
3856 d=sq(box1->frame_vector[i5][0]-box1->frame_vector[i4][0])
3857 +sq(box1->frame_vector[i5][1]-box1->frame_vector[i4][1]);
3858 if (d>2*sq(dx/8+1)) Break;
3859 if (d>0 && dx<8 ) ad=99*ad/100; // 7x10 2 ~Z
3860 if (d>1 && dx<16) ad=98*ad/100; // 9x10 2 ~Z d=2
3861 MSG( fprintf(stderr,"i45xy %d %d d %d ad %d upper right bow?",i4,i5,d,ad); )
3862
3863 /* check if upper left and upper right point are joined directly */
3864 dbg[0]=d=line_deviation(box1, aa[0][3], i1); if (d >2*sq(1024/4)) Break;
3865 /* check if lower right and upper left point are joined directly */
3866 dbg[1]=d=line_deviation(box1, i1, aa[1][3]); if (d >2*sq(1024/4)) Break;
3867
3868 /* search highest left < from right side 2017-07 ~I 5x9 */
3869 i2=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y0);
3870 x=box1->frame_vector[i2][0];
3871 y=box1->frame_vector[i2][1];
3872 if (y-y0 <= dy/8 && x-x0 <= 5*dx/8) Break; // ~I 2017-07_clean5x9
3873 if (y-y0 <= dy/4 && x-x0 <= 5*dx/8) ad=98*ad/100; // ~I
3874 /* search lowest left < from right side */
3875 i2=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y1);
3876 x=box1->frame_vector[i2][0];
3877 y=box1->frame_vector[i2][1];
3878 if (y-y0 < 3*dy/8) Break;
3879 if (x-x0 > 5*dx/8) Break;
3880 if (x-x0 >= dx/2) ad=99*ad/100; // ~I 2017-07_clean5x9
3881 if (aa[2][0]-x<=dx/4) Break; // ~lI
3882 if (aa[2][0]-x<=dx/3) ad=98*ad/100; // ~lI
3883 if (aa[2][0]-x< dx/2) ad=99*ad/100; // ~lI, dx/2 for 9x7 fat3 z
3884 /* check if upper right and lower left point are joined directly */
3885 dbg[2]=d=line_deviation(box1,i2, aa[3][3]);
3886 MSG(fprintf(stderr,"left diag line? i2xy %d %d d= %d krit= 2..3*%d",\
3887 x-x0,y-y0,d,sq(1024/4));)
3888 if ( d >3*sq(1024/4)) Break;
3889 if (dx>9 && d >2*sq(1024/4)) Break; // to strong for 5x7 Z
3890 /* check if lower left and lower right point are joined directly */
3891 dbg[3]=d=line_deviation(box1, aa[2][3],i2); if (d >2*sq(1024/4)) Break;
3892
3893 MSG( fprintf(stderr," i12 %d %d ad %d", i1, i2, ad); )
3894 /* upper right nick - lower left nick, 0 for fat screen fonts 8x10 */
3895 if (box1->frame_vector[i1][0]
3896 -box1->frame_vector[i2][0]<dx/8-1) Break; /* 0 for 9x7 fat3 z */
3897 /* 16384, 26788 for 8x10 screen font (Apr09) */
3898 MSG( \
3899 fprintf(stderr,"^v %d %d %d %d line deviation %d %d %d %d max %d ad %d",\
3900 box1->frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\
3901 box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\
3902 dbg[0], dbg[1], dbg[2], dbg[3], sq(1024)/16, ad);)
3903 ad=(100-dbg[0]/sq(1024/4))*ad/100;
3904 ad=(100-dbg[1]/sq(1024/4))*ad/100;
3905 ad=(100-dbg[2]/sq(1024/4))*ad/100;
3906 ad=(100-dbg[3]/sq(1024/4))*ad/100;
3907
3908 if ( gchar) ad=98*ad/100;
3909 bc='z';
3910 if( hchar ) bc='Z';
3911 Setac(box1,bc,ad);
3912 break;
3913 }
3914 return box1->c;
3915 }
3916
ocr0_wW(ocr0_shared_t * sdata)3917 static wchar_t ocr0_wW(ocr0_shared_t *sdata){
3918 struct box *box1=sdata->box1;
3919 pix *bp=sdata->bp;
3920 int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,handwritten=0,
3921 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
3922 int dx=x1-x0+1,dy=y1-y0+1, /* size */
3923 ad,ya,yb,xa,xb,xc,xd,xe,t1; /* tmp-vars */
3924 wchar_t ac;
3925
3926 // ------- test w ~{\it w} ---------------
3927 for(ad=d=100;dx>3 && dy>3;){ // dy<=dx
3928 DBG( wchar_t c_ask='w'; )
3929 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
3930 // xa xe
3931 // \ xc / <=ya connected xa-xb-xc-xd-xe
3932 // xb xd <=yb
3933 // get two lowest points i3,i4,ya
3934 // out_x(box1);
3935 // ~ul ~uf
3936 // out_x(box1);
3937 for(y=dy/8;y< dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs)< 2 ) break;
3938 if(y<dy/2) Break;
3939 yb=dy-1;
3940 if (dx>4) { /* 4x6 is to small */
3941 for(y=dy-1-dy/16;y>3*dy/4;y--)
3942 if( num_cross(0,dx-1,y,y,bp,cs)==2 ) break;
3943 if(y==3*dy/4) Break;
3944 }
3945 yb=y;
3946 t1=loop(bp,0 ,dy/4,dx,cs,0,RI);
3947 t1=loop(bp,t1,dy/4,dx,cs,1,RI); // thickness of line?
3948 for(i=j=0 ;y> dy/4;y--) if( num_cross(0,dx-1,y,y,bp,cs)==4 ) i++;
3949 else if( num_cross(0,dx-1,y,y,bp,cs)>=3 ) j++;
3950 if(i+5<dy/4 && 7*t1<dy) Break; // only for large letters
3951 if(i+j==0 && (dy>6 || dx>4)) Break;
3952 if(i+j==0 && dx<=4){
3953 if (abs(loop(bp, 1,dy-1,dy,cs,0,UP)
3954 -loop(bp,dx-2,dy-1,dy,cs,0,UP))>dy/8+1) Break; // 4x6 N
3955 if ( ( loop(bp, 1, 0,dy,cs,0,DO)>=dy-2
3956 && loop(bp, 0,dy-1,dy,cs,0,UP)>0)
3957 || ( loop(bp,dx-2, 0,dy,cs,0,DO)>=dy-2
3958 && loop(bp,dx-1,dy-1,dy,cs,0,UP)>0)) Break; // 4x6 UV
3959 ad=ad*99/100; // 4x6 font
3960 MSG(fprintf(stderr,"ad=%d",ad);)
3961 }
3962 if( num_cross(0,dx-1, 1, 1,bp,cs)< 2
3963 && num_cross(0,dx-1,dy/16,dy/16,bp,cs)< 2 ) Break;
3964 x =loop(bp,0 ,yb,dx,cs,0,RI);
3965 xb=loop(bp,x ,yb,dx,cs,1,RI);xb=x+xb/2; if(xb>dx/2) Break;
3966 x =loop(bp,dx-1 ,yb,dx,cs,0,LE);
3967 xd=loop(bp,dx-1-x,yb,dx,cs,1,LE);xd=dx-1-x-xd/2;if(xd<3*dx/8) Break;
3968 for(y=0,xc=x=xb+1;x<xd;x++)
3969 if((i=loop(bp,x,dy-1,dy,cs,0,UP))>y){xc=x;y=i;}
3970 if(dx>4 && !y) Break;
3971 ya=dy-1-y; // flat
3972 y=loop(bp,xc,ya,dy,cs,1,UP);if(y)y--;
3973 if (dy>6 || dx>4) { // ~4x6 font
3974 if( num_cross(0 ,xc ,ya-y ,ya-y ,bp,cs)!= 2
3975 && num_cross(0 ,xc ,ya-y/2,ya-y/2,bp,cs)!= 2 ) Break;
3976 if( num_cross(xc,dx-1,ya-y ,ya-y ,bp,cs)!= 2
3977 && num_cross(xc,dx-1,ya-y/2,ya-y/2,bp,cs)!= 2 ) Break;
3978 }
3979 ya-=y/2;
3980 x =loop(bp,0 ,1 ,dx,cs,0,RI);
3981 xa=loop(bp,x ,1 ,dx,cs,1,RI);
3982 if( x+xa>xb ){ // may be, here is a small but thick letter
3983 // later add some proofs
3984 xa=x+xa/4;
3985 } else {
3986 xa=x+xa/2;
3987 }
3988 x =loop(bp,dx-1 ,1 ,dx,cs,0,LE);
3989 xe=loop(bp,dx-1-x,1 ,dx,cs,1,LE);xe=dx-1-x-xe/2;
3990 MSG( fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d %d %d",
3991 xa,1,xb,yb,xc,ya,xd,yb,xe,1);)
3992 if (ya<dy/2 && xc< dx/2) ad=98*ad/100; /* ~N */
3993 if (ya<dy/2 && xc<3*dx/8) ad=96*ad/100; /* ~N */
3994 i= loop(bp,xa ,1 ,dx,cs,1,RI);
3995 for (x=xa;x<xa+i;x++)
3996 if( get_line2(x,1,xb,yb,bp,cs,100)>94 ) break;
3997 if (x==xa+i) Break; // no vert. line found
3998 if( get_line2(xb,yb-1,xc,ya ,bp,cs,100)<95
3999 && get_line2(xb,yb-1,xc,ya+dy/32,bp,cs,100)<95
4000 && get_line2(xb,yb-1,xc,ya+dy/16,bp,cs,100)<95 ) Break;
4001 if( get_line2(xc, ya,xd, yb,bp,cs,100)<95
4002 && get_line2(xc+1,ya,xd, yb,bp,cs,100)<95 ) Break;
4003 if( get_line2(xd,yb,xe ,1+dy/16,bp,cs,100)<95
4004 && get_line2(xd,yb,dx-1 ,1+dy/8 ,bp,cs,100)<95 // round w
4005 && get_line2(xd,yb,xe+dx/20,1+dy/16,bp,cs,100)<95 ) Break;
4006 // if( num_hole(0,dx-1,0,dy-1,bp,cs,NULL) != 0 ) Break;
4007 // ~ur
4008 MSG(fprintf(stderr,"ad=%d",ad);)
4009 for(i=0,y=5*dy/8;y<dy;y++){
4010 x=loop(bp,dx-1,y,dx,cs,0,LE); if( x>i ) i=x; if( x<i-2 ) break;
4011 if (x<i) ad=98*ad/100;
4012 } if( y<dy ) Break;
4013 MSG(fprintf(stderr,"ad=%d",ad);)
4014 ac=((hchar)?'W':'w');
4015 if (gchar) ad=98*ad/100;
4016 Setac(box1,ac,ad);
4017 break;
4018 }
4019 // --- test ~w {\it w} ohmega? also handwritten -------
4020 // italic
4021 for(ad=d=100;dx>3 && dy>3;){ // dy<=dx 4x6font (like a H with fat bar)
4022 DBG( wchar_t c_ask='w'; )
4023 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
4024 // ~ul ~uf
4025 if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)<2 ) Break;
4026 if( num_cross(0,dx-1,dy/8,dy/8,bp,cs)<2 ) handwritten=40;
4027 if( num_cross(0,dx-1,dy/4,dy/4,bp,cs)<2 ) handwritten=80;
4028 for(i=0,y=0;y<dy-1;y++)
4029 if( num_cross(0,dx-1,y,y,bp,cs)==3 ) i++;
4030 if(i<=dy/8) Break; // three legs
4031 // xa xe
4032 // \ xc / <=yb connected xa-xb-xc-xd-xe
4033 // xb xd
4034 for(y=dy/2;y<dy-1-dy/8;y++)
4035 if( num_cross(0,dx-1,y,y,bp,cs)==3 ) break;
4036 yb=y;
4037 x =loop(bp,0 ,yb,dx,cs,0,RI);
4038 x+=loop(bp,x ,yb,dx,cs,1,RI); if(x>dx/2) Break;
4039 xb=loop(bp,x ,yb,dx,cs,0,RI);xb=x+xb/2; if(xb>dx/2) Break;
4040 x =loop(bp,dx-1 ,yb,dx,cs,0,LE);
4041 x+=loop(bp,dx-1-x,yb,dx,cs,1,LE);
4042 xd=loop(bp,dx-1-x,yb,dx,cs,0,LE);xd=dx-1-x-xd/2;if(xd<3*dx/8) Break;
4043 if( num_cross(xb,xd,yb,yb ,bp,cs)!= 1 ) Break;
4044 if( num_cross(xb,xb,yb,dy-1,bp,cs)!= 1 ) Break;
4045 if( num_cross(xd,xd,yb,dy-1,bp,cs)!= 1 ) Break;
4046 if( num_cross(xb,xb, 0,yb ,bp,cs)!= 0 ) Break;
4047 if( num_cross(xd,xd, 0,yb ,bp,cs)!= 0 ) Break;
4048 // if( num_hole(0,dx-1,0,dy-1,bp,cs,NULL) != 0 ) Break;
4049 if (sdata->holes.num != 0) Break;
4050 // ~ur
4051 for(i=0,y=3*dy/4;y<dy;y++){
4052 x=loop(bp,dx-1,y,dx,cs,0,LE); if( x>i ) i=x; if( x<i-2 ) break;
4053 } if( y<dy ) Break; // fail for overlapping neighbouring slanted chars?
4054 ac=((hchar)?'W':'w');
4055 if (gchar) ad=98*ad/100;
4056 Setac(box1,ac,ad);
4057 Break;
4058 }
4059 return box1->c;
4060 }
4061
ocr0_aA(ocr0_shared_t * sdata)4062 static wchar_t ocr0_aA(ocr0_shared_t *sdata){
4063 struct box *box1=sdata->box1;
4064 pix *bp=sdata->bp;
4065 int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar,
4066 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
4067 int dx=x1-x0+1,dy=y1-y0+1, /* size */
4068 ad,ya; /* tmp-vars */
4069 int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
4070
4071 // --- test A ---------------------------------------------------
4072 for(ad=d=100;dx>2 && dy>3;){ // min 3x4
4073 DBG( wchar_t c_ask='A'; )
4074 if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
4075 // first selection (rough sieve)
4076 if( get_bw(dx/2 ,dx/2 ,dy-1-dy/8,dy-1,bp,cs,1) == 1
4077 && get_bw(dx/2-1,dx/2-1,dy-1-dy/8,dy-1,bp,cs,1) == 1 ) Break; // ~B
4078 ya=0; /* upper end, not 0 for modified A etc. */
4079 if (box1->modifier)
4080 for (ya=0;ya<dy/2;ya++)
4081 if (num_cross(0,dx-1,ya,ya,bp,cs)==0) break;
4082 if (ya>=dy/2) ya=0; // already subtracted?
4083 if( num_cross(0,dx-1,ya+ 1 ,ya+ 1 ,bp,cs)!=1 // 600dpi
4084 && num_cross(0,dx-1,ya+ dy/8 ,ya+ dy/8 ,bp,cs)!=1
4085 && num_cross(0,dx-1,ya+ dy/16 ,ya+ dy/16 ,bp,cs)!=1
4086 && num_cross(0,dx-1,ya+ dy/8+1,ya+ dy/8+1,bp,cs)!=1 ) Break;
4087 if( num_cross(0,dx-1, 7*dy/8 , 7*dy/8 ,bp,cs)!=2
4088 && num_cross(0,dx-1, 7*dy/8-1, 7*dy/8-1,bp,cs)!=2 ) Break;
4089 if ( num_cross( 0,dx/8,ya+dy/8,ya+0,bp,cs)>0 ) Break; // ~R
4090 for(y=ya+dy/8;y<ya+dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs) > 1 ) break;
4091 if( y==ya+dy/2 ) Break; i1=y;
4092 if (dy>20) i1++; /* get arround some noise fat font */
4093
4094 x =loop(bp,0,i1,dx,cs,0,RI); if(x>3*dx/4) Break;
4095 x+=loop(bp,x,i1,dx,cs,1,RI); if(x>3*dx/4) Break; i2=x;
4096 x+=loop(bp,x,i1,dx,cs,0,RI); if(x<3*dx/8) Break; i2=(x+i2)/2;
4097 // hole (i2,i1)
4098 y+=loop(bp,i2,y,dy,cs,1,DO);
4099 y+=loop(bp,i2,y,dy,cs,0,DO); if(y>3*dy/4) ad=ad*99/100;
4100 if (y>5*dy/6) { MSG(fprintf(stderr,"x,y,i1,i2= %d %d %d %d",x,y,i1,i2);) }
4101 if (y>5*dy/6) Break;
4102
4103 if( sdata->holes.num != ((box1->modifier==RING_ABOVE)?2:1)
4104 || sdata->holes.hole[0].y1-ya >= dy-1-dy/4) Break;
4105 // if( num_hole ( x0, x1, y0, y1-dy/4 ,box1->p,cs,NULL) != 1 ) Break;
4106 // out_x(box1);
4107 i3=0;i4=0;
4108 for(x=dx/3;x<2*dx/3;x++){
4109 i4=num_cross(i2,x,y ,dy-1,bp,cs);if(i4<1 || i4>2)
4110 i4=num_cross(i2,x,y+dy/16,dy-1,bp,cs);if(i4<1 || i4>2) break;
4111 if(i4==1) i3=x;
4112 } if(i4<1 || i4>2 || i3==0){
4113 // ToDo: MSG(fprintf(stderr,"x,y,i4,i3= %d %d %d %d",x,y,i4,i3);)
4114 Break;
4115 }
4116 if( get_bw(dx-1-dx/4, dx-1, dy-1-dy/4, dy-1, bp,cs,1) != 1 ) Break;
4117
4118 i1=loop(bp,dx-1,ya+ (dy-ya)/4,dx,cs,0,LE);
4119 i2=loop(bp,dx-1,ya+ (dy-ya)/2,dx,cs,0,LE);
4120 i3=loop(bp,dx-1,dy-1-(dy-ya)/4,dx,cs,0,LE);
4121 if( 2*i2-dx/8>i1+i3 ) ad=99*ad/100; /* 6*8 font */
4122 if( 2*i2+dx/4<i1+i3 || 2*i2-dx/4>i1+i3 ) Break;
4123
4124 i1=loop(bp,0 ,ya+ (dy-ya)/4,dx,cs,0,RI); // linke senkr. linie
4125 i2=loop(bp,0 ,ya+ (dy-ya)/2,dx,cs,0,RI);
4126 i3=loop(bp,0 ,dy-1-(dy-ya)/4,dx,cs,0,RI);
4127 if( 2*i2-dx/8>i1+i3 ) ad=98*ad/100; /* 6*8 font */
4128 if( 2*i2+dx/4<i1+i3 || 2*i2-dx/4>i1+i3 || i1<i3) Break;
4129
4130 // lower ends could be round on thick fonts
4131 for(i3=dx,y=ya+(dy-ya)/4;y<7*dy/8;y++){ // increasing width
4132 i1=loop(bp, 0, y,dx,cs,0,RI);
4133 i2=loop(bp,dx-1, y,dx,cs,0,LE);
4134 if(i1+i2>i3+dx/16) break; if( i1+12<i3 ) i3=i1+i2;
4135 } if(y<7*dy/8) Break;
4136 if ( loop(bp, 0,dy-1-dy/8,dx,cs,0,RI)
4137 -loop(bp, 0,dy/2 ,dx,cs,0,RI)>0) ad=97*ad/100; // italic-a
4138
4139 if (!hchar) ad=99*ad/100; // italic-a
4140 Setac(box1,'A',ad);
4141 break;
4142 }
4143 // new vector based fat a variant without holes 2010-10-11
4144 // --- test a ---------------------------------------------------
4145 for(ad=d=100;dx>2 && dy>4;){ // min 3x5
4146 DBG( wchar_t c_ask='a'; )
4147 if (sdata->holes.num > 1) break;
4148 if (box1->num_frames != 2 + box1->dots ) break;
4149 /* tmp08/gocr0801_bad5
4150 7x7 7x7qemu
4151
4152 .@@@@@.<- .@@@@..<-
4153 .@@@@@@ ....@@.
4154 ..@@@@@ .@@@@@.
4155 @@@@@@@ @@..@@.
4156 @@...@@ @@..@@.
4157 @@@@@@@ @@..@@.
4158 .@@@@@@<- .@@@.@@<-
4159
4160 */
4161 if (aa[0][0]>x0+dx/4 || aa[0][1]>y0+dy/4) Break;
4162 if (aa[1][0]>x0+dx/4 || aa[1][1]<y1-dy/4) Break;
4163 if (aa[2][0]<x1-dx/4 || aa[2][1]<y1-dy/4) Break;
4164 if (aa[3][0]<x1-dx/4 || aa[3][1]>y0+dy/4) Break;
4165 // ToDo: check position of 2nd frame (hole) using nearest_x and _y???
4166 // rightmost gap from the left
4167 i1=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1, y0+1*dy/3);
4168 if (box1->frame_vector[i1][0]<=aa[0][0]
4169 || box1->frame_vector[i1][0]<=aa[1][0]) Break; // must!
4170 if (box1->frame_vector[i1][0]< x0+dx/3) Break; // very tolerant
4171 if (box1->frame_vector[i1][0]< x1-dx/3) {
4172 // we allow one exeption where fat lines close the gap (see tmp08)
4173 // upper body must at least 2 times thicker than low line
4174 i= loop(box1->p,x0+dx/2,y0 ,y1-y0,cs,0,DO); if (i>dy/8) Break;
4175 i= loop(box1->p,x0+dx/2,y0+i,y1-y0,cs,1,DO);
4176 j= loop(box1->p,x0+dx/2,y1 ,y1-y0,cs,0,UP); if (j>dy/8) Break;
4177 j= loop(box1->p,x0+dx/2,y1-j,y1-y0,cs,1,UP); if (j>dy/3) Break;
4178 if (i<2*j-dy/16) Break;
4179 ad=98*ad/100;
4180 }
4181 if (box1->frame_vector[i1][1]>y0+dy/2) Break;
4182 i2=nearest_frame_vector(box1,i1 ,aa[1][3], x0-dx/2, y0+1*dy/2);
4183 MSG(fprintf(stderr,"i12 %d %d y=%d %d", i1, i2, box1->frame_vector[i2][1]-y0, y1-dy/4-1-y0 );)
4184 if (box1->frame_vector[i2][0]> x0+(dx+4)/8+1) Break;
4185 if (box1->frame_vector[i2][1]> y1-dy/4) Break; // ?
4186 for (i=0;i<box1->num_frames;i++)
4187 if (box1->frame_vol[i]<0) break;
4188 if (i>=box1->num_frames || i<1) Break; // no hole? hole only?
4189 i3= box1->num_frame_vectors[i-1];
4190 i4= box1->num_frame_vectors[i]-1;
4191 MSG(fprintf(stderr,"i34 %d %d = hole",i3,i4);)
4192 i5= nearest_frame_vector(box1, i3, i4, x0+dx/2, y0);
4193 MSG(fprintf(stderr,"i5 %d y %d",i5,box1->frame_vector[i5][1]-y0);)
4194 if (box1->frame_vector[i5][1]<y0+dy/3) Break;
4195
4196 if (box1->m2) {
4197 if (sdata->gchar) ad=98*ad/100;
4198 if (sdata->hchar) ad=98*ad/100;
4199 } else ad=99*ad/100;
4200
4201 Setac(box1,(wchar_t)'a',ad);
4202 if (ad>=100) return 'a';
4203 break;
4204 }
4205 #if 0
4206 // old-pixel based variant
4207 // --- test a -------------------------------------------
4208 // with a open bow above the circle starting
4209 // on the right side of the circle
4210 for(ad=d=100;dx>2 && dy>3;){ // min 3x4
4211 DBG( wchar_t c_ask='a'; )
4212 if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
4213 if( get_bw(x0 , x0+dx/2, y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
4214 if( get_bw(x1-dx/3, x1 , y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break;
4215 if( get_bw(x1-dx/3, x1 , y0+dy/4, y0+dy/4,box1->p,cs,1) != 1 ) Break;
4216 if( get_bw(x0+dx/2, x0+dx/2, y1-dy/3, y1, box1->p,cs,1) != 1 ) Break;
4217 if( get_bw(x0+dx/2, x0+dx/2, y0 , y0+dy/3,box1->p,cs,1) != 1 ) Break;
4218 if( get_bw(x0+dx/3, x1-dx/3, y0 , y0 ,box1->p,cs,1) != 1 ) Break;
4219 if( get_bw(x0+dx/4, x1-dx/2, y1 , y1 ,box1->p,cs,1) != 1 )
4220 if( get_bw(x0+dx/4, x1-dx/3, y1-1 , y1-1 ,box1->p,cs,1) != 1 ) Break;
4221 if( get_bw(x0 , x0 , y0+dy/2, y1 ,box1->p,cs,1) != 1 )
4222 if( get_bw(x0+dx/8, x0+dx/8, y0+dy/2, y1 ,box1->p,cs,1) != 1 ) Break;
4223 if( loop(bp,3*dx/8,0,dy,cs,0,DO) > 3*dy/16 ) Break; // ~d
4224 if( num_cross(0,dx-1,dy/4 ,dy/4 , bp,cs) >2 // ~glued am != an
4225 && num_cross(0,dx-1,dy/4+1,dy/4+1, bp,cs) >2 ) Break;
4226
4227 for( x=dx/4;x<dx-dx/4;x++ ){ // ar
4228 i=loop(bp,x, 0,y1-y0,cs,0,DO); if (i>dy/2) break;
4229 i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if (i>dy/2) break;
4230 } if( x<dx-dx/4 ) Break;
4231
4232 for(i=dx/8+1,x=dx/4;x<=dx-1-dx/4 && i;x++){
4233 if( num_cross(x,x,0,bp->y-1, bp,cs) == 3 ) i--;
4234 } if( i ) Break;
4235
4236 i1=loop(bp,0, dy/8,dx,cs,0,RI);
4237 i3=loop(bp,0,3*dy/4,dx,cs,0,RI);
4238 for(y=dy/8+1;y<3*dy/4;y++){
4239 i2=loop(bp,0,y,dx,cs,0,RI);if(2*i2>i1+i3+1) break;
4240 } if(y==3*dy/4) Break; // ~6
4241 // ~ s (small thick s), look for vertikal line piece
4242 for(x=3*dx/4;x<dx;x++)
4243 if( loop(bp,x,dy/4,dy/2,cs,1,DO)>dy/4 ) break;
4244 if( x==dx ) Break;
4245
4246 if (sdata->holes.num != 1) ad=96*ad/100; else
4247 if (sdata->holes.num == 1)
4248 if( num_hole ( x0, x1, y0+dy/3, y1 ,box1->p,cs,NULL) != 1 ) Break;
4249 // if( num_hole ( x0, x1, y0, y1, box1->p,cs,NULL) != 1 ) Break;
4250 if( num_hole ( x0, x1, y0, y1-dy/3 ,box1->p,cs,NULL) != 0 ){
4251 i =loop(bp,0,dy/4,dx,cs,0,RI);
4252 i =loop(bp,i,dy/4,dx,cs,1,RI);
4253 if(i<dx/4+1) Break; // fat a
4254 i =loop(bp,0,dy/4,dx,cs,0,RI);
4255 i+=loop(bp,i,dy/4,dx,cs,1,RI);
4256 for(y=dy/4;y<dy/2;y++)
4257 if( num_cross(0,dx-1,y,y, bp,cs) !=2 ) break;
4258 x =loop(bp,0,y-1,dx,cs,0,RI);
4259 x+=loop(bp,x,y-1,dx,cs,1,RI);
4260 if(x>i) Break; // ~ 8
4261 }
4262 /* test for horizontal symmetry ~8 */
4263 for (y=0;y<dy;y++) for (x=0;x<dx/2;x++)
4264 if ((getpixel(bp,x,y)<cs)!=(getpixel(bp,dx-1-x,y)<cs)) { y=dy+1; break; }
4265 if (y==dy) Break; /* ~8 */
4266 if (hchar
4267 && loop(bp,dx/4,0,dy,cs,0,DO)>dy/4) Break; // ~ serif d
4268
4269 if (hchar) ad=96*ad/100;
4270 if (gchar) ad=96*ad/100;
4271 Setac(box1,'a',ad);
4272 break;
4273 }
4274 #endif
4275 // --- test hand written a ---------------------------------------------------
4276 // rarely char, without bow above the circle
4277 for(ad=d=100;dx>3 && dy>3;){ // min 4x4
4278 DBG( wchar_t c_ask='a'; )
4279 if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
4280 if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
4281 if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
4282 if( get_bw(x0+dx/2 , x0+dx/2,y1-dy/2 , y1, box1->p,cs,1) != 1 ) Break;
4283 if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
4284 if( get_bw(x0+dx/3 , x0+dx/3,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
4285 i = loop(bp,dx/2-dx/8, 0 ,dy,cs,0,DO); if (i>dy/4) Break; // top center
4286 i+= loop(bp,dx/2-dx/8, i ,dy,cs,1,DO); if (i>dy/2) Break; // top hole
4287 i = loop(bp,dx/2-dx/8, i ,dy,cs,0,DO); if (i<dy/4) Break; // hole hight
4288 if( get_bw(x0 , x0 ,y1 , y1 ,box1->p,cs,1) == 1 ) Break;
4289
4290 if( num_cross(x0+dx/2,x0+dx/2,y0 , y1 ,box1->p,cs) != 2 ) Break;
4291 if( num_cross(x0+dx/3,x1-dx/3,y0 , y0 ,box1->p,cs) != 1 ) // AND
4292 if( num_cross(x0+dx/3,x1-dx/3,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break;
4293 i = loop(bp,dx/2,dy-1 ,dy,cs,0,UP); if (i>dy/3) Break;
4294 y = i+loop(bp,dx/2,dy-1-i,dy,cs,1,UP); if (i>dy/2) Break;
4295 // normal 'a' has a well separated vertical line right from the circle
4296 // but fat 'a' is like a 'o', only bigger on the right side
4297 if( num_cross(x0+dx/2-1,x1,y1 ,y1 ,box1->p,cs) < 2 /* 4x6font */
4298 && num_cross(x0+dx/2-1,x1,y1-i,y1-i ,box1->p,cs) < 2 /* 2 or 3 */
4299 && num_cross(x0+dx/2-1,x1,y1-y,y1-y ,box1->p,cs) < 2 )
4300 { if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)
4301 <4*loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)) { Break;}
4302 else ad=98*ad/100;
4303 }
4304 if( num_cross(x0,x1,y0+dy/2 , y0+dy/2,box1->p,cs) < 2
4305 || num_cross(x0,x1,y0+dy/3 , y0+dy/3,box1->p,cs) < 2 ) Break; // Jun00
4306
4307 if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/4,box1->p,cs) != 1 )
4308 if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/4,box1->p,cs) != 1 ) Break;
4309 if (sdata->holes.num != 1)
4310 if( num_hole(x0,x1-2,y0 ,y1 ,box1->p,cs,NULL) != 1 )
4311 // if( num_hole(x0,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 )
4312 Break;
4313 if( num_hole(x0,x1 ,y0+dy/3,y1-1 ,box1->p,cs,NULL) != 0 ) Break;
4314
4315 if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<=
4316 loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break;
4317
4318 if( loop(bp,dx-1,dy-1,x1-x0,cs,0,LE)> dx/4
4319 && loop(bp,dx-1,dy-2,x1-x0,cs,0,LE)> (dx+4)/8 ) ad=97*ad/100;
4320
4321 x=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE);
4322 i=loop(bp,dx-1, dy/4,dx,cs,0,LE); if (abs(x-i)>dx/4) Break;
4323
4324 for( x=dx/4;x<dx-dx/4;x++ ){ // ar
4325 i=loop(bp,x, 0,y1-y0,cs,0,DO); if (i>dy/2) break;
4326 i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if (i>dy/2) break;
4327 } if( x<dx-dx/4 ) Break;
4328
4329 if( num_cross(x0 , x1, y1, y1,box1->p,cs) == 1 )
4330 if( num_cross(x0 , x1, y0, y0,box1->p,cs) == 1 )
4331 if( loop(bp,dx-1, 0,y1-y0,cs,0,DO)> dy/4
4332 && loop(bp,dx-1,dy-1,y1-y0,cs,0,UP)> dy/4 ) Break; // ~o
4333 if( loop(bp,dx/2,dy-1,y1-y0,cs,0,UP)> dy/4 ) Break; // ~q
4334 if (ad>99
4335 && loop(bp,dx-1,dy-1,y1-y0,cs,0,UP)> dy/32 ) ad=98*ad/100; // ~o 2010
4336
4337 if (hchar) ad=96*ad/100; // 2010-10 ~O
4338 if (gchar) ad=98*ad/100;
4339 // handwritten-a (alpha)
4340 Setac(box1,'a',ad);
4341 break;
4342 }
4343 // --- test A_A_WITH_OGONEK 0x0104 Centr.Eur.Font -------------------------
4344 /* not sure if we should move this to a get_CentralEuropean-function */
4345 for(ad=d=100;dx>2 && dy>4;){ // min 3x4
4346 DBG( wchar_t c_ask='A'; )
4347 if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
4348 // first selection (grobes Sieb)
4349 if( get_bw(dx/2,dx/2,dy-1-dy/8,dy-1,bp,cs,1) == 1 ) break; // ~B
4350 if( num_cross(0,dx-1, 1 , 1 ,bp,cs)!=1 // 600dpi
4351 && num_cross(0,dx-1, dy/8 , dy/8 ,bp,cs)!=1
4352 && num_cross(0,dx-1, dy/16 , dy/16 ,bp,cs)!=1
4353 && num_cross(0,dx-1, dy/8+1, dy/8+1,bp,cs)!=1 ) break;
4354 if( num_cross(0,dx-1, dy-1 , dy-1 ,bp,cs)!=1 ) break;
4355 if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs)!=2
4356 && num_cross(0,dx-1, dy/3 , dy/3 ,bp,cs)!=2 ) break;
4357 if ( num_cross( 0,dx/8,dy/8, 0,bp,cs)>0 ) break; // ~R
4358 for(y=dy/8;y<dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs) > 1 ) break;
4359 if( y==dy/2 ) break; i1=y;
4360 if (dy>20) i1++; /* get arround some noise fat font */
4361
4362 x =loop(bp,0,i1,dx,cs,0,RI); if(x>3*dx/4) break;
4363 x+=loop(bp,x,i1,dx,cs,1,RI); if(x>3*dx/4) break; i2=x;
4364 x+=loop(bp,x,i1,dx,cs,0,RI); if(x<3*dx/8) break; i2=(x+i2)/2;
4365 // hole (i2,i1)
4366 y+=loop(bp,i2,y,dy,cs,1,DO);
4367 y+=loop(bp,i2,y,dy,cs,0,DO); if(y>3*dy/4) ad=ad*99/100;
4368 if (y>5*dy/6) break;
4369
4370 if( sdata->holes.num != 1 || sdata->holes.hole[0].y1 >= dy-1-dy/4) break;
4371 // if( num_hole ( x0, x1, y0, y1-dy/4 ,box1->p,cs,NULL) != 1 ) break;
4372 // out_x(box1);
4373 i3=0;i4=0;
4374 for(x=dx/3;x<2*dx/3;x++){
4375 i4=num_cross(i2,x,y ,dy-1,bp,cs);if(i4<1 || i4>2)
4376 i4=num_cross(i2,x,y+dy/16,dy-1,bp,cs);if(i4<1 || i4>2) break;
4377 if(i4==1) i3=x;
4378 } if(i4<1 || i4>2 || i3==0){
4379 // ToDo: g_debug_A(printf(" A: x,y,i4,i3= %d %d %d %d\n",x,y,i4,i3);)
4380 break;
4381 }
4382 if( get_bw(dx-1-dx/4, dx-1, dy-1-dy/4, dy-1, bp,cs,1) != 1 ) break;
4383 /* dy/4 changed to dy/6 because of screenfonts */
4384 /* there are strange fonts, one has a serif on the upper end of A */
4385 if ( num_cross( 0,dx/8,dy/6, 0,bp,cs)>0 ) break;
4386 if ( num_cross(dx-1-dx/4,dx-1, 0,dy/6,bp,cs)>0 ) break;
4387
4388 i1=loop(bp,dx-1, dy/4,dx,cs,0,LE);
4389 i2=loop(bp,dx-1, dy/2,dx,cs,0,LE);
4390 i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE);
4391 if( 2*i2+dx/4<i1+i3 || 2*i2-dx/8>i1+i3 ) break;
4392
4393 i1=loop(bp,0 , dy/4,dx,cs,0,RI); // linke senkr. linie
4394 i2=loop(bp,0 , dy/2,dx,cs,0,RI);
4395 i3=loop(bp,0 ,dy-1-dy/4,dx,cs,0,RI);
4396 if( 2*i2+dx/4<i1+i3 || 2*i2-dx/8>i1+i3 || i1<i3) break;
4397
4398 // lower ends could be round on thick fonts
4399 for(i3=dx,y=dy/4;y<6*dy/8;y++){ // increasing width
4400 i1=loop(bp, 0, y,dx,cs,0,RI);
4401 i2=loop(bp,dx-1, y,dx,cs,0,LE);
4402 if(i1+i2>i3+dx/16) break; if( i1+12<i3 ) i3=i1+i2;
4403 } if(y<6*dy/8) break;
4404
4405 if (!hchar) ad=96*ad/100;
4406 if (!gchar) ad=98*ad/100;
4407 Setac(box1,(wchar_t)LATIN_CAPITAL_LETTER_A_WITH_OGONEK,ad);
4408 break;
4409 }
4410 return box1->c;
4411 }
4412
ocr0_cC(ocr0_shared_t * sdata)4413 static wchar_t ocr0_cC(ocr0_shared_t *sdata){
4414 struct box *box1=sdata->box1;
4415 pix *bp=sdata->bp;
4416 int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar,
4417 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
4418 int dx=x1-x0+1,dy=y1-y0+1, /* size */
4419 ad,t1; /* tmp-vars */
4420 wchar_t bc=UNKNOWN;
4421
4422 // --- test c,C ---------------------------------------------------
4423 for(ad=d=100;dx>2 && dy>2;){ // min 3x4
4424 DBG( wchar_t c_ask='c'; )
4425 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
4426 if( get_bw(x0 , x0+dx/3,y0+dy/2, y0+dy/2,box1->p,cs,1) != 1 ) Break;
4427 if( get_bw(x0+dx/2, x0+dx/2,y1-dy/3, y1, box1->p,cs,1) != 1 ) Break;
4428 if( get_bw(x0+dx/2, x0+dx/2,y0 , y0+dy/3,box1->p,cs,1) != 1 ) Break;
4429 if( num_cross(x0,(x0+x1)/2,(y0+y1)/2,(y0+y1)/2,box1->p,cs) > 1 ) Break; // ~ocr-a-[
4430
4431 for(y=y0+dy/4;y<y0+3*dy/4;y++)
4432 if( get_bw(x0+dx/2,x1,y,y,box1->p,cs,1) == 0 ) break;
4433 if( y==y0+3*dy/4 ) Break; i1=y; // i1: upper end of right gap
4434
4435 // measure thickness of line!
4436 t1=loop(bp, 0,dy/2,dx,cs,0,RI);
4437 t1=loop(bp,t1,dy/2,dx,cs,1,RI);
4438 if (t1>dx/2) Break;
4439
4440 for(y=i1,i2=0,x=x0+dx/2;x<x0+6*dx/8;x++){
4441 i=y-1+loop(box1->p,x0+dx/2,i1,dy,cs,0,DO);
4442 if( i>i2 ) { i2=i; }
4443 } if(i2<y0+5*dy/8-t1/2) Break; // i2: lowest white point above lower bow
4444
4445 i3=i1+1-loop(box1->p,x0+5*dx/8,i1,dy,cs,0,UP);
4446 i =i1+1-loop(box1->p,x0+4*dx/8,i1,dy,cs,0,UP); if(i<i3) i3=i;
4447 if(i3>y0+ dy/4+t1/2) Break; // highest, i3: highest point below top-line
4448
4449 for(y=i1;y<y1-dy/8;y++)
4450 if( get_bw(x0+dx/2,x1,y,y,box1->p,cs,1) == 1 ) break;
4451 if( y-i1<dy/6 ) Break; i2=y-1; // i2: lower end of right gap
4452 // pixelbased num_cross for streight lines could fail on small fonts
4453 if( num_cross(x1-dx/4,x1-dx/4,i2,y0,box1->p,cs) < 1 ) Break; // ~L
4454 if (loop(box1->p,x0,y0+3*dy/4,dx,cs,0,RI)>dx/16)
4455 if( num_cross(x0+dx/2,x1,i3 ,y1,box1->p,cs) < 1
4456 && num_cross(x0+dx/2,x1,y1-dy/4,y1,box1->p,cs) < 1 // may fail
4457 && num_cross(x1 ,x1,y1-dy/4,y1,box1->p,cs) < 1 ) Break; // ~r
4458
4459 i=1;
4460 for(x=dx/2;x<dx-1 && i;x++) // look for @@ (instead +1 use +delta?)
4461 for(y=dy/2;y<dy-1-dy/8 && i;y++){ // .@
4462 if( getpixel(bp,x ,y )>=cs
4463 && getpixel(bp,x+1,y )< cs
4464 && getpixel(bp,x+1,y-1)< cs
4465 && getpixel(bp,x ,y-1)< cs ) { i=0;break; }
4466 }
4467 if(!i) ad=95*ad/100; // ~G
4468
4469 i=loop(bp,0,dy/2,dx,cs,0,RI);
4470 for(y=0;y<dy;y++)if( loop(bp,0,y,dx,cs,0,RI)<i-1-dx/32 ) break;
4471 if( y<dy ) Break; // ~r
4472 // out_x(box1);
4473 for(i5=0,i4=dx,y=dy/2;y>=dy/4;y--){
4474 x =loop(bp,0,y,dx,cs,0,RI);
4475 x+=loop(bp,x,y,dx,cs,1,RI); if(x>i5) i5=x;
4476 i =loop(bp,x,y,dx,cs,0,RI); if(i<i4) i4=i;
4477 if( i5<x-dx/32 && i>i4+dx/32 ) break; // unusual for c, more a bad e?
4478 } if( y>=dy/4 ) Break;
4479
4480 if( !hchar ){ // test for e where the middle line is partly removed
4481 x= loop(bp,0,dy/2,dx,cs,0,RI);
4482 x=x +loop(bp,x,dy/2,dx,cs,1,RI);
4483 y=dy/2-loop(bp,x,dy/2,dy,cs,0,UP)-1;
4484 i=x +loop(bp,x,y,dx,cs,1,RI);
4485 i=i +loop(bp,i,y,dx,cs,0,RI);
4486 if( num_cross(x ,x ,1,dy/2,bp,cs) > 1
4487 || num_cross(x+1,x+1,1,dy/2,bp,cs) > 1 )
4488 if( num_cross(i-1,i-1,1,dy/2,bp,cs) > 1
4489 || num_cross(i ,i ,1,dy/2,bp,cs) > 1 ) Break; // ~bad e
4490 }
4491 if( dy>16 && dy>3*dx && hchar ){ // ~[
4492 x= loop(bp,0, dy/16,dx,cs,0,RI);
4493 x=+loop(bp,0,dy-1-dy/16,dx,cs,0,RI);
4494 i= loop(bp,0, dy/2 ,dx,cs,0,RI)*2;
4495 if( i>=x )
4496 if( num_cross(0,dx-1,dy/4,dy/4,bp,cs) < 2 ) Break;
4497
4498 }
4499 if( get_bw(x0,x0,y0 ,y1 ,box1->p,cs,2) != 2
4500 && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
4501 && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
4502 && get_bw(x1,x1,y0+1,y1-1,box1->p,cs,1) != 1 ) Break; /* ~[ */
4503
4504 x =loop(bp, 0,dy/2,dx,cs,0,RI);
4505 i =loop(bp,dx-1,dy/2,dx,cs,0,LE);
4506 if( (i<dx/2 || i<3) && hchar && dy>7 )
4507 if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8
4508 && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8
4509 && loop(bp,dx-1,dy-1-dy/ 8,dx,cs,0,LE)
4510 > loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)
4511 && loop(bp,dx-1, dy/ 8,dx,cs,0,LE)
4512 > loop(bp,dx-1, dy/16,dx,cs,0,LE) ) Break; // ~(
4513
4514 // printf(" hchar=%d i1=%d i2=%d %d\n",hchar,i1-y0,i2-y0,9*dy/16);
4515 // ~G without characteristic crotchet
4516 if (hchar && dy>15 && dx>7 && i2-y0<9*dy/16 && i1-y0<=dy/4)
4517 if ( loop(bp,5*dx/8,i2-y0,dy,cs,0,DO) > 2*dy/8 ){
4518 Setac(box1,'G',90);
4519 Break;
4520 }
4521
4522 if (hchar){
4523 i=1;
4524 for(x=dx/2;x<dx-1 && i;x++) // look for @@ (instead +1 use +delta?)
4525 for(y= 1;y<dy/4 && i;y++){ // .@
4526 if( getpixel(bp,x ,y )>=cs
4527 && getpixel(bp,x+1,y )< cs
4528 && getpixel(bp,x+1,y-1)< cs
4529 && getpixel(bp,x ,y-1)< cs ) { i=0;break; }
4530 }
4531 if (i) ad=98*ad/100; // ~(
4532 if (dy>2*dx) ad=99*ad/100;
4533 }
4534 if( loop(bp,dx-1,dy/2,dx,cs,0,LE) < 6*dx/8 ) ad=98*ad/100;
4535
4536 i= loop(bp,dx-1,dy/16,dx,cs,0,LE);
4537 j= loop(bp,dx/2,0 ,dy,cs,0,DO);
4538 if (i>=dx/2 && j>dy/8 && j>2 && j<dy/2) Break; // t
4539
4540 if (dy>=3*dx && dy>12) ad=99*ad/100; // (
4541 i= loop(bp,dx-1,dy-1,dy,cs,0,UP);
4542 j= loop(bp,dx/2,dy-1,dy,cs,0,UP);
4543 if (i==0 && j>dy/8) ad=95*ad/100; // <
4544 i= loop(bp,dx-1, 0,dy,cs,0,DO);
4545 j= loop(bp,dx/2, 0,dy,cs,0,DO);
4546 if (i==0 && j>dy/8) ad=95*ad/100; // <
4547 if (loop(bp,0,dy-1-dy/8,dx,cs,0,RI)>= 3*dx/4) ad=98*ad/100; // <
4548 if (loop(bp,0,dy-1-dy/8,dx,cs,0,RI)>=(dx+1)/2) ad=98*ad/100; // <
4549 if (loop(bp,0, dy/8,dx,cs,0,RI)>=dx/2) ad=98*ad/100; // <
4550
4551 if (gchar) ad=98*ad/100; // could happen for 5x7 font
4552 bc=((hchar)?'C':'c');
4553 Setac(box1,bc,ad);
4554 break;
4555 }
4556 return box1->c;
4557 }
4558
ocr0_lL(ocr0_shared_t * sdata)4559 static wchar_t ocr0_lL(ocr0_shared_t *sdata){
4560 struct box *box1=sdata->box1;
4561 pix *bp=sdata->bp;
4562 int i,j,d,x,y,i0,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar,
4563 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
4564 int dx=x1-x0+1,dy=y1-y0+1, /* size */
4565 ad; /* tmp-vars */
4566
4567 // --- test L ---------------------------------------------------
4568 for(ad=d=100;dx>2 && dy>4;){ // min 3x4
4569 DBG( wchar_t c_ask='L'; )
4570 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
4571 i=loop(bp,dx-1,dy/2,dx,cs,0,LE);
4572 if (i<3 && dy>8) {Break;}
4573 if (i<dx/2) ad=98*ad/100; // ~G
4574
4575 if (dx<8 && 3*loop(bp,dx-1,0,dy,cs,0,DO)<=dy) break; // ~G
4576 for( i=i1=0,y=y1-dy/4;y<=y1;y++){ // check bottom line (i1)
4577 j=loop(box1->p,x0 ,y,dx,cs,0,RI);
4578 j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ){ i=j;i1=y; }
4579 } if( i<3*dx/4 ) Break; i1=i; // length of horizontal line
4580 // line thickness (i2)
4581 i=loop(box1->p,x0 ,y0+dy/2,dx,cs,0,RI); if( i>dx/2 ) Break;
4582 j=loop(box1->p,x0+i,y0+dy/2,dx,cs,1,RI); if( i+j>dx/2 ) Break; i2=j;
4583 if (loop(bp,dx-1, 0,dx,cs,0,LE)<dx/8
4584 && loop(bp,dx-1, dy/4,dx,cs,0,LE)>dx/2
4585 && loop(bp, 0,5*dy/8,dx,cs,0,RI)<dx/4
4586 && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)<dx/4) Break; // ~G
4587 for( i=1,y=y0;y<=y1-dy/4 && i;y++){ // check vertical line
4588 j=loop(box1->p,x0 ,y,dx,cs,0,RI);
4589 if ( j>(dx+2)/4+(y1-dy/4-y)*dx/2/dy ) { i=0; break; }
4590 x=loop(box1->p,x0+j,y,dx,cs,1,RI);
4591 if( ((x>i2+1 || 4*x<3*i2) && y>y0+dy/8) || 4*x>3*i1 ) i=0;
4592 } if( !i ) Break;
4593 if( num_cross(0, dx-1-dx/8, dy-1-dy/2, dy-1-dy/2,bp,cs) != 1 ) Break;
4594 if( num_cross(0, dx-1 , dy/3 , dy/3,bp,cs) != 1 ) Break;
4595 if( num_cross(0, dx-1 , dy/8 , dy/8,bp,cs) != 1 ) Break;
4596 if (loop(bp,0,dy-1,dx,cs,0,RI)
4597 -loop(bp,0,dy-3,dx,cs,0,RI)>1+dx/16) ad=96*ad/100; // ~c
4598 if (loop(box1->p,x0+dx/4,y1,dy,cs,0,UP)>1+dy/16) ad=99*ad/100; // ~4
4599 // tmp08/gocr0801_bad5_FP.jpg 6x9 1 2010-09-25
4600 if (loop(bp,0,dy-2,dx,cs,0,RI)
4601 -loop(bp,0,dy-1,dx,cs,0,RI)>dx/4) Break; // ~l1 6x9 font
4602
4603
4604 if ( gchar) ad=98*ad/100;
4605 if (!hchar) ad=99*ad/100;
4606 if (5*dx<2*dy && loop(box1->p,x0,y1,dx,cs,0,RI)>dx/4) ad=99*ad/100; // ~l
4607 Setac(box1,'L',ad);
4608 break;
4609 }
4610 // --- test l ---------------------------------------------------
4611 // recognize a "l" is a never ending problem, because there are lots of
4612 // variants and the char is not very unique (under construction)
4613 // --- test italic l ---------------------------------------------------
4614 // --- test l ~italic (set flag-italic) --------------------------------
4615 // if unsure d should be multiplied by 80..90%
4616 for (ad=d=100; dy>dx && dy>5;) { // min 3x4
4617 DBG( wchar_t c_ask='l'; )
4618 if (box1->dots>0) Break;
4619 if (num_cross(0, dx-1,dy/2,dy/2,bp,cs) != 1
4620 || num_cross(0, dx-1,dy/4,dy/4,bp,cs) != 1) Break;
4621 if (box1->num_frames>1 && box1->m3) Break; // ~! 2010-10-01
4622 /* 2010-09-24 6x10 1 similar but not ~l sample gocr_codabar.png
4623 6x10=1=~l 6x9=1=~l
4624 ..@@..<-
4625 .@@@.. @@@...<-
4626 ..@@.. @.@...
4627 ..@@..< ..@...
4628 ..@@.. ..@...<
4629 ..@@.. ..@...
4630 ..@@.. ..@...
4631 ..@@.. ..@...
4632 ..@@.. ..@...
4633 @@@@@@<- @@@@@@<-
4634 */
4635 // mesure thickness
4636 for (i1=0,i2=dx,y=dy/4;y<dy-dy/4;y++){
4637 j = loop(bp,0,y,dx,cs,0,RI);
4638 j = loop(bp,j,y,dx,cs,1,RI);
4639 if (j>i1) { i1=j; } // thickest
4640 if (j<i2) { i2=j; } // thinnest
4641 }
4642 if (i1>2*i2) Break;
4643 if (box1->m3 && dy<=box1->m3-box1->m2) ad=94*ad/100;
4644 if (box1->m2-box1->m1>1 && y0>=box1->m2) ad=94*ad/100;
4645 for (i0=0,i3=0,y=0;y<dy/4;y++){
4646 j = loop(bp,0,y,dx,cs,0,RI);
4647 if (j>i3) { i3=j; } // widest space
4648 j = loop(bp,j,y,dx,cs,1,RI);
4649 if (j>i0) { i0=j;i3=0; } // thickest
4650 }
4651 if (i0>4*i2 || 3*i3>2*dx)
4652 if (loop(bp,dx-1,dy-1,dx,cs,0,LE)>3*dx/8
4653 || loop(bp, 0,dy-1,dx,cs,0,RI)>3*dx/8) Break; // ~7
4654
4655 // detect serifs
4656 x =loop(bp,0, 0,dx,cs,0,RI);
4657 i3=loop(bp,x, 0,dx,cs,0,RI);
4658 x =loop(bp,0, 1,dx,cs,0,RI);
4659 x =loop(bp,x, 1,dx,cs,0,RI); if(x>i3) i3=x;
4660 x =loop(bp,0,dy-1,dx,cs,0,RI);
4661 i4=loop(bp,x,dy-1,dx,cs,0,RI);
4662 x =loop(bp,0,dy-2,dx,cs,0,RI);
4663 x =loop(bp,x,dy-2,dx,cs,0,RI); if(x>i4) i4=x;
4664 if (i3>i1+dx/8+1 && i4>i1+dx/8+1) Break; // ~I
4665
4666 for(i=dx,j=0,y=1;y<dy/4;y++){
4667 x=loop(bp,dx-1,y,dx,cs,0,LE); if(x>i+1) break; i=x;
4668 if( num_cross(0,dx-1,y ,y ,bp,cs)==2
4669 && num_cross(0,dx-1,y+1+dy/32,y+1+dy/32,bp,cs)==2 ) j=1;
4670 } if (y<dy/4) Break;
4671 if (j){ // if loop at the upper end, look also on bottom
4672 for (y=3*dy/4;y<dy;y++) {
4673 if( num_cross(0,dx-1,y ,y ,bp,cs)==2
4674 && num_cross(0,dx-1,y-1-dy/32,y-1-dy/32,bp,cs)==2 ) break;
4675 } if (y==dy) Break;
4676 }
4677
4678 // if( get_bw(x0,x1,y0,y1,p,cs,2) == 0 ) Break; // unsure !I|
4679
4680 if (dx>3)
4681 if ( get_bw(dx-1-dx/8,dx-1,0,dy/6,bp,cs,1) != 1 )
4682 if ( get_bw(dx-1-dx/8,dx-1,0,dy/2,bp,cs,1) == 1 ) Break;
4683
4684 if ( get_bw(dx-1-dx/8,dx-1,dy/4,dy/3,bp,cs,1) != 1 ) // large I ???
4685 if ( get_bw(0 ,dx/8,dy/4,dy/3,bp,cs,1) != 1 )
4686 if ( get_bw(dx-1-dx/8,dx-1,0 ,dy/8,bp,cs,1) == 1 )
4687 if ( get_bw(0 ,dx/8,0 ,dy/8,bp,cs,1) == 1 ) ad=ad*97/100;
4688 if ( get_bw(dx-1-dx/8,dx-1,dy/2,dy-1,bp,cs,1) != 1 ) // r ???
4689 if ( get_bw(0 ,dx/8,dy/2,dy-1,bp,cs,1) == 1 )
4690 if ( get_bw(dx-1-dx/8,dx-1,0 ,dy/3,bp,cs,1) == 1 )
4691 if ( get_bw(0 ,dx/8,0 ,dy/3,bp,cs,1) == 1 ) Break;
4692 MSG( fprintf(stderr,"ad= %d", ad); )
4693
4694 for ( y=1;y<12*dy/16;y++ )
4695 if ( num_cross(0, dx-1, y , y ,bp,cs) != 1 // sure ?
4696 && num_cross(0, dx-1, y-1, y-1,bp,cs) != 1 ) break;
4697 if ( y<12*dy/16 ) Break;
4698
4699 if (dx>3) {
4700 for ( y=dy/2;y<dy-1;y++ )
4701 if ( get_bw(dx/4,dx-1-dx/4,y,y,bp,cs,1) != 1 ) break;
4702 if ( y<dy-1 ) Break;
4703 }
4704 // test ob rechte Kante gerade
4705 for (x=dx,y=bp->y-1-5*dy/16;y>=dy/5;y--){ // rechts abfallende Kante/Knick?
4706 i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE);
4707 if ( i-2-dx/16>=x ) break;
4708 if ( i<x ) x=i;
4709 }
4710 if (y>=dy/5 ) Break;
4711
4712 // test ob linke Kante gerade
4713 for(x=0,y=bp->y-1-dy/5;y>=dy/5;y--){ // rechts abfallende Kante/Knick?
4714 i=loop(bp,0,y,x1-x0,cs,0,RI);
4715 if( i+2+dx/16<x ) break;
4716 if( i>x ) x=i;
4717 }
4718 if (y>=dy/5 ) Break;
4719 if (box1->m4 && y1<box1->m4)
4720 if ( get_bw(x0,x1,y1+1,box1->m4+dy/8,box1->p,cs,1) == 1 )
4721 ad=ad*97/100; // unsure !l|
4722 i=loop(bp,dx-1,dy/16,dx,cs,0,LE);
4723 j=loop(bp,dx-1,dy/2 ,dx,cs,0,LE);
4724 if( i>3 && j>3 )
4725 if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,bp,cs,1) == 1 ) Break; // ~t
4726
4727 for(y=5*dy/8;y<dy;y++)
4728 if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) break;
4729 if( y<dy ){
4730 i =loop(bp,0,y,dx,cs,0,RI);
4731 i+=loop(bp,i,y,dx,cs,1,RI);
4732 i+=loop(bp,i,y,dx,cs,0,RI)/2; // middle of v-gap
4733 if( num_cross(0,i,5*dy/8,5*dy/8,bp,cs)==0
4734 && num_cross(i,i,5*dy/8, y,bp,cs)==0 ) Break; // ~J
4735 }
4736 if ( dx>8
4737 && loop(bp, 0,3*dy/4,dx,cs,0,RI)>=dx/4
4738 && loop(bp, 0,7*dy/8,dx,cs,0,RI)<=dx/8
4739 && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)<=dx/8
4740 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE)<=dx/8 ) Break; // ~J
4741
4742 if ( 2*i3>5*i1 ) // hmm \tt l can look very similar to 7
4743 if ( loop(bp,0,dy/4,dx,cs,0,RI)>dx/2
4744 && get_bw(0,dx/8,0,dy/4,bp,cs,1) == 1 ) Break; // ~7
4745
4746 if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)>dx/2
4747 && get_bw(3*dx/4,dx-1,3*dy/4,dy-1,bp,cs,1) == 1) {
4748 if (loop(bp,0,dy-1,dx,cs,0,RI)<dx/8) ad=99*ad/100; // ~L
4749 if (5*dx>2*dy) ad=99*ad/100; // ~L
4750 if (5*dx>3*dy) ad=99*ad/100; // ~L
4751 MSG( fprintf(stderr,"ad= %d", ad); )
4752 }
4753 if (!hchar) { // right part (bow) of h is never a l
4754 if( get_bw(dx/4,dx/4, 0,dy/4,bp,cs,1) == 1
4755 && get_bw(dx/4,dx/4,dy/2,dy-1,bp,cs,1) == 0 ) Break;
4756 }
4757 if( dx>3 && dy>3*dx )
4758 if( loop(bp,dx/4,dy-1 ,dy,cs,0,UP)< dy/4
4759 && loop(bp, 0,dy-1-dy/8,dx,cs,0,RI)>=dx/2
4760 && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)<=dx/4 ){
4761 ad=98*ad/100; // ~]
4762 if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)==0 ) Break;
4763 }
4764
4765 for(x=0;x<dx/2;x++)
4766 if( get_bw( x, x, 0,dy/4 ,bp,cs,1) == 1 ) break;
4767 // works only for perpenticular char
4768 if( get_bw( x,x+dx/16, 0,dy/16,bp,cs,1) == 0
4769 && get_bw( x,x+dx/16,dy/4 ,dy/2 ,bp,cs,1) == 0
4770 && get_bw( x,x+dx/16,dy/16,dy/4 ,bp,cs,1) == 1 ){
4771 for(i=dx,y=0;y<dy/4;y++){
4772 x=loop(bp,0,y,dx,cs,0,RI);
4773 if( x>i ) break;
4774 }
4775 if (x>=loop(bp,0,y+1,dx,cs,0,RI) )
4776 if (loop(bp,0 ,0,dy,cs,0,DO)>1)
4777 if (loop(bp,0 ,0,dy,cs,0,DO)
4778 -loop(bp,dx/16+1,0,dy,cs,0,DO) < dx/16+1) Break; // ~1 Jul00,Nov00
4779 if (num_cross(0,dx/2,y-1,y-1,bp,cs)==2) Break; // ~1
4780 }
4781 if(dx<8 && dy<12){ // screen font
4782 i= loop(bp,0,0,dy,cs,0,DO);
4783 if ( loop(bp,dx/2,1,dy,cs,1,DO)>=dy-2
4784 && loop(bp,0,dy/2,dx,cs,0,RI)>=2
4785 && i>1 && i<dy/2 ) Break; // ~1
4786 // tmp17/0506_clean7x9b.png ~1 2017-08
4787 if ( 3*(loop(bp,0,dy-dy/8-2,dx,cs,0,RI)
4788 -loop(bp,0,dy -1,dx,cs,0,RI)) >= dx) ad=98*ad/100;
4789 }
4790 if ( get_bw(x1,x1,y0 ,y1 ,box1->p,cs,2) != 2 /* ~] */
4791 && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
4792 && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
4793 && get_bw(x0,x0+dx/4,y0+1+dy/16,y1-1-dy/16,box1->p,cs,1) != 1 ) Break;
4794
4795 // MSG( fprintf(stderr,"ad= %d", ad); )
4796 i=loop(bp,dx-1,dy/2,dx,cs,0,LE);
4797 if ( loop(bp, 0,dy/2,dx,cs,0,RI)>=dx/2
4798 && (i<dx/2 || i==0) && dx>1) ad=98*ad/100; // ~] JS-2010-09 add dx>1
4799 if ( get_bw(x0,x0,y0 ,y1 ,box1->p,cs,2) != 2
4800 && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
4801 && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
4802 && get_bw(x1-dx/4,x1,y0+1+dy/16,y1-1-dy/16,box1->p,cs,1) != 1 ) Break; /* ~[ */
4803
4804 x =loop(bp, 0,dy/2,dx,cs,0,RI); // konvex/konkav? ~()
4805 i =loop(bp,dx-1,dy/2,dx,cs,0,LE);
4806 if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8
4807 && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8
4808 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) < i-dx/8
4809 && loop(bp,dx-1, dy/8,dx,cs,0,LE) < i-dx/8 ) Break; // ~(
4810 if( loop(bp, 0,7*dy/8,dx,cs,0,RI) < x-dx/8
4811 && loop(bp, 0, dy/8,dx,cs,0,RI) < x-dx/8
4812 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) > i+dx/8
4813 && loop(bp,dx-1, dy/8,dx,cs,0,LE) > i+dx/8 ) Break; // ~)
4814
4815 i= loop(bp, 0, 0,dy,cs,0,DO); // horizontal line?
4816 if(dy>=12 && i>dy/8 && i<dy/2){
4817 if( loop(bp,dx-1,3*dy/16,dx,cs,0,LE)-dx/8
4818 >loop(bp,dx-1, i,dx,cs,0,LE)
4819 || loop(bp,dx-1,3*dy/16,dx,cs,0,LE)-dx/8
4820 >loop(bp,dx-1, i+1,dx,cs,0,LE) )
4821 if( loop(bp,dx-1,8*dy/16,dx,cs,0,LE)-dx/8
4822 >loop(bp,dx-1, i,dx,cs,0,LE)
4823 || loop(bp,dx-1,8*dy/16,dx,cs,0,LE)-dx/8
4824 >loop(bp,dx-1, i+1,dx,cs,0,LE) )
4825 if( loop(bp, 0,3*dy/16,dx,cs,0,RI)-dx/8
4826 >loop(bp, 0, i,dx,cs,0,RI)
4827 || loop(bp, 0,3*dy/16,dx,cs,0,RI)-dx/8
4828 >loop(bp, 0, i+1,dx,cs,0,RI) )
4829 if( loop(bp, 0,8*dy/16,dx,cs,0,RI)-dx/8
4830 >loop(bp, 0, i,dx,cs,0,RI)
4831 || loop(bp, 0,8*dy/16,dx,cs,0,RI)-dx/8
4832 >loop(bp, 0, i+1,dx,cs,0,RI) ) Break; // ~t
4833 if( loop(bp, 0,i-1,dx,cs,0,RI)>1 && dx<6 ) Break; // ~t
4834 if( loop(bp, 0,8*dy/16,dx,cs,0,RI)>dx/8
4835 && loop(bp, 0, i,dx,cs,1,RI)>=dx-1
4836 && loop(bp,dx-1,8*dy/16,dx,cs,0,LE)>dx/8
4837 && loop(bp,dx-1, i-1,dx,cs,0,LE)>dx/8 ) Break; // ~t
4838 }
4839 // MSG( fprintf(stderr,"ad= %d", ad); )
4840 // if( vertical_detected && dx>5 )
4841 if ( loop(bp,0, 1,dx,cs,0,RI)>=dx/2
4842 && ( loop(bp,0,dy-2,dx,cs,0,RI)<=dx/8
4843 || loop(bp,0,dy-1,dx,cs,0,RI)<=dx/8 ) )
4844 if ( dx>1 && // important for dx/2 below
4845 ( loop(bp,dx-1, 0,dx,cs,0,LE)<=dx/8
4846 || loop(bp,dx-1, 1,dx,cs,0,LE)<=dx/8 )
4847 && loop(bp,dx-1,dy-2,dx,cs,0,LE)>=dx/2 ) { ad=98*ad/100; // ~/
4848 MSG( fprintf(stderr,"ad= %d", ad); )
4849 }
4850 if (loop(bp,0,dy/4,dx,cs,0,RI)>dx/2
4851 && loop(bp,0,7*dy/8,dx,cs,0,RI)<dx/2){ // ~z example?
4852 i=loop(bp,0,dy/16 ,dx,cs,0,RI);
4853 i=loop(bp,i,dy/16 ,dx,cs,1,RI); j=i; // top width
4854 i=loop(bp,0,dy/16+1,dx,cs,0,RI);
4855 i=loop(bp,i,dy/16+1,dx,cs,1,RI); if (i>j) j=i; // top max width
4856 i=loop(bp,0,dy/16+2,dx,cs,0,RI);
4857 i=loop(bp,i,dy/16+2,dx,cs,1,RI); if (i>j) j=i; // top max width
4858 if (j*4>=dx*3) ad=98*ad/100; // ~z
4859 if (j*8>=dx*7) ad=96*ad/100; // ~z
4860 MSG( fprintf(stderr,"ad= %d", ad); )
4861 // false z on 7x9font 3x9 l (left vertical + upper stub)
4862 }
4863 i=loop(bp,0,0,dy,cs,0,DO);
4864 if (3*i>dy && 3*i<2*dy
4865 && loop(bp,dx-1, 0,dy,cs,0,DO)==0
4866 && loop(bp,dx-1,dy-1,dy,cs,0,UP)==0) {
4867 i=loop(bp, 0,dy-1,dy,cs,0,UP);
4868 if (3*i>dy && 3*i<2*dy) Break; // ~{} 6x10
4869 ad=ad*99/100;
4870 MSG( fprintf(stderr,"ad= %d", ad); )
4871 }
4872
4873 // JS-2010-09 badly implemented for (dx=1)-"l"?
4874 //MSG( fprintf(stderr,"ad= %d", ad); )
4875 //if ( get_bw(x0,x0,y1,y1,box1->p,cs,2) == 0 ) ad=99*ad/100;
4876 //if ( get_bw(x1,x1,y1,y1,box1->p,cs,2) == 0 ) ad=99*ad/100;
4877 if (ad==100) ad--; /* I have to fix that: (sample?)
4878
4879 .@@@@.<-
4880 @@..@@
4881 ....@@
4882 ....@@<
4883 ...@@.
4884 ..@@@.
4885 ..@@..
4886 .@@...
4887 @@....
4888 @@@@@@<-
4889 */
4890 MSG( fprintf(stderr,"ad= %d", ad); )
4891 if(!hchar) ad=ad*99/100;
4892 if( gchar) ad=ad*99/100;
4893 // full rectangle? no white dots? I and l should have 98% and 99%
4894 // after context correction (JS2010-09)
4895 if (ad>98 && get_bw(x0,x1,y0,y1,box1->p,cs,2) == 0) ad=99*ad/100;
4896 // 2010-10-01 sample tmp10/barcode_code128_145
4897 if (dx<3 && dy>10 && box1->m4==0) ad=ad*95/100; // just a vertical line?
4898
4899 Setac(box1,'l',ad);
4900 // if( i<100 ) Break; ????
4901 // if( loop(bp,0, 1,dx,cs,0,RI)<=dx/8
4902 // && loop(bp,0,dy/2,dx,cs,0,RI)<=dx/8
4903 // && loop(bp,0,dy-2,dx,cs,0,RI)<=dx/8 ) vertical_detected=1;
4904 break;
4905 }
4906 return box1->c;
4907 }
4908
ocr0_oO(ocr0_shared_t * sdata)4909 static wchar_t ocr0_oO(ocr0_shared_t *sdata){
4910 struct box *box1=sdata->box1;
4911 pix *bp=sdata->bp;
4912 int i1,i2,i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
4913 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
4914 int dx=x1-x0+1,dy=y1-y0+1, /* size */
4915 ad; /* tmp-vars */
4916 wchar_t bc=UNKNOWN;
4917
4918 // --- test o,O ---------------------------------------------------
4919 for(ad=d=100;dx>2 && dy>3;){ // min 3x4
4920 DBG( wchar_t c_ask='o'; )
4921 if (sdata->holes.num !=1 ) Break;
4922 if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
4923 if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
4924 if( get_bw(x0+dx/2 , x0+dx/2,y1-dy/2 , y1, box1->p,cs,1) != 1 ) Break;
4925 if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
4926 if( get_bw(x0+dx/2 , x0+dx/2,y0+dy/2 , y1-dy/3,box1->p,cs,1) != 0 ) Break;
4927 if (sdata->holes.hole[0].y0 > dy/3
4928 || sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
4929
4930 if( num_cross(x0+dx/2 ,x0+dx/2 ,y0, y1 ,box1->p,cs) != 2
4931 && num_cross(x0+dx/2+1,x0+dx/2+1,y0, y1 ,box1->p,cs) != 2 ) Break;
4932 if( num_cross(x0+dx/3,x1-dx/4,y0 , y0 ,box1->p,cs) != 1 ) // AND
4933 if( num_cross(x0+dx/3,x1-dx/4,y0+1 , y0+1,box1->p,cs) != 1 ) Break;
4934 if( num_cross(x0+dx/4,x1-dx/3,y1 , y1 ,box1->p,cs) != 1 ) // against "rauschen"
4935 if( num_cross(x0+dx/4,x1-dx/3,y1-1 , y1-1,box1->p,cs) != 1 ) Break;
4936 if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
4937 if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
4938 if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
4939 if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
4940
4941 if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)
4942 -loop(bp,0 ,2 ,x1-x0,cs,0,RI)<=dx/16 ) ad=99*ad/100;
4943 if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)
4944 -loop(bp,0 ,2 ,x1-x0,cs,0,RI)<=dx/8 ) ad=98*ad/100;
4945 if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<
4946 loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break;
4947 MSG(fprintf(stderr,"ad %d",ad);)
4948
4949 x=loop(bp,dx-1,dy-1-dy/3,x1-x0,cs,0,LE); // should be minimum
4950 for( y=dy-1-dy/3;y<dy;y++ ){
4951 i=loop(bp,dx-1,y,x1-x0,cs,0,LE);
4952 if( i<x ) break; x=i;
4953 }
4954 if( y<dy ) Break;
4955
4956 // ~D
4957 if( loop(bp,0, dy/16,dx,cs,0,RI)
4958 + loop(bp,0,dy-1-dy/16,dx,cs,0,RI)
4959 - 2*loop(bp,0, dy/2 ,dx,cs,0,RI) < 0 ) Break;
4960 if( loop(bp,0, dy/16,dx,cs,0,RI)
4961 + loop(bp,0,dy-1-dy/16,dx,cs,0,RI)
4962 - 2*loop(bp,0, dy/2 ,dx,cs,0,RI) <= dx/8 )
4963 { if (hchar) {Break;} else ad=98*ad/100; } // not konvex
4964 if( loop(bp,0 , 1+dy/16,dx,cs,0,RI) + dx/4
4965 <= loop(bp,dx-1, 1+dy/16,dx,cs,0,LE) ) Break; // Dec00
4966
4967 if( loop(bp,dx-1, dy/16,dx,cs,0,LE)>dx/8 )
4968 if( loop(bp,0 , dy/16,dx,cs,0,RI)<dx/16 ) Break;
4969 if( loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)>dx/8 )
4970 if( loop(bp,0 ,dy-1-dy/16,dx,cs,0,RI)<dx/16 ) Break;
4971 if( get_bw(x1-dx/32,x1,y0,y0+dy/32,box1->p,cs,1) == 0
4972 && get_bw(x1-dx/32,x1,y1-dy/32,y1,box1->p,cs,1) == 0
4973 // && ( get_bw(x0,x0+dx/32,y0,y0+dy/32,box1->p,cs,1) == 1
4974 && ( get_bw(0,dx/32,0,dy/32,bp,cs,1) == 1
4975 || get_bw(x0,x0+dx/32,y1-dy/32,y1,box1->p,cs,1) == 1 ) ) Break; // ~D
4976
4977 // search lowest inner white point, set i=y itallic a
4978 for(y=dy,j=x=0+dx/8;x<dx-dx/8;x++) {
4979 i =loop(bp,x,dy-1 ,y1-y0,cs,0,UP);
4980 i+=loop(bp,x,dy-1-i,y1-y0,cs,1,UP);
4981 if (i<=y) { y=i; j=x; }
4982 } i1=y;
4983 // search highest inner white point, set i=y itallic a
4984 for(y=dy,j=x=dx/8;x<dx-dx/8;x++) {
4985 i =loop(bp,x,0 ,y1-y0,cs,0,DO);
4986 i+=loop(bp,x,0+i,y1-y0,cs,1,DO);
4987 if (i<=y) { y=i; j=x; }
4988 } i2=y;
4989 MSG(fprintf(stderr,"y12 %d %d ad %d",i1,i2,ad);)
4990 // italic a
4991 for(y=dy-1-i1;y<dy-1;y++)
4992 if( num_cross(j,dx-1,y,y,bp,cs) > 1 ) ad=99*ad/100; // ~a \it a
4993 MSG(fprintf(stderr,"i12 %d %d ad %d",i1,i2,ad);)
4994 for(y=0;y<i2;y++)
4995 if( num_cross(0,dx-1,y,y,bp,cs) > 2 ) ad=98*ad/100; // ~a \it a
4996 MSG(fprintf(stderr,"i12 %d %d ad %d",i1,i2,ad);)
4997 for(y=i1+dy/8;y<i2-dy/8;y++)
4998 if( num_cross(0,dx-1,y,y,bp,cs) > 2 ) ad=98*ad/100; // 0 Q ?
4999 MSG(fprintf(stderr,"i12 %d %d ad %d",i1,i2,ad);)
5000 if (loop(bp,dx-1,dy-1,x1-x0,cs,0,LE)<dx/8) ad=98*ad/100; // \it a
5001 if (loop(bp,dx-1, 0,x1-x0,cs,0,LE)<dx/8) ad=98*ad/100; // \it a
5002 MSG(fprintf(stderr,"ad %d m14 %d %d",ad,box1->m1-y0,box1->m4-y0);)
5003 if (loop(bp,dx-1,dy-1-dy/8,x1-x0,cs,0,LE)+1+dx/16
5004 <loop(bp, 0,dy-1-dy/8,x1-x0,cs,0,RI))
5005 { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // \it a
5006 if (loop(bp,dx-1,dy-1,y1-y0,cs,0,UP)+1+(dy+3)/8
5007 <loop(bp, 0,dy-1,y1-y0,cs,0,UP))
5008 { ad=98*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // \it a
5009
5010 if (abs(loop(bp,dx/2, 0,dy,cs,0,DO)
5011 -loop(bp,dx/2,dy-1,dy,cs,0,UP))>dy/8
5012 || num_cross(0,dx-1, 0, 0,bp,cs) > 1
5013 || num_cross(0,dx-1,dy-1,dy-1,bp,cs) > 1
5014 ) { ad=98*ad/100; MSG(fprintf(stderr,"ad %d",ad);) }// ~bq
5015
5016 // corrections for wrong recognized m1,m2 (all chars of same high)
5017 if (hchar && 2*y0<box1->m1+box1->m2) i=1; else i=0;
5018 if (gchar) ad=99*ad/100;
5019 bc='o'; // ToDo: need line information, if m1-m4 is not sure
5020 if (i){ bc='O'; }
5021 if ( bc=='O' && ad>99) ad=99; /* we can never 100% sure, 0O */
5022 if (bc=='O' && hchar && dy<10 && ad>=99 && dx<dy-dy/16 &&
5023 6*( loop(bp, 0,0,dx,cs,0,RI)
5024 +loop(bp,dx-1,0,dx,cs,0,LE))>=4*dx) {
5025 Setac(box1,'0',99);
5026 Setac(box1,'O',98);Break; } // small top 0
5027 if (bc=='O' && hchar && dy<10 && ad>=99 && dx>=dy-dy/16) {
5028 Setac(box1,'0',98); // but output as 'o'
5029 Setac(box1,'O',99);Break; } // round O sslmozFP
5030 /* 2017: upper width: 3/5 for 5x7o, 2/6 for 6x9zero but teletext!*/
5031 if (bc=='o' && (!hchar) && y0<=box1->m2) { // 2018-09 bad m1-m4?
5032 DBG( c_ask='O'; )
5033 Setac(box1,'O',99*ad/100);
5034 DBG( c_ask='0'; )
5035 Setac(box1,'0',99*ad/100);
5036 }
5037 if (ad==100 && bc=='o' && box1->m2
5038 && abs((box1->m2 - box1->y0)
5039 - (box1->y0 - box1->m1))
5040 <= (box1->m2 - box1->m1)/4) ad=98*ad/100; // unsure 2010-10-01
5041 DBG( c_ask=bc; )
5042 Setac(box1,bc,ad);
5043 DBG( c_ask='0'; )
5044 if (bc=='O') Setac(box1,'0',ad);
5045 if (bc=='o') Setac(box1,'0',98*ad/100);
5046 break;
5047 }
5048 return box1->c;
5049 }
5050
ocr0_pP(ocr0_shared_t * sdata)5051 static wchar_t ocr0_pP(ocr0_shared_t *sdata){
5052 struct box *box1=sdata->box1;
5053 pix *bp=sdata->bp;
5054 int i,j,d,x,y,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar,
5055 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
5056 int dx=x1-x0+1,dy=y1-y0+1, /* size */
5057 ad; /* tmp-vars */
5058 wchar_t bc=UNKNOWN;
5059
5060 // --- test pP ---------------------------------------------------
5061 for(ad=d=100;dx>2 && dy>3;){ // min 3x4
5062 DBG( wchar_t c_ask='p'; )
5063 if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
5064 if( get_bw(0 , dx/2,3*dy/4,3*dy/4,bp,cs,1) != 1 ) Break;
5065 if( get_bw(0 , dx/2, dy/2, dy/2,bp,cs,1) < 1 ) Break;
5066 if( get_bw(dx/4, dx-1, dy/4, dy/4,bp,cs,1) != 1 ) Break;
5067 i= loop(bp,dx-1,3*dy/4,dx,cs,0,LE); if (i<dx/4) Break;
5068 if( num_cross(x1-3*i/4,x1-3*i/4, y0, y1-3*dy/16,box1->p,cs) != 2 )
5069 if( num_cross(x0+dx/2 ,x0+dx/2 , y0, y1-3*dy/16,box1->p,cs) != 2 )
5070 if( num_cross(x0+dx/2+1,x0+dx/2+1, y0, y1-3*dy/16,box1->p,cs) != 2 ) Break;
5071 if( num_cross(0,dx-1,7*dy/8 ,7*dy/8 ,bp,cs) != 1 )
5072 if( num_cross(0,dx-1,7*dy/8-1,7*dy/8-1,bp,cs) != 1 ) Break;
5073 if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
5074 if( num_cross(0,dx-1, dy/4-1, dy/4-1,bp,cs) != 3 ) // \it p with nice kurve
5075 if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
5076 if( num_cross(0,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 ) Break;
5077
5078 i= loop(bp,0,dy/2,dx,cs,0,RI); if(i<1) i++;
5079 if( num_cross(i-1,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
5080 if( num_cross(i-1,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 ) Break;
5081
5082 i1= loop(bp, 0,3*dy/8,dx,cs,0,RI); if (i1>=dx/2) ad=90*ad/100;
5083 i2=i1+loop(bp,i1,3*dy/8,dx,cs,1,RI); // upper x-position of v line
5084 i3= loop(bp, 0,7*dy/8,dx,cs,0,RI);
5085 i4=i3+loop(bp,i3,7*dy/8,dx,cs,1,RI); // lower x-position of v line
5086 // out_x(box1);printf(" p:");
5087 for ( y=dy/8; y<7*dy/8; y++ ){
5088 x=i2+ (8*y-3*dy)*(i4-i2)/(4*dy); // right limit of line
5089 i= loop(bp,0,y,dx,cs,0,RI); if(i>x+dx/16) break;
5090 } if ( y<7*dy/8 ) Break;
5091 for ( x=0,j=y=dy/3; y<dy-dy/8; y++ ){ // suche unterkante (also 4x6)
5092 i=loop(bp,dx-1,y,dx,cs,0,LE);
5093 if ( i>x ) { x=i; j=y; } if(x>dx/2) break;
5094 } if ( x<dx/2 || x>=dx) Break;
5095 if( get_bw(3*dx/4,dx-1, y , dy-1,bp,cs,1) == 1 ) Break;
5096
5097 i=num_hole (x0,x1,y0,y1-dy/5,box1->p,cs,NULL);
5098 // j=num_hole (x0,x1,y0,y1 ,box1->p,cs,NULL);
5099 j=sdata->holes.num;
5100
5101 if (j!=1 && dx< 8) ad=96*ad/100;
5102 if (j!=1 && dx>=8) ad=98*ad/100;
5103 if (i==0 && j==0) ad=90*ad/100; /* some times there is a small gap */
5104 if (i>1 || j>1 || j>i) Break;
5105
5106 // check for serif F
5107 i= loop(bp,bp->x-1, bp->y/4, dx ,cs,0,LE);
5108 i=i+loop(bp,bp->x-1-i,bp->y/4, dx ,cs,1,LE);
5109 j= loop(bp,bp->x-1-i,bp->y/4,3*dy/4,cs,0,DO);
5110 if (j>dy/2) ad=80*ad/100; // its an serif-F
5111
5112 if( ((!hchar) && (!gchar)) || (hchar && gchar)) ad=95*ad/100;
5113 bc='p';
5114 if( hchar && ((!gchar) || dy<14)) bc='P';
5115 if ( hchar && gchar) ad=98*ad/100; // \ss sz
5116 if ((!hchar) && !gchar) ad=98*ad/100;
5117
5118 Setac(box1,bc,ad);
5119 break;
5120 }
5121 return box1->c;
5122 }
5123
ocr0_qQ(ocr0_shared_t * sdata)5124 static wchar_t ocr0_qQ(ocr0_shared_t *sdata){
5125 struct box *box1=sdata->box1;
5126 pix *bp=sdata->bp;
5127 int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
5128 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
5129 int dx=x1-x0+1,dy=y1-y0+1, /* size */
5130 ad; /* tmp-vars */
5131
5132 // --- test Q ---------------------------------------------------
5133 for(ad=d=100;dx>2 && dy>4;){ // min 3x4
5134 DBG( wchar_t c_ask='Q'; )
5135 if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
5136 if( get_bw(x0 ,x0+dx/3,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break;
5137 if( get_bw(x1-dx/3,x1 ,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break;
5138 if( get_bw(x0+dx/2,x0+dx/2,y1-dy/3,y1, box1->p,cs,1) != 1 ) Break;
5139 if( get_bw(x0+dx/2,x0+dx/2,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break;
5140 if( get_bw(x0+dx/2,x0+dx/2,y0+dy/3,y1-dy/2,box1->p,cs,1) == 1 ) Break;
5141 if( get_bw(x1 ,x1 ,y0 ,y0 ,box1->p,cs,1) == 1 ) Break; //alpha
5142 if( num_cross(x0+dx/2,x0+dx/2,y0 , y1 ,box1->p,cs) < 2 ) Break;
5143 if( num_cross(x0+dx/5,x1-dx/5,y0 , y0 ,box1->p,cs) != 1 ) // AND
5144 if( num_cross(x0+dx/5,x1-dx/5,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break;
5145 if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
5146 if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
5147 if( get_bw(x1 ,x1 ,y1-dy/8 , y1 ,box1->p,cs,1) == 0 )
5148 if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
5149 if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
5150 // i=num_hole(x0,x1,y0,y1,box1->p,cs,NULL);
5151 i=sdata->holes.num;
5152 if(!i) Break;
5153 if( i!=1 && (i!=2 || num_hole(x0,x1,y0+dy/2,y1,box1->p,cs,NULL)!=1) ) Break;
5154 x=x1;y=y1;
5155 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( x<x1-dx/2 ) Break;
5156 turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE);
5157 if( x<x1-dx/2 ) { if (gchar) ad=98*ad/100; else ad=90*ad/100; }
5158 if( loop(bp,0 ,0 ,dx,cs,0,RI)
5159 < loop(bp,0 ,2 ,dx,cs,0,RI) ) Break;
5160 if( loop(bp,0 ,dy/8+2,dx,cs,0,RI)
5161 +loop(bp,dx-1,dy/8+2,dx,cs,0,LE) > 5*dx/8 ) Break; // ~4 Okt00
5162
5163 x= loop(bp,dx-1,3*dy/8,dy,cs,0,LE); if( x>dx/4 ) Break;
5164 if( loop(bp,dx-1-x,0 ,dy,cs,0,DO)
5165 <= loop(bp,dx-2-x,0 ,dy,cs,0,DO) ) Break; // 4
5166
5167 if( loop(bp,dx-1,dy-2,dx,cs,0,LE)
5168 <= loop(bp,dx-1,dy/2,dx,cs,0,LE) )
5169 if( loop(bp, 1,dy-1,dy,cs,0,UP)
5170 <= loop(bp,dx/2,dy-1,dy,cs,0,UP) )
5171 if( loop(bp, 0,dy-2,dx,cs,0,RI)>dx/2 )
5172 if( loop(bp, 0, 0,dx,cs,0,RI)>dx/2 ) Break; // 4
5173
5174 if( loop(bp,dx-1,3*dy/4,dx,cs,0,LE)
5175 + loop(bp, 0,3*dy/4,dx,cs,0,RI)
5176 < loop(bp,dx-1,2*dy/4,dx,cs,0,LE)
5177 + loop(bp, 0,2*dy/4,dx,cs,0,RI) ) ad=94*ad/100; // 4
5178 if( loop(bp,0 ,3*dy/4,dx,cs,1,RI) >= dx ) ad=94*ad/100; // 4
5179
5180
5181 if( loop(bp,dx-1,dy/3,dx,cs,0,LE)> dx/4 ) Break;
5182 j=loop(bp,dx/2,dy-1,dy,cs,0,UP);
5183 if (j>1 && j>dy/8) {
5184 if( get_bw(0,dx/2,dy-1-j/2,dy-1-j/2,bp,cs,1) == 1 ) { // ~RA
5185 if (j<5) ad=95*ad/100;
5186 else Break;
5187 }
5188 }
5189
5190 // italic a
5191 for(i=0,y=0;y<dy/2;y++)
5192 if( num_cross(0,dx-1,y,y,bp,cs) > 2 ) i++; if(i>dy/8) Break; // ~a \it a
5193 if (i>0) ad=99*ad/100;
5194
5195 // ~o look at the lower right side for falling line
5196 for(j=x=0,y=dy/2;y<dy;y++){
5197 i=loop(bp,dx-1,y,dx,cs,0,LE);if(i>x){ x=i; }
5198 if (x-i>j) j=x-i;
5199 if( j>dx/16 ) Break; // falling line detected
5200 }
5201 if (j==0) Break; // no falling line => no Q
5202 if (j<=dx/16) ad=98*ad/100;
5203 if(y1<=box1->m3) ad=98*ad/100; // ~q no underlength! rare
5204 if(!hchar) ad=96*ad/100;
5205 Setac(box1,'Q',ad);
5206 break;
5207 }
5208 // --- test q ---------------------------------------------------
5209 for(ad=d=100;dx>2 && dy>3;){ // min 3x4
5210 DBG( wchar_t c_ask='q'; )
5211 if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
5212 for ( y=y0; 2*y<=y0+y1; y++ ){ // detect ring
5213 if( num_cross(x0,x1, y, y,box1->p,cs) == 2 ) Break;
5214 } if (2*y>y0+y1) Break; /* < */
5215 for ( y=(y0+y1)/2; y<=y1; y++ ){ // detect vert line
5216 if( num_cross(x0, x1, y, y,box1->p,cs) == 1
5217 && num_cross(x0,x0+dx/2, y, y,box1->p,cs) == 0 ) Break;
5218 } if (y>y1) Break; /* O (y==y1 for 4x6font-q) */
5219 for ( x=0,j=y=y0+dy/3; y<=y1-dy/8; y++ ){ // detect baseline
5220 i=loop(box1->p,x0,y,dx,cs,0,RI);
5221 if ( i>x ) { x=i; j=y; }
5222 if ( x>dx/2 ) break;
5223 } if ( x<dx/2 || x>=dx) Break;
5224 if (y1-j+1<dy/4) ad=96*ad/100; // ~\it{a}
5225 if( num_cross(x0+x/2,x0+x/2, j, y1,box1->p,cs) != 0 ) ad=96*ad/100; // ~g
5226 if( loop(box1->p,x0+dx/16,j,dy,cs,0,UP)<1+dy/16 ){
5227 ad=97*ad/100;
5228 if (hchar || !gchar) Break; // 4
5229 }
5230 if( loop(box1->p,x0+dx/16,j-dy/32-1,dy,cs,1,RI)>=dx-dx/8
5231 || loop(box1->p,x0+dx/16,j-dy/16-1,dy,cs,1,RI)>=dx-dx/8 ){
5232 ad=96*ad/100; // 4
5233 }
5234 if( get_bw(x1-dx/3, x1, y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break;
5235 if( get_bw(x0, x0+dx/3, y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break;
5236 if( get_bw(x0, x0+dx/4, y1-dy/8, y1-dy/9,box1->p,cs,1) == 1 ) Break;
5237 if( get_bw(x0, x0+dx/4, y1-dy/5, y1-dy/9,box1->p,cs,1) == 1 ) ad=99*ad/100;
5238 if( num_cross(x0+dx/2,x0+dx/2, y0, j ,box1->p,cs) != 2 ) Break;
5239 // if( num_hole (x0 ,x1 , y0, y1 ,box1->p,cs,NULL) != 1 )
5240 if (sdata->holes.num != 1)
5241 { if (dx<16) ad=98*ad/100; else Break; }
5242 if( num_hole (x0 ,x1 , y0, j ,box1->p,cs,NULL) != 1 )
5243 { if (dx<16) ad=98*ad/100; else Break; }
5244 // ~\it g
5245 if( loop(bp,0,dy-1-dy/4,dx,cs,0,RI)>5*dx/8
5246 && get_bw(dx/4,dx/4,dy-1-dy/4,dy-1,bp,cs,1)==1 ) Break; // ~\it g
5247 // what about unsure m1-m4?
5248 if(!gchar){ ad=ad*99/100; } // ~4
5249 if( hchar){ ad=ad*99/100; } // ~49
5250 Setac(box1,'q',ad);
5251 break;
5252 }
5253 return box1->c;
5254 }
5255
ocr0_iIjJ(ocr0_shared_t * sdata)5256 static wchar_t ocr0_iIjJ(ocr0_shared_t *sdata){
5257 struct box *box1=sdata->box1;
5258 pix *bp=sdata->bp;
5259 int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar,
5260 ax,ay,bx,by,cx,cy,ex,ey,
5261 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
5262 int dx=x1-x0+1,dy=y1-y0+1, /* size */
5263 ad,ya,yb,yc,yd,ye,yf,xa,xb, /* tmp-vars */
5264 (*aa)[4]=sdata->aa; /* the for line ends, (x,y,dist^2,vector_idx) */
5265
5266 // --- test i ---------------------------------------------------
5267 // if(box1->dots==1) // what about \it neighbouring ij
5268 for(ad=d=100;dy>3 && dx>0;){ // min 3x4 without dot
5269 DBG( wchar_t c_ask='i'; )
5270 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
5271 if (box1->num_frames > 2) Break; /* ~% , ToDo: allow double dot i? */
5272 // ToDo: ':' check that high of dot is smaller than the vert. line!
5273 /*
5274 * o <== ya
5275 * o
5276 *
5277 * ooo <== yb
5278 * o
5279 * o
5280 * o
5281 * ooo
5282 */
5283 ya=yb=y0;
5284 if (box1->dots!=1) ad=98*ad/100;
5285 if (sdata->holes.num>0) ad=96*ad/100; // ~ fat italic a gocr0801_bad5
5286 while(dy>3*dx && box1->m2){ // test for vertical i without detected dot
5287 i= loop(bp,dx/2,dy-1 ,dy,cs,0,UP);
5288 if (dy-1-i<box1->m3-2) break;
5289 i+=loop(bp,dx/2,dy-1-i,dy,cs,1,UP);
5290 // distance upper end to m2 > (m2-m1)/3
5291 if (3*abs(dy-1-i-box1->m2)>box1->m2-box1->m1) break;
5292 if( get_bw(x0,x1,y0,(box1->m1+box1->m2)/2,box1->p,cs,1) == 1 )
5293 if( get_bw(x0,x1,y1-i ,y1-i ,box1->p,cs,1) == 0
5294 || get_bw(x0,x1,y1-i-1,y1-i-1,box1->p,cs,1) == 0
5295 || get_bw(x0,x1,y1-i-2,y1-i-2,box1->p,cs,1) == 0 )
5296 {
5297 Setac(box1,'i',ad);
5298 return 'i'; /* beleave me, thats an "i"! */
5299 } break;
5300 }
5301 // if( box1->dots!=1 ) Break;
5302 if( box1->m2 && 2*y0>=box1->m2+box1->m1 ) ya=box1->m1;
5303 // ya includes possible i-dot
5304 // failed on handwritten i m1=-22 frame2y=-31..-24 dy=15
5305 // volume periphery(de:Umfang) num_vectors
5306 //# frame 0 ( +52, 32,14) 3*vol1<2*vol0 2*dy1<dy0
5307 //# frame 1 ( +28, 18, 8)
5308 // Aug2010
5309 if (box1->num_frames>1) { // vector based i-dot check
5310 if (box1->frame_vol[1]>box1->frame_vol[0]/8) // no dust
5311 {
5312 if (3*box1->frame_vol[1]>2*box1->frame_vol[0]) Break; // to big
5313 if (3*box1->frame_per[1]>2*box1->frame_per[0]) Break; // not compact
5314 y=0;
5315 for (j=box1->num_frame_vectors[0];
5316 j<box1->num_frame_vectors[1] && j<MaxFrameVectors; j++) {
5317 if ( ya > box1->frame_vector[j][1]) // find top
5318 ya = box1->frame_vector[j][1];
5319 if ( y < box1->frame_vector[j][1]) // find bottom
5320 y = box1->frame_vector[j][1];
5321 }
5322 if (box1->m2 && ya>box1->m2+2) Break; // dot starts below m2
5323 if (2*y>=ya+y1) Break; // point ends to low
5324 }
5325 } else { Break; /* missing i-dot */ }
5326 // out_x(box1);
5327 #if 0 // dont work, because dots are removed as neighbours Aug10
5328 /* pixel based i-dot check */
5329 // search upper end of i-dot
5330 for (y=ya;3*y<ya+2*y1;y++)
5331 if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
5332 if (3*y>=ya+2*y1) Break; // hmm, gap only, no dot?
5333 if (2*y>=ya+ y1) { ad=98*ad/100; MSG(fprintf(stderr,"wide gap");)}
5334 ya=y;
5335 if (box1->m2 && ya>box1->m2+2) Break; // dot starts below m2
5336 for ( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
5337 if (2*y>=ya+y1) Break; // point ends to low
5338 for ( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
5339 // start y0 of base object
5340 yb=y;
5341 #endif
5342 MSG(fprintf(stderr," ya m1 yb %d %d %d", ya-box1->y0, box1->m1-y0, yb-box1->y0);)
5343 if (5*yb>=3*ya+2*y1) ad=99*ad/100; // large gap
5344 if (2*yb>= ya+ y1) ad=97*ad/100; // very large gap, ~:
5345 // if (5*yb>=2*ya+3*y1) Break; // huge gap, ~:
5346 // handwritten: ya=-31 m1=-22 yb=0 y1=15
5347 // may be its better to trust the frame melting algorithm (ad=96%=ok)
5348 if (5*yb>=1*ya+4*y1) Break; // huge gap, ~:
5349 if (loop(bp,dx-1,yb+(y1-ya+1)/32,dx,cs,0,LE)>dx/2) // unusual (right part of ouml)
5350 ad=95*ad/100;
5351
5352 // printf(" num_cross dy/2=%d %d\n",dy/2, num_cross(0,dx-1,dy/2,dy/2,bp,cs));
5353 // printf(" dots=%d\n",box1->dots); out_x(box1);
5354 // \sl ~f. !
5355 for (y=y1;y>ya;y--) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
5356 if (y>(ya+3*y1)/4) Break;
5357 if (y>(ya+2*y1)/3) ad=96*ad/100;
5358
5359 y=(y1-yb+1)/2+yb-y0; /* only one vertical line, italic i is more an tall S */
5360 if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) Break;
5361 for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } yc=y;
5362 for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yd=y;
5363 if( yd<3*(y1-yb+1)/4+yb-y0 ) Break;
5364 y=(y1-yb+1)/2+yb-y0;
5365 for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } ye=y;
5366 for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yf=y;
5367 if( yf>(y1-yb+1)/4+yb-y0 ) Break;
5368 if(yd>yc+2){
5369 xa=loop(bp, 0,yc-1,dx,cs,0,RI);
5370 xb=loop(bp,dx-1,yc-1,dx,cs,0,LE);
5371 if(
5372 xb-loop(bp,dx-1,yc,dx,cs,0,LE) /* Dec00 */
5373 > xa-loop(bp, 0,yc,dx,cs,0,RI) ){
5374 y= loop(bp,dx-xb,yc-1,dy,cs,0,DO);
5375 if(y>0){
5376 i=loop(bp,dx-xb-1,yc-1+y-1,dy,cs,0,DO);
5377 if( i>0 ) y+=i-1;
5378 }
5379 if( yc-1+y < yd-1 ) Break;
5380 } else {
5381 y= loop(bp,11*xa/16,yc-1,dy,cs,0,DO);
5382 if( yc-1+y < yd-2 ) Break;
5383 }
5384 }
5385 if(yf<ye-2){
5386 x=loop(bp,0 ,ye+1,dx,cs,0,RI);
5387 y=loop(bp,x-1,ye+1,dy,cs,0,UP);
5388 i=loop(bp,x ,ye+2-y,dy,cs,0,UP);
5389 if( i>0 ) y+=i-1;
5390 if( ye+1-y > yf+1 ) Break;
5391 }
5392 if( 2*y0 <= box1->m1+box1->m2
5393 && loop(bp,0, 0,dx,cs,0,RI)+1
5394 < loop(bp,0,dx/2,dx,cs,0,RI) ) ad=97*ad/100;
5395
5396 if( gchar ) // i is more often than j, be sure that realy correct Mai00
5397 if( loop(bp, 0,2*dy/4,dx,cs,0,RI)
5398 -loop(bp,dx-1,2*dy/4,dx,cs,0,LE)>dx/8 ) Break;
5399
5400 // could be a broken + or similar thing?
5401 if( 3 * ya > box1->m1 + 2*box1->m2 ) ad=90*ad/100;
5402
5403 if( loop(bp,dx-1,3*dy/4,dx,cs,0,LE)>dx/2
5404 && loop(bp,dx-1, dy-1,dx,cs,0,LE)<dx/4 ) Break; // ~d=cl
5405
5406 // test for é
5407 if( dx>5 && num_cross(x0+dx/2,x0+dx/2, ya, y1 ,box1->p,cs) >= 3 )
5408 ad=95*ad/100;
5409
5410 Setac(box1,'i',ad);
5411 break;
5412 }
5413 // --- test j ---------------------------------------------------
5414 // if(box1->dots==1) // what about \it neighbouring ij
5415 for(ad=d=100;dy>4 && dx>0;){ // min 3x4
5416 DBG( wchar_t c_ask='j'; )
5417 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
5418 // ToDo frames > 2 Break
5419 if (dx==1 && !gchar) Break; /* 2017-03 sure not a 'j', old=96% */
5420 ya=y0;
5421 if( box1->m2 && 2*y0>=box1->m2+box1->m1 ) ya=box1->m1;
5422
5423 for(y=ya;2*y<ya+y1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
5424 if(2*y>=ya+y1) Break; // hmm only gap
5425 ya=y;
5426 if( box1->m2 && ya>box1->m2+2 ) Break;
5427 for( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
5428 if(2*y>=ya+y1) Break; // hmm no gap
5429 for( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
5430 if(2*y>=ya+y1) Break; // hmm very large gap
5431 yb=y;
5432 if( loop(bp,dx-1,y+(y1-ya+1)/32,dx,cs,0,LE)>dx/2 ) Break; // unusual (right part of ouml)
5433
5434 // printf(" num_cross dy/2=%d %d\n",dy/2, num_cross(0,dx-1,dy/2,dy/2,bp,cs));
5435 // printf(" dots=%d\n",box1->dots); out_x(box1);
5436 // \sl ~f. !
5437 for(y=(ya+y1)/2;y<=y1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
5438 if(y<=y1) Break;
5439
5440 y=(y1-yb+1)/2+yb-y0; /* only one vertical line, italic i is more an tall S */
5441 if( num_cross(0,dx-1,y,y,bp,cs) >2 ) Break;
5442 for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } yc=y;
5443 for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yd=y;
5444 if( yd<3*(y1-yb+1)/4+yb-y0 ) Break;
5445 y=(y1-yb+1)/2+yb-y0;
5446 for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } ye=y;
5447 for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yf=y;
5448 if( yf>(y1-yb+1)/4+yb-y0 ) Break;
5449 if(yd>yc+2){
5450 xa=loop(bp, 0,yc-1,dx,cs,0,RI);
5451 xb=loop(bp,dx-1,yc-1,dx,cs,0,LE);
5452 if(
5453 xb-loop(bp,dx-1,yc,dx,cs,0,LE) /* Dec00 */
5454 > xa-loop(bp, 0,yc,dx,cs,0,RI) ){
5455 y= loop(bp,dx-xb,yc-1,dy,cs,0,DO);
5456 if(y>0){
5457 i=loop(bp,dx-xb-1,yc-1+y-1,dy,cs,0,DO);
5458 if( i>0 ) y+=i-1;
5459 }
5460 if( yc-1+y < yd-1 ) Break;
5461 } else {
5462 y= loop(bp,11*xa/16,yc-1,dy,cs,0,DO);
5463 if( yc-1+y < yd-2 ) Break;
5464 }
5465 }
5466 if(yf<ye-2){
5467 x=loop(bp,0 ,ye+1,dx,cs,0,RI);
5468 y=loop(bp,x-1,ye+1,dy,cs,0,UP);
5469 i=loop(bp,x ,ye+2-y,dy,cs,0,UP);
5470 if( i>0 ) y+=i-1;
5471 if( ye+1-y > yf+1 ) Break;
5472 }
5473 if( 2*y0 <= box1->m1+box1->m2
5474 && loop(bp,0, 0,dx,cs,0,RI)+1
5475 < loop(bp,0,dx/2,dx,cs,0,RI) )
5476 { ad=97*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) }
5477 // if (loop(bp,0,dy-1,dx,cs,0,RI)
5478 // -loop(bp,0,dy-3,dx,cs,0,RI)>1+dx/16)
5479 // { ad=96*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // ~c
5480
5481 if( gchar ) // i is more often than j, be sure that realy correct Mai00
5482 if( loop(bp, 0,2*dy/4,dx,cs,0,RI)
5483 -loop(bp,dx-1,2*dy/4,dx,cs,0,LE)<=dx/8 ) Break;
5484 // could be a broken + or similar thing?
5485 if( 3 * ya > box1->m1 + 2*box1->m2 ) ad=80*ad/100;
5486 if (!gchar) ad=96*ad/100;
5487 if( box1->dots!=1 ) ad=98*ad/100;
5488
5489 Setac(box1,'j',ad);
5490
5491 break;
5492 }
5493 // --- test I ---------------------------------------------------
5494 for(ad=d=100;dy>4 && dy>dx && 5*dy>4*(box1->m3-box1->m2);){ // min 3x4
5495 DBG( wchar_t c_ask='I'; )
5496 if( box1->dots==1 ) Break;
5497 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
5498
5499 x =loop(bp,0, dy/2,dx,cs,0,RI); // konvex? divided Q
5500 if(loop(bp,0,7*dy/8,dx,cs,0,RI) > x+dx/8) Break;
5501 for( y=dy/16;y<dy-1-dy/16;y++ )
5502 if( num_cross(0, dx-1, y , y ,bp,cs) != 1 )
5503 if( num_cross(0, dx-1, y+dy/16 , y+dy/16 ,bp,cs) != 1 ) break;
5504 if( y<dy-1-dy/16 ) Break;
5505 x =loop(bp,0, dy/2,dx,cs,0,RI);
5506 i5=loop(bp,x, dy/2,dx,cs,1,RI); // center width
5507 for(y=dy/4;y<3*dy/4;y++ ){ // same width ?
5508 x =loop(bp,0, y,dx,cs,0,RI);
5509 x =loop(bp,x, y,dx,cs,1,RI); // width
5510 if( abs(x-i5)>1+dx/8 ) break;
5511 } if( y<3*dy/4 ) Break;
5512 // out_x(box1);
5513
5514 // upper max width
5515 for(i2=i1=0,y=0;y<dy/4;y++ ){
5516 x =loop(bp,0, y,dx,cs,0,RI);
5517 x =loop(bp,x, y,dx,cs,1,RI); if(x>i1){ i1=x;i2=y; }
5518 }
5519 for(i4=i3=0,y=3*dy/4;y<dy;y++ ){
5520 x =loop(bp,0, y,dx,cs,0,RI);
5521 x =loop(bp,x, y,dx,cs,1,RI); if(x>i3){ i3=x;i4=y; }
5522 }
5523 if( abs(i3-i1)>1+dx/8 ) Break; // if i3>>i5 more sure!
5524 if( i1>i5 ){ // look for edges else *80%
5525 }
5526 if(i1+1<i5 && !hchar) Break; // Jun00
5527
5528 // calculate upper and lower mass center
5529 x =loop(bp,0, dy/8,dx,cs,0,RI); i1=x;
5530 x+=loop(bp,x, dy/8,dx,cs,1,RI); i1=(i1+x-1)/2;
5531
5532 x =loop(bp,0,dy-1-dy/8,dx,cs,0,RI); i2=x;
5533 x+=loop(bp,x,dy-1-dy/8,dx,cs,1,RI); i2=(i2+x-1)/2;
5534 x =loop(bp,0,dy-2-dy/8,dx,cs,0,RI); i=x;
5535 x+=loop(bp,x,dy-2-dy/8,dx,cs,1,RI); i=(i+x-1)/2; if( i>i2 ) i2=i;
5536
5537 // printf(" get_line(%d,%d) %d\n",i1,i2,
5538 // get_line2(i1,dy/8,i2,dy-1-dy/8,bp,cs,100));
5539 if( get_line2(i1,dy/8,i2,dy-1-dy/8,bp,cs,100)<95 ) Break;
5540 x =(i1-i2+4)/8; i1+=x; i2-=x;
5541
5542 // upper and lower width (what about serifs?)
5543 y=dy/8;
5544 x =loop(bp,i1, y+0,dx,cs,1,LE); i=x;
5545 x =loop(bp,i1, y+1,dx,cs,1,LE); if(x>i)i=x;
5546 x =loop(bp,i1, y+0,dx,cs,1,RI); j=x;
5547 x =loop(bp,i1, y+1,dx,cs,1,RI); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break;
5548 x =loop(bp,i2,dy-y-1,dx,cs,1,LE); j=x;
5549 x =loop(bp,i2,dy-y-2,dx,cs,1,LE); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break;
5550 x =loop(bp,i2,dy-y-1,dx,cs,1,RI); j=x;
5551 x =loop(bp,i2,dy-y-2,dx,cs,1,RI); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break;
5552
5553 if(dy>15) // v024a4
5554 if( loop(bp,dx-1,dy/16 ,dx,cs,0,LE)
5555 > loop(bp,dx-1,dy/4 ,dx,cs,0,LE)+1+dx/32 ) Break; // ~bad ) (thinn)
5556
5557 for(i=0,y=(dy+7)/16;y<(15*dy+7)/16 && i<2;y++)
5558 if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++;
5559 if( i>1+(dx+8)/32 ) Break; // rnd80-Droid-Sans-Mono I
5560
5561 if(!hchar){ // right part (bow) of h is never a l
5562 if( get_bw(dx/4,dx/4, 0,dy/4,bp,cs,1) == 1
5563 && get_bw(dx/4,dx/4,dy/2,dy-1,bp,cs,1) == 0 ) Break;
5564 if( loop(bp, 0,dy/4,dx,cs,0,RI)> dx/4
5565 && loop(bp,dx-1,dy/4,dx,cs,0,LE)<=dx/4
5566 && loop(bp, 1, 0,dy,cs,0,DO)<=dy/4 ) Break; // ~z
5567 }
5568
5569 if( get_bw(x1,x1,y0 ,y1 ,box1->p,cs,2) != 2
5570 && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
5571 && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
5572 && get_bw(x0,x0,y0+1,y1-1,box1->p,cs,1) != 1 ) Break; /* ~] */
5573
5574 if ( loop(bp,dx-1, dy/2,dx,cs,0,LE) <= dx/8
5575 && loop(bp, 0, dy/2,dx,cs,0,RI) > dx/2 ) Break; /* ~] 2010-10 */
5576 if ( loop(bp,dx-1, dy/2,dx,cs,0,LE) > dx/2
5577 && loop(bp, 0, dy/2,dx,cs,0,RI) <= dx/8 ) Break; /* ~[ 2010-10 */
5578
5579 if ( loop(bp,dx-1, dy/4,dx,cs,0,LE) > dx/2
5580 && loop(bp,dx-1,3*dy/4,dx,cs,0,LE) > dx/2
5581 && loop(bp, 0, dy/2,dx,cs,0,RI) < dx/4 ) Break; /* ~[ */
5582
5583 x =loop(bp, 0,dy/2,dx,cs,0,RI); // konvex/konkav? ~()
5584 i =loop(bp,dx-1,dy/2,dx,cs,0,LE);
5585 if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8
5586 && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8
5587 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) < i-dx/8
5588 && loop(bp,dx-1, dy/8,dx,cs,0,LE) < i-dx/8 ) Break; // ~(
5589 if( loop(bp, 0,7*dy/8,dx,cs,0,RI) < x-dx/8
5590 && loop(bp, 0, dy/8,dx,cs,0,RI) < x-dx/8
5591 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) > i+dx/8
5592 && loop(bp,dx-1, dy/8,dx,cs,0,LE) > i+dx/8 ) Break; // ~)
5593 if( loop(bp, 0, dy/8,dx,cs,0,RI)
5594 -(dx-loop(bp,dx-1,7*dy/8,dx,cs,0,LE)) > dx/4 ) Break; // ~/
5595 if( loop(bp, 0, 0,dx,cs,0,RI) > dx/2 // ToDo: check for serifs
5596 && loop(bp, 0, dy/8,dx,cs,0,RI) > dx/2
5597 && loop(bp,dx-1,dy-1 ,dx,cs,0,LE) > dx/2
5598 && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) > dx/2 ) ad=99*ad/100; // ~/
5599
5600 if (box1->m2 && 3*y0>box1->m1+2*box1->m2)
5601 if( get_bw(x0+dx/8,x1-dx/8,box1->m1,(box1->m1+box1->m2)/2,box1->p,cs,1) == 1 )
5602 Break; // ~i
5603
5604 if(i1+1<i5 && !hchar){ ad=65*ad/100; MSG({}) } // ~ slanted I
5605
5606 // be sure only for serif
5607 i3=loop(bp,dx-1, dy/4,dx,cs,0,LE);
5608 i4=loop(bp, 0,dy-1-dy/4,dx,cs,0,RI);
5609 if (i3<2 || i4<2
5610 || get_bw(x1-i3/4,x1-i3/4,y0,y0+dy/4,box1->p,cs,1) != 1
5611 || get_bw(x0+i4/4,x0+i4/4,y1-dy/4,y1,box1->p,cs,1) != 1 )
5612 { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // ToDo: improve it
5613 if(!hchar){ ad=96*ad/100; MSG({}) } // ~bad_small_r
5614 if (box1->m4 && y1<box1->m4) { // probably lower dot?
5615 if ((dx>2 && get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) == 1)
5616 || (dx<3 && get_bw(x0 ,x1 ,y1+1,box1->m4,box1->p,cs,1) == 1))
5617 { ad=96*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) }
5618 } // ~!
5619 // a---b
5620 // I
5621 // I
5622 // c---e
5623 // check against Z
5624 for(bx=0,ax=dx,ay=by=y=0;y<dy/4;y++){
5625 i =loop(bp,dx-1 ,y,dx,cs,0,LE); if (dx-i-1>bx) { bx=dx-1-i; by=y; }
5626 i+=loop(bp,dx-1-i,y,dx,cs,1,LE); if (dx-i-1<ax) { ax=dx-i; ay=y; }
5627 }
5628 for(cx=dx,ex=0,ey=cy=y=dy-1;y>dy-1-dy/4;y--){
5629 i =loop(bp,0,y,dx,cs,0,RI); if (i<cx) { cx=i; cy=y; }
5630 i+=loop(bp,i,y,dx,cs,1,RI); if (i>ex) { ex=i; ey=y; }
5631 }
5632 x=(3*ax+cx)/4; y=(3*ay+cy)/4; i= loop(bp,x,y,dx,cs,0,RI);
5633 MSG(fprintf(stderr,"xy= %3d %3d i= %3d ad=%d",x,y,i,ad);)
5634 x=(3*bx+ex)/4; y=(3*by+ey)/4; j= loop(bp,x,y,dx,cs,0,LE);
5635 if (j>1 && (2*i>3*j || 3*i<2*j )) ad=99*ad/100; // 2010-10-10 invalid2010
5636 if (j>1 && ( i>2*j || 2*i< j )) ad=97*ad/100; // j>0 to j>1
5637 MSG(fprintf(stderr,"xy= %3d %3d j= %3d ad=%d",x,y,j,ad);)
5638 i=loop(bp,0,0,dy,cs,0,DO);
5639 if (i>dy/8 && i<dy/2)
5640 { ad=99*ad/100;MSG(fprintf(stderr,"ad=%d",ad);) } // ~1
5641 if (loop(bp,dx-1,0,dx,cs,0,LE)
5642 -loop(bp, 0,0,dx,cs,0,RI)>dx/4) {ad=96*ad/100;MSG({})} // ~l 5x7
5643 if (loop(bp,dx-1,0,dx,cs,0,LE) // tmp08/0811qemu1
5644 -loop(bp, 0,0,dx,cs,0,RI)==dx/4) ad=98*ad/100; // ~l 4x10 2010-09
5645
5646 // JS-2010-09 do we have a full square (bad font) we can heve I or l
5647 // I and l should have 98% and 99% after context correction (JS2010-09)
5648 if (ad>98 && get_bw(x0,x1,y0,y1,box1->p,cs,2) == 0)
5649 { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) }
5650 if (gchar) ad=98*ad/100; // J
5651 if (box1->m3 && 2*y1<=box1->m2+box1->m3) {ad=96*ad/100;MSG({})} // '
5652 // 2010-10-01 sample tmp10/barcode_code128_145
5653 if (dx<3 && dy>10 && box1->m4==0) { ad=ad*95/100;MSG({})} // just a vertical line?
5654
5655 Setac(box1,'I',ad);
5656 break;
5657 }
5658 // --- test J --------------------------------------------------- 22Nov06
5659 for(ad=d=100;dy>4 && dy>=dx && dx>2;){ // min 3x4 ~Y)]d',
5660 // rewritten for vectors 0.42
5661 int ld, i1, i2, i3, i4, i5, i6, i7; // line derivation + corners
5662 DBG( wchar_t c_ask='J'; )
5663 if (sdata->holes.num > 0) Break; /* no hole */
5664 /* half distance to the center */
5665 d=2*sq(128/4);
5666 /* now we check for the upper right end of the J */
5667 if (aa[3][2]>d) Break; /* [2] = distance */
5668 /* searching for 4 notches between neighbouring ends */
5669
5670 /*
5671 type A B
5672
5673 6OOOO 6O5
5674 7O5 7O
5675 O O
5676 O O
5677 2O 1O4 1O4
5678 OO 2OO
5679 3 3
5680 */
5681
5682 /* Warning: aa0 can be left upper or left lower point for type B */
5683 /* get a point on the inner low left side of the J */
5684 i =nearest_frame_vector(box1,aa[3][3],aa[1][3],(x0+x1)/2,y0);
5685 /* failed for slanted J before Jun09 */
5686 i1=nearest_frame_vector(box1,i ,aa[1][3], x1+dx/8,y1-dy/8);
5687 /* get the most left point on the lower part of the J */
5688 i2=nearest_frame_vector(box1,i1,aa[3][3], x0-2*dx, y1-dy/8);
5689 /* get a point on the middle of the bottom of the J */
5690 i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], (x0+x1)/2, y1);
5691 /* get a point on the outer low right side of the J */
5692 i4=nearest_frame_vector(box1,aa[1][3],aa[3][3], x1, (y0+2*y1)/3);
5693 /* get a point on the outer right side below top serif */
5694 i5=nearest_frame_vector(box1,aa[2][3],aa[3][3], (x0+2*x1)/3,y0);
5695 /* get a point on the left side of upper serif */
5696 i6=nearest_frame_vector(box1,aa[3][3],i1, x0, y0);
5697 /* get a point on the most right left side of upper serif */
5698 i7=nearest_frame_vector(box1,i6,i1, x1, y0);
5699 MSG(fprintf(stderr," i1-i7 %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7);)
5700
5701 /* check the most left point on middle left area 3 vs. J 2013-06 */
5702 i =nearest_frame_vector(box1,i7,i1,x0,y0+dy/2);
5703 if (box1->frame_vector[i ][0]<
5704 box1->frame_vector[i1][0]-dx/8-1) Break; // 3
5705 /* check the highest point on lower left area */
5706 i =nearest_frame_vector(box1,i1,i3,x0,y0);
5707 if (box1->frame_vector[i ][1]-y0<dy/4) Break; // U
5708 if (box1->frame_vector[i ][1]-y0<=dy/2) ad=97*ad/100; // imperfect a
5709 /* check the lowest point on upper left area, serife? */
5710 j =nearest_frame_vector(box1,i6,i7,x0,y1);
5711 if (box1->frame_vector[i ][1]
5712 -box1->frame_vector[j ][1]<=dy/4) Break; // imperfect a
5713 if (box1->frame_vector[i7][1]>y0+dy/4) Break; // not to low
5714 if (box1->frame_vector[i1][1]
5715 -box1->frame_vector[i7][1]<dy/2) Break;
5716 if (box1->frame_vector[i4][1]
5717 -box1->frame_vector[i5][1]<dy/2) Break;
5718 if (box1->frame_vector[i7][0]<x0+dx/2) Break;
5719 if (box1->frame_vector[i1][0]
5720 -box1->frame_vector[i2][0]<=dx/8) Break; // ~1
5721 if (box1->frame_vector[i1][0]
5722 -box1->frame_vector[i2][0]<=dx/4) ad=ad*99/100; // ~1
5723 if (box1->frame_vector[i6][1]>y0+dy/8) ad=99*ad/100; // ~1
5724 if (aa[0][2]==0) { // ]?
5725 ad=99*ad/100;
5726 if (aa[1][2]==0) ad=98*ad/100;
5727 if (aa[2][2]<=aa[3][2]) ad=97*ad/100;
5728 }
5729
5730 /* check for left bow */
5731 for (j=i=i2;i!=i4;i=(i+1)%box1->num_frame_vectors[0]) {
5732 if (box1->frame_vector[ i][0] /* [0]=x */
5733 <box1->frame_vector[i1][0]) break; /* curve? */
5734 } if (i==i4) Break; // ~I
5735 /* check for no right bow */
5736 for (j=i=i2;i!=i4;i=(i+1)%box1->num_frame_vectors[0]) {
5737 if (box1->frame_vector[ i][0] /* [0]=x */
5738 >box1->frame_vector[i4][0]) break;
5739 } if (i!=i4) Break; // ~I
5740 /* check for no right bow */
5741 for (j=i=i5;i!=i6;i=(i+1)%box1->num_frame_vectors[0]) {
5742 if (box1->frame_vector[ i][1] > y0+dy/4) break;
5743 } if (i!=i6) Break; // ~Y
5744 /* check if upper left and lower left points are joined directly */
5745 ld=line_deviation(box1, i7, i1);
5746 MSG(fprintf(stderr," i7,i1 %d %d linedist= %d/%d",i7,i1,ld,2*sq(1024/4));)
5747 if (ld >2*sq(1024/4)) Break;
5748 if (5*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
5749 if (6*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
5750 if (7*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
5751 if (8*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
5752 /* check if lower right and upper right points are joined directly */
5753 ld=line_deviation(box1, i4, i5);
5754 MSG(fprintf(stderr," i4,i5 %d %d linedist= %d/%d",i4,i5,ld,2*sq(1024/4));)
5755 if (ld >2*sq(1024/4)) Break;
5756 if (5*ld >4*2*sq(1024/4)) ad=99*ad/100;
5757
5758 // J exists as gchar and ~gchar
5759 if (!hchar){ ad=99*ad/100; }
5760 if (box1->num_frames>1) {
5761 ad=98*ad/100; // j
5762 }
5763 Setac(box1,'J',ad);
5764 break;
5765 }
5766 return box1->c;
5767 }
5768
ocr0_brackets(ocr0_shared_t * sdata)5769 static wchar_t ocr0_brackets(ocr0_shared_t *sdata){
5770 struct box *box1=sdata->box1;
5771 pix *bp=sdata->bp;
5772 int i,j,d,x,y,i1,i2,i3,i4,i5,i6,hchar=sdata->hchar,
5773 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
5774 int dx=x1-x0+1,dy=y1-y0+1, /* size */
5775 (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */
5776 ad,r1,r2; /* tmp-vars */
5777 wchar_t bc=UNKNOWN;
5778
5779 // --- test > derived from xX ---------------------------------------------------
5780 // rewritten for vectors v0.41
5781 for(ad=d=100;dx>1 && dy>2;){ // min 3x2
5782 // 0 - indizes 0,1,i1,i2 pointing to edges of the char
5783 // \ .
5784 // \ .
5785 // i1,i2
5786 // /
5787 // /
5788 // 1
5789 DBG( wchar_t c_ask='>'; )
5790 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
5791 if (sdata->holes.num > 0 && (dx<6 || dy<6)) Break; /* # */
5792 /* calculate the half distance to the center */
5793 d=2*sq(128/4);
5794 /* now we check for the 2 left ends of the > */
5795 if (aa[0][2]>d) Break; /* upper left end */
5796 if (aa[1][2]>d) Break; /* lower left end */
5797 if (aa[1][1]-aa[0][1]<dy/2) Break;
5798 /* searching for 4 notches between neighbouring ends */
5799
5800 /* run along left side from top to bottom */
5801 for (i1=i=aa[0][3];i!=aa[1][3];i=(i+1)%box1->num_frame_vectors[0]) {
5802 if (box1->frame_vector[i ][0]
5803 >=box1->frame_vector[i1][0]) i1=i; /* notice most right vector */
5804 } if (i1==i || i1==aa[0][3]) Break;
5805 /* calculate the distance to the center */
5806 x=box1->frame_vector[i1][0];
5807 y=box1->frame_vector[i1][1];
5808 if (2*x-aa[0][0]-aa[1][0]<dx) ad=99*ad/100;
5809 if (abs(aa[0][1]+aa[1][1]-2*y)>(dy+2)) Break;
5810 if ( aa[0][0]+aa[1][0]-2*x>=0) Break;
5811 d=line_deviation(box1, aa[0][3], i1); // 2017-03: fixed >sq(1024/4);
5812 /* check if upper left and center point are joined directly */
5813 MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
5814 if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
5815 MSG(fprintf(stderr,"ad=%d", ad);)
5816 j=nearest_frame_vector(box1,i1,aa[1][3],x0-2*dx,y1-dy/8-1);
5817 d=line_deviation(box1, i1, j /*aa[1][3] */); // 2010-10 ocr-b
5818 /* check if lower left and center point are joined directly */
5819 MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
5820 if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
5821 MSG(fprintf(stderr,"ad=%d", ad);)
5822
5823 /* run along right side from bottom to top */
5824 for (j=i2=i=aa[1][3];i!=aa[0][3];i=(i+1)%box1->num_frame_vectors[0]) {
5825 if (box1->frame_vector[i ][0]
5826 >=box1->frame_vector[i2][0]) i2=i; /* notice most right vector */
5827 // MSG(fprintf(stderr,"search right: %d %d %d %d",i,i2,aa[1][3],aa[0][3]);)
5828 } if (i2==i || i2==aa[1][3]) Break;
5829 /* calculate the distance to the center */
5830 x=box1->frame_vector[i2][0];
5831 y=box1->frame_vector[i2][1];
5832 if ( (aa[0][0]+aa[1][0]-2*x)>= 0 ) Break;
5833 if (abs(aa[0][1]+aa[1][1]-2*y)>(dy+2)/4) Break;
5834 if (aa[0][0]>=x || aa[1][0]>=x) Break;
5835
5836 j=nearest_frame_vector(box1,i2,aa[0][3],x0+dx/4,y0-2*dy);
5837 if (box1->frame_vector[j][0]-x0>dx/4) Break;
5838 d=line_deviation(box1, i2, j /* aa[0][3] */);
5839 /* check if upper left and center point are directly joined directly */
5840 MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
5841 if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
5842 MSG(fprintf(stderr,"ad=%d", ad);)
5843
5844 j=nearest_frame_vector(box1,aa[1][3],i2,x0+dx/4,y1+2*dy);
5845 if (box1->frame_vector[j][0]-x0>dx/4) Break;
5846 i=nearest_frame_vector(box1,aa[1][3],i2,x1+2*dx,y0+dy/2+dy/8);
5847 if (box1->frame_vector[i][1]-y0>dy/2+dy/8) Break; // ~()
5848 d=line_deviation(box1, j /* aa[1][3] */, i /* i2 */); // 2010-10-08
5849 /* check if lower left and center point are directly joined */
5850 MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
5851 if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
5852 MSG(fprintf(stderr,"ad=%d", ad);)
5853
5854 /*
5855 ToDo: calculate momentums or max derivations
5856 along lines to distinguish )]}>
5857 i1,i2
5858 */
5859
5860 if (sdata->gchar) ad=98*ad/100;
5861 if (sdata->hchar) ad=99*ad/100;
5862 bc='>';
5863 Setac(box1,bc,ad);
5864 break;
5865 }
5866 // --- test /\\ ------------------------------------------------
5867 // if(bc==UNKNOWN)
5868 // if(!box1->dots)
5869 for(ad=d=100;dx>3 && dy>3;){ // min 4x4 for 4x6 font
5870 DBG( wchar_t c_ask='/'; )
5871 if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
5872 #if 1
5873 for(i=y=0;y<dy;y++){
5874 if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++;
5875 if( loop(bp, 0,y,dx,cs,0,RI)
5876 + loop(bp,dx-1,y,dx,cs,0,LE)<3*dx/8 ) break;
5877 }
5878 if( y<dy ) Break;
5879 if ( i>2 || (i>0 && dy<16)) Break;
5880 #endif
5881 /* get the center as exact as possible */
5882 i2=dx-1-loop(bp,dx-1,dy/2 ,dx,cs,0,LE) // be exact for small fonts
5883 +dx-1-loop(bp,dx-1,dy/2+dy%2-1,dx,cs,0,LE)
5884 + loop(bp, 0,dy/2 ,dx,cs,0,RI)
5885 + loop(bp, 0,dy/2+dy%2-1,dx,cs,0,RI);
5886 if (abs(i2-2*dx)>1+dx/2) Break; // ???
5887 if (abs(i2-2*dx)> dx/2) ad=99*ad/100;
5888
5889 i1=loop(bp,dx-1,dy/16,dx,cs,0,LE); // top right end
5890 i3=loop(bp,dx-1,dy-1 ,dx,cs,0,LE); // bottom right end
5891 i4=loop(bp, 0,0 ,dx,cs,0,RI); // top left end
5892 i6=loop(bp, 0,dy-1 ,dx,cs,0,RI); // bottom left end
5893 // i= (box1->m4 + box1->m3)/2 - box1->m2;
5894 // out_x(box1);printf("() %d %d %d %d %d %d %d\n",i,i1,i2,i3,i4,i5,i6);
5895
5896 // ~lI
5897 for(i=i4,y=0;y<dy;y++){ // check for left deviations from line
5898 x=loop(bp,0 ,y,dx,cs,0,RI);if(abs(x-i)>dx/6+1 ) break; i=x;
5899 } if( y<dy ) Break;
5900 for(i=i1,y=0;y<dy;y++){ // check for right deviations from line
5901 x=loop(bp,dx-1,y,dx,cs,0,LE);if(abs(x-i)>dx/6+1 ) break; i=x;
5902 } if( y<dy ) Break;
5903
5904 if (box1->m2 && sdata->gchar) ad=99*ad/100; // 2010-10
5905 if (box1->m2 && !sdata->hchar) ad=98*ad/100;
5906 if (box1->m2 && dy < box1->m3 - box1->m2 -1) ad=96*ad/100; // ~,
5907
5908 // check top-right-end, bottom-left-end
5909 if (i1<=dx/8 && i6<=dx/8 && i4-(dx-i3)>=dx/8 ) { // /
5910 if (i4<=dx/8 && i3<=dx/8) Break;
5911 if (i4-(dx-i3)<dx/4) ad=99*ad/100; // not enough slope
5912 Setac(box1,(bc='/'),ad);break;
5913 }
5914 if (i4<=dx/8 && i3<=dx/8 && i6-(dx-i1)>=dx/8 ) { // \ ...
5915 if (i6<=dx/8 && i1<=dx/8) Break;
5916 if (i6-(dx-i1)<dx/4) ad=99*ad/100; // not enough slope
5917 Setac(box1,(bc='\\'),ad); break;
5918 }
5919 Break;
5920 }
5921 // --- test ()<> ------------------------------------------------
5922 // if(bc==UNKNOWN)
5923 // if(!box1->dots)
5924 for(ad=d=100;dx>1 && dy>4;){ // min 3x4 '(' ')'
5925 DBG( wchar_t c_ask='('; )
5926 if (sdata->holes.num > 1) {Break;}; /* tolerant against a tiny hole */
5927 if (aa[0][1]>y0+dy/8 && aa[3][1]>y0+dy/8) Break; // no upper end?
5928 if (aa[1][1]<y1-dy/8 && aa[2][1]<y1-dy/8) Break; // no lower end?
5929 // check upper and lower right corner 2018-09
5930 if (aa[2][1]>=y1-dy/8 && aa[2][0]>=x1-dx/8 // ( vs. l (handwritten)
5931 && aa[3][1]<=y0+dy/8 && aa[3][0]<=x1-dx/4) ad=99*ad/100;
5932 if (aa[2][1]>=y1-dy/8 && aa[2][0]>=x1-dx/8 // ( vs. l (handwritten)
5933 && aa[3][1]<=y0+dy/8 && aa[3][0]<=x1-dx/2) Break;
5934
5935 #if 1
5936 for(i=y=0;y<dy;y++){
5937 if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++;
5938 if( loop(bp, 0,y,dx,cs,0,RI)
5939 + loop(bp,dx-1,y,dx,cs,0,LE)<3*dx/8 ) break;
5940 }
5941 if( y<dy ) {Break;};
5942 if ( i>2 || (i>0 && dy<16)) {Break;};
5943 #endif
5944 /* look for the extrema => r1..r2 */
5945 for(i=dx,r1=r2=y=dy/2-dy/8;y<=dy/2+dy/8;y++){
5946 j=loop(bp, 0,y,dx,cs,0,RI); if(j==i) r2=y; if(j<i){ r2=r1=y; i=j; }
5947 j=loop(bp,dx-1,y,dx,cs,0,LE); if(j==i) r2=y; if(j<i){ r2=r1=y; i=j; }
5948 } y=(r1+r2)/2;
5949 i1=loop(bp,dx-1, dy/16,dx,cs,0,LE);
5950 i2=loop(bp,dx-1,y ,dx,cs,0,LE);
5951 i3=loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE);
5952 i4=loop(bp, 0,dy/16 ,dx,cs,0,RI);
5953 i5=loop(bp, 0,y ,dx,cs,0,RI);
5954 i6=loop(bp, 0,dy-1-dy/16,dx,cs,0,RI);
5955 if(dx>dy){
5956 // from Aug06 vector-version of greater is used
5957 // if(i2==0 && 3*i5>dx && i4<=dx/8 && i6<=dx/8) { Setac(box1,(bc='>'),98);{Break;}; }
5958 if(i5==0 && 3*i2>dx && i1<=dx/8 && i3<=dx/8) { Setac(box1,(bc='<'),98);{Break;}; }
5959 }
5960 if( dx > 2 && 9*dx>=5*dy ){ // 4x6 screen-font (3*5)
5961 ad=98;
5962 if (dx<8) ad=99*ad/100;
5963 if (dx<6) ad=96*ad/100;
5964 if( 2*dx > OCR_JOB->res.avX && 4*dx>dy ) ad=98;
5965 // printf(" %d %d %d %d %d %d\n",i5,i1,i3,i2,i4,i6);
5966 if( i5==0 && i1<=dx/8+1 && i3<=dx/8+1 && i1+i3<=dx/8+1
5967 && i2>=dx/2 && i4>=3*dx/4 && i6>=3*dx/4 ) {
5968 if (2*loop(bp, 0, y/2,dx,cs,0,RI)+1+dx/16<i4+i5) ad=95*ad/100;
5969 if (2*loop(bp, 0,dy-1-y/2,dx,cs,0,RI)+1+dx/16<i6+i5) ad=95*ad/100;
5970 Setac(box1,(bc='<'),ad);{Break;};
5971 }
5972 /* obsolete code Aug06, will be removed if new code is stable
5973 if( i2==0 && i4<=dx/8 && i6<=dx/8
5974 && i5>=dx/2 && i1>=3*dx/4 && i3>=3*dx/4 ) {
5975 if (2*loop(bp,dx-1, y/2,dx,cs,0,LE)+1+dx/16<i1+i2) ad=95*ad/100;
5976 if (2*loop(bp,dx-1,dy-1-y/2,dx,cs,0,LE)+1+dx/16<i3+i2) ad=95*ad/100;
5977 Setac(box1,(bc='>'),ad);{Break;};
5978 }
5979 */
5980 }
5981
5982 i1=loop(bp,dx-1,dy/16,dx,cs,0,LE);
5983 i2=loop(bp,dx-1,dy/2 ,dx,cs,0,LE);
5984 i3=loop(bp,dx-1,dy-1 ,dx,cs,0,LE);
5985 i4=loop(bp, 0,0 ,dx,cs,0,RI);
5986 i5=loop(bp, 0,dy/2,dx,cs,0,RI);
5987 i6=loop(bp, 0,dy-1,dx,cs,0,RI);
5988 i=(box1->m4+box1->m3)/2-box1->m2;
5989 //
5990 // out_x(box1);printf("() %d %d %d %d %d %d %d\n",i,i1,i2,i3,i4,i5,i6);
5991 if(2*i2<i1+i3 && 2*i5>i4+i6 && 2*dx<dy && dy>=i){
5992 Setac(box1,(bc=')'),98);break; }
5993 if(2*i2>i1+i3 && 2*i5<i4+i6 && 2*dx<dy && dy>=i){
5994 if(2*i2<=i1+i3+1 || 2*i5>=i4+i6-1) ad=98*ad/100;
5995 if(2*i2<=i1+i3+2 || 2*i5>=i4+i6-2) ad=98*ad/100;
5996 for(x=y=0;y<dy/4;y++){
5997 i=loop(bp,0,y,dx,cs,0,RI);if( i>x ) x=i;
5998 }
5999 for(y=0;y<(dy+2)/4;y++){
6000 i=loop(bp,0,y+dy/8,dx,cs,0,RI);if( i<x ) break;
6001 }
6002 if( y==(dy+2)/4 ) {Break;}; // ~l (left upper side must be convex) Jul00
6003 if (loop(bp,0,dy/2+dy/8,dx,cs,0,RI)-i5>=dx/8+1) ad=99*ad/100; // ~{ Jul09
6004 if (loop(bp,0,dy/2-dy/8,dx,cs,0,RI)-i5>=dx/8+1) ad=99*ad/100; // ~{ Jul09
6005 Setac(box1,(bc='('),ad); break;
6006 }
6007 Break;
6008 } // '(' ')'
6009 // --------- test [] --------------------------------
6010 for(ad=d=100;dx>2 && dy>4 && dy>=2*dx;){ // (3,6) on 4x6 font
6011 DBG( wchar_t c_ask=']'; )
6012 if (sdata->holes.num > 1) { Break;} /* tolerant against a tiny hole */
6013 if (box1->num_frames!=1) break;
6014 if (box1->num_frame_vectors[0]!=10) ad=98; // not charp, unsure
6015 if (!hchar) ad=97*ad/100;
6016 for(y=0;y<dy;y++){
6017 if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break;
6018 } if (y<dy) {Break;};
6019 if( get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) == 2
6020 && get_bw(x0,x1,y0+1,y0+1,box1->p,cs,2) == 2 ) {Break;};
6021 if( get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) == 2
6022 && get_bw(x0,x1,y1-1,y1-1,box1->p,cs,2) == 2 ) {Break;};
6023 if( get_bw(x0 ,x0,y0 ,y1 ,box1->p,cs,2) == 0
6024 || get_bw(x0+1 ,x0+1,y0 ,y1 ,box1->p,cs,2) == 0 )
6025 if( get_bw(x0+dx/2,x1,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 )
6026 { Setac(box1,(bc='['),ad);break; }
6027 if( get_bw(x1 ,x1,y0 ,y1 ,box1->p,cs,2) == 0
6028 || get_bw(x1-1 ,x1-1,y0 ,y1 ,box1->p,cs,2) == 0 )
6029 if( get_bw(x0,x1-dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 )
6030 { Setac(box1,(bc=']'),ad);break; }
6031 break;
6032 }
6033
6034 #if CODE_NOT_COMPLETED
6035 // --- test ] -------
6036 for(ad=d=100;dx>2 && dy>3;){
6037 DBG( wchar_t c_ask=']'; )
6038 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
6039 if (sdata->holes.num > 0) ad=98*ad/100; /* # */
6040 /* 1/8 distance to the center */
6041 d=2*sq(128/16);
6042 /* now we check for the 4 ends of the x */
6043 if (aa[0][2]>d) Break;
6044 if (aa[1][2]>d) Break;
6045 if (aa[2][2]>d) Break;
6046 if (aa[3][2]>d) Break;
6047 if (aa[3][0]-aa[0][0]<7*dx/8) Break;
6048 if (aa[2][0]-aa[1][0]<7*dx/8) Break;
6049 if (aa[1][1]-aa[0][1]<7*dy/8) Break;
6050 if (aa[2][1]-aa[3][1]<7*dy/8) Break;
6051 if (aa[3][0]-aa[0][0]<2) Break; /* to small */
6052 if (aa[2][0]-aa[1][0]<2) Break; /* to small */
6053 MSG( fprintf(stderr," aa %d %d %d %d %d %d %d %d d %d %d %d %d",\
6054 aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,\
6055 aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0,\
6056 aa[0][2],aa[1][2],aa[2][2],aa[3][2]);)
6057 /* left and right vertical line */
6058 d=line_deviation(box1, aa[0][3], aa[1][3]); if (d>2*sq(1024/4)) Break;
6059 ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100;
6060 d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break;
6061
6062 /* search uppermost left ^ */
6063 i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0, y0);
6064 x=box1->frame_vector[i1][0];
6065 y=box1->frame_vector[i1][1];
6066 if (y-y0 > 5*dy/8) Break;
6067 if (x-x0 > 5*dx/8) Break;
6068 /* search uppermost right ^ ~H */
6069 i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0);
6070 if ( box1->frame_vector[i3][0]-x> dx/4
6071 && box1->frame_vector[i3][1]-y<=dy/8) Break;
6072
6073 /* check if upper left and lower right point are joined directly */
6074 dbg[0]=d=line_deviation(box1,i1, aa[2][3]); if (d >2*sq(1024/4)) Break;
6075 /* check if lower left and lower left point are joined directly */
6076 dbg[1]=d=line_deviation(box1, aa[1][3],i1); if (d >2*sq(1024/4)) Break;
6077
6078 if (!hchar) ad=99*ad/100;
6079 if ( gchar) ad=98*ad/100; // \sc N
6080 ac=(wchar_t) ']';
6081 Setac(box1,ac,ad);
6082 if (ad>=100) return ac;
6083 break;
6084 }
6085 #endif
6086 // --------- test ocr-a-[] --------------------------------
6087 if(bc==UNKNOWN)
6088 for(ad=d=98;dx>5 && dy>7 && 2*dy>3*dx;){ // only for accurate font at the moment
6089 DBG( wchar_t c_ask='['; )
6090 if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
6091 if (!hchar) ad=97*ad/100;
6092 if( num_cross(0,dx-1, 0, 0,bp,cs) != 1 ) break;
6093 if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1 ) break;
6094 if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)
6095 +loop(bp, 0,dy/2,dx,cs,0,RI) <= dx/4 ) break; // O
6096 for(y=dy/8;y<dy-dy/8;y++){
6097 if( num_cross(0,dx,y,y,bp,cs) != 2 ) break;
6098 } if (y<dy-dy/8) break;
6099 if( get_bw((3*x0+5*x1)/8,x1,y0+3*dy/16,y1-3*dy/16,box1->p,cs,1) == 0)
6100 { Setac(box1,(bc='['),ad);break; }
6101 if( get_bw(x0,(5*x0+3*x1)/8,y0+3*dy/16,y1-3*dy/16,box1->p,cs,1) == 0)
6102 { Setac(box1,(bc=']'),ad);break; }
6103 break;
6104 }
6105 // --------- test {} --------------------------------
6106 for(ad=d=99;dx>2 && dy>5 && 2*dy>3*dx;){
6107 DBG( wchar_t c_ask='{'; )
6108 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
6109 if (!hchar) ad=97*ad/100;
6110 for(y=0;y<dy;y++){
6111 if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break;
6112 } if (y<dy) Break;
6113 for(x=0;x<dx/2;x++){
6114 if( num_cross(dx-1-x,dx-1-x,0,dy-1,bp,cs) != 2 ) break;
6115 } if (y<dx/2) Break;
6116 if ( num_cross(dx-1,dx-1,dy/4,dy-1-dy/4,bp,cs) != 0 ) Break;
6117 if ( num_cross( 0, 0,dy/4,dy-1-dy/4,bp,cs) != 1 ) Break;
6118 i1=loop(bp, 0,dy/4,dx,cs,0,RI);
6119 i1=loop(bp,i1,dy/4,dx,cs,1,RI); // thickness1
6120 for (i2=dx,i3=y=dy/2-1-dy/16;y<dy/2+2+dy/16;y++)
6121 { x=loop(bp, 0, y ,dx,cs,0,RI); if (x<i2) {i2=x;i3=y;} }
6122 i2=loop(bp,i2,i3,dx,cs,1,RI); // thickness2
6123 if (i2<i1+dx/16+1) Break;
6124 if ( loop(bp,0,dy-1,dx,cs,0,RI)>3*dx/4 ) {ad=99*ad/100;MSG({})}
6125 if ( loop(bp,0, 0,dx,cs,0,RI)>3*dx/4 ) {ad=99*ad/100;MSG({})} // <
6126 if ( loop(bp,0, 0,dy,cs,0,DO)<dy/2-dy/8-1 ) {ad=98*ad/100;MSG({})}
6127 if ( loop(bp,0,dy-1,dy,cs,0,UP)<dy/2-dy/8-1 ) {ad=98*ad/100;MSG({})} // (
6128 if ( loop(bp,0,dy-1,dy,cs,0,UP)<=dy/4 ) Break; // (
6129 if (dy>=8)
6130 if ( loop(bp,dx-1, 0,dx,cs,0,LE)
6131 + loop(bp,dx-1,dy/4,dx,cs,0,LE)
6132 - 2*loop(bp,dx-1,dy/8,dx,cs,0,LE) >=dx/8 ) {ad=98*ad/100;MSG({})} // <
6133 if ( loop(bp,dx-2,dy-1,dy,cs,0,UP)>dy/4 ) Break; // f
6134 if ( get_bw(x0,x0,y0,y0+dy/4,box1->p,cs,1) == 1
6135 || get_bw(x0,x0,y1-dy/4,y1,box1->p,cs,1) == 1 ) Break;
6136 Setac(box1,(bc='{'),ad);break;
6137 }
6138 // --------- test {} --------------------------------
6139 for(ad=d=99;dx>2 && dy>5 && 2*dy>3*dx;){
6140 DBG( wchar_t c_ask='}'; )
6141 if (!hchar) ad=97*ad/100;
6142 for(y=0;y<dy;y++){
6143 if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break;
6144 } if (y<dy) Break;
6145 for(x=0;x<dx/2;x++){
6146 if( num_cross(x,x,0,dy-1,bp,cs) != 2 ) break;
6147 } if (y<dx/2) Break;
6148 if ( num_cross( 0, 0,dy/4,dy-1-dy/4,bp,cs) != 0 ) Break;
6149 if ( num_cross(dx-1,dx-1,dy/4,dy-1-dy/4,bp,cs) != 1 ) Break;
6150 i1=loop(bp,dx-1 ,dy/4,dx,cs,0,LE);
6151 i1=loop(bp,dx-1-i1,dy/4,dx,cs,1,LE); // thickness1
6152 for (i2=dx,i3=y=dy/2-1-dy/16;y<dy/2+2+dy/16;y++)
6153 { x=loop(bp,dx-1 , y,dx,cs,0,LE); if (x<i2) {i2=x;i3=y;} }
6154 i2= loop(bp,dx-1-i2,i3,dx,cs,1,LE); // thickness2
6155 if (i2<i1+dx/16+1) Break;
6156 if ( loop(bp,dx-1,dy-1,dx,cs,0,LE)>3*dx/4 ) {ad=99*ad/100;MSG({})}
6157 if ( loop(bp,dx-1, 0,dx,cs,0,LE)>3*dx/4 ) {ad=99*ad/100;MSG({})} // >
6158 if ( loop(bp,dx-1, 0,dy,cs,0,DO)<dy/2-dy/8-1 ) {ad=98*ad/100;MSG({})}
6159 if ( loop(bp,dx-1,dy-1,dy,cs,0,UP)<dy/2-dy/8-1 ) {ad=98*ad/100;MSG({})} // )
6160 if ( loop(bp,dx-1, 0,dy,cs,0,DO)<=dy/4) Break;
6161 if (dy>=8)
6162 if ( loop(bp,0, 0,dx,cs,0,RI)
6163 + loop(bp,0,dy/4,dx,cs,0,RI)
6164 - 2*loop(bp,0,dy/8,dx,cs,0,RI) >=dx/8 ) {ad=98*ad/100;MSG({})} // <
6165 if ( loop(bp,1,dy-1,dy,cs,0,UP)>dy/4 ) Break; // ???
6166 if ( get_bw(x1,x1,y0,y0+dy/4,box1->p,cs,1) == 1
6167 || get_bw(x1,x1,y1-dy/4,y1,box1->p,cs,1) == 1 ) Break;
6168 Setac(box1,(bc='}'),ad);break;
6169 }
6170 return box1->c;
6171 }
6172
6173 #if 0
6174 /* ---------- empty prototype function for copy and expand ---------- */
6175 static wchar_t ocr0_XXX(ocr0_shared_t *sdata){
6176 struct box *box1=sdata->box1;
6177 pix *bp=sdata->bp;
6178 int i,j,d,x,y,i0,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
6179 x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
6180 int dx=x1-x0+1,dy=y1-y0+1, /* size */
6181 ac,ad; /* tmp-vars */
6182
6183 // --- test XXX ---------------------------------------------------
6184 return box1->c;
6185 }
6186 #endif
6187
6188
6189 /* ----------------------- part9 -------------------------------- */
ocr0p9(ocr0_shared_t * sdata)6190 static wchar_t ocr0p9(ocr0_shared_t *sdata){
6191 struct box *box1=sdata->box1;
6192 pix *bp=sdata->bp;
6193 int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
6194 int dx=x1-x0+1,dy=y1-y0+1, /* size */
6195 i1,i2,i3,i4; /* tmp-vars */
6196 int xa,xb, /* used for store significant points of char */
6197 dbg[9]={0,0,0,0,0,0,0,0,0}, /* debugging space */
6198 ya,yb,ad,cs=sdata->cs;
6199 wchar_t ac,bc=UNKNOWN; // bestletter
6200 int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
6201 int hchar; // char is higher than e
6202 int gchar; // char has ink lower than m3
6203 // --- hchar --- gchar -------------------------
6204 hchar=0;if( 2*y0<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
6205 gchar=0;if( 2*y1>=2*box1->m3+(box1->m4-box1->m3) ) gchar=1;
6206 // if the char is slightly moved down correction can be done
6207 if ( y0<box1->m2 && y1>box1->m3 && 2*y1<box1->m3+box1->m4) // moved
6208 if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
6209
6210 /* reserved for the future */
6211 // --- test beta,\3,sz,"s ---------------------------------------------
6212 if(bc==UNKNOWN && hchar)
6213 for(ad=d=100;dx>3 && dy>6;){ // min 4x7
6214 DBG( wchar_t c_ask='S'; )
6215 if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
6216 /* this part is provisorium, should be changed!
6217 a-\
6218 | d
6219 b| /
6220 | \
6221 -c /
6222 */
6223 if( num_cross(x0 ,x1 ,y0+dy/4 ,y0+dy/4 ,box1->p,cs) != 2
6224 && num_cross(x0 ,x1 ,y0+dy/4+1,y0+dy/4+1,box1->p,cs) != 2 ) break;
6225 for(i=1+dy/16,y=y0+dy/8;y<y1-dy/4 && i>0;y++){
6226 if( y<y1-6*dy/16 ){ if( num_cross(x0 ,x1 ,y,y,box1->p,cs) != 2 ) i--;}
6227 else { if( num_cross(x0 ,x1 ,y,y,box1->p,cs) < 2 ) i--;}
6228 if( get_bw(x0,x0+dx/2,y,y,box1->p,cs,1) == 0 ) i--;
6229 if( y<y1-5*dy/16 )
6230 if( get_bw(x1-dx/2,x1,y,y,box1->p,cs,1) == 0 ) i--;
6231 } if( i<=0 ) break;
6232 // out_x(box1);
6233
6234 for(y=y0+dy/3;y<y1-dy/3;y++){
6235 i =loop(box1->p,x1,y,dx,cs,0,LE);
6236 if( i>=dx/8 ) break;
6237 i+=loop(box1->p,x1-i,y,dx,cs,1,LE);
6238 if( i>=dx/2 ) break;
6239 } if( y>=y1-dy/3 ) break;
6240
6241 for(y=y0+dy/5;y<y0+dy/3;y++)
6242 if( get_bw(x1-dx/6,x1,y,y,box1->p,cs,1) == 1 ) break;
6243 if( y>=y0+dy/3 ) break;
6244
6245 for(y=y0+dy/2;y<y1;y++)
6246 if( get_bw(x1-dx/6,x1,y,y,box1->p,cs,1) == 1 ) break;
6247 if( y>=y1 ) break;
6248
6249 for(y=y1-dy/3;y<y1-dy/8;y++){
6250 i=loop(box1->p,x1,y,dx,cs,0,LE);
6251 if( i>dx/4
6252 && get_bw(x1-dx/8,x1-dx/8,y,y1,box1->p,cs,1) == 1 ) break;
6253 } if( y<y1-dy/8 ) break; // ~Q
6254
6255 if( box1->m3==0 || 2*y1<box1->m3+box1->m4 )
6256 if( loop(box1->p,x1,y1, dx,cs,0,LE)==0
6257 && loop(box1->p,x1,y1-dy/4,dx,cs,0,LE)>dx/8 ) break; // ~R
6258
6259
6260 for(x=x0+dx/4;x<x1-dx/4;x++)
6261 if( num_cross(x,x,y0,y1,box1->p,cs) == 3 ) break;
6262 if( x>=x1-dx/4 ) break;
6263
6264 i=loop(bp,dx/2,dy-1,dy,cs,0,UP)+dy/64; // Jul00
6265 for(x=dx/5;x<dx/2;x++)
6266 if( loop(bp,x,dy-1,dy,cs,0,UP) > i ) break;
6267 if( x==dx/2 ) break;
6268
6269 x=x0+loop(bp,0,dy/4,dx,cs,0,RI);
6270 for(;x<x1-dx/3;x++)
6271 if( get_bw(x,x,y0,y0+dy/4,box1->p,cs,1) == 0 ) break;
6272 if( x<x1-dx/3 ) break;
6273
6274 if( !gchar )
6275 // if( num_hole( x0, x1, y0, y1,box1->p,cs,NULL) != 0 ) break;
6276 if (sdata->holes.num != 0) break;
6277
6278 bc=LATIN_SMALL_LETTER_SHARP_S;
6279 Setac(box1,(wchar_t)bc,98);
6280 break;
6281 }
6282 // --- test + ------------------------------------------------
6283 for(ad=d=100;dx>2 && dy>2;){ // min 3x3
6284 DBG( wchar_t c_ask='+'; )
6285 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
6286 xa=(dx+1)/3-1; ya=(dy+1)/3-1; /* size of the 4 gaps = 1/3 * 1/3 */
6287 xb=(dx+1)/4; yb=(dy+2)/4; /* smaller gap */
6288 if( get_bw(x0,x0+xa,y0,y0+ya,box1->p,cs,1) == 1 ) Break; // left upper
6289 if( get_bw(x0,x0+xa,y1-ya,y1,box1->p,cs,1) == 1 ) Break; // left lower
6290 if( get_bw(x1-xb,x1,y0,y0+ya,box1->p,cs,1) == 1 ) Break; // right upper
6291 if( get_bw(x1-xa,x1,y1-ya,y1,box1->p,cs,1) == 1 ) { // right lower
6292 if( get_bw(x1-xa,x1,y1-yb,y1,box1->p,cs,1) == 1 ) Break;
6293 ad=99*ad/100; // smoothed inner corner? 0907
6294 }
6295 for(i=0,y=y0+ya;y<=y1-ya;y++){ // horizontal line
6296 if( get_bw(x0+dx/9,x1-dx/9,y,y,box1->p,cs,2) == 0 ) { i=y; break; }
6297 }
6298 if (3*dx<2*dy) ad=99*ad/100; // ~t
6299 if( !i ) Break;
6300 ac=(wchar_t) '+';
6301 Setac(box1,ac,ad);
6302 if (ad>=100) return ac;
6303 break;
6304 }
6305 // --- test $ (vert. line crossing) ------------------------------
6306 for(ad=d=99;dx>3 && dy>6;){ // min 4x7 2010-10
6307 DBG( wchar_t c_ask='$'; )
6308 if (sdata->holes.num != 2) Break;
6309
6310 if( get_bw(x0,x0+dx/5,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break;
6311 if( get_bw(x0,x0+dx/9,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break;
6312 if( get_bw(x1-dx/9,x1,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break;
6313 if( get_bw(x1-dx/5,x1,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break;
6314 if( get_bw(x0,x0+dx/3,y0+dy/3 ,y0+dy/2 ,box1->p,cs,1) != 1 ) Break;
6315 if( get_bw(x1-dx/3,x1,y1-dy/2 ,y1-dy/3 ,box1->p,cs,1) != 1 ) Break;
6316 i1=x0+loop(box1->p,x0,y0,dx,cs,0,RI); if( i1<x0+dx/3 || i1>x1-dx/5 ) Break;
6317 i2=x0+loop(box1->p,x0,y1,dx,cs,0,RI); if( i2<x0+dx/5 || i2>i1 ) Break;
6318 ad= get_line2(i1,y0,i2,y1,box1->p,cs,100)*ad/100;
6319 // check upper left and lower right half circle, $ (Oct08: removed)
6320 // Oct08 JS: check the position of holes (better for tiny fonts)
6321 // upper hole must be the first!? (hole[].x0 = relative coordinates)
6322 if ( sdata->holes.hole[0].y0 <
6323 sdata->holes.hole[1].y0 ) i4=0; else i4=1; /* sort to [idx^i4] */
6324 if ( sdata->holes.hole[0^i4].y1 >
6325 sdata->holes.hole[1^i4].y0 ) Break; /* no y-overlap allowed */
6326 // upper left hole
6327 // MSG( fprintf(stderr,"hole[0]=x0=%d %d", sdata->holes.hole[0^i4].x0, sdata->holes.hole[0^i4].x1); )
6328 if ( sdata->holes.hole[0^i4].x0 > (dx+1)/3 ) Break;
6329 if ( sdata->holes.hole[0^i4].x1 > dx/2+dx/4 ) Break;
6330 if ( sdata->holes.hole[0^i4].y1 >= dy/2+dy/8 ) Break;
6331 if ( sdata->holes.hole[0^i4].y0 > dy/2-dy/8 ) Break;
6332 // lower right hole
6333 // fprintf(stderr,"\nDBG hole[1]=x0=%d %d", sdata->holes.hole[1^i4].x0, sdata->holes.hole[1^i4].x1);
6334 if ( sdata->holes.hole[1^i4].x0 <= dx/2-dx/4 ) Break;
6335 if ( sdata->holes.hole[1^i4].x1 < dx/2+dx/4 ) Break;
6336 if ( sdata->holes.hole[1^i4].y1 < dy/2+dy/8 ) Break;
6337 if ( sdata->holes.hole[1^i4].y0 <= dy/2-dy/8 ) Break;
6338 if (ad<95) Break;
6339 ac=(wchar_t) '$';
6340 Setac(box1,ac,ad);
6341 if (ad>=100) return ac;
6342 break;
6343 }
6344 // --- test $ (no vert. line crossing, but lines on top and bottom) -----
6345 for(ad=d=99;dx>3 && dy>6;){ // min 4x7 2010-10
6346 DBG( wchar_t c_ask='$'; )
6347 if (box1->num_frames != 1) break;
6348 /* 5x7
6349 ..@..<-
6350 @@@@@<
6351 @....
6352 @@@@@
6353 ....@
6354 @@@@@
6355 ..@..<-
6356 */
6357 if( get_bw(x0,x0+dx/9,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break;
6358 if( get_bw(x1-dx/9,x1,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break;
6359 if( get_bw(x1-dx/5,x1,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break;
6360 if( get_bw(x0,x0+dx/3,y0+dy/3 ,y0+dy/2 ,box1->p,cs,1) != 1 ) Break;
6361 if( get_bw(x1-dx/3,x1,y1-dy/2 ,y1-dy/3 ,box1->p,cs,1) != 1 ) Break;
6362 i1=x0+loop(box1->p,x0,y0,dx,cs,0,RI); if( i1<x0+dx/3 || i1>x1-dx/5 ) Break;
6363 i2=x0+loop(box1->p,x0,y1,dx,cs,0,RI); if( i2<x0+dx/5 || i2>i1 ) Break;
6364
6365 i1=nearest_frame_vector(box1, aa[0][3], aa[1][3], x1+2*dx, (y0+y1)/2);
6366 i2=nearest_frame_vector(box1, aa[2][3], aa[3][3], x0-2*dx, (y0+y1)/2);
6367 if (box1->frame_vector[i1][0]-x0<3*dx/4
6368 || box1->frame_vector[i2][0]-x0> dx/4
6369 || box1->frame_vector[i1][1]-y0
6370 <box1->frame_vector[i2][1]-y0) Break;
6371
6372 if (ad<95) Break;
6373 ac=(wchar_t) '$';
6374 Setac(box1,ac,ad);
6375 if (ad>=100) return ac;
6376 break;
6377 }
6378 // --- test & ------------------------------------------------
6379 for(ad=d=99;dx>3 && dy>4;){ /* 4x6 font */
6380 DBG( wchar_t c_ask='&'; )
6381 if (sdata->holes.num != 2) Break;
6382 if( get_bw(x1-dx/9,x1,y0,y0+dy/4,box1->p,cs,1) == 1 ) Break; // g
6383 if( loop(bp,dx/2,0,dy,cs,0,DO)>dy/2) Break;
6384 i1=loop(bp,0,dy/8 ,dx,cs,0,RI); if (i1>dx/2) Break;
6385 i =loop(bp,0,dy/4 ,dx,cs,0,RI); if (i1>dx/2) Break; if (i<i1) i1=i;
6386 i3=loop(bp,0,dy-dy/4 ,dx,cs,0,RI); if (i3>dx/2) Break;
6387 i =loop(bp,0,dy-dy/4-1,dx,cs,0,RI); if (i3>dx/2) Break; if (i<i3) i3=i;
6388 if (i3>i1) Break;
6389 for( i2=0, y=dy/4; y<=dy/2+1; y++ ){
6390 i =loop(bp,0,y,dx,cs,0,RI); if( i>i2 ) i2=i;
6391 }
6392 if(2*i2-i1-i3<1) Break;
6393 // if( num_hole(x0,x1 ,y0,y1,box1->p,cs,NULL)!=2 ) Break;
6394 if( num_hole(x0,x1-dx/4,y0,y1,box1->p,cs,NULL)!=2 ) Break;
6395 if( num_cross(dx-1,dx-1,dy/4,dy-1,bp,cs) < 1 ) Break;
6396 for( x=dx-1; x>=dx/2; x-- ){
6397 if( num_cross(x,x,dy/4,dy-1,bp,cs) > 1 ) break;
6398 } if( x<=3*dx/4 && x<dx-2) Break;
6399 if( num_cross(0,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs) > 3 ) { // glued ah
6400 if (dy>15) { Break; } else ad=96*ad/100;
6401 }
6402 if (!hchar) ad=98*ad/100;
6403 bc=(wchar_t) '&';
6404 Setac(box1,bc,ad);
6405 if (ad>=100) return bc;
6406 break;
6407 }
6408 // --- test \it & like \epsilon\tau ------------------------------
6409 if(bc==UNKNOWN)
6410 for(ad=d=100;dx>7 && dy>7;){
6411 DBG( wchar_t c_ask='&'; )
6412 if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
6413 if( num_cross(0,dx-1, dy/4, dy/4,bp,cs) != 3 ) break;
6414 if( num_cross(0,dx-1, dy/2, dy/2,bp,cs) != 4 ) break;
6415 if( num_cross(dx/2,dx-1,dy/2, dy/2,bp,cs) != 2 ) break;
6416 if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs) != 2 ) break;
6417 if( num_cross(0,dx-1, dy-1, dy-1,bp,cs) != 1 ) break;
6418 if( num_cross( 0, 0,0,dy-1,bp,cs) != 1 ) break;
6419 if( num_cross( dx/3, dx/3,0,dy-1,bp,cs) != 4 ) break;
6420 if( num_cross(13*dx/16,13*dx/16,0,dy/8,bp,cs) != 0 ) break;
6421 if( num_cross(4*dx/8,4*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break;
6422 if( num_cross(3*dx/8,3*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break;
6423 if( num_cross(5*dx/8,5*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break;
6424 if( num_hole(x0 ,(x0+x1)/2,y0, y1,box1->p,cs,NULL) != 1 ) break;
6425 if( num_hole(x0+dx/8,x1-dx/4,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) break;
6426 ac=(wchar_t) '&';
6427 Setac(box1,ac,ad);
6428 if (ad>=100) return ac;
6429 break;
6430 }
6431 // --- test ? ---------------------------------------------------
6432 for(ad=d=98;dx>2 && dy>5;){ // min 3x(4+2)
6433 DBG( wchar_t c_ask='?'; )
6434 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
6435 if ( num_cross(x0, x1, y0, y0, box1->p, cs) !=1 ) Break; // ~?
6436 if ( num_cross(x0, x1, y1, y1, box1->p, cs) > 1 ) Break; // ~?
6437 for(y=y0;y<y1;y++) // new y1
6438 if( get_bw(x0, x1, y, y,box1->p,cs,1) != 1 ) break; // lower end
6439 if (2*y<y0+y1) Break;
6440 i1=y1;
6441 if (y==y1 && box1->m4) { // probably lower dot not catched in box?
6442 if (get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) != 1 ) Break;
6443 i1=box1->m4;
6444 for(;i1>y1;i1--) // new y1
6445 if( get_bw(x0, x1,i1,i1,box1->p,cs,1) == 1 ) break; // lower dot
6446 }
6447 y--; i=y-y0+1; // new dy
6448 for (y=0;y<dy/2;y++) {
6449 if( num_cross(x0, x1, y0+y, y0+y, box1->p, cs) == 2 ) break;
6450 if( y>=dy/4
6451 && loop(bp, 0,y,dx,cs,0,RI) // 2018-09
6452 -loop(bp,dx-1,y,dx,cs,0,LE)>dx/2
6453 && loop(bp, 0,dy/8,dx,cs,0,RI)<dx/4 ) Break; // upper right bow
6454 } if (y==dy/2) Break;
6455 if (y==dy/2) ad=99*ad/100; // not on ocr1809_12quest.png 7x13
6456 if (y==dy/2 && (dx>7 || dy>13 || 13*dx<7*dy)) Break; // bad fix
6457 // ToDo better add bow-detection against fail ?!
6458 // if( num_hole( x0, x1, y0, y1, box1->p,cs,NULL) > 0 ) Break;
6459 if (sdata->holes.num > 0) Break;
6460 for(y=y0+dy/2;y<=i1;y++)
6461 if( get_bw(x0,x1,y,y,box1->p,cs,1) == 0 ) break;
6462 if( y==i1 ) Break;
6463 for( ;y<=i1;y++)
6464 if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
6465 if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) Break;
6466 if( get_bw(x0+7*dx/8,x1,y,i1,box1->p,cs,1) == 1 ) Break; // broken thin 2
6467 bc='?';
6468 Setac(box1,(wchar_t)bc,98);
6469 return bc;
6470 }
6471 // --- test !| ---------------------------------------------------
6472 for(ad=d=99; dy>4 && dy>2*dx;){ // min 3x4
6473 DBG( wchar_t c_ask='!'; )
6474 if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
6475 // measure thickness
6476 if (num_cross(x0,x1,y0 ,y0 ,box1->p,cs)!=1) Break;
6477 if (num_cross(x0,x1,y0+dy/2,y0+dy/2,box1->p,cs)!=1) Break;
6478 for(y=y0;y<y1;y++) // new y1
6479 if( get_bw(x0, x1, y, y,box1->p,cs,1) != 1 ) break; // lower end
6480 if (2*y<y0+y1) Break;
6481 if (y==y1 && y>box1->m3-dy/8) ad=ad*97/100; /* missing dot? */
6482 i1=y1;
6483 if (y==y1 && box1->m4) { // probably lower dot not catched in box?
6484 if ((dx>2 && get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) == 1)
6485 || (dx<3 && get_bw(x0 ,x1 ,y1+1,box1->m4,box1->p,cs,1) == 1 )) {
6486 i1=box1->m4;
6487 for(;i1>y1;i1--) // new y1
6488 if( get_bw(x0, x1,i1,i1,box1->p,cs,1) == 1 ) break; // lower dot
6489 }
6490 } i2=i1;
6491 for( i1=0,y=y0;y<=i2;y++){
6492 i=num_cross(x0,x1,y,y,box1->p,cs); if(i>1) break;
6493 if(i==0 && i1==0) i1=y;
6494 } if(y<=i2 || i1==0 || i1<y0+dy/2) Break;
6495
6496 if( loop(bp,dx-1,dy/8,dx,cs,0,LE)
6497 -loop(bp,dx-1, 0,dx,cs,0,LE)>dx/4+1 ) Break; // f
6498
6499 if (!hchar) ad=96*ad/100;
6500 Setac(box1,(wchar_t)'!',ad);
6501 break;
6502 }
6503 // --- test * five egdes (jagges? beames?) what is the right english word? ----
6504 for(ad=d=99;dx>2 && dy>4;){
6505 DBG( wchar_t c_ask='*'; )
6506 if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
6507 if( num_cross(0,dx-1, 0,dy-1,bp,cs) != 1
6508 && num_cross(0,dx-1, 1,dy-2,bp,cs) != 1 ) Break;
6509 if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 2
6510 && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 2 ) Break;
6511 x=dx/2;y=(6*dy+8)/16; // center point 6/8=6/2^3 rounded
6512 /* upwarts from center */
6513 dbg[0]=i=get_line2(x,y,x ,0,bp,cs,100); if(i<95) Break;
6514 if (dx<8) /* be exact on small fonts, where get_line2 returns 100 (ToDo change) */
6515 if (get_bw(x,x,0,y,bp,cs,2)==2) Break;
6516 /* horizontal */
6517 dbg[1]=i=get_line2(0,y,dx-1,y,bp,cs,100); if(i<95) Break;
6518 if (dy<8)
6519 if (get_bw(0,dx-1,y ,y ,bp,cs,2)==2
6520 && get_bw(0,dx-1,y+1,y+1,bp,cs,2)==2) Break;
6521 /* down (right) */
6522 i=get_line2(x,y,(5*dx+4)/8,dy-1,bp,cs,100);
6523 j=get_line2(x,y,(6*dx+4)/8,dy-1,bp,cs,100); if(j>i) dbg[2]=i=j;
6524 if(i<95) Break;
6525 /* down (left) */
6526 dbg[3]=i=get_line2(x, y,(2*dx+4)/8,dy-1,bp,cs,100); if(i<95) Break; // straight up
6527 /* check for lower gap at bottom */
6528 dbg[4]=i=get_bw( x, x,dy-1-dy/8,dy-1,bp,cs,1); if(i==1) Break;
6529 dbg[5]=i=get_line2( dx/4,dy/4, 0,0,bp,cs,101); if(i<95) Break; // upper left gap
6530 dbg[6]=i=get_line2(dx-1-dx/4,dy/4,dx-1,0,bp,cs,101); if(i<95) Break; // upper right gap
6531 MSG(fprintf(stderr,"%d %d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5],dbg[6]);)
6532 Setac(box1,(wchar_t)'*',ad);
6533 break;
6534 }
6535 // --- test * six egdes (jagges? beames?) incl. vert. line (|+X) ----
6536 for(ad=d=100;dx>4 && dy>4;){
6537 DBG( wchar_t c_ask='*'; )
6538 if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
6539 if( num_cross(0,dx-1, dy/8, dy/8,bp,cs) != 3
6540 && num_cross(0,dx-1, 1+dy/8, 1+dy/8,bp,cs) != 3) Break;
6541 if( num_cross(0,dx-1,dy-2-dy/8,dy-2-dy/8,bp,cs) != 3) Break;
6542 if( num_cross(0 , 0, 0,dy-1,bp,cs) != 2) Break;
6543 if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) != 2) Break;
6544 if( num_cross(0,dx-1,dy/2,dy/2,bp,cs) != 1) Break;
6545 if( num_cross( 0 ,dx/8,dy/2,dy/2,bp,cs) != 0) Break;
6546 if( num_cross(dx-1-dx/8,dx-1,dy/2,dy/2,bp,cs) != 0) Break;
6547 if (dx>5) {
6548 dbg[0]=i=get_line2(0,dy-2-dy/8,dx-1,dy/8,bp,cs,100); if(i<95) Break; // black upwarts beam
6549 dbg[1]=i=get_line2(0,dy/8,dx-1,dy-2-dy/8,bp,cs,100); if(i<95) Break; // black downwards beam
6550 /* check vertical line */
6551 dbg[2]=i=get_line2(dx/2,0,dx/2, dy-1,bp,cs,100); if(i<95) Break;
6552 }
6553 MSG(fprintf(stderr,"%d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5]);)
6554 Setac(box1,(wchar_t)'*',99);
6555 break;
6556 }
6557 // --- test * six egdes '*' = 'x'+'-' incl. horizontal line ----
6558 for(ad=d=100;dx>3 && dy>4;){
6559 DBG( wchar_t c_ask='*'; )
6560 if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
6561 if( num_cross( dx/8, dx/8, 0, dy-1,bp,cs) != 3
6562 && num_cross(1+dx/8,1+dx/8, 0, dy-1,bp,cs) != 3) Break;
6563 if( num_cross(dx-1-dx/8,dx-1-dx/8,0,dy-1,bp,cs) != 3
6564 && num_cross(dx-2-dx/8,dx-2-dx/8,0,dy-1,bp,cs) != 3) Break;
6565 if( num_cross( 0,dx-1, 0, 0,bp,cs) != 2) Break;
6566 if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) != 2) Break;
6567 if( num_cross(dx/2,dx/2,0,dy-1,bp,cs) != 1) Break;
6568 // check upper and lower gap
6569 if( num_cross(dx/2,dx/2,0,dy/8,bp,cs) != 0) Break;
6570 if( num_cross(dx/2,dx/2,dy-1-dy/8,dy-1,bp,cs) != 0) Break;
6571 if (dx>5) {
6572 dbg[0]=i=get_line2(dx-2-dx/8,0,dx/8,dy-1,bp,cs,100); if(i<95) Break; // black upwarts beam
6573 dbg[1]=i=get_line2(dx/8,0,dx-2-dx/8,dy-1,bp,cs,100); if(i<95) Break; // black downwards beam
6574 /* check horizontal line */
6575 dbg[2]=i=get_line2(0,dy/2,dx-1,dy/2,bp,cs,100); if(i<95) Break;
6576 }
6577 MSG(fprintf(stderr,"%d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5]);)
6578 Setac(box1,(wchar_t)'*',98);
6579 break;
6580 }
6581 // --- test @ - a popular char should be detectable! added in version v0.2.4a5
6582 if(bc==UNKNOWN)
6583 for(ad=d=99;dx>4 && dy>7;){ // 2010-09-24 5x8 sample
6584 DBG( wchar_t c_ask='@'; )
6585 if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */
6586 // check ~ 7x10 0 with dot in it
6587 // num_holes==1 + hole.y0<=dy/8,>=y1-dy/8
6588 if (sdata->holes.num==1
6589 && sdata->holes.hole[0].y0<=y0+dy/8
6590 && sdata->holes.hole[0].y1>=y1-dy/8) Break;
6591 if (loop(bp, 0,dy/2,dx,cs,0,RI)>dx/4) Break;
6592 if (loop(bp,dx-1,dy/2,dx,cs,0,LE)>dx/4) Break;
6593 if (loop(bp,dx/2,dy-1,dy,cs,0,UP)>dx/8) Break;
6594 if (loop(bp,dx/2, 0,dy,cs,0,DO)>dx/8) Break;
6595 /* ..@@@@..<- 8x10 example
6596 .@@..@@. .@@@@@. - 7x9 sample
6597 @@....@@ @@...@@ .@@@.<- 5x8 sample
6598 @@..@@@@< @@...@@< @...@
6599 @@.@@.@@ @@.@@@@ @..@@<
6600 @@.@@.@@ @@.@@@@ @.@.@
6601 @@..@@@. @@.@@@@ @.@.@
6602 @@...... @@.@@@. @.@@.
6603 .@@...@@ @@..... @....
6604 ..@@@@@.<- .@@@@@.<- .@@@@<- */
6605
6606 x=(7*dx+3)/16; // 8x10=3 7x9=3 5x8=2 (within vert. middle line)
6607 y=dy/2;
6608 i=num_cross(0,dx-1,y,y,bp,cs);
6609 j=loop(bp, 0,y,dx,cs,0,RI); // measure line width, 2010-09-24
6610 j=loop(bp, j,y,dx,cs,1,RI);
6611 if (dx>=4*j && (i<3 || i>4)) Break;
6612 if (dx>=5*j && i!=4) ad=98*ad/100;
6613
6614 i=num_cross(x,x,0,dy-1,bp,cs); if (i<2) Break;
6615 if (i!=4) { j=num_cross(x+1,x+1,0,dy-1,bp,cs);
6616 if (abs(4-j)<abs(i-4)) i=j; }
6617 if (i!=4) { j=num_cross(x+2,x+2,0,dy-1,bp,cs);
6618 if (abs(4-j)<abs(i-4)) i=j; }
6619 if (i<3 || i>4) Break;
6620 if (i!=4) ad=97*ad/100;
6621 if( num_cross(0, x,y,y,bp,cs) != 2 ) Break;
6622 if( num_cross(x,dx-1,y,y,bp,cs) != 2 && dx>=4*j) Break;
6623 if( num_cross(x,x,0, y,bp,cs) != 2 ) Break;
6624 if( num_cross(x,x,y,dy-1,bp,cs) != 2 ) Break;
6625 if (dx>7) {
6626 // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 1 ) Break;
6627 if (sdata->holes.num != 1) Break;
6628 if( num_hole(x0+dx/8,x1-3*dx/16,y0+dy/8,y1-dy/8,box1->p,cs,NULL) != 1 ) Break;
6629 }
6630 Setac(box1,(wchar_t)'@',ad);
6631 break;
6632 }
6633 // --- test paragraph v0.2.6
6634 if(bc==UNKNOWN && hchar)
6635 for(ad=d=100;dx>4 && dy>15;){
6636 DBG( wchar_t c_ask='$'; )
6637 if (sdata->holes.num > 3) break; /* tolerant against a tiny hole */
6638 if( get_bw( 0,dx/2,3*dy/4,3*dy/4,bp,cs,1) == 1 ) break;
6639 if( get_bw(3*dx/4,dx-1,3*dy/4,3*dy/4,bp,cs,1) == 0 ) break;
6640 if( get_bw( 0,dx/4, dy/4, dy/4,bp,cs,1) == 0 ) break;
6641 if( get_bw( dx/2,dx-1, dy/4, dy/4,bp,cs,1) == 1 ) break;
6642 if( get_bw(dx/2,dx/2, 0, dy/4,bp,cs,1) == 0 ) break;
6643 if( get_bw(dx/2,dx/2,dy-1-dy/4, dy-1,bp,cs,1) == 0 ) break;
6644 if( num_cross(dx/2,dx/2,0,dy-1,bp,cs) != 4 ) break;
6645 if( num_cross(x0,x1,y0+dy/2,y0+dy/2,box1->p,cs) != 2 ) break;
6646 if( num_hole( x0,x1,y0+dy/4,y1-dy/4,box1->p,cs,NULL) != 1 ) break;
6647 Setac(box1,SECTION_SIGN,96);
6648 break; // paragraph=0xA7=167
6649 }
6650
6651 return bc;
6652 }
6653
6654 /* ----------------------- partx -------------------------------- */
ocr0px(ocr0_shared_t * sdata)6655 static wchar_t ocr0px(ocr0_shared_t *sdata){
6656 struct box *box1=sdata->box1;
6657 pix *bp=sdata->bp;
6658 int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
6659 int dx=x1-x0+1,dy=y1-y0+1, /* size */
6660 i1,i2,i3,i4,j1,cs=sdata->cs; /* tmp-vars */
6661 int ya,ad; /* used for store significant points of char */
6662 int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
6663 wchar_t ac,bc=UNKNOWN; // bestletter
6664 int hchar; // char is higher than e
6665 int gchar; // char has ink lower than m3
6666 // --- hchar --- gchar -------------------------
6667 hchar=0;if( 2*y0<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
6668 gchar=0;if( 2*y1>=2*box1->m3+(box1->m4-box1->m3) ) gchar=1;
6669 // if the char is slightly moved down correction can be done
6670 if ( y0<box1->m2 && y1>box1->m3 && 2*y1<box1->m3+box1->m4) // moved
6671 if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
6672
6673 /* reserved for special chars, to test at the end */
6674 // --- test 'ff' ---------------------------------------------------
6675 // ToDo: better check and call test 'f' and 'f' with subboxes
6676 if( bc==UNKNOWN )
6677 for(ad=98;dx>4 && dy>6;){ // Dec00 body copied from H
6678 DBG( wchar_t c_ask='f'; )
6679 if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
6680 if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2
6681 && num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) != 2 ) break;
6682 if( num_cross(0,dx-1,3*dy/4 ,3*dy/4 ,bp,cs) != 2
6683 && num_cross(0,dx-1,3*dy/4+1,3*dy/4+1,bp,cs) != 2 ) break;
6684 if( loop(bp,0 ,dy/8,dx,cs,0,RI)
6685 + loop(bp,dx-1,dy/8,dx,cs,0,LE)>dx/2 ) break; // ~A
6686 for( j1=0,i=1,y=y0+dy/10; y<y1-dy/10 && i; y++ ) // 2 vertikal lines
6687 { j=loop(box1->p,x0 ,y,dx,cs,0,RI)
6688 +loop(box1->p,x1 ,y,dx,cs,0,LE);
6689 if( j>10*dx/16 ) i=0; if ( j>j1 ) j1=j; }
6690 if( !i ) break;
6691 for( x=dx/4; x<dx/2; x++ ){ // lower gap
6692 y=loop(bp,x ,dy-1,dy,cs,0,UP);
6693 if ( y > 3*dy/8 ) break;
6694 if ( 10*y > dy ){ /* italic */
6695 i=loop(bp,x ,dy-y,dx,cs,0,RI);
6696 if( i>1 && y+loop(bp,x+i-1,dy-y,dy,cs,0,UP)>3*dy/8 ) break;
6697 }
6698 } if( x>=dx/2 ) break;
6699 x=loop(box1->p,x0 ,y1-dy/8,dx,cs,0,RI)
6700 +loop(box1->p,x1 ,y1-dy/8,dx,cs,0,LE);
6701 for( i=1,y=dy/4; y<dy-1-dy/4 && i; y++ ) // max - min width
6702 { j=loop(bp,0 ,y,dx,cs,0,RI)
6703 +loop(bp,dx-1,y,dx,cs,0,LE); if( j-x>dx/5 ) i=0; }
6704 if( !i ) break; // ~K Jul00
6705 for( i=0,ya=y=y0+dy/4; y<y1-dy/3; y++ ) // horizontal line
6706 { j=loop(box1->p,x0 ,y,dx,cs,0,RI);
6707 j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ) { i=j; ya=y; } }
6708 if( i<=dx/2 ) break; ya-=y0;
6709 if( num_cross(0,dx-1,ya ,ya ,bp,cs) != 1
6710 && num_cross(0,dx-1,ya+1,ya+1,bp,cs) != 1 ) break; /* Dec00 */
6711 for( y=ya; y<dy-dy/4; y++ ) // ~M Dec00
6712 if( num_cross(0,dx-1,y ,y ,bp,cs) > 2
6713 && num_cross(0,dx-1,y+1,y+1,bp,cs) > 2 ) break;
6714 if ( y<dy-dy/4 ) break;
6715 for(i=1,x=x0+dx/2;x<=x1-dx/4 && i;x++){
6716 if( get_bw( x, x,y0 ,y0+dy/4,box1->p,cs,1) == 0 ) i=0;
6717 } if( !i ) break;
6718 for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
6719 if( get_bw( x, x,y1-dy/4,y1 ,box1->p,cs,1) == 0 ) i=0;
6720 } if( i ) break;
6721 for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
6722 if( num_cross(x,x,y0+dy/8,y1-dy/8, box1->p,cs) == 1 ) i=0;
6723 } if( i ) break;
6724 for(i=1,y=y0;y<=y0+dy/4 && i;y++){
6725 if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
6726 } if( i ) break;
6727 for(i=1,y=y1-dy/4;y<=y1 && i;y++){
6728 if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
6729 } if( i ) break;
6730 if( num_cross(x0 ,x0+dx/8 ,y0+dy/8 ,y0 ,box1->p,cs) != 0 ) ad=96*ad/100;
6731 if( get_bw(x1-dx/8, x1 , y0, y0+dy/8,box1->p,cs,1) != 1 ) break;
6732 if( get_bw(x0 , x0+dx/8, y1-dy/8, y1,box1->p,cs,1) != 1 ) break;
6733 i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(i1>dx/2) break;
6734 i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(i2<i1-dx/4 || i2>i1+dx/8) break;
6735 i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if(i3<i2-dx/4 || i3>i2+dx/8) break;
6736 if(abs(i1+i3-2*i2)>dx/16+1) break;
6737 if( num_hole(x0,x1,y0+dy/4,y1,box1->p,cs,NULL) != 0 ) break;
6738 if (!hchar) ad=96*ad/100;
6739 if (!gchar) ad=99*ad/100;
6740 ac=LATIN_SMALL_LIGATURE_FF;
6741 Setac(box1,ac,ad);
6742 break;
6743 }
6744 // --- test ae ---------------------------------------------------
6745 if( bc==UNKNOWN )
6746 for(ad=98;dx>4 && dy>6;){ // provisorium
6747 DBG( wchar_t c_ask=LATIN_SMALL_LETTER_AE; )
6748 if (sdata->holes.num > 4) Break; /* tolerant against a tiny hole */
6749 if( num_cross( dx/4,dx-1,3*dy/16,3*dy/16,bp,cs) != 2
6750 && num_cross(dx-1-dx/4,dx-1,3*dy/16,3*dy/16,bp,cs) != 1 ) Break;
6751 if( num_cross(0,dx-1,3*dy/ 4,3*dy/ 4,bp,cs) < 2 ) Break;
6752 if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 3 ) Break;
6753 if( num_cross(dx-1,0, 0, dy-1,bp,cs) < 3 ) Break;
6754 if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) < 2 )
6755 if( num_cross(0,dx-1,1+dy/16,1+dy/16,bp,cs) < 2 ) Break;
6756 if( num_cross(0,dx-1,dy-1-dy/16,dy-1-dy/16,bp,cs) < 2 ) Break;
6757 for( x=0,i2=y=dy/4; y<3*dy/4; y++ ){
6758 j=loop(bp,0,y,dx,cs,0,RI); if(j>x) { i2=y; x=j; }
6759 } if( x<dx/4 || x>3*dx/4 ) Break;
6760 for( x=0,i4=y=dy/4; y<3*dy/4; y++ ){
6761 j=loop(bp,dx-1,y,dx,cs,0,LE); if(j>x) { i4=y; x=j; }
6762 } if( x<dx/4 || x>3*dx/4 ) Break;
6763 for( x=0,i4=y=dy/8; y<3*dy/4; y++ ){
6764 j=loop(bp,dx-1 ,y,dx,cs,0,LE);
6765 j=loop(bp,dx-1-j,y,dx,cs,1,LE);
6766 if(j>x) { i4=y; x=j; }
6767 } if( x<dx/4 ) Break;
6768 if( num_hole(x0,x0+3*dx/4,y0+dy/4,y1,box1->p,cs,NULL) != 1 ) Break;
6769 if( num_hole(x0+dx/2-1,x1,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break;
6770 ac=LATIN_SMALL_LETTER_AE;
6771 Setac(box1,ac,ad);
6772 if (ad>=100) return ac;
6773 break;
6774
6775 }
6776 // --- test AE ---------------------------------------------------
6777 if( bc==UNKNOWN )
6778 for(ad=98;dx>5 && dy>6;){ // provisorium
6779 DBG( wchar_t c_ask=LATIN_CAPITAL_LETTER_AE; )
6780 if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
6781 if( num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) < 2 ) Break;
6782 if( num_cross(0,dx-1,3*dy/ 4,3*dy/ 4,bp,cs) < 2 ) Break;
6783 if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 3 ) Break;
6784 if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) != 1
6785 && num_cross(0,dx-1, dy/32, dy/32,bp,cs) != 1
6786 && num_cross(0,dx-1, 0, 0,bp,cs) != 1 ) Break;
6787 // check for upper horizontal line
6788 j=loop(bp,dx-1 ,0,dx,cs,0,LE); x=j;
6789 j=loop(bp,dx-1-j,0,dx,cs,1,LE);
6790 i=loop(bp,dx-1 ,1,dx,cs,0,LE); if (i<x) x=i;
6791 i=loop(bp,dx-1-i,1,dx,cs,1,LE);
6792 if (i>j) j=i;
6793 if (x>dx/8) Break;
6794 if (j<dx/4) Break;
6795 for( x=dx,i1=i3=0,i2=y=dy/4; y<3*dy/4; y++ ){
6796 j=loop(bp, 0,y,dx,cs,0,RI); if(j>x) break; x=j;
6797 j=loop(bp, j,y,dx,cs,1,RI); if(j>i1) { i1=j; i2=y; }
6798 j=loop(bp,dx-1 ,y,dx,cs,0,LE);
6799 j=loop(bp,dx-1-j,y,dx,cs,1,LE); if(j>i3) { i3=j; i4=y; }
6800 } if( y<3*dy/4 || i1<dx/4-1 || i3<dx/4-1) Break;
6801 for( i1=i3=0,y=0; y<dy/8; y++ ){
6802 j=loop(bp,dx-1 , y,dx,cs,0,LE);
6803 j=loop(bp,dx-1-j, y,dx,cs,1,LE); if(j>i1) { i1=j; }
6804 j=loop(bp,dx-1 ,dy-1-y,dx,cs,0,LE);
6805 j=loop(bp,dx-1-j,dy-1-y,dx,cs,1,LE); if(j>i3) { i3=j; }
6806 } if( i1<=dx/4 || i3<=dx/4 ) Break;
6807 for( x=dx-1-dx/8; x>dx/2; x-- ){ // look for right the E
6808 if( num_cross(x,x, 0,dy-1,bp,cs) == 3 )
6809 if( num_cross(x,x, 0,dy/4,bp,cs) == 1 )
6810 if( num_cross(x-1,dx-1-dx/8,3*dy/4,3*dy/4,bp,cs) == 0 )
6811 if( num_cross(x,x,3*dy/4,dy-1,bp,cs) == 1 ) break;
6812 } if (x<=dx/2) Break; // not found
6813 if (sdata->holes.num != 1) Break;
6814 if( num_hole(x0,x0+3*dx/4,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break;
6815 // if( num_hole(x0, x1,y0,y1 ,box1->p,cs,NULL) != 1 ) Break;
6816 ac=LATIN_CAPITAL_LETTER_AE;
6817 Setac(box1,ac,ad);
6818 if (ad>=100) return ac;
6819 break;
6820
6821 }
6822 // --- test /0 /o /O O_WITH_STROKE -----------------------------------------
6823 for(ad=99;dx>4 && dy>4;){ // provisorium
6824 DBG( wchar_t c_ask=LATIN_SMALL_LETTER_O_WITH_STROKE; )
6825 if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */
6826 if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 3 ) Break;
6827 if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 3 ) Break;
6828 if (loop(bp,dx-1,3*dy/8,dx,cs,0,RI)>dx/8) Break;
6829 if (loop(bp, 0,5*dy/8,dx,cs,0,RI)>dx/8) Break;
6830 if( num_cross( 0,dx-1, 0, 0,bp,cs) > 2 ) Break;
6831 if( num_cross(dx/4,dx-1, 0, 0,bp,cs) > 2 ) Break;
6832 if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) > 2 ) Break;
6833 if( num_cross( 0,3*dx/4,dy-1,dy-1,bp,cs) > 2 ) Break;
6834 if( num_cross( 0, 0, 0,dy-1,bp,cs) > 2 ) Break;
6835 if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) > 2 ) Break;
6836 if( num_cross( 0, 0,dy/4,dy-1,bp,cs) > 2 ) Break;
6837 if( num_cross(dx-1,dx-1, 0,3*dy/4,bp,cs) > 2 ) Break;
6838 i1 =loop(bp,dx-1 , 0,dx,cs,0,LE); if( i1>dx/8 ) Break;
6839 i1+=loop(bp,dx-1-i1, 0,dx,cs,1,LE); if( i1>dx/3 ) Break; i1=dx-1-i1;
6840 i2 =loop(bp, 0,dy-1,dx,cs,0,RI); if( i2>dx/8 ) Break;
6841 for(y=1;y<dy-1;y++){
6842 x=i1+y*(i2-i1)/dy-dx/8; if(x<0)x=0;
6843 j=loop(bp,x,y,dx,cs,0,RI); if( j>3*dx/16 ) break;
6844 } if( y<dy-1 ) Break;
6845 if( num_cross( 0 ,dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
6846 if( num_cross(dx-1-dx/4,dx-1,dy/2,dy/2,bp,cs) != 1 ) Break;
6847 if( num_cross(dx/4,dx-1-dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
6848 if (sdata->holes.num != 2) Break;
6849 // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 2 ) Break;
6850
6851 if ( hchar && 2*y0<box1->m1+box1->m2 )
6852 ac=LATIN_CAPITAL_LETTER_O_WITH_STROKE;
6853 else ac=LATIN_SMALL_LETTER_O_WITH_STROKE;
6854 Setac(box1,ac,ad);
6855 if (ad>=100) return ac;
6856 break;
6857
6858 }
6859 // --- test /c /C C_WITH_STROKE CENT_SIGN --------------------------
6860 // here only the version with a continuously vertical line (not broken variant)
6861 if( bc==UNKNOWN )
6862 for(ad=98;dx>4 && dy>4;){ // provisorium
6863 DBG( wchar_t c_ask=CENT_SIGN; )
6864 if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
6865 if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 2 ) Break;
6866 if( num_cross(0,dx-1-dx/4,dy/2,dy/2,bp,cs) != 2 ) Break;
6867 if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 3 ) Break;
6868 if( num_cross( 0,dx-1, 0, 0,bp,cs) > 2 ) Break;
6869 if( num_cross(dx/4,dx-1, 0, 0,bp,cs) > 2 ) Break;
6870 if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) > 2 ) Break;
6871 if( num_cross( 0,3*dx/4,dy-1,dy-1,bp,cs) > 2 ) Break;
6872 if( num_cross( 0, 0, 0,dy-1,bp,cs) > 2 ) Break;
6873 if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) > 3 ) Break;
6874 if( num_cross( 0, 0,dy/4,dy-1,bp,cs) > 2 ) Break;
6875 if( num_cross(dx-1,dx-1, 0,3*dy/4,bp,cs) > 3 ) Break;
6876 i1 =loop(bp,dx-1 , 0,dx,cs,0,LE); if( i1>dx/4 ) Break;
6877 i1+=loop(bp,dx-1-i1, 0,dx,cs,1,LE); if( i1>dx/4 ) Break; i1=dx-1-i1;
6878 i2 =loop(bp, 0,dy-1,dx,cs,0,RI); if( i2>dx/4 ) Break;
6879 for(y=0;y<dy;y++){
6880 x=i1+y*(i2-i1)/dy; if(x>dx/16+1) x-=dx/16+1;
6881 j=loop(bp,x,y,dx,cs,0,RI); // fprintf(stderr,"\n x=%d j=%d",x,j);
6882 if( j>(dx+4)/8 ) ad=96*ad/100;
6883 if( j>(dx+2)/4 ) break;
6884 } if( y<dy ) Break;
6885 if( num_cross( 0 ,dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
6886 if( num_cross(dx-1-dx/4,dx-1,dy/2,dy/2,bp,cs) != 0 ) Break;
6887 if( num_cross(dx/4,dx-1-dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
6888 // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 1 ) Break;
6889 if (sdata->holes.num != 1) Break;
6890
6891 ac=CENT_SIGN;
6892 Setac(box1,ac,ad);
6893 if (ad>=100) return ac;
6894 break;
6895
6896 }
6897 // --- test EURO_CURRENCY_SIGN -----------------------------------------
6898 if( bc==UNKNOWN )
6899 for(ad=98;dx>4 && dy>6;){ // provisorium
6900 DBG( wchar_t c_ask='&'; )
6901 if (sdata->holes.num > 1) break; /* tolerant against a tiny hole */
6902 if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 4 ) break;
6903 if( num_cross( 0,dx-1, 0, 0,bp,cs) != 1 ) break;
6904 if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) != 1 ) break;
6905 if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 1 ) break;
6906 for(i=0,y=dy/4;y<dy-dy/4-1;y++){ // check if no gap on left side
6907 x=loop(bp,0,y,dx,cs,0,RI); if( x>dx/4 ) break;
6908 j=loop(bp,x,y,dx,cs,1,RI); if( j>i ) i=j;
6909 } if( y<dy-dy/4-1 || i<dx/2 ) break;
6910 for(y=dy/4;y<dy-dy/4-1;y++){ // check for right horizontal gap
6911 x=loop(bp,dx-1,y,dx,cs,0,LE); if( x>dx/2 ) break;
6912 } if( y>=dy-dy/4-1 ) break;
6913 // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break;
6914 if (sdata->holes.num != 0) break;
6915 ac=EURO_CURRENCY_SIGN;
6916 Setac(box1,ac,ad);
6917 if (ad>=100) return ac;
6918 break;
6919 }
6920 // --- test LETTER_C_WITH_CEDILLA ---------------------------------------------------
6921 if (bc==UNKNOWN)
6922 if (gchar)
6923 for(ad=98;dx>3 && dy>6;){ // provisorium
6924 DBG( wchar_t c_ask='c'; )
6925 if (sdata->holes.num > 0) break; /* no tolerant against tiny holes */
6926 j=loop(bp,dx-1,dy/16 ,dy,cs,0,LE);
6927 x=loop(bp,dx-1,dy/16+1,dy,cs,0,LE); if (x<j) j=x;
6928 if (3*x>dx) Break; // ~4 ocr-b
6929 if( num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) > 2 ) break;
6930 if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 2 ) break;
6931 if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) > 2 ) break;
6932 for( x=dx,i2=y=dy/4; y<3*dy/4; y++ ){
6933 j=loop(bp,0,y,dx,cs,0,RI); if(j<x) { i2=y; x=j; }
6934 } if( x>0 ) break; i1=x;
6935 for( x=0,i4=y=dy/4; y<5*dy/8; y++ ){
6936 j=loop(bp,dx-1,y,dx,cs,0,LE); if(j>x) { i4=y; x=j; }
6937 } if( x<dx/2 ) break; i3=x;
6938 j =loop(bp,dx/2,0,dy,cs,0,DO);
6939 j+=loop(bp,dx/2,j,dy,cs,1,DO); if(j>dy/4) break;
6940 j =loop(bp,dx/2,j,dy,cs,0,DO); if(j<dy/2) break;
6941 j =loop(bp,dx-1 ,dy-1-dy/8,dx,cs,0,LE); if(j<dx/4 || 4*j>3*dx) break;
6942 j =loop(bp,dx-1-j/2,dy-1-dy/8,dy,cs,0,UP); if(j>dy/2) break; // ~()
6943 // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break;
6944 if (sdata->holes.num) break;
6945 if( hchar ) ac= LATIN_CAPITAL_LETTER_C_WITH_CEDILLA;
6946 else ac= LATIN_SMALL_LETTER_C_WITH_CEDILLA;
6947 Setac(box1,ac,ad);
6948 if (ad>=100) return ac;
6949 break;
6950
6951 }
6952 // --- test # ---------------------------------------------------
6953 for(ad=99;dx>4 && dy>4;){ // never sure?
6954 DBG( wchar_t c_ask='#'; )
6955 if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
6956 if (sdata->holes.num < 1) Break;
6957 if( num_cross(0,dx-1, dy/8, dy/8,bp,cs) != 2 ) Break;
6958 if( num_cross(0,dx-1,dy-1-dy/8,dy-1-dy/8,bp,cs) != 2 ) Break;
6959 if( num_cross(0,dx-1, dy/2, dy/2,bp,cs) != 2 ) Break;
6960 if( num_cross(0,dx/2, dy/2, dy/2,bp,cs) != 1 ) Break;
6961 /* fat "#" have only small ends on left and right side, we tolerate this */
6962 j=loop(bp, 0,dy/8,dx,cs,0,RI); if(j<1 || j<dx/16) Break; if (j<dx/8) {ad=ad*96/100;}
6963 j=loop(bp, 0,dy/2,dx,cs,0,RI); if(j<1 || j<dx/16 || j>=dx/2) Break; if (j<dx/8) {ad=ad*96/100;}
6964 j=loop(bp,dx-1,dy/2,dx,cs,0,LE); if(j<1 || j<dx/16 || j>=dx/2) Break; if (j<dx/8) {ad=ad*96/100;}
6965 j=loop(bp,dx-1,dy-1,dx,cs,0,LE); if(j<1 || j<dx/16) Break; if (j<dx/8) {ad=ad*96/100;}
6966 for( i1=i3=0,y=dy/4; y<dy/2; y++ ){
6967 j=loop(bp,0, y,dx,cs,0,RI); if(j>3*dx/4) { i1=0; break; }
6968 j=loop(bp,j, y,dx,cs,1,RI); if(j>i1) { i1=j; }
6969 j=loop(bp,0,dy-1-y,dx,cs,0,RI); if(j>3*dx/4) { i1=0; break; }
6970 j=loop(bp,j,dy-1-y,dx,cs,1,RI); if(j>i3) { i3=j; }
6971 }
6972 if (i1<dx-dx/4 || i3<dx-dx/4) Break;
6973 if (i1<dx-dx/8) ad=97*ad/100;
6974 if (i3<dx-dx/8) ad=97*ad/100;
6975 if (sdata->holes.num != 1) {ad=95*ad/100;}
6976 if( num_hole(x0+dx/8,x1-dx/8,y0+dy/8,y1-dy/8,box1->p,cs,NULL) != 1 ) Break;
6977 // if( num_hole(x0 ,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break;
6978
6979 ac=(wchar_t) '#';
6980 if( gchar ) {ad=99*ad/100;}
6981 Setac(box1,ac,ad);
6982 if (ad>=100) return ac;
6983 break;
6984 }
6985 // --- test bullet, full_box, grabbed cursor, ZapfDingBats_156
6986 if (bc==UNKNOWN)
6987 for(ad=96;dx>4 && dy>4 && 2*dx>dy;){ // provisorium
6988 DBG( wchar_t c_ask='#'; )
6989 if( get_bw(x0,x1,y0,y1,box1->p,cs,2) != 0 ) break;
6990 ac=BULLET;
6991 if (gchar && !hchar) ad=80*ad/100;
6992 Setac(box1,ac,ad);
6993 if (ad>=100) return ac;
6994 break;
6995 }
6996 /* --- test | (vertical line, could be a I or l) --- */
6997 for(ad=99;dy>4 && 2*dx<dy;){ /* v0.44 */
6998 DBG( wchar_t c_ask='|'; ) /* solid small block */
6999 /* test if everything is filled black */
7000 if( get_bw(x0+dx/8,x1-dx/8,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) break;
7001 /* more unsure if the borders are not exact */
7002 if( get_bw(x0 ,x0+dx/8,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) ad=99*ad/100;
7003 if( get_bw(x1-dx/8,x1 ,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) ad=99*ad/100;
7004 if( get_bw(x0+dx/8,x1-dx/8,y0 ,y0+dy/8,box1->p,cs,2) != 0 ) ad=99*ad/100;
7005 if( get_bw(x0+dx/8,x1-dx/8,y1-dy/8,y1 ,box1->p,cs,2) != 0 ) ad=99*ad/100;
7006 if (3*dx<dy) ad=98*ad/100;
7007 if (4*dx<dy) ad=99*ad/100;
7008 if (box1->m2 && 2*y1< box1->m2+ box1->m3) Break;
7009 if (box1->m2 && 3*y1< box1->m2+2*box1->m3) ad=95*ad/100;
7010 ac='|';
7011 if (!hchar) ad=98*ad/100;
7012 if (dx==1 && hchar && gchar) ad=99; /* 2017-07 tables */
7013 Setac(box1,ac,ad);
7014 break;
7015 }
7016 /* --- test | (vertical line, could be a i if gray and small) --- */
7017 for(ad=99;dy>4 && dy<12 && 3*dx<=dy;){ /* 2017-08 2x9,3x9 sslmozFP */
7018 DBG( wchar_t c_ask='i'; ) /* solid small block but grey gap */
7019 /* test if everything is filled black */
7020 if( get_bw(x0+dx-1,x1-dx+1,y0+dy-1,y1-dy+1,box1->p,cs,2) != 0 ) break;
7021 /* ToDo: check again withmodified cs? */
7022 if (gchar || !hchar) Break;
7023 if (dx>3) ad=98;
7024 { int x,y,a,gmi=999999,gma=0,gme=0, yma=999999;
7025 for (y=0;y<dy;y++) {
7026 for (a=0,x=0;x<dx;x++)
7027 a += getpixel(box1->p, x0+x, y0+y) & ~0x0F; // 2018-09 ~marker
7028 if (a<gmi) { gmi=a; }
7029 if (a>gma) { gma=a; yma=y; }
7030 gme += a;
7031 DBG( IFV fprintf(stderr,"\nDBG i| y=%2d grey=%4d",y,a); )
7032 }
7033 DBG( IFV fprintf(stderr,"\nDBG i| yma %2d gmi,me,ma %4d %4d %4d",\
7034 yma,gmi,gme/dy,gma); ) /* most white column */
7035 if (yma>=dy/2 || y0+yma>box1->m2) Break;
7036 if (dy*gma<gme*2) Break; /* ToDo: better mean near blk + min near white */
7037 }
7038 ac='i';
7039 { int i;
7040 for (i=0;i<box1->num_ac;i++) /* reduce l|I */
7041 if (box1->wac[i]<100 && ad <= box1->wac[i]) box1->wac[i]--;
7042 }
7043 Setac(box1,ac,ad);
7044 break;
7045 }
7046 /* --- test | (vertical line with small gap, pipe symbol) --- */
7047 if (box1->num_frames == 2)
7048 for (ad=99;dy>6 && 2*dx<dy;){ /* 2010-10-08 qemu */
7049 DBG( wchar_t c_ask='|'; )
7050 if (8*abs(box1->frame_vol[0]-box1->frame_vol[1]) // 10 + 8
7051 > box1->frame_vol[0]+box1->frame_vol[1] ) Break;
7052 if ( box1->frame_vol[0]+box1->frame_vol[1]
7053 < 7*(dx*dy)/8 ) Break;
7054 /* test if everything is filled black */
7055 if ( get_bw(x0,x1,y0+dy/2+0,y0-dy/2+0,box1->p,cs,1) != 0
7056 && get_bw(x0,x1,y0+dy/2-1,y0-dy/2-1,box1->p,cs,1) != 0 ) Break;
7057 /* more unsure if the borders are not exact */
7058 if (3*dx<dy) ad=98*ad/100;
7059 if (4*dx<dy) ad=99*ad/100;
7060 if (box1->m2 && 4*y1< box1->m2+3*box1->m3) Break;
7061 if (box1->m2 && 4*y1<0*box1->m2+4*box1->m3) ad=95*ad/100;
7062 ac='|';
7063 if (!hchar) ad=98*ad/100;
7064 Setac(box1,ac,ad);
7065 break;
7066 }
7067 // --- test % ---------------------------------------------------
7068 for(ad=100;dx>5 && dy>7;){ // provisorium
7069 DBG( wchar_t c_ask='%'; )
7070 if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
7071 if ( box1->num_frames != 1
7072 && box1->num_frames != 3
7073 && box1->num_frames != 5 ) break;
7074 if (box1->num_frames == 1) { // tiny fat font?
7075 Break;
7076 if (dx>5) Break;
7077 ad=ad*95/100;
7078 }
7079 if (box1->num_frames >= 3
7080 && box1->frame_vol[1]<0
7081 && box1->frame_vol[2]<0) { // small font? 7x8
7082 /* 12x17 ocr-b
7083 .@@@...@
7084 @@.@@.@@
7085 .@@@.@@.
7086 ...@@@..
7087 ..@@@...
7088 .@@.@@@.
7089 @@.@@.@@
7090 @...@@@.
7091 */
7092 // aa[] belongs to the frame 0 only! 2010-10
7093 if (aa[0][0]-x0> dx/4 || aa[0][1]-y0> dy/4) Break; // upper left /
7094 if (aa[2][0]-x0<3*dx/4 || aa[2][1]-y0<3*dy/4) Break; // lower right /
7095 if (aa[1][0]-x0> dx/4 || aa[1][1]-y0<3*dy/4) Break; // lower left /
7096 if (aa[3][0]-x0<3*dx/4 || aa[3][1]-y0> dy/4) Break; // upper right /
7097 j =nearest_frame_vector(box1,aa[3][3],aa[0][3],x0+dx/2,y0+dy/2);
7098 if (box1->frame_vector[j][0]-x0< dx/2
7099 || x1-box1->frame_vector[j][0]< dx/4
7100 || box1->frame_vector[j][1]-y0<=dy/8) Break; // ~8B
7101 j =nearest_frame_vector(box1,aa[1][3],aa[2][3],x0+dx/2,y0+dy/2);
7102 if (box1->frame_vector[j][0]-x0< dx/4
7103 || x1-box1->frame_vector[j][0]< dx/2
7104 || y1-box1->frame_vector[j][1]<=dy/8) Break; // ~8B
7105 if (dx>7 && ad==100) {ad=ad*99/100;MSG({})} // ocr-b
7106 for (i1=i2=1,i=1;i<box1->num_frames;i++) { // get biggest holes
7107 if (i1==i2 && i!=i1) i2=i;
7108 if (box1->frame_vol[i ]<box1->frame_vol[i2] && i!=i1) i2=i;
7109 if (box1->frame_vol[i2]<box1->frame_vol[i1]) { j=i1;i1=i2;i2=j; }
7110 } if (i1==i2) Break;
7111 if (abs(box1->frame_vol[i1])>=box1->frame_vol[0]/8) Break;
7112 if (abs(box1->frame_vol[i2])>=box1->frame_vol[0]/8) Break;
7113 if (abs(box1->frame_vol[i1] - box1->frame_vol[i2]) // vol=4
7114 >=abs(box1->frame_vol[i1] + box1->frame_vol[i2])/2) Break;
7115 // ToDo: check one box above and the other below dy/2 ("u)
7116 if (ad==100) {ad=ad*99/100;MSG(fprintf(stderr,"ad= %d",ad);)}
7117 }
7118 // MSG(fprintf(stderr,"aa2y dy %d %d",aa[2][1]-y0,dy);)
7119 if (box1->num_frames == 3
7120 && box1->frame_vol[1]>0) { // small font? 7x8
7121
7122 /* 5x7 vol=4
7123 @@....@ -
7124 @@...@@<
7125 ....@@.
7126 ...@@..
7127 ..@@...
7128 .@@....
7129 @@...@@
7130 @....@@<-
7131 */
7132 // aa[] belongs to the frame 0 only! 2010-10
7133 if (aa[0][0]-x0< dx/4 && aa[0][1]-y0< dy/4) Break; // upper left /
7134 if (aa[2][0]-x0>3*dx/4 && aa[2][1]-y0>3*dy/4) Break; // lower right /
7135 if (aa[1][0]-x0> dx/4 || aa[1][1]-y0<3*dy/4) Break; // lower left /
7136 if (aa[3][0]-x0<3*dx/4 || aa[3][1]-y0> dy/4) Break; // upper right /
7137 if (dx>7 && ad==100) {ad=ad*99/100;MSG({})} // ocr-b
7138 if (box1->frame_vol[1]>=box1->frame_vol[0]) Break;
7139 if (abs(box1->frame_vol[1] - box1->frame_vol[2]) // vol=4
7140 >=abs(box1->frame_vol[1] + box1->frame_vol[2])/8) Break;
7141 // ToDo: check one box above and the other below dy/2 ("u)
7142 if (ad==100) {ad=ad*99/100;MSG(fprintf(stderr,"ad= %d",ad);)}
7143 }
7144 if (box1->num_frames == 5
7145 && sdata->holes.num == 2
7146 && box1->frame_vol[1]>0) { // big font
7147 // aa[] belongs to the frame 0 only! 2010-10
7148 if (aa[0][0]-x0< dx/4 && aa[0][1]-y0< dy/4) Break; // upper left /
7149 if (aa[2][0]-x0>3*dx/4 && aa[2][1]-y0>3*dy/4) Break; // lower right /
7150 if (aa[1][0]-x0> dx/4 || aa[1][1]-y0<3*dy/4) Break; // lower left /
7151 if (aa[3][0]-x0<3*dx/4 || aa[3][1]-y0> dy/4) Break; // upper right /
7152 if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) != 3
7153 && num_cross(x0,x1 ,y0+dy/8,y0+dy/8,box1->p,cs) != 3 ) Break;
7154 if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) != 3
7155 && num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) != 3 ) Break;
7156 if( num_cross(x0,x1, y0, y1,box1->p,cs) < 4
7157 && num_cross(x0+dx/8,x1, y0, y1,box1->p,cs) < 4
7158 && num_cross(x0,x1+dx/4, y0, y1,box1->p,cs) < 4
7159 && dx>7 && dy>15) Break;
7160 if (dx>7 && dy>12) {
7161 if( num_hole(x0 ,x1 ,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break;
7162 if( num_hole(x0+dx/4,x1+dx/4,y0+dy/4,y1,box1->p,cs,NULL) != 1 ) Break;
7163 if( num_hole(x0 ,x1+dx/4,y0,y1 ,box1->p,cs,NULL) != 2 ) Break;
7164 } else {ad=98*ad/100;MSG({})}
7165 }
7166
7167 // use box1->p instead of b, because % is a sum of 3 objects + 2 holes
7168 if (loop(box1->p,x0,y0 ,dx,cs,0,RI)
7169 < loop(box1->p,x0,y0+dy/16+1,dx,cs,0,RI)){ad=96*ad/100;MSG({})} // X
7170 if (loop(box1->p,x1,y1 ,dx,cs,0,LE)
7171 < loop(box1->p,x1,y1-1-dy/16,dx,cs,0,LE)){ad=96*ad/100;MSG({})} // X
7172 for (x=0;x<dx;x++) { /* look for a vertical line and break if found */
7173 if ( get_bw(x0+x,x0+x,y0+dy/8,y1-dy/8,box1->p,cs,2) != 2 ) break;
7174 } if (x<dx) Break; // ~gluedVI
7175 if (gchar) ad=98*ad/100;
7176 ac=(wchar_t) '%';
7177 Setac(box1,ac,ad);
7178 if (ad>=100) return ac;
7179 break;
7180 }
7181 // --- test Omega ---------------------------------------------------
7182 for(ad=d=99;dx>7 && dy>7;){ // min 3x4
7183 DBG( wchar_t c_ask=GREEK_CAPITAL_LETTER_OMEGA; )
7184 if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
7185 if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
7186 if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
7187 if( get_bw(x0+dx/2 , x0+dx/2,y0+dy/3 , y1-dy/3,box1->p,cs,1) != 0 ) Break;
7188
7189 if( num_cross(x0+dx/2,x0+dx/2,y0 , y1-dy/3,box1->p,cs) != 1 ) Break;
7190 if( num_cross(x0+dx/3,x1-dx/3,y0 , y0 ,box1->p,cs) != 1 ) // AND
7191 if( num_cross(x0+dx/3,x1-dx/3,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break;
7192 if( num_cross(x0+dx/3,x1-dx/3,y1 , y1 ,box1->p,cs) != 2 ) // against "rauschen"
7193 if( num_cross(x0+dx/3,x1-dx/3,y1-1 , y1-1 ,box1->p,cs) != 2 ) Break;
7194 if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
7195 if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
7196 if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
7197 if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
7198 if (sdata->holes.num) Break;
7199 // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break;
7200
7201 if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<=
7202 loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break;
7203 if( loop(bp,dx/2,dy-dy/4,x1-x0,cs,0,RI)>dx/4
7204 || loop(bp,dx/2,dy-dy/4,x1-x0,cs,0,LE)>dx/4 ) Break;
7205 if( loop(bp,dx/2,3*dy/8,x1-x0,cs,0,RI)<dx/4
7206 || loop(bp,dx/2,3*dy/8,x1-x0,cs,0,LE)<dx/4 ) Break;
7207
7208 i=loop(bp,0,dy-1-dy/16,x1-x0,cs,0,RI); if(i>dx/8) Break;
7209 x=loop(bp,i,dy-1-dy/16,x1-x0,cs,1,RI); i+=x; if(i<3*dx/8 || i>dx/2) Break;
7210 x=loop(bp,i,dy-1-dy/16,x1-x0,cs,0,RI); i+=x; if(i<dx/2 || i>5*dx/8) Break;
7211 x=loop(bp,i,dy-1-dy/16,x1-x0,cs,1,RI); i+=x; if(i<7*dx/8) Break;
7212
7213 /* look for a vertikal gap at lower end */
7214 for( x=dx/4;x<3*dx/4;x++ ){
7215 i=loop(bp,x,dy-1,y1-y0,cs,0,UP);
7216 if( i>3*dy/4 ) break;
7217 }
7218 if( x>=3*dx/4 ) Break;
7219
7220 if( !hchar ) ad=60*ad/100;
7221 bc=GREEK_CAPITAL_LETTER_OMEGA;
7222 Setac(box1,bc,ad);
7223 break;
7224 }
7225
7226 return bc;
7227 }
7228
7229 // -------------------- OCR engine ;) ----------------------------
ocr0(struct box * box1,pix * bp,int cs)7230 wchar_t ocr0(struct box *box1, pix *bp, int cs){
7231 // pix p=*(box1->p);
7232 int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
7233 int dx=x1-x0+1,dy=y1-y0+1, /* size */
7234 rx,ry,r1,r2,i1,i2,ad; /* tmp-vars */
7235 // ad,ac will be used in future
7236 wchar_t bc = UNKNOWN; // bestletter
7237 wchar_t um = SPACE; // modifier '"
7238 int hchar; // char is higher than e
7239 int gchar; // char has ink lower than m3
7240 int aa[4][4]; /* corner points, see xX, (x,y,dist^2,vector_idx) v0.41 */
7241 ocr0_shared_t sdata; // data used in all subfunctions
7242
7243 sdata.box1=box1;
7244 sdata.bp=bp;
7245 sdata.cs=cs;
7246 // --- hchar --- gchar -------------------------
7247 hchar=0;if( y0 < box1->m2-(box1->m2-box1->m1)/2 ) hchar=1;
7248 gchar=0;if( // 2018-09 nums1 bad m1..m4
7249 y1 > box1->m3+(box1->m4-box1->m3)/2 ) gchar=1;
7250 // if the char is slightly moved down correction can be done
7251 if ( y0<box1->m2 && y1>box1->m3 && 2*y1<box1->m3+box1->m4) // moved
7252 if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
7253
7254 sdata.hchar=hchar;
7255 sdata.gchar=gchar;
7256
7257 /* search for nearest points to the 4 courners, typical for xX */
7258 /* this is faster as calling nearest_frame_vector 4 times */
7259 /* 2018-09 ToDo: on 5x8 "y" this is a problem bottom right 2 vectors */
7260 aa[0][0]=aa[1][0]=aa[2][0]=aa[3][0]=(x0+x1)/2; /* set to center */
7261 aa[0][1]=aa[1][1]=aa[2][1]=aa[3][1]=(y0+y1)/2; /* set to center */
7262 aa[0][2]=aa[1][2]=aa[2][2]=aa[3][2]=2*sq(128); /* distance to box edges */
7263 aa[0][3]=aa[1][3]=aa[2][3]=aa[3][3]=0; /* vector index */
7264 /* searching for 4 diagonal line ends */
7265 for (i=0;i<box1->num_frame_vectors[0];i++) {
7266 x=box1->frame_vector[i][0]; /* take a vector */
7267 y=box1->frame_vector[i][1];
7268 /* distance to upper left end, normalized to 128 */
7269 j=0; d=sq((x-x0)*128/dx)+sq((y-y0)*128/dy);
7270 // fprintf(stderr," setaa i= %2d xy= %3d %3d d=%5d aa[3]=%2d\n",i,x-x0,y-y0,d,aa[0][3]);
7271 if (d<aa[j][2]) { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
7272 /* distance to lower left end */
7273 j=1; d=sq((x-x0)*128/dx)+sq((y-y1)*128/dy); // ToDo18 see j=2-3
7274 if (d<aa[j][2]) { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
7275 /* distance to lower right end */
7276 j=2; d=sq((x-x1)*128/dx)+sq((y-y1)*128/dy);
7277 if (d<aa[j][2] ||
7278 (d==aa[j][2] && i>aa[j-1][3])) // 2018-09 5x8.y
7279 { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
7280 /* distance to upper right end */
7281 j=3; d=sq((x-x1)*128/dx)+sq((y-y0)*128/dy);
7282 if (d<aa[j][2] ||
7283 (d==aa[j][2] && i>aa[j-1][3])) // 2018-09 5x8.z
7284 { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
7285 }
7286 for (i=0;i<16;i++) sdata.aa[i/4][i%4]=aa[i/4][i%4];
7287
7288 /* extract number position and size of holes and store in a table
7289 * - hole coordinates are relative to box (x-x0,y-y0)
7290 */
7291 sdata.holes.num=0; /* set by num_hole(), ToDo18 better .num_frames */
7292 if (box1->num_frames>0) // speedup v0.42
7293 num_hole(x0,x1,y0,y1,box1->p,cs,&sdata.holes); // call once
7294 // printf(" num_holes=%d\n",sdata.holes.num);
7295
7296 /*
7297 after division of two glued chars, boundaries could be wrong,
7298 check this first (ToDo: only if a flag set?)
7299 */
7300 if (2*y0 < box1->m2+box1->m3)
7301 if (box1->m4>box1->m3 && 2*box1->y1>box1->m4+box1->m3){
7302 /* could be a "I" from divided "Ij" or "Ig" */
7303 for(y=(box1->m3+box1->m2)/2;2*y<box1->m3+box1->m4;y++)
7304 if( get_bw(x0,x1,y,y,box1->p,cs,1)==0 ) break;
7305 if(2*y<box1->m3+box1->m4)
7306 if( get_bw((x0+x1)/2,(x0+x1)/2,y,box1->m4,box1->p,cs,1)==0 ){
7307 /* be sure, ~_ */
7308 if (y>y0) y1=box1->y1=y;
7309 }
7310 }
7311
7312 DBG( IFV fprintf(stderr,"\nDBG L%d (%d,%d): ",__LINE__,box1->x0,box1->y0); )
7313 DBG( IFV out_b(box1,sdata.bp,0,0,dx,dy,160); )
7314 DBG( IFV fprintf(stderr,"# aa[] %d %d %d %d %d %d %d %d"
7315 " (4 corners)"
7316 " d= %d %d %d %d vi= %d %d %d %d",
7317 aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,
7318 aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0,
7319 aa[0][2], aa[1][2], aa[2][2], aa[3][2],
7320 aa[0][3], aa[1][3], aa[2][3], aa[3][3]);)
7321 // thin fonts may have inner and outer vector nearest to corner, 5x8.y
7322 DBG( IFV fprintf(stderr,"\n# holes %d gchar=%d hchar=%d",sdata.holes.num, gchar, hchar);)
7323
7324 // --- test thin lines - ---------------------------------
7325 for( ad=100; 2*dy<box1->m3-box1->m2 && 3*dx>=4*dy && dx>2; ){ // min 3x3 (small font)
7326 DBG( wchar_t c_ask='-'; )
7327 if( get_bw(x0+dx/8+1,x1-dx/8-1,y0+dy/8+((dy>2)?1:0),
7328 y1-dy/8-((dy>2)?1:0),box1->p,cs,2)==2 ) break;
7329 if( box1->dots ) { Setac(box1,'=',97);break; }
7330 if (dx<=2*dy) ad=99*ad/100;
7331 if (dx<=3*dy) ad=99*ad/100;
7332 if (!box1->m4) ad=96*ad/100;
7333 else {
7334 if (y1>=box1->m3) {
7335 if ( dx<2*dy) ad=98*ad/100;
7336 if (2*dx<3*dy) ad=98*ad/100;
7337 Setac(box1,'_',ad);
7338 break;
7339 }
7340 }
7341 Setac(box1,'-',ad); if (ad>=100) return '-';
7342 break;
7343 }
7344 // --- test thin lines = ---------------------------------
7345 for( ad=100; dy>2 && dx>2; ){ // min 3x3 (small font)
7346 DBG( wchar_t c_ask='='; )
7347 if (box1->num_frames != 2) break;
7348 for( y=y0;y<y1;y++) // remove upper empty space
7349 if( get_bw(x0+dx/10,x1-dx/10,y ,y ,box1->p,cs,1)==1 ) break;
7350 if( get_bw(x0+dx/10,x1-dx/10,y ,y ,box1->p,cs,2)==2 ) break;
7351 if( get_bw(x0 ,x1 ,(y+y1)/2,(y+y1)/2,box1->p,cs,1)==1 ) break;
7352 if( get_bw(x0+dx/10,x1-dx/10,y1 ,y1 ,box1->p,cs,2)==2 ) break;
7353 if (2*dx < dy) Break; // 2010-10-08
7354 if (4*dx < 3*dy) ad=ad*99/100;
7355 Setac(box1,'=',ad);
7356 return '=';
7357 }
7358 // --- test dots : ---------------------------------
7359 for( ad=100; dy>2 && dy>=2*dx; ){ // max 3x3 (small font)
7360
7361 DBG( wchar_t c_ask=':'; )
7362 if (box1->num_boxes!=2 || box1->num_subboxes!=0) Break;
7363 // check the gap hight
7364 for( i1=dy/16;i1<dy/2;i1++)
7365 if( get_bw(x0+dx/8,x1-dx/8,y0+i1,y0+i1,box1->p,cs,1)==0 ) break;
7366 if (i1>=dy/2) Break;
7367 for( i2=dy/16;i2<dy/2;i2++)
7368 if( get_bw(x0+dx/8,x1-dx/8,y1-i2,y1-i2,box1->p,cs,1)==0 ) break;
7369 if (i2>=dy/2) Break;
7370
7371 if (box1->m3 && y1>box1->m3) ad=98*ad/100; // ~;
7372 if (box1->m3 && 2*y0> box1->m2+box1->m1) ad=98*ad/100; // ~i
7373 if (gchar) ad=99*ad/100;
7374 MSG(fprintf(stderr,"gap dot.dy12 %d %d ad %d",i1,i2,ad);)
7375 ad=ad-abs(i1-i2)/dy*20;
7376 if (abs(i1-dx)>dy/4) Break; // round or quadratic dots?
7377 if (abs(i1-dx)>dy/8) ad=98*ad/100;
7378 if (abs(i2-dx)>dy/4) Break; // round or quadratic dots?
7379 if (abs(i2-dx)>dy/8) ad=98*ad/100;
7380 if (box1->dots!=1) ad=96*ad/100;
7381 MSG(fprintf(stderr,"gap dot.dy12 %d %d ad %d",i1,i2,ad);)
7382 Setac(box1,':',ad); // dx<=3 ad--
7383 if (ad>=100) return ':';
7384 break;
7385 }
7386 // --- test dots ; ---------------------------------
7387 if ( 2*y0> box1->m2+box1->m1 ) // ~i
7388 if ( 4*y1>=3*box1->m3+box1->m2 ) // ~:
7389 if (box1->num_frames == 2) // 2010-10-08
7390 for( ad=100; dy>5 && dx>1 && 2*dy>3*dx; ){ // max 3x3 (small font)
7391 DBG( wchar_t c_ask=';'; )
7392 // better would it be to detect round pixelcluster on top
7393 // check high of upper and lower dot
7394 for( i1=0;i1<dy/2;i1++)
7395 if( get_bw(x0,x1,y0+i1,y0+i1,box1->p,cs,1)==0 ) break;
7396 if (i1>=dy/2) break;
7397 for( i2=0;i2<dy/2;i2++)
7398 if( get_bw(x0,x1,y1-i2,y1-i2,box1->p,cs,1)==0 ) break;
7399 if (i2<i1) break;
7400
7401 /* test for horizontal symmetry ~i */
7402 for (y=0;y<dy;y++) for (x=0;x<dx/2;x++)
7403 if ((getpixel(bp,x,y)<cs)!=(getpixel(bp,dx-1-x,y)<cs)) { y=dy+1; break; }
7404 if (y==dy) ad=96*ad/100; /* ~i */
7405
7406 if (i2==i1 && y1<=box1->m3) ad=97*ad/100;
7407 if (i2-i1<dy/8) ad=99*ad/100;
7408 Setac(box1,';',ad); // dx<=3 ad--
7409 if (ad>=100) return ';';
7410 break;
7411 }
7412 // --- first test small dots . ---------------------------------
7413 if( 3*dy<box1->m4-box1->m1+2 && abs(dx-dy)<(dx+dy)/4+2
7414 && 3*y1>=(2*box1->m3+ box1->m2) // dot near baseline?
7415 && 5*y0>=(3*box1->m3+2*box1->m2) ){ // Jul00
7416 DBG( wchar_t c_ask='.'; )
7417 d=0; r1=60;r2=140; ad=99;
7418 for(x=x0;x<=x1;x++)for(y=y0;y<=y1;y++){ /* circle equation */
7419 rx=100*(2*x-(x0+x1))/dx; // normalize to 15bit number
7420 ry=100*(2*y-(y0+y1))/dy;
7421 if( rx*rx + ry*ry < r1*r1 ) if( getpixel(box1->p,x,y)>=cs ){ d++;x=x1+1;y=y1+1; }
7422 if( rx*rx + ry*ry > r2*r2 ) if( getpixel(box1->p,x,y)< cs ){ d++;x=x1+1;y=y1+1; }
7423 // MSG( fprintf(stderr,"x= %3d %3d r= %6d %6d %6d", rx, ry, rx*rx+ry*ry, r1*r1, r2*r2); )
7424 }
7425 if (loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI)> dx/8) { ad=98*ad/100; // ,
7426 if (loop(box1->p,x0,y1 ,x1-x0,cs,0,RI)<=dx/8) ad=98*ad/100; } // ,
7427 MSG( fprintf(stderr,"d= %3d ad= %3d", d, ad); )
7428 if(d==0) // 2018-09 adding 12.5% (1/8) tolerance for handwritten
7429 if( loop(box1->p,x0,y0+dy/8,x1-x0,cs,0,RI) // top left vs. komma?
7430 <= loop(box1->p,x0,y1-dy/8,x1-x0,cs,0,RI)+dx/8 // bottom left
7431 || loop(box1->p,x1,y0+dy/8,x1-x0,cs,0,LE) // top right
7432 >= loop(box1->p,x1,y1-dy/8,x1-x0,cs,0,LE)-dx/8 ) // bottom right
7433 {
7434 bc='.';
7435 if (box1->dots) { Setac(box1,':',ad); ad=98*ad/100; }
7436 Setac(box1,bc,ad);
7437 }
7438 }
7439 // --- first test small dots , ---------------------------------
7440 if( 3*dy<2*(box1->m4-box1->m1)
7441 && 2*y0> box1->m2+box1->m3
7442 && (2*dx<3*dy
7443 || get_bw(0,dx/2,dy/2,dy-1,bp,cs,1)==0)
7444 && box1->dots==0 ){ // ocr-a-,
7445 DBG( wchar_t c_ask=','; )
7446 ad=100; bc=',';
7447 if (dy==1 && dx==1) ad=98*ad/100;
7448 if (dy==2 && dx==1) ad=99*ad/100; // this is a problem case
7449 if (dx>=dy) ad=99*ad/100;
7450 if (2*dy >= box1->m4-box1->m1)
7451 {ad=98*ad/100;MSG(fprintf(stderr,"ad= %d",ad);)}
7452 if( loop(box1->p,x0,y0,x1-x0,cs,0,RI) /* simple line */
7453 > loop(box1->p,x0,y1,x1-x0,cs,0,RI)
7454 && loop(box1->p,x1,y0,x1-x0,cs,0,LE)
7455 < loop(box1->p,x1,y1,x1-x0,cs,0,LE) ) { ad=99*ad/100; }
7456 else { /* with upper filled circle or box */
7457 if( loop(box1->p,x0,(y0+y1+1)/2,x1-x0,cs,0,RI)<dx/2 ) ad=98*ad/100;
7458 if( loop(box1->p,x1, y1 ,x1-x0,cs,0,LE)<dx/2 ) ad=99*ad/100;
7459 MSG(fprintf(stderr,"ad= %d",ad);)
7460 // if( loop(box1->p,x0,y1-((dy>5)?1:0),x1-x0,cs,0,LE)>(dx+1)/2 )
7461 // if( loop(box1->p,x0, y1 ,x1-x0,cs,0,LE)>(dx+1)/2 )
7462 // { ad=96*ad/100; MSG(fprintf(stderr,"ad= %d",ad);) }
7463 }
7464 // if(box1->dots==1) { Setac(box1,';',ad); ad=99*ad/100; }
7465 Setac(box1,bc,ad);
7466 }
7467 // --- first test small dots ''""`` (quotation)--------------
7468 if (// 2*dy < box1->m4 -box1->m1+1 // failed for ocr-b dy=8 bad_m4-m1=13
7469 dy < box1->m3 - box1->m2 // should work always 2010-10-08
7470 && 2*y0 < box1->m2 + box1->m3
7471 && 3*y1 < box1->m2+2*box1->m3+2 ){
7472 DBG( wchar_t c_ask='\''; )
7473 ad=100; bc='\'';
7474 if (3*y1>= box1->m2+2*box1->m3) { ad=96*ad/100; MSG({}) }
7475 if (2*y1>= box1->m2+ box1->m3) { ad=99*ad/100; MSG({}) }
7476 if (box1->num_frames>1) { // ~!
7477 if (2*y1>= box1->m2+ box1->m3) { ad=96*ad/100; MSG({}) } // ~!
7478 if (3*y1>=2*box1->m2+ box1->m3) { ad=96*ad/100; MSG({}) }
7479 if (get_bw(x0,x1,(box1->m2+3*box1->m3)/4,box1->m4,box1->p,cs,1)!=0)
7480 { ad=98*ad/100; MSG({}) }
7481 }
7482 if (dx>4
7483 && num_cross(x0,x1,(y0+3*y1)/4,(y0+3*y1)/4,box1->p,cs) == 2) { // " "
7484 DBG( c_ask='\"'; )
7485 MSG(fprintf(stderr,"double quote detected");)
7486 bc='\"';
7487 // ocr-a-" has no gap!
7488 if ( get_bw((x0+x1)/2,(x0+x1)/2,y0,y1,box1->p,cs,1)!=0 ) ad=96*ad/100;
7489 } else {
7490 if ( num_cross(x0,x1, y0 , y0 ,box1->p,cs)!=1) ad=96*ad/100;
7491 if ( num_cross(x0,x1,(y0+y1)/2,(y0+y1)/2,box1->p,cs)!=1) ad=98*ad/100;
7492 if (dx>dy) { ad=96*ad/100; MSG({}) }
7493 }
7494 if ( num_cross(x1,x1, y0 , y1 ,box1->p,cs)!=1
7495 && num_cross(x0,x0, y0 , y1 ,box1->p,cs)!=1) ad=99*ad/100;
7496 if ( num_cross(x0,x1, y0+dy/4 , y0+dy/4 ,box1->p,cs)>2) ad=97*ad/100;
7497 if ( num_cross(x0,x1, y1-dy/4 , y1-dy/4 ,box1->p,cs)>2) ad=97*ad/100; // * 5x8font
7498 if ( loop(box1->p,x0,y0,dx,cs,0,RI)==0
7499 && loop(box1->p,x0,y1,dx,cs,0,RI)> 0
7500 && loop(box1->p,x1,y0,dx,cs,0,LE)> 0
7501 && loop(box1->p,x1,y1,dx,cs,0,LE)==0) bc='`'; // 2010-10-08 qemu
7502
7503 if (2*y0 > box1->m1+box1->m2) ad=99*ad/100;
7504 Setac(box1,bc,ad);
7505 if (ad>=100) return bc;
7506 }
7507 // --- first double dots ,, (quotation) -------------- 2010-10-01
7508 if( 2*dy < box1->m4 -box1->m1+1
7509 && 2*y0 >= box1->m2 +box1->m3
7510 && y1 <= box1->m4+1
7511 && box1->num_frames == 2 // ToDo: quotation without gap???
7512 && box1->num_boxes == 2
7513 && box1->num_subboxes == 0
7514 && sdata.holes.num == 0 ){
7515 DBG( wchar_t c_ask='"'; )
7516 ad=100; bc='"';
7517 if (2*y1 >= box1->m4) { ad=98*ad/100; MSG({}) }
7518 if (4*y0 <= box1->m2 + 3*box1->m3) { ad=99*ad/100; MSG({}) }
7519 if ( num_cross(x0,x1,(y0+3*y1)/4,(y0+3*y1)/4,box1->p,cs) != 2) ad=90*ad/100;
7520 if ( num_cross(x1,x1, y0 , y1 ,box1->p,cs)!=1) ad=99*ad/100;
7521 if ( num_cross(x0,x1, y0+dy/4 , y0+dy/4 ,box1->p,cs)>2) ad=97*ad/100;
7522 if ( num_cross(x0,x1, y1-dy/4 , y1-dy/4 ,box1->p,cs)>2) ad=97*ad/100; // * 5x8font
7523 if (2*y1 > box1->m3+box1->m4) ad=99*ad/100;
7524 Setac(box1, DOUBLE_LOW_9_QUOTATION_MARK, ad);
7525 if (ad>=100) return bc;
7526 }
7527 // --- TILDE ~ ---------------------------------
7528 if( 2*dy<box1->m4-box1->m1 && dx>=dy && dx>3 && dy>1
7529 && 2*y0< box1->m1+box1->m2
7530 && 3*y1<2*box1->m2+box1->m3 ){
7531 if( loop(box1->p,x0,y0,dx,cs,0,RI)
7532 > loop(box1->p,x0,y1,dx,cs,0,RI)
7533 && loop(box1->p,x1,y0,dx,cs,0,LE)
7534 < loop(box1->p,x1,y1,dx,cs,0,LE)
7535 && num_cross(x0,x1,y0,y0,box1->p,cs) == 2
7536 && num_cross(x0,x1,y1,y1,box1->p,cs) == 2 ) {
7537 DBG( wchar_t c_ask='~'; )
7538 bc=TILDE;
7539 Setac(box1,bc,99);
7540 }
7541 }
7542 // --- CIRCUMFLEX, hat ^ ---------------------------------
7543 if( 2*dy<box1->m4-box1->m1 && dx>=dy && dx>2 && dy>1
7544 && 2*y0< box1->m1+box1->m2
7545 && 3*y1<2*box1->m2+box1->m3
7546 && box1->num_frames == 1 // ToDo: 2010-10-01 what about \^O ???
7547 && box1->num_boxes == 1
7548 && box1->num_subboxes == 0
7549 && sdata.holes.num == 0 ){
7550 DBG( wchar_t c_ask='^'; )
7551 if( ( loop(box1->p,x0,y0 ,dx,cs,0,RI)
7552 > loop(box1->p,x0,y1 ,dx,cs,0,RI)-dx/8
7553 || loop(box1->p,x0,y0 ,dx,cs,0,RI)
7554 > loop(box1->p,x0,y1-1,dx,cs,0,RI)-dx/8 )
7555 && ( loop(box1->p,x1,y0 ,dx,cs,0,LE)
7556 > loop(box1->p,x1,y1 ,dx,cs,0,LE)-dx/8
7557 || loop(box1->p,x1,y0 ,dx,cs,0,LE)
7558 > loop(box1->p,x1,y1-1,dx,cs,0,LE)-dx/8 )
7559 && num_cross(x0,x1,y0 ,y0 ,box1->p,cs) == 1
7560 && ( num_cross(x0,x1,y1 ,y1 ,box1->p,cs) == 2
7561 || num_cross(x0,x1,y1-1,y1-1,box1->p,cs) == 2 )) {
7562 bc='^';
7563 Setac(box1,bc,99);
7564 }
7565 }
7566 // ------------------------------------------------------
7567 // if( dots==1 ){ um='\''; }
7568 #if 0 /* ToDo: change to vectors, call here or in whatletter */
7569 if (box1->dots==0) { // i-dots ??? (if dots==0 is wrong)
7570 y=box1->m1;
7571 for(;y<y0+dy/2;y++)if( get_bw(x0+dx/4,x1,y,y,box1->p,cs,1)==1) break;
7572 { i1=y;
7573 if( y<y0+dy/4 )
7574 for(;y<y0+dy/2;y++)if( get_bw(x0,x1,y,y,box1->p,cs,1)==0) break;
7575 if( y<y0+dy/2 && 5*(y-i1+1)>box1->m2-box1->m1){
7576 testumlaut(box1,cs,2,&um); // set modifier + new y0 ???
7577
7578 }
7579 }
7580 }
7581 #else
7582 um = box1->modifier;
7583 #endif
7584 if ( /* um==ACUTE_ACCENT || */ um==DIAERESIS){
7585 for(y=y1;y>y0;y--)
7586 if( get_bw(x0,x1,y,y,box1->p,cs,1)==0) { y0=y; dy=y1-y0+1; break; } // scan "a "o "u
7587 }
7588
7589 // --- test numbers 0..9 --- separated for faster compilation
7590 if( OCR_JOB->cfg.only_numbers ) return ocr0n(&sdata);
7591
7592 // bc=ocr1(box1,bp,cs);
7593 if(bc!=UNKNOWN && box1->num_ac>0 && box1->wac[0]==100)
7594 return bc; // for fast compilable tests
7595
7596 // ------ separated for faster compilation
7597 // ToDo: inser ocr0_shared_t here and split into a,b,cC,d,e,f,g9,...
7598 #define IF_NOT_SURE if(bc==UNKNOWN || box1->num_ac==0 || box1->wac[0]<100)
7599
7600 IF_NOT_SURE bc=ocr0_eE(&sdata);
7601 IF_NOT_SURE bc=ocr0_f(&sdata);
7602 IF_NOT_SURE bc=ocr0_bB(&sdata);
7603 IF_NOT_SURE bc=ocr0_dD(&sdata);
7604 IF_NOT_SURE bc=ocr0_F(&sdata);
7605 IF_NOT_SURE bc=ocr0_uU(&sdata);
7606 IF_NOT_SURE bc=ocr0_micro(&sdata);
7607 IF_NOT_SURE bc=ocr0_vV(&sdata);
7608 IF_NOT_SURE bc=ocr0_rR(&sdata);
7609 IF_NOT_SURE bc=ocr0_m(&sdata);
7610 IF_NOT_SURE bc=ocr0_tT(&sdata);
7611 IF_NOT_SURE bc=ocr0_sS(&sdata);
7612 IF_NOT_SURE bc=ocr0_gG(&sdata);
7613 IF_NOT_SURE bc=ocr0_xX(&sdata);
7614 IF_NOT_SURE bc=ocr0_yY(&sdata);
7615 IF_NOT_SURE bc=ocr0_zZ(&sdata);
7616 IF_NOT_SURE bc=ocr0_wW(&sdata);
7617 IF_NOT_SURE bc=ocr0_aA(&sdata);
7618 IF_NOT_SURE bc=ocr0_cC(&sdata);
7619 IF_NOT_SURE bc=ocr0_lL(&sdata);
7620 IF_NOT_SURE bc=ocr0_oO(&sdata);
7621 IF_NOT_SURE bc=ocr0_pP(&sdata);
7622 IF_NOT_SURE bc=ocr0_qQ(&sdata);
7623 IF_NOT_SURE bc=ocr0_iIjJ(&sdata);
7624 IF_NOT_SURE bc=ocr0_n(&sdata);
7625 IF_NOT_SURE bc=ocr0_M(&sdata);
7626 IF_NOT_SURE bc=ocr0_N(&sdata);
7627 IF_NOT_SURE bc=ocr0_h(&sdata);
7628 IF_NOT_SURE bc=ocr0_H(&sdata);
7629 IF_NOT_SURE bc=ocr0_k(&sdata);
7630 IF_NOT_SURE bc=ocr0_K(&sdata);
7631 IF_NOT_SURE bc=ocr0n(&sdata);
7632 IF_NOT_SURE bc=ocr0_brackets(&sdata);
7633 IF_NOT_SURE bc=ocr0p9(&sdata);
7634 IF_NOT_SURE bc=ocr0px(&sdata);
7635
7636
7637 if(box1->num_ac==0 && bc!=UNKNOWN) fprintf(stderr,"<!--ERROR 576-->");
7638 if(box1->num_ac>0 && box1->wac[0]>95) box1->c=bc=box1->tac[0];
7639 /* will be removed later, only fix old things */
7640 for (i=0;i<box1->num_ac;i++) if (box1->tac[i]==bc) { bc=box1->tac[0]; }
7641
7642 return bc;
7643 }
7644
7645
7646