/* rule based OCR engine, partly rewritten for edges (old=pixel) */ /* This is a Optical-Character-Recognition program Copyright (C) 2000-2007 Joerg Schulenburg This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. see README for email address >>> DO NOT EDIT THIS FILE IF YOU NOT REALLY KNOW WHAT YOU ARE DOING! <<< I have invested lot of time, to write this part of the program. This engine should recognize chars allways right or return UNKNOWN. If you change something, test all other example files too, to be sure that all things work better. (JoergS) This engine was pixelbased until 0.40 which was not successfull enough. Also code changes always hade side effects. The vectorisation of the code starts from version 0.41 with the chars XNz and seems to be much better to handle. Vectorization means we frame each character by a chain of vectors and dont care about pixels anymore. Unfortunatly I have to replace all the pixel codes, which is a long process. Old code will be lost. (JorgS) ToDo: - if box1->p and b differ, reduce probability - probability makes life much easier here - use only one box!?, may be bits have usefull infos - divide this file, suggestion: classify chars: high=ABCDEFGHIJKLMNOPQRSTUVWXYZbdfhklt, low=acegijmnopqrsuvwxyz or often_used=etianmsurwdkgo rarely_used=hvjcflpqxyz.,: or every char (large overhead) - two-pass version (first pass without tolerance) 2nd pass with tolerance (ex: one tiny more in sdata->holes) general feature extraction: - white holes at middle, upper, lower position (cost much time) - test lines and triangles insteat of rectangles char is removed, wchar_t is used (better code) making a static global variable-set x.x0,x.x1, and call test_a, test_b ... (faster compilation, but not reentrant!) - adding slant-angle (if detected) to distinguish between l and / ? - ac (alternate chars) as string add_ac(box1,"/") => box1->ac="Il/"; for better context correction or output: "Ha[lI][lI]o!" */ #include #include // #include "pgm2asc.h" #include "ocr0.h" // #include "ocr1.h" #include "pnm.h" #include "gocr.h" #define IFV if(JOB->cfg.verbose&4) #define MM {IFV fprintf(stderr,"\nDBG %c L%04d (%d,%d): ",(char)c_ask,__LINE__,box1->x0,box1->y0);} // the old debug mode (0.40) was only for a special char, for another char // code must be recompiled with C_ASK='char' // new debug mode (0.41) explains why char is declined or accepted as ABC... // the output can be filtered by external scripts // ToDo: we could reduce output to filter string #ifndef DO_DEBUG /* can be defined outside */ #define DO_DEBUG 0 /* 0 is the default */ #endif /* this macro is for debugging output: "if char is declined, why?" */ #if DO_DEBUG /* 0=Work mode, 1=debugging mode */ // Setac: output, that char is choosen with a probability // Break: output, why the char is not choosen // MSG: debugging functions for char C_ASK, mostly messages // DBG: definitions usefull only for debugging #define Setac(box1,ac,ad) { MM;IFV fprintf(stderr,"setac %d",ad);setac(box1,ac,ad); } #define Break { MM;IFV fprintf(stderr,"break"); break; } #define MSG(x) { MM;IFV x } #define DBG(x) x #else #define Setac(box1,ac,ad) setac(box1,ac,ad) #define Break break #define MSG(x) #define DBG(x) #endif /* extern "C"{ */ // static inline int sq(int x) { return x*x; } /* square */ /* * go from vector j1 to vector j2 and measure maximum deviation of * the steps from the line connecting j1 and j2 * return the squared maximum distance * in units of the box size times 1024 * ToDo: 1) better give back max-dx and max-dy ??? * errors if j1 and j2 are in different frames or belong to * more then one frame? * 2) Better get deviation from a complete vector graphic? * The vectorgraphic is the ideal test char adapted to the * extrem vertices of the real char. */ int line_deviation( struct box *box1, int j1, int j2 ) { int r1x, r1y, r2x, r2y, r3x, r3y, i, x, y, d, dist, maxdist=0, frame, l2; r1x=box1->frame_vector[j1][0]; r1y=box1->frame_vector[j1][1]; r2x=box1->frame_vector[j2][0]; r2y=box1->frame_vector[j2][1]; if (!box1->num_frames) return(-1); if (j1<0 || j1>box1->num_frame_vectors[box1->num_frames-1] || j2<0 || j2>box1->num_frame_vectors[box1->num_frames-1]) { fprintf(stderr,"Error in "__FILE__" L%d: idx out of range",__LINE__); return(-1); } /* get the frame the endvector belongs to */ for (i=0;inum_frames;i++) if (j2num_frame_vectors[i]) break; frame=i; /* frame(j1)<=frame(j2) possible */ for (i=j1;;i++) { // do it for each vector between j1 and j2 if (i >= box1->num_frame_vectors[frame]) i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */ if (i==j2) break; // for (i=j1;i!=j2;i=(i+1)%box1->num_frame_vectors[0]) {~} r3x=box1->frame_vector[i][0]; r3y=box1->frame_vector[i][1]; // Language=german // german: Abstand Punkt von Strecke, Laenge Lotrechte // germ.Strecke : l1=(r1+r2)/2+d*(r2-r1)/2 for d=-1..1 // germ.Lotrechte: l2=r3+b*[-(r2-r1).y,(r2-r1).x] // Schnittpunkt : l1=l2, // eq1x: (r1x+r2x)/2-r3x+d*(r2x-r1x)/2+b*(r2y-r1y)=0 // eq1y: (r1y+r2y)/2-r3y+d*(r2y-r1y)/2-b*(r2x-r1x)=0 // eq2x: b*(r2x-r1x)*(r2y-r1y)=-((r1x+r2x)/2-r3x+d*(r2x-r1x)/2)*(r2x-r1x) // eq2y: b*(r2x-r1x)*(r2y-r1y)= ((r1y+r2y)/2-r3y+d*(r2y-r1y)/2)*(r2y-r1y) // eq2y-eq2x: ... in units of 1024 (fast integer rounded correctly) l2=sq(r2x-r1x)+sq(r2y-r1y); // square of distance r2-r1 if (l2==0) { // fprintf(stderr,"ocr0 L%d: r1==r2 r1= %d %d",__LINE__, r1x, r1y); // debugging d=-1024; } else d=-( ((r1x+r2x)-2*r3x)*(r2x-r1x) +((r1y+r2y)-2*r3y)*(r2y-r1y))*1024/l2; // ..-1024..+1024.. if (d<=-1024) { x=r1x; y=r1y; } // starting point else { if (d>=1024) { x=r2x; y=r2y; } // end point else { x=((r1x+r2x)+1)/2+(d*(r2x-r1x))/2048; y=((r1y+r2y)+1)/2+(d*(r2y-r1y))/2048; /* we have the crossing point x,y now */ } } dist=sq((x-r3x)*1024/(box1->x1-box1->x0+1)) +sq((y-r3y)*1024/(box1->y1-box1->y0+1)); // 0..2*sq(1024) if (dist>maxdist) maxdist=dist; // for debugging: // fprintf(stderr,"\nDBG dev: %d-%d-%d dist=%5d max=%5d d=%d %d,%d-%d,%d" // " vector= %d %d crosspoint= %d %d ", // j1,i,j2,dist,maxdist,d,r1x,r1y,r2x,r2y,r3x,r3y,x,y); } return maxdist; } /* * search vectors between j1 and j2 for nearest point a to point r * example: * * r-> $$...$$ $ - mark vectors * @@$..@@ @ - black pixels * @@$..@@ . - white pixels * @@@@.$@ * a-> @@$@$@@ * @$.@@@@ * @@..$@@ * @@..$@@ * j1 --> $$...$$ <-- j2 * * ToDo: vector aa[5] = {rx,ry,x,y,d^2,idx} statt rx,ry? * j1 and j2 must be in the same frame * return aa? */ int nearest_frame_vector( struct box *box1, int j1, int j2, int rx, int ry) { int x,y,d,i,aa[4]; /* x,y,normalized_distance^2,vector_index */ int frame=0, x0=box1->x0, y0=box1->y0, x1=box1->x1, y1=box1->y1, dx=box1->x1-x0+1, dy=box1->y1-y0+1; if (!box1->num_frames) return(-1); if (j1<0 || j1>box1->num_frame_vectors[box1->num_frames-1] || j2<0 || j2>box1->num_frame_vectors[box1->num_frames-1]) { fprintf(stderr,"Error in "__FILE__" L%d: idx %d-%d out of range\n",__LINE__,j1,j2); //out_x(box1); return(-1); } aa[0]=x=box1->frame_vector[j2][0]; /* x */ aa[1]=y=box1->frame_vector[j2][1]; /* y */ /* maximum is (distance*128)^2 if r is inside the box */ aa[2]=d=2*sq(128)+sq((rx-(x0+x1)/2)*128/dx)+sq((ry-(y0+y1)/2)*128/dy); aa[3]=j2; /* vector index */ /* get the frame the endvector belongs to */ for (i=0;inum_frames;i++) if (j2num_frame_vectors[i]) break; frame=i; /* frame(j1)<=frame(j2) possible */ for (i=j1;;i++) { if (i >= box1->num_frame_vectors[frame]) i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */ x=box1->frame_vector[i][0]; /* take a vector */ y=box1->frame_vector[i][1]; /* distance to upper left end, normalized to 128 */ d=sq((x-rx)*128/dx)+sq((y-ry)*128/dy); if (d0 and m==1 box1 is changed // m>0 modify box1->dots // m==2 modify box1->y0 // called by pgm2asc + ocr0(?) int testumlaut(struct box *box1, int cs, int m, wchar_t *modifier){ // pix p=*(box1->p); int r,y,x,x0,x1,y0,y1,dx,dy,m1,m2,m3, xl,xr,yu,yl; // left, right, upper and lower border of dots wchar_t mod='\0'; /* (TeX-) modifier ~"'` for compose() */ DBG( wchar_t c_ask='"'; ) r=0; x0=box1->x0; x1=box1->x1; dx=x1-x0+1; y0=box1->y0; y1=box1->y1; dy=y1-y0+1; m1=box1->m1; m2=box1->m2; m3=box1->m3; xl=x0; xr=x1; yu=yl=y0; if( dy < 5 || 4*y0 > 3*m2+m3 ) return 0; // no low chars: .,-= /* modifier in box included? */ if( 2*y1 > m1+m2 ){ /* modifier in box included? */ for(y=y0;2*yp,cs,1)==0 ) break; if( 2*y extract */ yl=y; while( get_bw(xl,xr,y,y,box1->p,cs,1)==0 && 2*y<=y0+y1) y++; if( m&2 ) box1->y0=y; /* set new upper bond */ } } if( yu>=yl ) { if(m) box1->dots=0; return 0; } /* nothing found */ if( get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==1 ) // neighbour overlap? while( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==1 && 2*xlp,cs,1)==1 ) break; for(;xr>xl;xr--)if( get_bw(xr,xr,yu,yl,box1->p,cs,1)==1 ) break; if ( yl-1>yu ) { // tall box ij"a"o"u #if 0 x=box1->y0; box1->y0=m1; out_x(box1); box1->y0=x; fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0); fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0); #define DEBUG 1 #endif { x=xl;y=yu; if( get_bw(xl,x1+1,yu,yl-1,box1->p,cs,1)==0 ) r=0; // neighbour overlap? else if( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==0 || get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==0 ) // be sure there are gap to neighbours if( get_bw(xr ,xr ,yu,yl-1,box1->p,cs,1)==0 || get_bw(xr+1,xr+1,yu,yl-1,box1->p,cs,1)==0 ) { int i,j,x; r=1; // ...@@@.... RING_ABOVE // ..@@@..@@. TILDE // ..@...@... // @@.@@@@@.. // ..@...@... // @......... // ..@..@@... // ...@@@.... for (i=yu;ip,cs,1)==1) break; for ( ;ip,cs,1)==0) break; for (j=xl;jp,cs,1)==1) break; for ( ;jp,cs,1)==0) break; for ( x=j;xp,cs,1)==1) break; // vert. gap detected if( j2 && num_obj(xl,xr,yu,yl-1,box1->p,cs)>=2 // not best!!! && num_cross(xl,xr,yu +(yl-yu)/4,yu+ (yl-yu)/4,box1->p,cs) == 2 && num_cross(xl,xr,yl-1-(yl-yu)/2,yl-1-(yl-yu)/2,box1->p,cs) == 2 ){ // may be the following lines are not quite ok while( get_bw(xl,xr,yl,yl,box1->p,cs,1)==0 && 2*yly0=yl; /* if( m&2 ) box1->y0= ( (r==1) ? yu : yl ); */ // out_x(box1); } if(r==0){ // divided fr != fi while( get_bw(x0,x1,yu,yu,box1->p,cs,1)==0 && 2*yuy0=yu; } if( r==1 ){ yl--; // .@@@. ..@@. // .@@.. .@@.. // .@... .@@.. // // if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) // > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8 // && loop(box1->p,xr,yu,xr-xl,cs,0,LE) // < loop(box1->p,xr,yl,xr-xl,cs,0,LE)) // -dx/8 ) // é Nov03 if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) - loop(box1->p,xr,yu,xr-xl,cs,0,LE) > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8 - loop(box1->p,xr,yl,xr-xl,cs,0,LE)+1) // -dx/8 ) // é Nov03 mod = ACUTE_ACCENT; // ' if( xr-xl+1 > 3*(yl-yu+1) && get_bw(xl,xr,yu,yl,box1->p,cs,2)==0 ) mod = MACRON; // "-" above // .@@@. .@@.. // ..@@. ..@@. // ...@. ..@@. // // if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) // < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8 // && loop(box1->p,xr,yu,xr-xl,cs,0,LE) // > loop(box1->p,xr,yl,xr-xl,cs,0,LE) ) // +dx/8 ) à Nov03 if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) - loop(box1->p,xr,yu,xr-xl,cs,0,LE) < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8 - loop(box1->p,xr,yl,xr-xl,cs,0,LE) -1 ) // +dx/8 ) à Nov03 mod = GRAVE_ACCENT; // ` #ifdef DEBUG fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0); fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0); #endif if( (xr-xl+1) < 2*(yl-yu+1)+2 && 2*(xr-xl+1)+2 > (yl-yu+1) ) { int i,i1,i2,i3,i4; i1=loop(box1->p,xl ,(yu+yl)/2,xr-xl+1,cs,0,RI); i1=loop(box1->p,xl+i1,(yu+yl)/2,xr-xl+1,cs,1,RI); i2=loop(box1->p,(xl+xr)/2,yu ,yl-yu+1,cs,0,DO); i2=loop(box1->p,(xl+xr)/2,yu+i2,yl-yu+1,cs,1,DO); for (i=0;ip,xl+i,yu+i)< cs) break; i3=i; for ( ;ip,xl+i,yu+i)>=cs) break; i3=i-i3; for (i=0;ip,xr-i,yu+i)< cs) break; i4=i; for ( ;ip,xr-i,yu+i)>=cs) break; i4=i-i4; #ifdef DEBUG fprintf(stderr,"\n#DEBUG DOT_ABOVE %d %d %d %d",i1,i2,i3,i4); #endif if ( (xr-xl<5 && yl-yu<8) /* to small */ || (i1>=(xr-xl+1)/2+2 && i2>=(yl-yu+1)/2+2 /* symmetrical */ && abs(i3-i4)<=i1/4+2 && abs(i1-i2)<=i1/4+2 && abs(i3-i1)<=i1/4+4 && abs(i4-i2)<=i1/4+4) ) mod = DOT_ABOVE; // "." above, ToDo: improve it! } if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI) > loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/8 || loop(box1->p,xl,yu ,xr-xl,cs,0,RI) > loop(box1->p,xl,yl-1,xr-xl,cs,0,RI)-dx/8 ) && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE) > loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/8 || loop(box1->p,xr,yu ,xr-xl,cs,0,LE) > loop(box1->p,xr,yl-1,xr-xl,cs,0,LE)-dx/8 ) && num_cross(xl,xr,yu ,yu ,box1->p,cs) == 1 && ( num_cross(xl,xr,yl ,yl ,box1->p,cs) == 2 || num_cross(xl,xr,yl-1,yl-1,box1->p,cs) == 2 )) mod = CIRCUMFLEX_ACCENT; // "^" if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI) < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 || loop(box1->p,xl,yu+1,xr-xl,cs,0,RI) < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 ) && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE) < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 || loop(box1->p,xr,yu+1,xr-xl,cs,0,LE) < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 ) && ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2 || num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 ) && num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 ) mod = CARON; // "v" above if( /* test for bow (new0.3.6) */ loop(box1->p,xl,yu ,xr-xl,cs,0,RI) + loop(box1->p,xl,yl ,xr-xl,cs,0,RI) - 2*loop(box1->p,xl,(yl+yu)/2,xr-xl,cs,0,RI) > dx/16+1 && xr-xl>10) if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI) < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 || loop(box1->p,xl,yu+1,xr-xl,cs,0,RI) < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 ) && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE) < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 || loop(box1->p,xr,yu+1,xr-xl,cs,0,LE) < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 ) && ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2 || num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 ) && num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 ) mod = BREVE; // round "u" above if( xr-xl>3 && yl-yu>1 ) if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) > loop(box1->p,xl,yl,xr-xl,cs,0,RI) && loop(box1->p,xr,yu,xr-xl,cs,0,LE) < loop(box1->p,xr,yl,xr-xl,cs,0,LE) && num_cross(xl,xr,yu,yu,box1->p,cs) == 2 && num_cross(xl,xr,yl,yl,box1->p,cs) == 2 ) mod = TILDE; if( xr-xl>2 && yl-yu>2) if( num_cross(xl,xr,(yu+yl)/2,(yu+yl)/2,box1->p,cs) >1 ) if( num_cross((xl+xr)/2,(xl+xr)/2,yu,yl,box1->p,cs) >1 ) if( num_hole(xl,xr,yu,yl,box1->p,cs,NULL) == 1 ) mod = RING_ABOVE; #ifdef DEBUG printf("\n#DEBUG umlaut mod=0x%04x x=%d..%d y=%d..%d r=%d %s", (int)mod,yu-box1->y0,yl-box1->y0, xl-box1->x0,xr-box1->x0,r,((mod==CARON)?"CARON": ((mod==ACUTE_ACCENT)?"ACUTE": ((mod==TILDE)?"TILDE":"?")))); out_x(box1); #endif } } if (m) box1->dots=r; // set to 0 also possible after division if (m) box1->modifier=mod; /* should be resetted after compose ??? */ MSG(fprintf(stderr,"umlaut mod=%s dots=%d y0o=%d",decode(mod,ASCII),r,y0);) } // printf(" modifier=%c",mod); if (modifier) *modifier=mod; /* set modifier */ return r; } static wchar_t ocr0_eE(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; int i,i1,i2,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,bad_e=0, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ // --- most frequent letter e first!!! // --- test e --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (smallest seen is 5x6) DBG( wchar_t c_ask='e'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if (sdata->holes.num != 1) ad=97*ad/100; /* ToDo: may be a two pass version intolerant/tolerant is better */ if( loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI)>dx/3 ) Break; // rough test if( loop(box1->p,x0+dx/2,y0,y1-y0,cs,0,DO)>dy/3 ) Break; if( loop(box1->p,x0+dx/2,y1,y1-y0,cs,0,UP)>dy/3 ) Break; if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 2 && num_cross(x0,x1,y0+dy/4+1,y0+dy/4+1,box1->p,cs) > 2 ) Break; // gt x=(x0+x1)/2;i= num_cross(x,x,y0,y1,box1->p,cs); // v0.40 if (i!=3) { x=(x0+2*x1)/3;i= num_cross(x,x,y0,y1,box1->p,cs); } if (i!=3) { x=(x0+3*x1)/4;i= num_cross(x,x,y0,y1,box1->p,cs); } if (i!=3) { i= num_cross((x0+2*x1)/3,(x0+x1)/2,y0,y1,box1->p,cs); } i=loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI); if( i>dx/2 ) Break; j=loop(box1->p,x0,y0 ,x1-x0,cs,0,RI); if( jp,x0,y1 ,x1-x0,cs,0,RI); if( jp,x0+dx/2,y0,y1-y0,cs,0,DO); if( i>dx/2 ) Break; j=loop(box1->p,x1-dx/3,y0,y1-y0,cs,0,DO); if( jp,x0 ,y0,y1-y0,cs,0,DO); if( jp,x1 ,y0,y1-y0,cs,0,DO); if( jp,x0+dx/2,y1,y1-y0,cs,0,UP); if( i>dx/2 ) Break; j=loop(box1->p,x0 ,y1,y1-y0,cs,0,UP); if( jp,x1 ,y1,y1-y0,cs,0,UP); if( jp,x0, (y0+y1)/2,x1-x0,cs,0,RI) -loop(box1->p,x0,(3*y0+y1)/4,x1-x0,cs,0,RI) -loop(box1->p,x0,(y0+3*y1)/4,x1-x0,cs,0,RI); if (dx>3 && j>=dx/4) Break; // ~g 4x6font for(y=1;yp,cs) == 2 ) break; if( y==dy/2 ) Break; // v0.2.5 ~ bad_t for(i=0,j=x0+dx/4;j<=x1-dx/4 && i<=dx/4;j++) if( num_cross(j,j,y0,y1,box1->p,cs) == 3 ) i++; if( dx>4 && dy>5 && (i set x,y for(x=0,y=i=y0+dy/3;ip,x1,i,y1-y0,cs,0,LE); if(j>=x) { x=j;y=i; } } if (x 2* smallest thickness right for(i1=dx,i=y0+dy/3;ip,x0 ,i,y1-y0,cs,0,RI); if (j>dx/2) break; j =loop(box1->p,x0+j,i,y1-y0,cs,1,RI); if (jp,x1 ,i,y1-y0,cs,0,LE); j =loop(box1->p,x1-j,i,y1-y0,cs,1,LE); if(j2*i1) Break; // not accepted, if right line is not very thinn x =loop(box1->p,x1 ,y,y1-y0,cs,0,LE); x+=loop(box1->p,x1-x,y,y1-y0,cs,1,LE); x+=loop(box1->p,x1-x,y,y1-y0,cs,0,LE); if (3*i2>i1) ad=99*ad/100; if (2*i2>i1) ad=99*ad/100; bad_e=60; // used later? } if (xp,cs) > 1 ) i=0; if( i ) Break; // ..@@@@...<- // .@@@@@@;. // @@,...@@. // @@.....@, // @@@@@@@@@ // @@.,;.@,. <- problem (y) == bad_e>50 // @@.....@. // @@,...@@. // .@@@,@@@. // ..@@@@;..<- if (dy>11 && bad_e<50) if ( num_cross(x0,x1,y,y,box1->p,cs) != 1 ) Break; // except "geschwungenem e" if ( num_cross(x0,x1-dx/3,y ,y ,box1->p,cs) != 1 && num_cross(x0,x1-dx/3,y+1,y+1,box1->p,cs) != 1 ) Break; // if( num_hole(x0, x1, y0 , y ,box1->p,cs,NULL) < 1 ){ if( sdata->holes.num == 0 || sdata->holes.hole[0].y1 >= y-y0){ if( sdata->hchar ) Break; // ~ \it t // look if thinn font (may be h-line is broken) Mai00 for(j=0,i=x0+dx/8;ip,cs,1) == 1 ) j++; if(j<2*dx/4) Break; } if( sdata->holes.num>0 && sdata->holes.hole[0].y0 > y-y0) Break; if( sdata->holes.num>1 && sdata->holes.hole[1].y0 > y-y0) Break; if( sdata->holes.num==1 && sdata->holes.hole[0].x0 >= dx/2) { ad=95*ad/100; } /* 8*10 @ (=at) is not an e */ // look for horizontal gap for(x=0,y=i=y0+dy/4;ip,x0,i,x1-x0,cs,0,RI); if(j>=x) { x=j;y=i; } } if (y>y0+dy/4 && ydx/2) Break; // s if (x>dx/4) ad=99*ad/100; if( num_cross(x0+dx/2,x1 ,y1-dy/4,y1 ,box1->p,cs) == 0 && num_cross(x0+dx/2,x1-1,y1-dy/4,y1 ,box1->p,cs) == 0 && num_cross(x0+dx/2,x1 ,y1-dy/4,y1-1,box1->p,cs) == 0 ) { if (sdata->gchar) Break; // ~p ad=99*ad/100; } /* upper case is for 5x6 box */ if( sdata->hchar // broken B ? should also work when linedetection fails && loop(box1->p,x1,y1-dy/3,dx,cs,0,LE)<=dx/8 ) { x = loop(box1->p,x0,y0+dy/2,dx,cs,0,RI); if( loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)<=x && loop(box1->p,x0,y0+dy/8,dx,cs,0,RI)<=x ) Break; if( loop(box1->p,x0,y1-dy/4,dx,cs,0,RI)<=x && loop(box1->p,x0,y1-dy/8,dx,cs,0,RI)<=x ) Break; } x = loop(sdata->bp,0,dy-2 ,dx,cs,0,RI); if( loop(sdata->bp,0,dy-1-dy/8,dx,cs,0,RI)>x && dy>16) Break; // some Q if (box1->m2) { if (sdata->gchar) ad=99*ad/100; if (sdata->hchar) ad=99*ad/100; } else ad=99*ad/100; Setac(box1,(wchar_t)'e',ad); if (ad>=100) return 'e'; break; } // --- test E --------------------------------------------------- for(ad=d=100;dx>2 && dy>4 ;){ // min 3x4 // rewritten for vectors 0.43 int i1, i2, i3, i4, i5; // line derivation + corners DBG( wchar_t c_ask='E'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ /* half distance to the center */ d=2*sq(128/4); /* now we check for the upper right end of the h */ if (aa[3][2]>d/2) Break; /* [2] = distance, ~dj... */ if (aa[0][2]>d/2) Break; /* upper left end */ if (aa[1][2]>d/2) Break; /* lower left end */ if (aa[2][2]>d/2) Break; /* lowerright end */ /* E f near E OOOOOOOO OOOO O5 O O O4 O OOOO3 OOOOOO O2 O O O O1 O O OOOOOOOO OOOOOO */ // check the bow from below for (i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) { if (y1-box1->frame_vector[ i][1]>dy/4) break; // fatal! } if (i!=aa[2][3]) Break; // ~AHKMNRX // search most left+down between bottom right and top right i1=nearest_frame_vector(box1, aa[2][3],aa[3][3], x0, y1); i5=nearest_frame_vector(box1, i1,aa[3][3], x0, y0); i3=nearest_frame_vector(box1, i1, i5, x1, (y0+y1)/2); i2=nearest_frame_vector(box1, i1, i3, x0, (2*y0+y1)/3); i4=nearest_frame_vector(box1, i3, i5, x0, (y0+2*y1)/3); i =nearest_frame_vector(box1, aa[0][3],aa[1][3], x0-dx/4, (y0+y1)/2); if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]-1-dx/16) Break; if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]) ad=99*ad/100; // f MSG(fprintf(stderr,"i1-5 %d %d %d %d %d",i1,i2,i3,i4,i5);) // holes right open? for( i=1,y=y0; yp,cs,2) == 0 ) i=0; if( i ) Break; for( i=1,y=y1; y>y1-dy/4 && i; y-- ) // long black line if( get_bw(x0+dx/6,x1-dx/4,y,y,box1->p,cs,2) == 0 ) i=0; if( i ) Break; for( i=1,y=y0+dy/3; yp,x0 ,y,dx,cs,0,RI); j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>dx/3 ) i=0; } if( i ) Break; x=x1-dx/3; y=y0; // von oben durchbohren! turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break; turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break; turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,DO); if( x<=x1 || y>y0+dy/2 ) Break; x=x1-dx/3; y=y1; // von unten durchbohren! turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); if( yp,&x,&y,x0,x1,y0,y1,cs,ST,UP); if( yp,&x,&y,x0,x1,y0,y1,cs,RI,UP); if( x<=x1 || yp,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break; turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break; y+=dy/15; turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( x15 && x==x0) ad=99*ad/100; // to thin x+=dx/15+1; turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y1-dy/3 ) Break; // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) Break; if (sdata->holes.num > 0) Break; i=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI); if(i>dx/2) Break; j=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI); if(ji+dx/8) Break; i=j; j=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI); if(ji+dx/8) Break; j=loop(box1->p,x1,y1-dy/4,dx,cs,0,LE); for( x=dx,y=y0+dy/6; yp,x0,y,dx,cs,0,RI); if (i>j/2 && ad>98) ad=99*ad/100; if (i>dx/4) break; if(i3*dx) // ~[ if( get_bw(x0+dx/2,x0+dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 ) Break; if (box1->m2) { if (!hchar) ad=ad*99/100; if ( gchar) ad=ad*99/100; } Setac(box1,(wchar_t)'E',ad); if (ad>=100) return 'E'; break; } return box1->c; } static wchar_t ocr0_n(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; int i,j,d,x,y,i1,i2,i3,handwritten=0, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test n --------------------------------------------------- // glued rm is very similar to glued nn -> thickness of h-line should grow // may02: tested for 8x12 font for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='n'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ i= num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs); j= num_cross( 0,dx-1,dy/2,dy/2,sdata->bp,cs); if( (i<2 || i>3) && j!=2 ) Break; if( loop(sdata->bp,dx/2,0,dy,cs,0,DO) > dy/8 && sdata->hchar ) Break; /* tt */ y=5*dy/8; /* also for handwritten n, where first bow goes not down enough */ if( num_cross( 0,dx/2,y ,y ,sdata->bp,cs) != 1 && num_cross( 0,dx/2,y-1,y-1,sdata->bp,cs) != 1 && num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) < 1 ) Break; // n rr // ~thick_w y=loop(sdata->bp,dx-1-dx/4,0,dy,cs,0,DO); if(y>dy/2) Break; if(y>1)if( get_bw(dx-1-dx/4,dx-1,0,y-2,sdata->bp,cs,1) == 1 ) Break; y=3*dy/4; if( num_cross(0, dx/2,y ,y ,sdata->bp,cs) == 1 && num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) == 0 ) Break; // ~p y=dy/2; if( num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) == 2 && num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) == 2 ) { // n rr /* printed n */ x =loop(sdata->bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // 1st gap x+=loop(sdata->bp,x,y,dx-x,cs,0,RI); if(x< dx/2) Break; i2=x; // 2nd v-line x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x<3*dx/4) Break; i3=x; // 2nd gap i=dy/4; y=13*dy/16; if( num_cross(dx/2,dx-1,y,y,sdata->bp,cs)==2 ) i=3*dy/8; // \it n if (i<2 && il1 l2 l3 l4 ??? for(x=i1;xbp,x, 0,dy,cs,0,DO)>=i ) break; if(x bp,x,dy-1,dy,cs,0,UP) >dy/4 ) break; if(x==i2) Break; // no gap detected (glued serifs ??? ) // glued rm as nn ??? for(y=0,x=(i1+i2)/2;xbp,x,0,dy,cs,0,DO); i=loop(sdata->bp,x,i,dy,cs,1,DO); // measure thickness if( i>y ) y=i; if( i7 ) if( loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,LE) +loop(sdata->bp, 0,dy-1-dy/8,dx,cs,0,RI)-dx/8-1 > loop(sdata->bp,dx-1,dy-1-dy/2,dx,cs,0,LE) +loop(sdata->bp, 0,dy-1-dy/2,dx,cs,0,RI) ) ad=90*ad/100; // broken o if( dy>7 && dx>7 ) if( loop(sdata->bp,dx-1, dy/2,dx,cs,0,LE)==0 && loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,RI)>dx/8 ) ad=98*ad/100; // broken o } else { /* check handwritten n */ if( num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) != 3 && num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) != 3 ) Break; i =loop(sdata->bp,0,dy/2-dy/8,dx,cs,0,RI); if (i>dx/4) Break; i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI); if (i>dx/2) Break; i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,0,RI); if( num_cross(i,i, 0,dy/2-2*dy/8,sdata->bp,cs) != 0 ) Break; i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI); if( num_cross(i,i,dy/2+1, dy-1,sdata->bp,cs) != 0 ) Break; handwritten=80; } i= loop(sdata->bp,dx-1 ,dy/2,dx,cs,0,LE); if(i>5) if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,sdata->bp,cs,1) == 1 ) Break; // ~rr i+=loop(sdata->bp,dx-1-i,dy/2,dx,cs,1,LE); if( get_bw(dx-1-i ,dx-1-i ,0,dy/2,sdata->bp,cs,1) == 0 ) Break; // ~rv if( get_bw(dx/2,dx/2,dy/4,dy/4,sdata->bp,cs,1) == 0 && get_bw(dx/2,dx-1,dy-2,dy-2,sdata->bp,cs,1) == 0 && get_bw(dx/2,dx/2,dy/4,dy-2,sdata->bp,cs,1) == 1 ) Break; // ~P // glued ri ??? if( box1->dots>0 && box1->m1 ) if( get_bw((x1+x0)/2,x1,box1->m1,y0-1,box1->p,cs,1) == 1 ) if( num_cross( 0,dx-1,0 ,0 ,sdata->bp,cs) >2 || num_cross( 0,dx-1,1 ,1 ,sdata->bp,cs) >2 ) Break; i=loop(sdata->bp,dx-1, dy-1,dx,cs,0,LE); if (i>dx/2) i=loop(sdata->bp,dx-1, dy-2,dx,cs,0,LE); x=loop(sdata->bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if (sdata->hchar && i-x>1) Break; // ß x=loop(sdata->bp, 0,dy-1,dx,cs,0,LE); // check for serifs i=loop(sdata->bp, 0,dy-2,dx,cs,0,LE); if (ibp, 0, 1,dx,cs,0,LE); if (ibp, 0, 2,dx,cs,0,LE); if (ihchar && x>0) Break; // fl if (num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs)>=3) ad=98*ad/100; // small M if (sdata->hchar || 2*y0m1+box1->m2) ad=96*ad/100; if (sdata->gchar) ad=96*ad/100; // ß fl if (dx<5) { // for small fonts no middle line is possible for m ad=99*ad/100; // 4x6 m if (num_cross(0,dx-1,dy/8,dy/8,sdata->bp,cs)>=2) { ad=97*ad/100; // ~m if (dy<=4) Setac(box1,'m',97); // only for 4x6 font! } } Setac(box1,'n',ad); break; } return box1->c; } static wchar_t ocr0_M(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int d,x,y,i0,i1,i2,i3,t1,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // ------------------ test M --------------------------- for(ad=d=100;dx>3 && dy>3;){ // dy<=dx nicht perfekt! besser mittleres // min-suchen fuer m DBG( wchar_t c_ask='M'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if( num_cross(0,dx-1, dy/2, dy/2,bp,cs)<3 && num_cross(0,dx-1, dy/4, dy/4,bp,cs)<3 && num_cross(0,dx-1,5*dy/8,5*dy/8,bp,cs)<3 && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<3 && dx>4 ) Break; if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<2 && num_cross(0,dx-1, dy/8, dy/8,bp,cs)<2 ) Break; /* fat M */ if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<2 ) Break; x = loop(bp,dx-1 ,dy-1,dx,cs,0,LE); // ~ melted kl x = loop(bp,dx-1-x,dy-1,dx,cs,1,LE); if( x>dx/2 ) Break; if( loop(bp, 0,7*dy/16,dx,cs,0,RI) + loop(bp,dx-1,7*dy/16,dx,cs,0,LE) > dx/2 ) Break; // ~K if( dy>8 /* following lines should be extend to range check */ && loop(bp, dx/4,dy-1, dy,cs,0,UP) 2 && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)> 2 ) Break; // ~it_u if( num_cross(0 ,dx-1,3*dy/4,3*dy/4,bp,cs)==2 && num_cross(dx/2,dx/2,3*dy/4, dy-1,bp,cs)> 0 ) Break; // ~it_v if( loop(bp,3*dx/4, 0,dy,cs,0,DO) > loop(bp,2*dx/4, 0,dy,cs,0,DO) && loop(bp,3*dx/4,dy-1,dy,cs,0,UP) < loop(bp,2*dx/4,dy-1,dy,cs,0,UP) ) Break; // ~N if( loop(bp,3*dx/4, dy/8,dy,cs,0,DO) > loop(bp,2*dx/4, dy/8,dy,cs,0,DO) && loop(bp,3*dx/4,dy-1-dy/8,dy,cs,0,UP) < loop(bp,2*dx/4,dy-1-dy/8,dy,cs,0,UP) ) Break; // ~serif_N // i0 is lower end of upper serifen (widest gap? ) i0=0; if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=4 ){ // Is it a N ? if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==3 ){ for(y=dy/2+1;yy-2 ) Break; // ~N } } } // MNWK for(i2=0,i1=x=dx/2;xi2) {i2=y;i1=x;} else break; } i3=i2+loop(bp,i1,i2,dy-i2,cs,1,DO); if(i2hchar) Break; // rm ad=99*ad/100; } if (i2==0 && dx>8 && dy>12) Break; // glued and bad splitted serifen-MN // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) != 0 ) Break; // small A if (sdata->holes.num != 0) Break; t1=loop(bp,0 ,3*dy/4,dx,cs,0,RI); t1=loop(bp,t1,3*dy/4,dx,cs,1,RI); // thickness of line? if( 7*(t1+1)=i2 ) Break; // no good M i1+=loop(bp,i1, dy/4,dx,cs,1,RI); i2+=loop(bp,i2,3*dy/4,dx,cs,1,RI); if( i1>=i2 ) Break; // no good M i1+=loop(bp,i1, dy/4,dx,cs,0,RI); i2+=loop(bp,i2,3*dy/4,dx,cs,0,RI); if( i1<=i2 ) Break; // no good M } if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==2 && num_cross(0,dx-1,dy/4,dy/4,bp,cs)==2 && !hchar ) Break; // ~ \it u if (dy<17) if( num_cross(0,dx-1, 0, 0,bp,cs)<2 ) ad=99*ad/100; if (dx>5) /* 4x6 font has only 1 cross at y=1 */ if( num_cross(0,dx-1, 1, 1,bp,cs)<2 ) ad=96*ad/100; // kt if( num_cross(dx/2,dx/2, 0, dy-1,bp,cs)!=1) ad=98*ad/100; // kt if (dx<5 && loop(bp,dx/2,0,dy,cs,0,DO)>=3*dy/8) ad=96*ad/100; // 4x6 H if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<=2 && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<=2 && dx>8 && dy>12 ){ ad=98*ad/100; for(y=5*dy/16;y<5*dy/8;y++) // look for H-line if( num_cross(0,dx-1,y ,y ,bp,cs)==1 ) break; if( y<5*dy/8 ) ad=95*ad/100; if( y<5*dy/8 ) if( num_cross(2+dx/6,dx-3-dx/6,y-2,y-2,bp,cs)==0 || num_cross(2+dx/6,dx-3-dx/6,y-1,y-1,bp,cs)==0 ) Break; // ~H bad! } if( loop(bp,3*dx/8, 0,dy,cs,0,DO) >dy/2 && loop(bp,5*dx/8,dy-1,dy,cs,0,UP) >dy/2 ) ad=95*ad/100; if(!hchar){ ad=98*ad/100; /* not sure */ if( loop(bp,0, dy/4,dx,cs,0,RI) < loop(bp,0,dy-1-dy/8,dx,cs,0,RI)-dx/16 ) Break; // ~wi glued } if( gchar ) ad=98*ad/100; if (ad>99 && dx<8) ad=99*ad/100; /* give 5x8 N a chance */ Setac(box1,'M',ad); break; } return box1->c; } static wchar_t ocr0_N(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; int d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; int dx=x1-x0+1,dy=y1-y0+1, /* size */ (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */ dbg[9], ad; /* tmp-vars */ // --- test N ------- +hchar -gchar for(ad=d=100;dx>3 && dy>3;){ // 4x6font DBG( wchar_t c_ask='N'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (sdata->holes.num > 0) ad=98*ad/100; /* # */ if (dx<6) ad=99*ad/100; if (dx<5) ad=99*ad/100; /* half distance to the center */ d=2*sq(128/4); /* now we check for the 4 ends of the x */ if (aa[0][2]>d) Break; if (aa[1][2]>d) Break; if (aa[2][2]>d) Break; if (aa[3][2]>d) Break; if (aa[3][0]-aa[0][0](dy+2)/5) Break; /* glued tu */ if (abs(aa[3][1]-aa[0][1])>(dy+4)/8) ad=98*ad/100; /* glued tu */ /* left and right vertical line */ d=line_deviation(box1, aa[0][3], aa[1][3]); if (d>2*sq(1024/4)) Break; ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100; d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break; /* search uppermost left ^ (between near 0,0) */ i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0+dx/8, y0); x=box1->frame_vector[i1][0]; y=box1->frame_vector[i1][1]; MSG( fprintf(stderr,"i1= %d (%d,%d) left ^", i1,x-x0,y-y0);) if (y-y0 > 5*dy/8) Break; if (x-x0 > 5*dx/8) Break; /* search uppermost right ^ ~H */ i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0); MSG( fprintf(stderr,"i3= %d (%d,%d) right ^",\ i3, box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0);) /* check if upper left and lower right point are joined directly */ dbg[0]=d=line_deviation(box1,i1, aa[2][3]); /* check if lower left and lower left point are joined directly */ dbg[1]=d=line_deviation(box1, aa[1][3],i1); MSG( fprintf(stderr," i1-a2 %d a1-i1 %d",dbg[0],dbg[1]); ) if (dbg[0] > sq(1024/4)) Break; if (dx>4 && dbg[1] > sq(1024/4)) ad=97*ad/100; // d=0..2*sq(1024) if (dx>4 && dbg[1] > sq(1024/3)) Break; // d=0..2*sq(1024) // serif N has d=sq(1024/3)=116508 /* serach lowest right v, same frame? N-tilde etc.? */ i2=nearest_frame_vector(box1,aa[3][3],aa[0][3], x1, y1-dy/8); x=box1->frame_vector[i2][0]; y=box1->frame_vector[i2][1]; MSG( fprintf(stderr,"i2= %d (%d,%d) right v",\ i2, box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0);) if (y-y0 < 3*dy/8) Break; if (x-x0 < 3*dx/8) Break; // test H if ( box1->frame_vector[i3][0]-box1->frame_vector[i1][0]> dx/4 && box1->frame_vector[i3][1]-box1->frame_vector[i1][1]<=dy/8 && y<=box1->frame_vector[i1][1]) Break; /* check if upper left and lower right point are joined directly */ dbg[2]=d=line_deviation(box1,i2, aa[0][3]); /* check if lower right and lower right point are joined directly */ dbg[3]=d=line_deviation(box1, aa[3][3],i2); MSG( fprintf(stderr," i2-a0 %d a3-i2 %d",dbg[2],dbg[3]); ) if (dbg[2] > sq(1024/4)) Break; if (dbg[3] > sq(1024/4)) ad=97*ad/100; // serif N, ToDo: do it better if (dbg[3] > sq(1024/3)) Break; if (abs((box1->frame_vector[i1][1]-y0) -(y1-box1->frame_vector[i2][1]))>dy/8) ad=99*ad/100; /* ~ tu */ if (abs(((y0+y1)/2-box1->frame_vector[i1][1]) -(box1->frame_vector[i2][1]-(y0+y1)/2))>dy/8) ad=99*ad/100; /* ~ tu */ if (box1->frame_vector[i2][0] -box1->frame_vector[i1][0]<=dx/8) Break; /* nonsignificant distance */ if (box1->frame_vector[i2][1] -box1->frame_vector[i1][1]<=dy/8) ad=97*ad/100; /* too flat (ff,H) */ if (box1->frame_vector[i2][1] -box1->frame_vector[i1][1]<=dy/2) ad=99*ad/100; MSG( \ fprintf(stderr,"^v %d %d %d %d line deviation %d %d %d %d max %d %d",\ box1->frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\ box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\ dbg[0],dbg[1],dbg[2],dbg[3],sq(1024/4),sq(1024));) ad=(100-(dbg[0]-sq(1024)/2)/sq(1024)/4)*ad/100; ad=(100-(dbg[1]-sq(1024)/2)/sq(1024)/4)*ad/100; ad=(100-(dbg[2]-sq(1024)/2)/sq(1024)/4)*ad/100; ad=(100-(dbg[3]-sq(1024)/2)/sq(1024)/4)*ad/100; if (!hchar) ad=99*ad/100; if ( gchar) ad=98*ad/100; // \sc N Setac(box1,'N',ad); break; } return box1->c; } static wchar_t ocr0_h(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ // --- test h --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 // rewritten for vectors 0.42 int i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners DBG( wchar_t c_ask='h'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ /* half distance to the center */ d=2*sq(128/4); /* now we check for the upper right end of the h */ if (aa[3][2]d/2) Break; /* upper left end */ if (aa[1][2]>d/2) Break; /* lower left end */ if (aa[2][2]>d/2) Break; /* lowerright end */ /* type A B=italic ??? 18 OOO O O O O O O7OOO OOOO O4 O O O O O O O O O O O O 2O3 5O6 O OOO */ i1=i8=aa[0][3]; i2=i3=aa[1][3]; i5=i6=aa[2][3]; // check the bow from below for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[ i][1] frame_vector[i4][1]) i4=i; // get next maximum if (box1->frame_vector[ i][1]<=y0) break; // fatal! } if (box1->frame_vector[i4][1]-y0frame_vector[i4][1]18 ) if( get_bw(dx-1-dx/3,dx-1,dy/6 ,dy/5 ,bp,cs,1) == 1 ) Break; if( get_bw(dx-1-dx/3,dx-1,dy-1-dy/4,dy-1 ,bp,cs,1) == 0 ) Break; // s- for( x=x0+dx/3;xp,cs,1) == 0 ) break; if( x>=x1-dx/3 ) Break; for(i=dy/4,y=y0+dy/3;y<=y1 && i;y++){ if( num_cross(x0,x1 ,y,y, box1->p,cs) == 2 ) i--; } if( i ) Break; for(i=dy/4,y=y0;y<=y0+dy/2 && i;y++){ if( num_cross(x0,x0+dx/2,y,y, box1->p,cs) == 1 ) i--; } if( i ) Break; // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) // could happen if (sdata->holes.num > 0) if (sdata->holes.hole[0].y0 > dy/3 && sdata->holes.hole[0].y1 < dy-1-dy/3) Break; // if( num_hole(x0, x1, y0+dy/3 , y1-dy/3 ,box1->p,cs,NULL) != 1 ) Break; // mini if( loop(bp,dx-1,dy/3,dx,cs,0,LE)+dx/8 < loop(bp,dx-1,dy/2,dx,cs,0,LE) && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)+dx/8 < loop(bp,dx-1,dy/2,dx,cs,0,LE)) Break; // ~k Okt00 i=loop(bp,0,dy-1-dy/4,dx,cs,0,RI); if (i>1 && num_cross(x0,x0,y0+dy/8+2,y0+dy/2, box1->p,cs) == 1 ){ // fi fu ad=(99-(1<p,cs) == 0 ) ad=97*ad/100; if (num_cross(x0+dx/2,x0+dx/2,y0,y0+dy/8+2, box1->p,cs) == 1 ) ad=97*ad/100; if (ad<1) break; } i =loop(bp,0,dy/4,dx,cs,0,RI); i+=loop(bp,i,dy/4,dx,cs,1,RI)+1; for ( ; i5*dy/8 ) { ad=98*ad/100; // melted hi, li, but handwritten h MSG(fprintf(stderr,"ad=%d",ad);) } if( num_cross(x0,x0,y0+(dy+3)/8,y1,box1->p,cs) > 1 ) { ad=98*ad/100; // melted fr MSG(fprintf(stderr,"ad=%d",ad);) } i=loop(bp,dx-1,3*dy/4,dx,cs,0,LE); // melted "fr" for vertikal letters if (i>dx/4 && loop(bp,dx-1-i,dy-1,dy,cs,1,UP)>dy/2) { ad=94*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } i=loop(bp,dx-1,1+dy/16,dx,cs,0,LE); if (i 0 ) { ad=95*ad/100; // melted fi MSG(fprintf(stderr,"ad=%d",ad);) } if (loop(box1->p,x1,y0+1+dy/16,dx,cs,0,LE)p,x1,y0 ,dx,cs,0,LE)p,x1,y0+1,dx,cs,0,LE)holes.num > 0) ad=97*ad/100; if (box1->m2) { if ( gchar) ad=98*ad/100; if (!hchar) ad=97*ad/100; } else ad=99*ad/100; Setac(box1,'h',ad); break; } return box1->c; } static wchar_t ocr0_H(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,j1,d,x,y,ya,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test H --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='H'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if( num_cross(0,dx-1,dy/4 ,dy/4 ,bp,cs) != 2 && num_cross(0,dx-1,dy/4-1,dy/4-1,bp,cs) != 2 ) Break; if( num_cross(0,dx-1,3*dy/4 ,3*dy/4 ,bp,cs) != 2 && num_cross(0,dx-1,3*dy/4+1,3*dy/4+1,bp,cs) != 2 ) Break; if( loop(bp,0 ,dy/8,dx,cs,0,RI) + loop(bp,dx-1,dy/8,dx,cs,0,LE)>dx/2 ) Break; // ~A for( j1=0,i=1,y=y0+dy/10; yp,x0 ,y,dx,cs,0,RI) +loop(box1->p,x1 ,y,dx,cs,0,LE); if( j>dx/2 ) i=0; if(j>j1)j1=j; } if( !i ) Break; for( i=1,y=dy/4; ydx/5 ) i=0; } if( !i ) Break; // ~K Jul00 for( i=0,ya=y=y0+dy/3; yp,x0 ,y,dx,cs,0,RI); j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ) { i=j; ya=y; } } if( i<=dx/2 ) Break; ya-=y0; if( num_cross(0,dx-1,ya ,ya ,bp,cs) != 1 && num_cross(0,dx-1,ya+1,ya+1,bp,cs) != 1 ) Break; /* Dec00 */ for( y=ya; y 2 && num_cross(0,dx-1,y+1,y+1,bp,cs) > 2 ) break; if ( yp,cs,1) == 0 ) i=0; } if( i ) Break; for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){ if( get_bw( x, x,y1-dy/4,y1 ,box1->p,cs,1) == 0 ) i=0; } if( i ) Break; for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){ if( num_cross(x,x,y0+dy/8,y1-dy/8, box1->p,cs) == 1 ) i=0; } if( i ) Break; for(i=1,y=y0;y<=y0+dy/4 && i;y++){ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if( i ) Break; for(i=1,y=y1-dy/4;y<=y1 && i;y++){ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if( i ) Break; if( get_bw(x1-dx/8, x1 , y0, y0+dy/8,box1->p,cs,1) != 1 ) Break; if( get_bw(x0 , x0+dx/8, y1-dy/8, y1,box1->p,cs,1) != 1 ) Break; i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(i1>dx/2) Break; i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(i2i1+dx/8) Break; i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if(i3i2+dx/8) Break; if(abs(i1+i3-2*i2)>dx/16+1) Break; // test for thick tall N looking like a H if( num_cross(x0,x1,y0,y1, box1->p,cs) < 2 ) Break; // sure N i1=loop(bp, 0, dy/4,dx,cs,0,RI); i1=loop(bp, i1, dy/4,dx,cs,1,RI); i2=loop(bp, 0,dy-1-dy/4,dx,cs,0,RI); i2=loop(bp, i2,dy-1-dy/4,dx,cs,1,RI); i3=loop(bp,dx-1 ,dy-1-dy/4,dx,cs,0,LE); i3=loop(bp,dx-1-i3,dy-1-dy/4,dx,cs,1,LE); i =loop(bp, 0,dy/2+1+dy/8,dx,cs,0,RI); i+=loop(bp, i,dy/2+1+dy/8,dx,cs,1,RI); i =loop(bp, i,dy/2+1+dy/8,dx,cs,0,RI); if (i6*i2 && 5*i3>6*i2 && i1>i2 && i3>i2 ) Break; if( dx>8 ) if ( loop(bp,dx-1, 3*dy/8,dx,cs,0,LE) -loop(bp,dx-1, dy/8,dx,cs,0,LE)>dx/4 && loop(bp,dx-1, 3*dy/8,dx,cs,0,LE) -loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)>dx/4 ) Break; // ~K // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) Break; if (sdata->holes.num != 0) Break; if ( gchar) ad=99*ad/100; if (!hchar) ad=98*ad/100; Setac(box1,'H',ad); break; } return box1->c; } static wchar_t ocr0_k(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ // --- test k --------------------------------------------------- for(ad=100;dx>2 && dy>3;){ // min 3x4 // rewritten for vectors 0.43 int d, i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners DBG( wchar_t c_ask='k'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ /* half distance to the center */ d=2*sq(128/4); /* now we check for the upper right end of the h */ if (aa[3][2]d/2) Break; /* upper left end */ if (aa[1][2]>d/2) Break; /* lower left end */ if (aa[2][2]>d/2) Break; /* lowerright end */ /* type A B=italic ??? 18 OOO O O O O O6 O O7 OO O OO O4OO OO OO O OO O O O OO O O O 2O3 O5 O OOO */ i1=i8=aa[0][3]; i2=i3=aa[1][3]; i5= aa[2][3]; // check the bow from below for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[ i][1] frame_vector[i4][1]) i4=i; // get next maximum if (box1->frame_vector[ i][1]<=y0) break; // fatal! } if (box1->frame_vector[i4][1]-y0frame_vector[i4][1]frame_vector[i][0]frame_vector[i][0]dx/2) Break; i3=loop(bp,0,dy/2+dy/4,dx,cs,0,RI); if(abs(i1+i3-2*i2)>dx/16+1 || i1p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2,x1, y1-dy/3,y1 ,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/4,x1, y0 ,y0+3*dy/16,box1->p,cs,1) == 1 ) Break; if( get_bw(x1-dx/4,x1, y0+dy/4,y1-dy/4,box1->p,cs,1) != 1 ) Break; //~1 if( get_bw(x1-dx/4,x1, y1-dy/8,y1 ,box1->p,cs,1) != 1 ) Break; if (sdata->holes.num > 0) if (sdata->holes.hole[0].y0 > dy/4) Break; // if( num_hole(x0,x1,y0+dy/4,y1,box1->p,cs,NULL) != 0 ) Break; for(y=y0+1;yp,cs,1) == 0 ) break; if( yp,cs,100)>50) i=0; if( i ) Break; // no vertikal line! /* check for falling line in the lower left corner */ for (j=x=0,y=5*dy/8;y<7*dy/8;y++) { i= loop(bp,dx-1,y,dx,cs,0,LE); if(i>x) { x=i;j=y; } } // x=dx/6 on fat k if (x + loop(bp,dx-1-x,y,dx,cs,1,LE)/2 dx/2) i =loop(bp,dx-1,dy-2,dx,cs,0,LE); if(i>dx/2) Break; i+=loop(bp,dx-1-i,dy-1,dx,cs,1,LE)/2; if( get_line(x,y,dx-1-i,dy-1,bp,cs,100)<60 ) Break; for(y=y0+dy/3;yp,cs)==2 ) break; if( y==y1 ) Break; if( // num_hole(x0,x1 ,y0 ,y1 ,box1->p,cs,NULL)>0 // ~A happens! sdata->holes.num > 0 ) if (sdata->holes.hole[0].x1>dx-1-dx/4 || sdata->holes.hole[0].y1>dy-1-dy/4 || sdata->holes.hole[0].y0< dy/4) Break; // if ( num_hole(x0,x1-dx/4,y0+dy/4,y1-dy/4,box1->p,cs,NULL)==0 ) Break; i=loop(bp,0,dy-1,dx,cs,0,RI); i=loop(bp,i,dy-1,dx,cs,1,RI); if (dx>8 && 4*i>3*dx) Break; // ~glued_tz i =loop(bp,0,dy/4,dx,cs,0,RI); if (i>dx/4 && i+loop(bp,i,dy/4,dx,cs,1,RI)>dx/2 && loop(bp, 0,0,dx,cs,0,RI)<=dx/4 && loop(bp,dx-1,0,dx,cs,0,LE)>=dx/2 ) ad=90*ad/100; // divided Q if( 2*y0>(box1->m1+box1->m2) ) ad=99*ad/100; if ( gchar) ad=98*ad/100; if (!hchar) ad=98*ad/100; Setac(box1,'k',ad); break; } return box1->c; } static wchar_t ocr0_K(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,i1,i2,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad,ya,xa,yb,xb,yc,xc,yd,xd,ye,xe,yf,xf; /* tmp-vars */ // --- test K --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // updated 29 Mar 2000 perfect??? DBG( wchar_t c_ask='K'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ for(y=dy/8;yp,x,y0,y1-y0,cs,0,DO); if (y>3*dy/4) { i=1;break; } if (dy>15 && j>dy/8){ j =loop(box1->p,x-1,y0+y-1,x1-x0,cs,0,LE)/2; y+=loop(box1->p,x-j,y0+y-1,y1-y0,cs,0,DO)-1; } if(y>=dy/4) i=0; /* ok, found gap */ } if( i ) Break; for(y=0,x=x0+dx/4;x<=x1-dx/4;x++){ // lower h-gap i=loop(box1->p,x,y1,dy,cs,0,UP); /* on small chars bypass possible low left serifs */ if (i>0) { i2=loop(box1->p,x-1,y1-i-1,dy,cs,0,UP); if (i2>1) i+=i2-1; } if (i>y) { y=i; i1=x; } } if( y<=dy/8 ) Break; if (yp,cs) == 2 ) i=0; } if( i ) Break; for(i=1,y=y0;y<=y0+dy/4 && i;y++){ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if( i ) Break; if( dx<10 ){ for(i=1,y=y0+dy/3;y<=y1-dy/3 && i;y++){ if( num_cross(x0,x1,y,y, box1->p,cs) == 1 ) i=0; } if( i ) Break; } for(i=1,y=y1-dy/4;y<=y1 && i;y++){ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if( i ) Break; if( get_bw(x1-dx/3,x1,y0,y0+dy/8,box1->p,cs,1) != 1 ) Break; // ~k if( dy>16 && loop(bp,0, dy/4,dx,cs,0,RI) +loop(bp,0,3*dy/4,dx,cs,0,RI) <2*loop(bp,0, dy/2,dx,cs,0,RI)-2-dx/32 ) Break; // ~X i=loop(box1->p,x1,y0+ dy/4,x1-x0+1,cs,0,LE); if(i>dx/2) Break; j=loop(box1->p,x1,y0+ dy/2,x1-x0+1,cs,0,LE); x=loop(box1->p,x1,y0+3*dy/8,x1-x0+1,cs,0,LE); if(x>j) j=x; if(j<=i ) Break; i=j; j=loop(box1->p,x1,y1-dy/4,x1-x0+1,cs,0,LE); if(j>=i ) Break; // out_x(box1); // detailed analysis // // a d <= that are main points of K // | / // b/e // | \ . // c f ya= dy/4;xa=loop(bp,0,ya,dx,cs,0,RI);xa+=loop(bp,xa,ya,dx,cs,1,RI)/2; yc=dy-dy/4;xc=loop(bp,0,yc,dx,cs,0,RI);xc+=loop(bp,xc,yc,dx,cs,1,RI)/2; yb=dy/2; xb=dx-1-loop(bp,dx-1,dy/2,dx,cs,0,LE); for(yd=ye=yf=xe=y=i=0,xf=xd=dx;yxe){ xe=x;ye=dy/2+y; } x =loop(bp,dx-1,dy/2-y,dx,cs,0,LE); if(x>xe){ xe=x;ye=dy/2-y; } #if 0 // removed v0.2.4a2 x =loop(bp,0 ,dy/2+y,dx,cs,0,RI); // middle left border x+=loop(bp,x ,dy/2+y,dx,cs,1,RI); // test 2nd cross x+=loop(bp,x ,dy/2+y,dx,cs,0,RI); if(x8 ){ // example szaka0103 if( xe>5*dx/8 || xb>5*dx/8 ) Break; // ~{\it n} i=loop(bp,xb,yb,xb,cs,1,LE); // thick center? see font22 if( get_line2(xb,yb,xd,yd,bp,cs,100)<95 ) // right up if( get_line2(xb-i/2,yb,xd,yd,bp,cs,100)<95 ) Break; if( get_line2(xe,ye,xf,yf,bp,cs,100)<95 ) Break; // right down xe+=loop(bp,xe,ye,dx,cs,1,RI); if( xe>=xf ) Break; // ~{\it n} } else { if( dy<16 && !hchar ) Break; if( loop(bp,0,1,dy,cs,1,DO)<=3*dx/4 && loop(bp,1,1,dy,cs,1,DO)<=3*dx/4 && loop(bp,2,1,dy,cs,1,DO)<=3*dx/4 ) Break; // ~x } if (loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)<=dx/8){ ad=99*ad/100; /* broken B ? */ if (sdata->holes.num > 0) if (sdata->holes.hole[0].y1 < dy-1-dy/3) Break; // if( num_hole(x0,x1,y0,(y0+2*y1)/3,box1->p,cs,NULL)>0) Break; // broken B } if(box1->m3 && !hchar) ad=99*ad/100; if(box1->m3 && gchar) ad=99*ad/100; // printf(" ok xe=%d",xe); Setac(box1,'K',ad); break; } return box1->c; } static wchar_t ocr0_f(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */ ab[8][4], /* special points (x,y,dist^2,vector_idx) */ ad; /* tmp-vars */ /* x=mindist_to_a y=0 "t" 0>..$$. 0>..$$ 0>..$$ end right bow a--..$$ a--.$7. y>0 "f" 1>.$..$ 1>.$.. 1>.$$$ start right bow .$7. .$.. .@... .@.. 2>.@@. start upper end .@.. .@.. 2>.$... 2>.$.. 3>$$$$ crossing bar .$.. $$$. 3>$@$$. 3>$@$. $@@$ $@$. .@.. 4>.$... 4>.$.. 4>.$$. lower end .$.. .$.. .@... .@.. .@@. .@.. .@.. .@... .@.. .@@. .@.. .@.. 5>.$... 5>.$.. 5>.$$. lower start .$.. .$.. 6>..... 6>$... 6>.... optional left bow */ // --- test f like t --------------------------------------------------- for(ad=d=100;dx>2 && dy>5;){ // sometimes no hchar! // rewritten for vectors 0.43 int d, i1, i2, i3, i4, i5, i6, i7, i8, i9; // line derivation + corners DBG( wchar_t c_ask='f'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ /* half distance to the center */ d=2*sq(128/4); /* now we check for the upper right end of the h */ if (aa[3][2]>d/2) Break; /* [2] = distance, ~BCDEF... */ if (aa[0][2]>d ) Break; /* upper left end */ /* 9 OOO O 7 O8 O6 1OOOO5 O4 O 2O3 OOOOO */ i1=nearest_frame_vector(box1,aa[0][3],aa[1][3],x0-dx/2,(5*y0+3*y1)/8); /* we need i for 4x6 font, where left side of h-bar is near (x0,y1) */ i =aa[1][3]; if (box1->frame_vector[i][1]frame_vector[i2][0]-x0>dx/2) Break; // ~3 i =nearest_frame_vector(box1, aa[0][3], i2, x1+2*dx, (y0+y1)/2); // MSG(fprintf(stderr,"i %d",i);) if (box1->frame_vector[i ][0] -box1->frame_vector[i9][0]>dx/8) Break; // ~3 if( (box1->dots) ) Break; // Bold-face is gchar if (dy<=box1->m3-box1->m2+1) Break; for(x=0,j=y=2+(3*dy+4)/32;y<=5*dy/8;y++){ // upper cross line min=2 i=loop(bp,0,y,dx,cs,0,RI); if( y>dy/4 && i>5*dx/8 ) break; i=loop(bp,i,y,dx,cs,1,RI); if( i>x ) { x=i;j=y; } if( y<3*dy/4 && y>dy/4 && num_cross(0,dx-1,y ,y ,bp,cs) != 1 && num_cross(0,dx-1,y+1,y+1,bp,cs) != 1 // against noise ) break; } if( y<=5*dy/8 ) Break; y=j;// if( y>dy/2 || ydy/8 && num_cross( 0, (dx+1)/2,i,i,bp,cs) > 0 && num_cross((dx+1)/2,dx-1,i,i,bp,cs) > 0 ) Break; // ~Y if (loop(bp,3*dx/4, 0,dy,cs,0,DO)>dy/8 && loop(bp,3*dx/4-1,0,dy,cs,0,DO)>dy/8) Break; // upper bow i=3*dy/4; if (box1->m3 && i>=box1->m3) i=box1->m3-1; if (num_cross(0,dx-1,i,i,bp,cs)!=1) Break; // the middle bar appear in a wide vertical range, get part below for (i1=dx,i2=y,j=y+1;jframe_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0);) ab[7][0]=box1->frame_vector[i3][0]; ab[7][1]=box1->frame_vector[i3][1]; ab[7][3]=i3; if (ab[7][1]-y0<=dy/16) ad=95*ad/100; // ~t // because of the dx,dy scaling the horiz. bar could be nearer to (x1,y0) // as the upper right end of the "t" if (aa[3][0]-x0>3*dx/4 && aa[3][1]-y0>3*dy/16) ad=99*ad/100; // ~t j=loop(bp,0,dy/8,dx,cs,0,RI); // if j>dx/2 we have italic f if ((2*x(j+dx/4)) break; if (iloop(bp,0, 1,dx,cs,0,RI) ) Break; // ~X i=y;j=1; // j used as flag if( num_cross(0,dx-1,0,0,bp,cs)==1 && hchar) //~r if( num_cross(0,dx-1,dy-1,dy-1,bp,cs)!=1 && num_cross(0,dx-1,dy-2,dy-2,bp,cs)!=1 ) Break; // ~* etc. // check for upper bow to right for(y=1;j && y=cs || dx<7) && getpixel(bp,x+1,y )>=cs && getpixel(bp,x ,y-1)< cs && getpixel(bp,x+1,y-1)< cs ) { j=0;break; } } if(j) ad=98*ad/100; // not detected // if( num_hole (x0 , x1 , y0, y1,box1->p,cs,NULL) != 0 ) Break; // ~e if (sdata->holes.num != 0) Break; // ~e for(i1=i2=dx,y=7*dy/8;yi2+dx/4) Break; // ~t ~e if(i1>i2+1) ad=96*ad/100; // ~t ~e if( loop(bp,0,3*dy/4,dx,cs,0,RI)5 && !hchar) if( loop(bp,dx-1,dy/2,dx,cs,0,LE)>3*dx/4 ) if( loop(bp,dx-1,dy-1,dy,cs,0,UP)8 ) if( loop(bp, 0,2*dy/3 ,dx,cs,0,RI)>2*dx/3 || loop(bp, 0,2*dy/3-1,dx,cs,0,RI)>2*dx/3 ) if( loop(bp,dx-1, dy/4 ,dx,cs,0,LE)>2*dx/3 ) Break; // ~5 ~S if (!hchar) if ( get_bw(x0+dx/8,x0+dx/8,y0+dy/4,y1-dy/16,box1->p,cs,2) == 0 && num_cross(x1-dx/4,x1-dx/4,y0,y1,box1->p,cs)!=2 && num_cross(x1-dx/8,x1-dx/8,y0,y1,box1->p,cs)!=2 ) Break; // ~r if (dy>15) if( num_cross(x0,x1,y1-dy/4,y1-dy/4,box1->p,cs)>1 && num_cross(x0,x1,y0+dy/4,y0+dy/4,box1->p,cs)>1 ) Break; // ~H if( dx>4 ) if( loop(bp,dx-1 ,3*dy/4,dx,cs,0,LE)- loop(bp,0 ,3*dy/4,dx,cs,0,RI)>dx/5+1 && loop(bp,dx-1-dx/8,dy-1 ,dy,cs,0,UP)=dx/5+1) ad=98*ad/100; // ~E i=loop(bp,dx/8,0,dy,cs,0,DO); if (idy/2) { ad=98*ad/100; // ~E, could also be a "f" with big serifs MSG(fprintf(stderr,"ad=%d",ad);) } if (!gchar) { ad=98*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } } i = loop(bp,dx-1 ,3*dy/4,dx ,cs,0,LE)/2; if (loop(bp,dx-1-i , dy-1,dy/2,cs,0,UP)1 && loop(bp,0, 0,dy/4,cs,0,DO)p,cs,2) == 0) { // white pixels? ad=98*ad/100; // F MSG(fprintf(stderr,"ad=%d",ad);) } if (!hchar) ad=ad*98/100; // d*=100;d/=128 // not 100% ! if (box1->m4>0 && gchar && ad<99 && 8*box1->y1 >= box1->m4*7+box1->m3) ad++; Setac(box1,'f',ad); break; } return box1->c; } static wchar_t ocr0_bB(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test B --------------------------------------------------- for(ad=d=100;dx>2 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='B'; ) if (sdata->holes.num < 2) Break; /* tolerant against a tiny hole */ for(i=1,y=y0;yp,cs,1) != 1 ) i=0; if( !i ) Break; for(i=1,y=y1-dy/2;yp,cs,1) != 1 ) i=0; if( !i ) Break; if( get_bw(x1,x1 , y0 , y0 ,box1->p,cs,1) == 1 ) Break; if( num_cross(x0+dx/2, x0+dx/2,y0,y1 ,box1->p,cs) != 3 ) if( num_cross(x1-dx/3, x1-dx/3,y0,y1 ,box1->p,cs) != 3 ) Break; /* --- detect center of lower hole --- */ y = loop(box1->p,x0+dx/2,y1 ,dy,cs,0,UP); if (y>1+dy/8) Break; y+= loop(box1->p,x0+dx/2,y1-y,dy,cs,1,UP); if (y>dy/3) Break; y=y1-y-loop(box1->p,x0+dx/2,y1-y,dy,cs,0,UP)/2; if (yp,x0,y0+ y ,dx,cs,0,RI) > loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)+dx/32 ) if( get_bw(x0,x0,y0,y0,box1->p,cs,1) == 0 ) if( get_bw(x0,x0,y1,y1,box1->p,cs,1) == 0 ) Break; // ~8 i1=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI); i2=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI); i =loop(box1->p,x0,y0+dy/2-dy/ 8,dx,cs,0,RI); if(i>i2) i2=i; i =loop(box1->p,x0,y0+dy/2-dy/16,dx,cs,0,RI); if(i>i2) i2=i; i3=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI); if(dy>16 && i3p,x0,y0+ 1 ,dx,cs,0,RI) >= loop(box1->p,x0,y0+ 3 ,dx,cs,0,RI)+dx/32 ) if( loop(box1->p,x0,y0+ 0 ,dx,cs,0,RI) > loop(box1->p,x0,y0+ 3 ,dx,cs,0,RI)+dx/32 ) if( loop(box1->p,x0,y1- 0 ,dx,cs,0,RI) > loop(box1->p,x0,y1- 3 ,dx,cs,0,RI)+dx/32 ) if( loop(box1->p,x0,y1- 1 ,dx,cs,0,RI) > loop(box1->p,x0,y1- 3 ,dx,cs,0,RI)+dx/32 ) Break; // ~8 Aug00 } if (sdata->holes.num != 2) Break; if (sdata->holes.hole[0].y0 < y-1 && sdata->holes.hole[1].y0 < y-1 ) Break; if (sdata->holes.hole[0].y1 > y+1 && sdata->holes.hole[1].y1 > y+1 ) Break; // if( num_hole(0,dx-1,0 ,y+1 ,bp,cs,NULL) != 1 ) Break; // if( num_hole(0,dx-1,y-1,dy-1,bp,cs,NULL) != 1 ) Break; // out_x(box1); for( x=dx,y=dy/6; yp,x0,y0+y,dx,cs,0,RI); if( i>x+dx/9 ) break; if(ix )break; } if( yx) x=i; // allow dust i=loop(bp,0,dy/2+1,dx,cs,0,RI); if (i>x) x=i; if ( loop(bp,0, dy/8,dx,cs,0,RI) +loop(bp,0,7*dy/8,dx,cs,0,RI) > 2*x+1 ) Break; // not konvex! if(!hchar){ // ~ fat_a ad=99*ad/100; x =loop(bp,0,dy/4,dx,cs,0,RI); if(loop(bp,0,dy/2,dx,cs,0,RI)>x+dx/8) ad=97*ad/100; } if ( (!hchar) && (dx<=10 || dy<=10) ) ad=97*ad/100; // hchar or good_quality if (gchar) ad=99*ad/100; Setac(box1,'B',ad); break; } // --- test b --------------------------------------------------- for(ad=d=100;dx>3 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='b'; ) if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */ for(y=y0;yp,cs,1) != 1 ) Break; if(yp,cs,1) != 1 ) Break; if( get_bw(x1- dx/2, x1 , y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x1- dx/3, x1 , y0 , y0+dy/5,box1->p,cs,1) == 1 ) Break; if( get_bw(x1-4*dx/9, x1 , y0+dy/5, y0+dy/5,box1->p,cs,1) == 1 ) Break; if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 1 ) // & if( num_cross(x0,x1,y0+dy/4-1,y0+dy/4-1,box1->p,cs) > 1 ) if( dy<16 || num_cross(x0,x1,y0+dy/5 ,y0+dy/5 ,box1->p,cs) > 1 ) Break; // fat b for(i=j=0,y=dy/2;yholes.num != 1) Break; if (sdata->holes.hole[0].y0 < dy/4) Break; if ((sdata->holes.hole[0].y1-sdata->holes.hole[0].y0+1) *(sdata->holes.hole[0].x1-sdata->holes.hole[0].x0+1)*16 < dx*dy) ad=90*ad/100; // hole to small if( num_hole( x0, x1 , y0+dy/4, y1,box1->p,cs,NULL) != 1 ) Break; i=loop(bp,dx-1,dy-1 ,dx,cs,0,LE); j=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); if(j>i) Break; if (!hchar) ad=99*ad/100; if ( gchar) ad=99*ad/100; Setac(box1,'b',ad); if (ad>=100) return 'b'; break; } return box1->c; } static wchar_t ocr0_dD(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,d,x,y,ya,yb,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test D --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='D'; ) if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */ if( get_bw(x0 ,x0+dx/3,y0+dy/2,y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/3,x1 ,y0+dy/2,y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x1 ,x1 ,y0 ,y0+dy/16,box1->p,cs,1) == 1 ) Break; if( get_bw(x1-dx/2,x1 ,y0+dy/4,y0+dy/4 ,box1->p,cs,1) != 1 ) Break; if( num_cross(x0+dx/2,x0+dx/2,y0 ,y1 ,box1->p,cs) != 2 ) if( num_cross(x1-dx/3,x1-dx/3,y0 ,y1 ,box1->p,cs) != 2 ) Break; if( num_cross(x0 ,x1 ,y0+dy/3,y0+dy/3,box1->p,cs) != 2 ) Break; if( num_cross(x0 ,x1 ,y1-dy/3,y1-dy/3,box1->p,cs) != 2 ) Break; if (sdata->holes.num != 1) Break; if (sdata->holes.hole[0].y0 > dy/3) Break; if (sdata->holes.hole[0].y1 < dy-1-dy/3) Break; // if( num_hole (x0 ,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break; // test if left edge is straight for(x=0,y=bp->y-1-dy/8;y>=dy/5;y--){ i=loop(bp,0,y,x1-x0,cs,0,RI); if( i+2+dx/16<=x ) break; if( i>x ) x=i; } if (y>=dy/5 ) Break; /* test if right edge is falling */ for(x=dx,y=0;yx-1,y,x1-x0,cs,0,LE); if( i>x+dx/16 ) break; if( iy-1;y>2*dy/3;y--){ i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE); if( i>x+dx/16 ) break; if( i2*dy/3 ) Break; if( loop(bp,dx-1,dy-1 ,dx,cs,0,LE) <= loop(bp,dx-1,dy-2-dy/16,dx,cs,0,LE) ) Break; // P y=loop(bp,dx/2,dy-1,dy,cs,0,UP)-1; if (dy>16) y/=2; if ( y>=dy/16 ) { y-=dy/16; if (get_bw(dx/2,dx-1,dy-1-y,dy-1-y,bp,cs,1)==1) Break; // ~A } ya=loop(bp, 0,dy-1,dy,cs,0,UP); yb=loop(bp,dx/16+1,dy-1,dy,cs,0,UP); if( yady/16 && ya>yb ) Break; // ~O if ( loop(bp, dx/2, 0,dy,cs,0,DO) -loop(bp, dx/2,dy-1,dy,cs,0,UP) > dy/8 ) ad=97*ad/100; // ~b if (loop(bp, 0, 0,dx,cs,0,RI)>=dx/2 && loop(bp,dx-1,dy-1,dx,cs,0,LE)>=dx/2 && loop(bp, 0,dy/2,dx,cs,0,RI)< 2 ) ad=96*ad/100; // thin O if(box1->dots) ad=ad*94/100; if ( gchar) ad=99*ad/100; if (!hchar) ad=99*ad/100; Setac(box1,'D',ad); break; } // --- test d --------------------------------------------------- for(d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='d'; ) ad=100; if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */ if( get_bw(x0 , x0+dx/2, y1-dy/6, y1-dy/9,box1->p,cs,1) != 1 ) Break; if( get_bw(x0 , x0+dx/2, y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2, x1 , y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/4, x1 , y0+dy/8, y0+dy/8,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2, x0+dx/2, y1-dy/4, y1 ,box1->p,cs,1) != 1 ) Break; if(dy>19) if( get_bw(x0 , x0+dx/3, y0 , y0+dy/5,box1->p,cs,1) == 1 ) Break; if( get_bw(x0 , x0+dx/3, y0 , y0+dy/6,box1->p,cs,1) == 1 ) Break; if( get_bw(x0 , x0+dx/4, y1-dy/8, y1 ,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2-1,x0+dx/2,y1-dy/8, y1 ,box1->p,cs,1) != 1 ) Break; // ~"A if( loop(bp,bp->x-1, bp->y/4,x1-x0,cs,0,LE) > loop(bp,bp->x-1,3*bp->y/4,x1-x0,cs,0,LE)+1 ) Break; for(i=dx/8+1,x=0;x 3 ) i++; // ~al } if( i ) ad=98*ad/100; for(i=dy/8+1,y=0;yholes.num<1) Break; if (sdata->holes.num>1) { if (dx<6) Break; ad=95*ad/100; } // glued j above 8 (4x6 sample) MSG(fprintf(stderr,"hole[0].y0,y1= %d %d",sdata->holes.hole[0].y0,sdata->holes.hole[0].y1);); if ( sdata->holes.hole[0].y0 < dy/4 ) Break; if (dy-sdata->holes.hole[0].y1 > dy/4+1) Break; // glued et // if( num_hole(x0 , x1 , y0+dy/4 , y1 ,box1->p,cs,NULL) !=1 ) Break; if( num_cross(0 ,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs) != 2 ) { // glued al if (dy>15) { Break; } else ad=96*ad/100; } if (!hchar) ad=98*ad/100; if ( gchar) ad=99*ad/100; Setac(box1,'d',ad); break; } return box1->c; } static wchar_t ocr0_F(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test F --------------------------------------------------- for(ad=d=100;dx>2 && dy>4;){ // dx>1 dy>2*dx DBG( wchar_t c_ask='F'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if( get_bw(x0+dx/2,x0+dx/2,y0,y0+dy/8,box1->p,cs,1) != 1 ) Break; if( get_bw(x0,x0+dx/4,y1-dy/4,y1-dy/4,box1->p,cs,1) != 1 ) Break; if( get_bw(x0,x0+dx/2,y0+dy/4,y0+dy/4,box1->p,cs,1) != 1 ) Break; for (x=0,y=0;yx) x=j; } if (ydx/2 ) i=0; } if( i ) Break; x=loop(bp,0,dy-1-dy/4,dx,cs,0,RI); x=loop(bp,x,dy-1-dy/4,dx,cs,1,RI); // strichdicke for( i=1,y=dy/3; ydx/3 && ((j>2*x && dx>8) || j>x+1)) i=0; } if( i ) Break; y=dy/8; if (y<1) y=1; for( i=1; y=dx/3) { i=0; break; } } if( i ) Break; // check for vertical line on left side for(i=1,y=1;y<=dy/2 && i;y++) if( get_bw(0,dx/2,y,y,bp,cs,1) != 1 ) i=0; if( !i ) Break; for(i=1,y=dy/2;ydx/8 // no serif || loop(bp, 0, dy-3,dx,cs,0,RI)<1) break; ad=99*ad/100; } if( get_bw(dx-1-dx/4,dx-1,dy-1-dy/4,dy-1,bp,cs,1) == 1 ) Break; // ~E if( get_bw(dx-1 ,dx-1,0 ,dy/3,bp,cs,1) != 1 ) Break; if( loop(bp,0, bp->y/4,dx,cs,0,RI) < loop(bp,0,3*bp->y/4,dx,cs,0,RI)-1 ) Break; // if( num_hole(x0 , x1 , y0 , y1 ,box1->p,cs,NULL) >0 ) Break; if (sdata->holes.num > 0) Break; for(i=0,x=dx/4;xy/4,dx,cs,0,RI)-1; if (i>=0 && loop(bp,dy-1,i,dy,cs,0,UP)<=3*dy/4 ) ad=ad*98/100; // check for screen font P i= loop(bp,bp->x-1,bp->y/4,dx,cs,0,LE); if (i<1) { j=i+loop(bp,bp->x-1-i,bp->y/4, dx ,cs,1,LE); j= loop(bp,bp->x-1-j,bp->y/4,3*dy/4,cs,0,DO); if (j<=dy/2) { i=loop(bp,bp->x-1,0,dx,cs,0,LE); ad=ad*98/100; if (i>dx/8) Break; if (i) ad=98*ad/100; } } if (!hchar) if ((box1->m2-box1->y0)*8>=dy) { // ignore bad m1..4 if ( num_cross(2*dx/3,2*dx/3,0,dy-1,bp,cs) < 2 ) ad=90*ad/100; // ~r } if (gchar) ad=99*ad/100; Setac(box1,'F',ad); break; } return box1->c; } static wchar_t ocr0_uU(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test uU --------------------------------------------------- // in Mitte so breit wie oben (bei V kontinuierlich schmaler) for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='u'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ for(y=y0+dy/4;yp,cs) < 2 ) break; if( yi)i=y; if(y1) break; } if( idy/2)?dx/8:0),y,y,bp,cs); if( y1 ) i--; // ~{\it v} if( y2) ) { i--; ad=90*ad/100; } if( y>dy/2 && j!=1 ) { i--; ad=95*ad/100; } } if( !i ) Break; for(i=dy/16+1,y=dy/8;ydy/2 && (j<1 && j>2) ) i--; if( yp,cs,1) != 1 ) i=0; } if( i ) Break; for(i=dx/4+1,x=x0+dx/3;x<=x1-dx/3 && i;x++){ if( get_bw( x, x,y0+dy/3,y1-dy/3,box1->p,cs,3) != 2 ) i--; } if( !i ) Break; for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){ if( get_bw( x, x,y1-dy/2,y1,box1->p,cs,3) == 2 ) i=0; if( get_bw( x, x,y1-dy/3,y1,box1->p,cs,3) == 2 ) ad=98*ad/100; } if( !i ) Break; if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)==2 && num_cross(dx-dx/2,dx-1,dy-dy/4,dy-dy/4,bp,cs)==1 ) Break; // ~{\it v} i=loop(bp,0,dy-1-dy/16,dx,cs,0,RI); j=loop(bp,0,dy-1-dy/8 ,dx,cs,0,RI); if( i15) if( loop(bp,dx-1,dy/16,dx,cs,0,LE) > loop(bp,dx-1,dy/8 ,dx,cs,0,LE)+1+dx/32 ) Break; // ~bad 0 (thinn) if( hchar && dy>7) if( loop(bp, 0, dy-1,dx,cs,1,RI)==dx && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)>dx/16 && loop(bp, 0,3*dy/4,dx,cs,0,RI)>dx/16 && loop(bp,dx-1, dy/2,dx,cs,0,LE)>dx/16 && loop(bp, 0, dy/2,dx,cs,0,RI)>dx/16 ) Break; // melted ll i=loop(bp, 0,dy-2-dy/8,dx,cs,0,RI); j=loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE); if ( i>dx/4 && j>dx/4 && i+j>=dx/2) Break; // v if (i+j>=dx/2) ad=97*ad/100; if ( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=2 ) ad=96*ad/100; // w if ( loop(bp,dx/2,dy-1,dy,cs,0,UP)>0 ) ad=98*ad/100; // w if (ad==100) ad=99; // ToDo: only if lines.wt<100 bc='u'; if (gchar) ad=98*ad/100; if (hchar) bc='U'; if (box1->dots>0) ad=99*ad/100; Setac(box1,bc,ad); break; } return box1->c; } static wchar_t ocr0_micro(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i2,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test \mu µ MICRO_SIGN -------------------------------------- // in Mitte so breit wie oben (bei V kontinuierlich schmaler) if( gchar && !hchar ) for(ad=d=100;dx>2 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='u'; ) if (sdata->holes.num > 1) break; /* tolerant against a tiny hole */ for(y=y0+dy/8;ym3-dy/4;y++) if( num_cross(x0,x1,y,y,box1->p,cs) < 2 ) break; if( ym3-dy/4 ) break; if( get_bw(dx/2,dx/2,3*dy/8,7*dy/8,bp,cs,1)==0 ) break; if( get_bw(dx/2,dx-1,3*dy/8,7*dy/8,bp,cs,1)==0 ) break; for(y=dy/2;y5*dx) break; } if( y>=dy || 2*y>box1->m3+box1->m4) break; i2=y; for(i=0,x=2*dx/8;xi)i=y; if(y1) break; } if( im4-box1->m3)-dy/4 && i;y++){ // 12%+1 Fehler j=num_cross(0,dx/2,y,y,bp,cs); if( y1 ) i--; // ~{\it v} if( y2) ) i--; if( y>dy/2 && j!=1 ) i--; } if( !i ) break; for(i=dy/16+1,y=dy/8;ym4-box1->m3)-dy/4 && i;y++){ // 12%+1 Fehler j=num_cross(dx-dx/2,dx-1,y,y,bp,cs); if( y>dy/2 && (j<1 && j>2) ) i--; if( yp,cs,1) != 1 ) i=0; } if( i ) break; for(i=dx/4+1,x=x0+dx/3;x<=x1-dx/3 && i;x++){ if( get_bw( x, x,y0+dy/4,y1-dy/2,box1->p,cs,3) != 2 ) i--; } if( !i ) break; if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)!=1 ) break; if( num_cross(dx-dx/2,dx-1,dy-dy/2,dy-dy/2,bp,cs)!=1 ) break; if( get_bw( (dx+2)/4,dx-1,dy-2-3*dy/16,dy-1,bp,cs,1) == 1 ) break; if( num_cross(0,dx/4,dy-1,dy-1,bp,cs)!=1 ) break; Setac(box1,MICRO_SIGN,ad); break; } return box1->c; } static wchar_t ocr0_vV(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test v ------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='v'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ x=loop(bp,dx/2,0,dx,cs,1,RI)+dx/2; // be sure in the upper gap y=loop(bp, x,0,(dy+1)/2,cs,0,DO)-1; // (x,y) should be in the gap if (x>3*dx/4 || yp,cs,1) != 1 ) Break; if( get_bw(x0+x,x1,y0+y,y0+y,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+x,x0+x,y1-dy/2,y1, box1->p,cs,1) != 1 ) Break; if( get_bw(x0+x, x0+x ,y0, y0+dy/3,box1->p,cs,1) == 1 ) // it v? if( get_bw(x0+x+1,x0+x+1,y0, y0+dy/3,box1->p,cs,1) == 1 ) Break; // UVW if(((num_cross( 0,dx/2+1,dy/ 8,dy/ 8,bp,cs)!=1) && (num_cross( 0,dx/2+1,dy/16,dy/16,bp,cs)!=1) // it v && (num_cross(dx/2+1,dx -1,dy/ 8,dy/ 8,bp,cs)!=1)) /* () added on Sep00 */ || ((num_cross( 0,dx-1,dy-1-dy/8,dy-1-dy/8,bp,cs)> 1) && (num_cross( 0,dx-1,dy-1 ,dy-1 ,bp,cs)> 1)) ) Break; // UV if( get_bw(0 ,dx/8,dy-1-dy/6,dy-1,bp,cs,1)==1 ) Break; if( get_bw(dx-1-dx/8,dx-1,dy-1-dy/6,dy-1,bp,cs,1)==1 ) Break; if( loop(bp,0 ,dy/6 ,dx,cs,0,RI) >=loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI) && dy>6 ) Break; if( loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI) >loop(bp,0 ,dy-1-dy/8,dx,cs,0,RI) && loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE) >loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) ) Break; // better OR ? if( loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI) >=loop(bp,0 ,dy-1-dy/8,dx,cs,0,RI) && loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE) >=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) ) ad=99*ad/100; // font21 if( loop(bp,dx-1,dy/6 ,dx,cs,0,LE) >=loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE) && dy>6 ) Break; x=loop(bp,0,dy-1,dx,cs,0,RI); // 3*x>dx changed to 2*x>dx May2001 JS x=loop(bp,x,dy-1,dx,cs,1,RI); if ( dx>14 && 2*x>dx ) Break; // U if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)==2 && num_cross(dx-dx/2,dx-1,dy-dy/4,dy-dy/4,bp,cs)==2 ) Break; // ~{\it u} #if 0 // measure thickness of lower v i=loop(bp, 0,dy-1-dy/16,dx,cs,0,RI) +loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE); j=loop(bp, 0,dy-1-dy/4 ,dx,cs,0,RI) +loop(bp,dx-1,dy-1-dy/4 ,dx,cs,0,LE); if( box1->m1 && hchar && dy>15 && j>=i-dx/32 ) Break; // ~Y #endif /* V has serifs only on upper site! Y also on bottom, check it. Okt00 */ i=loop(bp, 0, 0,dx,cs,0,RI); i=loop(bp, i, 0,dx,cs,1,RI); i1=i; // thickness i=loop(bp, 0, 1,dx,cs,0,RI); i=loop(bp, i, 1,dx,cs,1,RI); if(i>i1) i1=i; // thiggest i=loop(bp, 0,dy/4,dx,cs,0,RI); i=loop(bp, i,dy/4,dx,cs,1,RI); i2=i; i=loop(bp, 0,dy ,dx,cs,0,RI); i=loop(bp, i,dy ,dx,cs,1,RI); i3=i; // thickness i=loop(bp, 0,dy-1,dx,cs,0,RI); i=loop(bp, i,dy-1,dx,cs,1,RI); if(i>i3) i3=i; // thiggest if( y0 < box1->m2 ) if( i1-i2 > dx/32+2 && i3-i2 > dx/32+2 ) Break; // ~serif_Y if( y0 < box1->m2 ) // uppercase V ? if( i1-i2 < dx/32+2 ) /* no serif detected */ if( num_cross(0,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs)==1 ){ j=loop(bp, 0,dy-1-dy/4 ,dx,cs,0,RI); j=loop(bp, j,dy-1-dy/4 ,dx,cs,1,RI); if (jloop(bp,0 ,dy-1 ,dx,cs,0,RI) ) ad=96*ad/100; if (gchar) ad=99*ad/100; bc='v'; if( hchar ) bc='V'; Setac(box1, bc, ad); break; } return box1->c; } static wchar_t ocr0_rR(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test r ------- for(ad=d=100;dy>3 && dx>1;){ // dy>dx, 4x6 font, dx=2 smallest prop-font DBG( wchar_t c_ask='r'; ) if (sdata->holes.num > 0 && ( sdata->holes.hole[0].y1 > dy/2 // tiny hole in upper left || sdata->holes.hole[0].x1 > dx/2 ) // is tolerated, ~Pp ) Break; /* tolerant against a tiny hole */ if( 2*dym3-box1->m1) Break; if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<=dx/8 ) Break; x= loop(bp,dx-1,dy/2,dx,cs,0,LE); if (x<=dx/2) ad=99*ad/100; // ~t if (loop(bp,dx-1-x/2,0,dy,cs,0,DO)>dy/8) ad=99*ad/100; // ~t if( dx>4 ) if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<=dx/8+2 ) Break; // ~v Jun00 i=dy-(dy+20)/32; // ignore dust on the ground for( y=4*dy/8; y3*dx/8) break; i2= loop(bp,dx-1,y,dx,cs,0,LE); if(i1>i2) break; if( (i1+(dx-i2 -1))/2 >= 4*dx/8 ) break; // mass middle should be left } if (y5*dx/8 // not a C && get_bw(dx-1-dx/8,dx-1,dy-1-dy/4,dy-1,bp,cs,1) ==1 ) Break; if( loop(bp, 0,5*dy/8,dx,cs,0,RI)<=dx/8 && loop(bp,dx-1,5*dy/8,dx,cs,0,LE)>=5*dy/8 && loop(bp,dx/2, dy-1,dy,cs,0,UP)<=dy/8 ) Break; // ~c if( loop(bp, 0,3*dy/8,dx,cs,0,RI) > loop(bp,dx-1,3*dy/8,dx,cs,0,LE)+dx/8 ) { if( loop(bp, 0, dy/8,dx,cs,0,RI)3*dx/4 ) Break; // ~i if( loop(bp,0,dy/4,dx,cs,0,RI)>3*dx/8 // ~I && get_bw(0,dx/8,0,dy/4,bp,cs,1) ==1 ) Break; if( num_cross(0,dx-1,dy/2, dy/2 ,bp,cs)!=1 && num_cross(0,dx-1,dy/2+1,dy/2+1,bp,cs)!=1 ) Break; // ~n 024a3 // itallic t is sometimes not high enough, look for v-like shape for(y=3*dy/4;y1 ) ad=95*ad/100; // ~f if( num_cross(dx/2 ,dx/2 ,0,dy-1,bp,cs)>2 && num_cross(dx/2+1,dx/2+1,0,dy-1,bp,cs)>2 ) Break; // ~f if (box1->dots) ad=98*ad/100; /* could be modified latin2-r */ if (hchar) ad=96*ad/100; if (gchar) ad=97*ad/100; Setac(box1,'r',ad); break; // not 100% sure! } // --- test R --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='R'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if( num_cross(x0,x1,y1-dy/8,y1-dy/8, box1->p,cs) < 2 ) Break; // ~P if (loop(bp, dx/2, dy/4,dy,cs,0,DO)>dy/2) Break; // ~C if (loop(bp, dx/2, 0,dy,cs,0,DO)>dy/8 && loop(bp, dx/2,dy/16,dx,cs,0,RI)=16 ) Break; for(i=1,y=y0+dy/8;y<=y1-dy/8 && i;y++){ // left v-line if( get_bw(x0 , x0+dx/2,y, y,box1->p,cs,1) != 1 ) i=0; } if( !i ) Break; for(i=1,x=x0+3*dx/8;x<=x1-dx/4 && i;x++){ // upper h-line if( get_bw( x, x, y0, y0+dy/4,box1->p,cs,1) != 1 ) i=0; } if( !i ) Break; for(y=0,x=x0+dx/4;x<=x1-dx/4;x++){ // lower h-gap i=loop(box1->p,x,y1,dy,cs,0,UP); /* on small chars bypass possible low left serifs */ if (i>0) { i2=loop(box1->p,x-1,y1-i-1,dy,cs,0,UP); if (i2>1) i+=i2-1; } if (i>y) { y=i; i1=x; } } if( y<=dy/8 ) Break; if (yp,cs) == 2 ) i=0; } if( i ) Break; for(i=1,y=y0;y<=y0+3*dy/8 && i;y++){ // upper 2 vert lines if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if( i ) Break; for(i=1,y=y0+dy/3;y<=y1-dy/3 && i;y++){ // midle h line if( num_cross(x0,x1,y,y, box1->p,cs) == 1 ) i=0; } if( i ) ad=95*ad/100; /* sometimes there is a small gap */ for(i=1,y=y1-dy/4;y<=y1 && i;y++){ // lower 2 vert lies if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if( i ) Break; if( get_bw(x1-dx/3,x1,y0,y0+dy/4,box1->p,cs,1) != 1 ) Break; // pixel ru x=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(x>dx/2) Break; i=x; // ru x=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(x<=i ) Break; i=x; // rc x=loop(bp,dx-1, 5*dy/8,dx,cs,0,LE); if(x>i ) i=x; x=loop(bp,dx-1, 6*dy/8,dx,cs,0,LE); if(x>i ) i=x; x=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); if(x>=i ) Break; // rd i1=loop(bp,0, dy/4,dx,cs,0,RI); // straight i2=loop(bp,0, dy/2,dx,cs,0,RI); i3=loop(bp,0,dy-1-dy/4,dx,cs,0,RI); if( abs(i1+i3-2*i2)>1+dx/16 ) Break; if (dy>15) if (loop(bp,dx-1, dy/2,dx,cs,0,LE)>=loop(bp,dx-1, dy-1,dx,cs,0,LE) && loop(bp,dx-1,3*dy/16,dx,cs,0,LE)>=loop(bp,dx-1,dy/16,dx,cs,0,LE)+dx/8 ) Break; // ~ff if (dy>7) if (loop(bp,dx-1,dy-2 ,dx,cs,0,LE) >loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE)) { ad=98*ad/100; if (loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)==0 && loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE)>0 ) Break; // broken B ?? } j=sdata->holes.num; if (j != 1) { i=num_hole (x0,x1,y0,y1-dy/3,box1->p,cs,NULL); // j=num_hole (x0,x1,y0,y1 ,box1->p,cs,NULL); if (i==0) ad=90*ad/100; /* some times there is a small gap */ if (j>1 || j>i) Break; } if (sdata->holes.num < 1) ad=90*ad/100; if (sdata->holes.num==1) if (sdata->holes.hole[0].y1 > 3*dy/4) ad=95*ad/100; // alpha if (!hchar) ad=98*ad/100; if ( gchar) ad=98*ad/100; Setac(box1,'R',ad); break; } return box1->c; } static wchar_t ocr0_m(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar, handwritten=0, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test m ------- for(ad=d=100;dx>4 && dy>3;){ DBG( wchar_t c_ask='m'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (sdata->holes.num > 0) ad=96*ad/100; x =loop(bp,dx-1,dy/2,dx,cs,0,LE); if(3*x>dx) Break; // ~K y=dy/2; i=num_cross(0,dx-1,y ,y ,bp,cs); if (i!=3) i=num_cross(0,dx-1,y+1,y+1,bp,cs); if (i<3 && i>5) Break; // m ru rn, handwritten m // im or glued.mm cut to nm if (i>3) { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } for (i=0,y=dy-1-dy/8;y>dy/2;y--) { i=num_cross(0,dx-1,y,y,bp,cs); if (i>2) break; } if (i>3) Break; for ( ;y>dy/2;y--) { i=num_cross(0,dx-1,y,y,bp,cs); if (i!=3) break; } if (i>5) Break; y++; i5=y; if (y> dy/2) handwritten=10; if (y>3*dy/4) handwritten=60; /* @@............... @@......,........ @@,...@@@....@@@. @@,,.@@@@..@@@@@, @@@.@@@@@.@@@@@@, @@;@@@@@@@@@;,@@, @@@@@,.@@@@,,,@@@ <- i5 ,@@@...;@@....@@@ .@;...........,@@ ...............@@ i1 i2 i3 i4 */ x =loop(bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line x+=loop(bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // first gap x+=loop(bp,x,y,dx-x,cs,0,RI); if(x>3*dx/4) Break; i2=x; // 2nd v-line x+=loop(bp,x,y,dx-x,cs,1,RI); if(x>6*dx/8) Break; i3=x; // 2nd gap x+=loop(bp,x,y,dx-x,cs,0,RI); if(x<5*dx/8) Break; i4=x; // 3th v-line if (x>=dx) Break; // missing 3th v-line, ~W MSG(fprintf(stderr,"y=%d x=%d %d %d %d",y,i1,i2,i3,i4);) if( abs((i2-i1)-(i4-i3)) > 2+((i2-i1)+(i4-i3))/4 ) Break; // same gap width? rn if( abs((i2-i1)-(i4-i3)) > 2+((i2-i1)+(i4-i3))/8 ) ad=98*ad/100; // same gap width? rn // the same game for the lower part =>l1 l2 l3 l4 ??? i =loop(bp,0,5*dy/8,dx,cs,0,RI); i =loop(bp,i,5*dy/8,dx,cs,1,RI); x =loop(bp,0,dy-dy/32-1,dx,cs,0,RI); x =loop(bp,x,dy-dy/32-1,dx,cs,1,RI); if( x > i+1 ) i=1; else i=0; /* looks like serif m, Okt00 */ for(y=0,x=i1;xy) y=i; } if(yy) y=i; } if(y=dy/2 ) break; if(xi4-i3+dx/16){ for(y=0,x=(i1+i2)/2;xy ) y=i; if( 2*i3 ) Break; // melted WT x=loop(bp,dx-1,dy/2,dx,cs,0,LE); if (x>2 && loop(bp,dx-1-x/2,0,dy,cs,0,DO)dy/2) Break; // N // {\it m} if( loop(bp,1, dy/4,dx,cs,0,RI) >loop(bp,0,7*dy/8,dx,cs,0,RI) ) Setac(box1,'m',98*ad/100); if (handwritten<10){ x =loop(bp,0,dy/4,dx,cs,0,RI); x+=loop(bp,x,dy/4,dx,cs,1,RI); for( ;x=dy/4) ad=99*ad/100; if (i>(dy+2)/4) ad=95*ad/100; if (3*i>dy) Break; } if(xdots) ad=99*ad/100; Setac(box1,'m',ad); if (ad>=100) return 'm'; break; } return box1->c; } static wchar_t ocr0_tT(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,i1,i2,i3,i4,j,d,x,y,yb,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test T --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // dx>1 dy>2*dx DBG( wchar_t c_ask='T'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ // upper horizontal line i1= loop (bp, dx/8,0,dy,cs,0,DO); // left side i2= loop (bp,dx-1-dx/8,0,dy,cs,0,DO); // right side i3= loop (bp, dx/8,i1,dy,cs,1,DO); // left side i4= loop (bp,dx-1-dx/8,i2,dy,cs,1,DO); // right side if (i1>dy/4 || i2>dy/4) Break; for (x=dx/8;xi1+dy/8 && i>i2+dy/8) break; if (idx+1 || i+j>=dx || i+j/23*x) break; //~I } if( y3*dx/4) Break; // ~7 i+= loop(bp,i ,dy/4,dx,cs,1,RI);if(i>3*dx/4) Break; if( num_cross(0,dx-1, dy-1, dy-1,bp,cs) != 1 && num_cross(0,dx-1, dy-2, dy-2,bp,cs) != 1 ) Break; if( num_cross(0,dx-1,2*dy/3,2*dy/3,bp,cs) != 1 && num_cross(0,dx-1,2*dy/3,2*dy/3,bp,cs) != 1 ) Break; if (box1->m3 && 2*y1>box1->m3+box1->m4 && loop(bp,0, 0,dy/2,cs,0,DO)>=dy/4 && loop(bp,0,dy-1,dy ,cs,0,UP)<=dy/2) ad=96*ad/100; // ~J if (gchar) ad=98*ad/100; if( loop(bp,0,dy-1,dx,cs,0,RI)<=dx/8) ad=99*ad/100; // ~J i = loop(bp,0,dy/2,dx,cs,0,RI); j = loop(bp,i,dy/2,dx,cs,1,RI); if( 2*i>=dx || 2*(dx-j-i)=100) return 'T'; break; } // --- test t --------------------------------------------------- // written t can look like a + or even with missing right side // smallest t found in win-screenshot (prop-font) dx=2 for(ad=d=100;dx>1 && dy>=box1->m3-box1->m2-1;){ // sometimes no hchar! DBG( wchar_t c_ask='t'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (dy<=box1->m3-box1->m2+1) ad=96*ad/100; // bad line detection? for(x=0,yb=j=y=dy/32+3*dy/16;y<5*dy/8;y++)if(y>0){ // upper cross line i=loop(bp,0,y,dx,cs,0,RI); i=loop(bp,i,y,dx,cs,1,RI); if( i>x ) { x=i;yb=j=y; } // hor. line i=num_cross(0,dx-1,y ,y ,bp,cs); j=num_cross(0,dx-1,y+1,y+1,bp,cs); if (i>2 && j>2) break; if( y<11*dy/16 && num_cross(0,dx-1,y ,y ,bp,cs) != 1 && ( num_cross(0,dx-1,y+dy/8,y+dy/8,bp,cs) != 1 || dy<13) // against noise ) break; } if( y<4*dy/8 ) Break; if (dy>12 && x>4 && x>dx/2 && yb<=(dy+4)/8) if ( loop(bp,dx-1-3*x/4,yb,dy,cs,1,UP) <=loop(bp,dx-1-1*x/4,yb,dy,cs,1,UP)+1 ) if ( loop(bp,0 ,dy/2,dy,cs,1,UP)>dx/8 ) Break; // ~C if (x=dx && 9*dx>=8*dy) { ad=99*ad/100; } // + i=loop(bp,dx-1,0,dx,cs,0,LE); for(y=0;y1) break; i=j; } if( yi ) break; if( y==yb ) break; j=loop(bp,0, dy/2,dx,cs,0,RI); j=loop(bp,j, dy/2,dx,cs,1,RI); i=j; // thickness j=loop(bp,0, dy/4,dx,cs,0,RI); j=loop(bp,j, dy/4,dx,cs,1,RI); if (j=loop(bp,dx-1,yb/2,dx,cs,0,LE) ) Break; // ~1 ??? j=1; for(y=1;j && y=cs && getpixel(bp,x+1,y )>=cs && getpixel(bp,x ,y-1)< cs && getpixel(bp,x+1,y-1)< cs ) { j=0;break; } } if(!j) Break; if( num_cross(0,dx-1,dy-2,dy-2,bp,cs) == 2 && num_cross(0,dx-1,dy-1,dy-1,bp,cs) == 2 ) Break; // ~* (5er) if( dy>= 16 && loop(bp, 0, 3*dy/4,dx,cs,0,RI) >=loop(bp, 0, dy-2,dx,cs,0,RI) && loop(bp,dx-1, 3*dy/4,dx,cs,0,LE) <=loop(bp,dx-1, dy-2,dx,cs,0,LE) && loop(bp,dx-1, 1,dx,cs,0,LE)+dx/16 loop(bp, 0,3*dy/16,dx,cs,0,RI)+dx/16 || loop(bp,dx-1, 0,dx,cs,0,LE)==0 || loop(bp,dx-1, 1,dx,cs,0,LE)==0) ) ad=96*ad/100; // ~f Jan02 if(dx<8 && dy>12){ // thin f's could easily confound with t x=loop(bp,dx-1,3*dy/16,dx,cs,0,LE); if (x) if (loop(bp,dx-x,0,dy,cs,0,DO)<3*dy/16 && loop(bp, 0, 3*dy/4,dx,cs,0,RI)+1 >=loop(bp, 0, dy-2,dx,cs,0,RI) && loop(bp,dx-1, 3*dy/4,dx,cs,0,LE) <=loop(bp,dx-1, dy-2,dx,cs,0,LE) ) Break; } if (dx>7) if( num_cross( 0,dx-1,2*dy/3,2*dy/3,bp,cs) > 1 && num_cross( 0,dx/2,2*dy/3,2*dy/3,bp,cs) > 0 && num_cross(dx/2,dx-1,2*dy/3,2*dy/3,bp,cs) > 0 ) if (sdata->holes.num > 0) if (sdata->holes.hole[0].y0 > dy/4) Break; // ~6 // if ( num_hole( x0, x1, y0+dy/4, y1, box1->p,cs,NULL) > 0 ) Break; // ~6 if( num_cross(0,dx-1,3*dy/4, 3*dy/4, bp,cs) >= 2 && num_cross(0,dx-1,3*dy/4-1,3*dy/4-1,bp,cs) >= 2 ){ ad=99*ad/100; /* italic t ? */ if (loop(bp,dx/2 ,dy-1,dy,cs,0,UP)>dy/4) Break; // ~h if (loop(bp,dx/2+1,dy-1,dy,cs,0,UP)>dy/4) Break; // ~h } x= loop(bp,dx-1,dy/2,dx,cs,0,LE); i= loop(bp,dx-1,dy/8,dx,cs,0,LE); if (i>x && loop(bp,dx-x,0,dy,cs,0,DO)>=dy/2) ad=90*ad/100; /* ~\ */ x= loop(bp,0, 0,dx,cs,0,RI); i= loop(bp,0, 1,dx,cs,0,RI); if (i1) Break; // l // this happens quite often, do not be to strong if (!box1->m2) ad=99*ad/100; if (box1->m2) { if (!hchar) ad=99*ad/100; /* some times t is not long enough */ if( y0>=box1->m2-(box1->m2-box1->m1)/4 ) ad=99*ad/100; /* to short */ if( y0>=box1->m2 ) ad=99*ad/100; /* to short */ } if (sdata->holes.num > 0) ad=95*ad/100; if (gchar) ad=99*ad/100; if (box1->dots) ad=90*ad/100; Setac(box1,'t',ad); break; } return box1->c; } static wchar_t ocr0_sS(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ wchar_t ac; // --- test sS near 5 --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (4x6 font) DBG( wchar_t c_ask='s'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if( num_cross( dx/2, dx/2,0,dy-1,bp,cs)!=3 && num_cross(5*dx/8,3*dx/8,0,dy-1,bp,cs)!=3 && dy>4 ) Break; if( num_cross(0,dx-1,dy/2 ,dy/2 ,bp,cs)!=1 && num_cross(0,dx-1,dy/2-1,dy/2-1,bp,cs)!=1 ) Break; // get the upper and lower hole koords y=dy/4; x =loop(bp,0,y,dx,cs,0,RI); if(x>3*dx/8) Break; /* slanted too */ x +=loop(bp,x,y,dx,cs,1,RI); if(x>5*dx/8) Break; /* fat too */ i1 =loop(bp,x,y,dx,cs,0,RI); i1=(i1+2*x)/2; // upper center x y=11*dy/16; x =loop(bp,dx-1 ,y,dx,cs,0,LE); if(x>dx/4) Break; x +=loop(bp,dx-1-x,y,dx,cs,1,LE); if(dx>5 && dy>7 && x>dx/2) Break; if (x>3*dx/4) Break; if(x>dx/2) { ad=98*ad/100; MSG({})} i2 =loop(bp,dx-1-x,y,dx,cs,0,LE); i2=dx-1-(i2+2*x)/2; // upper center x for( y=dy/4;ydx/8) break; } if(y==dy/2) Break; // Mai00 y=dy/2+loop(bp,0,dy/2,dy/2,cs,1,DO); if( !joined(bp,0,y,i2,11*dy/16,cs) ) Break; if (sdata->holes.num > 0) if (sdata->holes.hole[0].y0 > dy/4) Break; // ??? // if( num_hole( x0, x1, y0+dy/4, y1, box1->p,cs,NULL) > 0 ) Break; i1=loop(bp,dx-1,dy-1,dx,cs,0,LE); i2=loop(bp,dx-1,dy-2,dx,cs,0,LE); if (i2-i1 >= dx/4) Break; // ~{ 5x7font i1=loop(bp, 0, 0,dx,cs,0,RI); i2=loop(bp, 0, 1,dx,cs,0,RI); if (i2-i1 >= dx/4) Break; // ~} 5x7font // sS5 \sl z left upper v-bow ? i1=loop(bp, 0,dy/2,dx,cs,0,RI); i1=loop(bp, i1,dy/2,dx,cs,1,RI); if (4*i1>=3*dx) ad=97*ad/100; // ~5 7-segment i1=loop(bp,0, dy/16,dx,cs,0,RI); i2=loop(bp,0,4*dy/16,dx,cs,0,RI); i3=loop(bp,0,7*dy/16,dx,cs,0,RI); if( 2*i2+dx/32 >= i1+i3 ){ if( 2*i2+dx/32 > i1+i3 || dx>9 ) Break; // very small s? i1+=loop(bp,i1, dy/16,dx,cs,1,RI); i2+=loop(bp,i2,4*dy/16,dx,cs,1,RI); i3+=loop(bp,i3,7*dy/16,dx,cs,1,RI); if( 2*i2+dx/32 >= i1+i3 ) Break; } for(y=7*dy/16;y<5*dy/8;y++){ if( num_cross( 0,dx-1,y ,y ,bp,cs)==2 ) if( num_cross( 0,dx-1,y+1,y+1,bp,cs)==1 ) if( num_cross( 0,dx/4,y,y,bp,cs)==1 ) break; // ~5 } if(y<5*dy/8) Break; // v0.2.4a5 if ( loop(bp, dx-1,dy-2-dy/32,dx,cs,0,LE) > loop(bp, 0, 1+dy/32,dx,cs,0,RI) + dx/4 ) Break; // ~5 Dec00 ac='s'; if (gchar) { ad=98*ad/100; MSG({}) } if( hchar ){ // S but 5 is very similar! check it ac='S'; if ( loop(bp, dx-1,dy-1-dy/32,dx,cs,0,LE) > loop(bp, 0, 0+dy/32,dx,cs,0,RI) ) ad=99*ad/100; // ~5 if ( loop(bp, 0,dy-1-dy/32,dx,cs,0,RI) > loop(bp, dx-1, 0+dy/32,dx,cs,0,LE) ) ad=99*ad/100; // ~5 } Setac(box1,ac,ad); break; } return box1->c; } static wchar_t ocr0_gG(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test g --------------------------------------------------- /* some g's have crotchet at upper right end, so hchar can be set */ // ~italic g for(ad=d=100;dx>2 && dy>4;){ // min 3x5 DBG( wchar_t c_ask='g'; ) if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */ if( get_bw(x0+dx/2, x0+dx/2, y1-dy/2, y1,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/4, x1 , y1-dy/4, y1,box1->p,cs,1) != 1 ) Break; // ~p if( get_bw(x0+dx/2, x0+dx/2, y0, y0+dy/2,box1->p,cs,1) != 1 ) Break; if( num_cross(x0+dx/2, x0+dx/2, y0, y1, box1->p,cs) < 3 ) if( num_cross(x1-dx/2, x1-dx/2, y0, y1, box1->p,cs) < 3 ) Break; if (sdata->holes.num < 1) Break; for (i=0;iholes.num;i++){ if (sdata->holes.hole[i].y1 < 5*dy/8+1) break; } if (i==sdata->holes.num) Break; // no upper hole found // if( num_hole ( x0, x1, y0, y0+5*dy/8, box1->p,cs,NULL) != 1 ) Break; for(y=dy/4;y=15*dy) Break; // ~B if (num_cross(x1, x1, (y0+y1)/2, y1, box1->p,cs)>1) { ad=98*ad/100; // ~& if (num_cross(x1 , x1 , y0, (y0+y1)/2, box1->p,cs)<1 ) ad=96*ad/100; if (num_cross(x1-1, x1-1, y0, (y0+y1)/2, box1->p,cs)<1 ) ad=95*ad/100; } // looking for a gap for (x=0,y=dy/4;yx) x=i; } // in a good font x is greater dx/2 if (xp,cs) > 2 || num_cross(x0,x1 ,y0+dy/8,y0+dy/8,box1->p,cs) > 2) ad=90*ad/100; if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) > 2 || num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) > 2) ad=90*ad/100; } if( num_cross(0,dx-1,dy/2,dy/2,bp,cs) >2 ) ad=99*ad/100; // ~/o /* test for horizontal symmetry ~8 */ for (y=0;ym4==0) ad=98*ad/100; if ( hchar) ad=96*ad/100; if (!gchar) ad=96*ad/100; ad=98*ad/100; Setac(box1,'g',ad); break; } // --- test rundes G --------------------------------------------- for(ad=d=100;dx>3 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='G'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if( get_bw(x0 ,x0+dx/2,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2,x1-dx/4,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2,x0+dx/2,y1-dy/4,y1 ,box1->p,cs,1) != 1 ) Break; if( get_bw(x0 ,x0+dx/2,y1-dy/3,y1-dy/3,box1->p,cs,1) != 1 ) Break; // ~S for( y=y0+dy/4;yp,cs,1) == 0 ) break; if( y==y1-dy/3 ) Break; // no gap if( num_cross(x0+dx/2 , x0+dx/2 , y0, y, box1->p,cs) != 1 || num_cross(x0+dx/2+1, x0+dx/2+1, y0, y, box1->p,cs) != 1 ) Break; // ~e x=x0; y=y1; turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); // left bow? if( yp,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( xp,&x,&y,x0,x1,y0,y1,cs,ST,LE); if( xp,&x,&y,x0,x1,y0,y1,cs,LE,ST); turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE); if( xp,&x,&y,x0,x1,y0,y1,cs,RI,UP); // upper end right midle if( x<=x1 ) Break; if( yy1-dy/4 ) Break; x=x1-dx/3;y=y1; // follow left C-bow, filter S turmite(box1->p,&x,&y,x0,x1,y0+dy/4,y1,cs,LE,UP); // w=LE b=UP if( y>y0+dy/4+1 ) Break; /* leave box below for S or on top for CG */ MSG(fprintf(stderr,"xy= %d %d",x-x0,y-y0);) /* if (yp,&x,&y,x0,x1,y0 ,y1,cs,RI,UP); MSG(fprintf(stderr,"xy= %d %d",x-x0,y-y0);) if( y>y0 ) Break; if (sdata->holes.num > 0) Break; // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) > 0 ) Break; if( dx>4 && dy>6){ // no (<[ for(i=1,y=0;i && y Z if( xi ) i=x; } if( yi){ i=x;i1=y; } } if( i1<=dy/4 || i1>=dy-dy/4 ) Break; // around the middle ? // check from above for gap and left vertical line (~S) x =loop(bp,0,i1,dx ,cs,0,RI); x+=loop(bp,x,i1,dx-x,cs,1,RI); // left vertical bow x+=loop(bp,x,i1,dx-x,cs,0,RI); if (x>=dx) ad=90*ad/100; MSG(fprintf(stderr,"h-bar y dx %d %d ad= %d",i1,i,ad);) i=1; // Mar06: adapted to 4x6 font for(x=dx/2;x=cs && getpixel(bp,x+1,y )< cs && getpixel(bp,x+1,y-1)< cs && getpixel(bp,x ,y-1)< cs ) { i=0;break; } } if(i) ad=95*ad/100; // ~C if(!hchar) ad=98*ad/100; if( gchar) ad=98*ad/100; Setac(box1,'G',ad); break; } // --- test \it g like 9 ---------------------------------------------- for(ad=d=100;dx>2 && dy>4;){ // dx>1 dy>2*dx DBG( wchar_t c_ask='g'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if( num_cross(x0+dx/2,x0+dx/2,y0,y1,box1->p,cs) != 3 // pre select && num_cross(x0+dx/4,x1-dx/4,y0,y1,box1->p,cs) != 3 ) Break; for( x=0,i=y=y0+dy/2;y<=y1-3*dy/16;y++){ // suche kerbe j=loop(box1->p,x0,y,dx,cs,0,RI); if( j>2 && j>dx/4 && yp,x0+j-2,y+1,dx,cs,0,RI)-2; if( j>x ) { x=j; i=y; } } if( x<4*dx/8 ) Break; if( num_cross(x0+dx/2,x1,i ,y1,box1->p,cs) != 1 && num_cross(x0+dx/2,x1,i+1,y1,box1->p,cs) != 1 ) Break; if( num_hole(x0,x1,y0,i+1,box1->p,cs,NULL)!=1 ) Break; if( num_hole(x0,x1,i-1,y1,box1->p,cs,NULL)!=0 ) Break; if( loop(box1->p,x0,y1 ,dy,cs,0,RI)>dx/3 && loop(box1->p,x0,y1-1,dy,cs,0,RI)>dx/3) Break; // no q for( x=0,i=y=y0+dy/3;y<=y1-dy/3;y++){ // suche kerbe j=loop(box1->p,x1,y,dx,cs,0,LE); if( j>x ) { x=j; i=y; } } if( x>dx/2 ) Break; // no g i1=loop(bp,dx-1,dy/8 ,dx,cs,0,LE); if(i1>dx/2) Break; i3=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); i2=loop(bp,dx-1,dy/2 ,dx,cs,0,LE); if(i1+i3<2*i2-dx/8) Break; // konvex i1=loop(bp,dx-1,dy/4 ,dx,cs,0,LE); if(i1>dx/2) Break; i3=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); for(y=dy/4;y0){ x--; // robust y=loop(bp,dx-x-1, dy-1,dy,cs,0,UP); if(yp,cs) > 2) ad=90*ad/100; if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) > 2 || num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) > 2) ad=90*ad/100; if (box1->m4==0) ad=98*ad/100; if ( hchar) ad=96*ad/100; if (!gchar) ad=96*ad/100; if (ad>99) ad=99; // never be sure to have a 9 Setac(box1,'g',ad); break; } return box1->c; } // rewritten for vector usage v0.41 static wchar_t ocr0_xX(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; // pix *bp=sdata->bp; // obsolete int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0, x1=box1->x1, y0=box1->y0, y1=box1->y1; // ,cs=sdata->cs; int dx=x1-x0+1, dy=y1-y0+1, /* size */ (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */ ad; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test xX --------------------------------------------------- // rewritten for vectors 0.41 for(ad=d=100;dx>2 && dy>3;){ // min 3x4 int ld, i1, i2, i3, i4; // lien derivation, 4 inner edges DBG( wchar_t c_ask='x'; ) if (sdata->holes.num > 0) Break; /* # */ /* half distance to the center */ d=2*sq(128/4); /* now we check for the 4 ends of the x */ if (aa[0][2]>d) Break; if (aa[1][2]>d) Break; if (aa[2][2]>d) Break; if (aa[3][2]>d) Break; if (aa[3][0]-aa[0][0]num_frame_vectors[0]) { if (box1->frame_vector[i][0] >=box1->frame_vector[j][0]) j=i; /* notice most right vector */ } if (j==i) Break; /* calculate the distance to the center */ x=box1->frame_vector[j][0]; y=box1->frame_vector[j][1]; i1=j; if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break; if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break; if ( aa[0][0]+aa[1][0]-2*x>=0) Break; if ( aa[1][0] >= x ) Break; if ( aa[0][0] > x ) Break; if ( aa[0][0] >= x ) ad=99*ad/100; if (x-x02*sq(1024/4)) Break; /* check if lower left and center point are joined directly */ ld=line_deviation(box1, j, aa[1][3]); MSG(fprintf(stderr," X-1 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; /* only lower side */ for (j=i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[i][1] <=box1->frame_vector[j][1]) j=i; /* notice most upper vector */ } if (j==i) Break; /* calculate the distance to the center */ x=box1->frame_vector[j][0]; y=box1->frame_vector[j][1]; i2=j; if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break; if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break; if ( aa[1][1]+aa[2][1]-2*y<=0) Break; /* check if lower left and center point are joined directly */ ld=line_deviation(box1, aa[1][3], j); MSG(fprintf(stderr," 1-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; /* check if lower right and center point are joined directly */ ld=line_deviation(box1, j, aa[2][3]); MSG(fprintf(stderr," X-2 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; /* only right side */ for (j=i=aa[2][3];i!=aa[3][3];i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[i][0] <=box1->frame_vector[j][0]) j=i; /* notice most left vector */ } if (j==i) Break; /* calculate the distance to the center */ x=box1->frame_vector[j][0]; y=box1->frame_vector[j][1]; i3=j; if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break; if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break; if ( aa[2][0]+aa[3][0]-2*x<=0) Break; if ( aa[3][0] <= x ) Break; if ( aa[2][0] < x ) Break; if ( aa[2][0] <= x ) ad=99*ad/100; if (dx-(x-x0)2*sq(1024/4)) Break; /* check if upper right and center point are joined directly */ ld=line_deviation(box1, j, aa[3][3]); MSG(fprintf(stderr," X-3 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; /* only upper side */ for (j=i=aa[3][3];i!=aa[0][3];i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[i][1] >=box1->frame_vector[j][1]) j=i; /* notice lowest vector */ } if (j==i) Break; /* calculate the distance to the center */ x=box1->frame_vector[j][0]; y=box1->frame_vector[j][1]; i4=j; if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break; if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break; if ( aa[3][1]+aa[0][1]-2*y>=0) Break; /* check if upper left and center point are joined directly */ ld=line_deviation(box1, aa[3][3], j); MSG(fprintf(stderr," 3-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; /* check if lower left and center point are joined directly */ ld=line_deviation(box1, j, aa[0][3]); MSG(fprintf(stderr," X-0 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; // center crossing of diagonal lines is small? if (box1->frame_vector[i3][0] - box1->frame_vector[i1][0] > dx/2) Break; if (gchar) ad=99*ad/100; bc='x'; if(hchar) bc='X'; Setac(box1,bc,ad); break; } // --- test \it x --------------------------------------------------- #if 0 for(ad=d=99;dx>4 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='x'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if( get_bw(x0,x0+dx/4,y0+dy/2,y0+dy/2,box1->p,cs,1) != 0 ) Break; if( get_bw(x1-dx/4,x1,y0+dy/2,y0+dy/2,box1->p,cs,1) != 0 ) Break; if( num_cross(x0+dx/4,x1-dx/4,y0+dy/2,y0+dy/2, box1->p,cs) != 1 ) Break; if( num_cross(x0,x1,y0+dy/4,y0+dy/4, box1->p,cs) != 3 && num_cross(x0,x1,y0+dy/8,y0+dy/8, box1->p,cs) < 3 ) Break; if( num_cross(x0,x1,y1-dy/4,y1-dy/4, box1->p,cs) != 3 && num_cross(x0,x1,y1-dy/8,y1-dy/8, box1->p,cs) < 3 ) Break; if( gchar ) ad=97*ad/100; if( hchar ) ad=96*ad/100; bc='x'; Setac(box1,bc,ad); break; } #endif return box1->c; } static wchar_t ocr0_yY(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad,xa,ya,xb,yb,xc,yc,xd,yd; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test italic yY -------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='y'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (sdata->holes.num > 0) ad=97*ad/100; if( num_cross(0,dx-1,dy/8,dy/8,bp,cs) < 2 && num_cross(0,dx-1, 1, 1,bp,cs) < 2 ) Break; if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1 && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 1 ) Break; if( num_cross(dx-1,dx-1,0,dy-1,bp,cs) != 1 && num_cross(dx-2,dx-2,0,dy-1,bp,cs) != 1 ) Break; if( num_cross(dx/3,dx/3,dy/4,dy-1,bp,cs) != 2 && num_cross(dx/2,dx/2,dy/4,dy-1,bp,cs) != 2 ) Break; for(yc=y=0,xc=x=dx/4;xy){ yc=y=i;xc=x; } } if( y>12*dy/16 || y<3*dy/8 ) Break; ya=dy/8; xa=xc-loop(bp,xc,ya,dx,cs,0,LE); if(xa< 0) Break; yb=dy/8; xb=xc+loop(bp,xc,yb,dx,cs,0,RI); if(xb>=dx) Break; for(y=dy/8;y6*dx/8) ad=99*ad/100; // why this??? if (loop(bp,dx-1,dy-1,dx,cs,0,LE)<1) Break; // printf(" abcd=%d %d %d %d %d %d %d %d -",xa,ya,xb,yb,xc,yc,xd,yd); if( get_line2(xb,yb,xd,yd,bp,cs,100)<95 ) Break; // if( get_line2(xc,yc,xd,yd,bp,cs,100)<95 ) Break; // printf("ok"); bc='y'; if(gchar && !hchar) bc='y'; else if(hchar && (!gchar || dy<14)) bc='Y'; else ad=98*ad/100; // SMALL-CAPS ??? Setac(box1,bc,ad); break; } // --- test yY --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='y'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if( get_bw(x0,x0,y1-dy/8,y1,box1->p,cs,1) == 1 ) { if( get_bw(x0,x0+4*dx/8,y0+dy/8,y0+dy/8,box1->p,cs,1) != 1 ) Break; } else { if( get_bw(x0,x0+3*dx/8,y0+dy/8,y0+dy/8,box1->p,cs,1) != 1 ) Break; } if( num_cross(0,dx-1,dy/8,dy/8,bp,cs) != 2 && num_cross(0,dx-1, 1, 1,bp,cs) != 2 ) Break; if( num_cross(dx/2,dx/2,0, 1,bp,cs) != 0 ) Break; if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1 && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 1 ) Break; if( num_cross(dx-1,dx-1,0,dy-1,bp,cs) != 1 && num_cross(dx-2,dx-2,0,dy-1,bp,cs) != 1 && num_cross(dx-dx/8-1,dx-dx/8-1,0,dy-1,bp,cs) != 1 ) Break; if( loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)+dx/8+1 // Jul00 < loop(bp, 0,dy-1-dy/8,dx,cs,0,RI) ) Break; for(y=0,x=dx/4;xy) y=i; } if( y>10*dy/16 || y<2*dy/8 ) Break; for(xc=xb=xa=dx,yc=yb=ya=y=0;ydy/8) Break; for(i=dx,yc=y=dy/4;y<3*dy/4;y++){ if( num_cross(0,dx-1,y,y,bp,cs) < 2 ) break; x =loop(bp,dx-1 ,y,dx,cs,0,LE); x+=loop(bp,dx-1-x,y,dx,cs,1,LE); j =loop(bp,dx-1-x,y,dx,cs,0,LE); if(j<=i){ i=j;yc=y;xc=dx-1-x-j/2; } } yc+=dy/16+1; yc+=loop(bp,xc,yc,i,cs,1,DO)/2; xa+= loop(bp,xa ,ya,dx,cs,1,RI)/2; xb=dx-1-loop(bp,dx-1,yb,dx,cs,1,LE)/2; yd=dy-1-dy/8;xd=dx-1-loop(bp,dx-1,yd,dx,cs,0,LE); if(xd>6*dx/8) Break; /* check for serife at lower end */ for (i=0,x=dx-1;ix+dx/16+1) break; /* detect serif */ if (j=5*dy/8 && !gchar) if( get_line2(xa,ya,xd ,yd,bp,cs,100)>95 ) if( get_line2(xb,yb,xd ,yd,bp,cs,100)>95 ) { if (dx>4) { Break; } else ad=ad*98/100; } // ~V xa=loop(bp,0,dy/8,dx,cs,0,RI); xb=loop(bp,0,dy/2,dx,cs,0,RI); xc=loop(bp,0,dy-1,dx,cs,0,RI); if( 2*xb< xa+xc ) ad=98*ad/100; // ~V if( 2*xb<=xa+xc ) ad=98*ad/100; if( 2*xb<=xa+xc+1 ) ad=98*ad/100; bc='y'; if ((!gchar) && (!hchar)) ad=98*ad/100; if(y0m2-(box1->m2-box1->m1)/4) { bc='Y'; if(gchar) ad=98*ad/100; } // SMALL-CAPS ??? Setac(box1,bc,ad); break; } return box1->c; } static wchar_t ocr0_zZ(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; int i1,i2,i3,i4,i5,dbg[9], d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; int dx=x1-x0+1,dy=y1-y0+1, /* size */ (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */ ad; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test zZ ------- for(ad=d=100;dx>3 && dy>3;){ // dy>dx DBG( wchar_t c_ask='z'; ) /* for debugging purpose */ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (sdata->holes.num > 0) ad=98*ad/100; /* # */ /* half distance to the center */ d=2*sq(128/4); /* now we check for the 4 edges of the z */ if (aa[0][2]>d) Break; if (aa[1][2]>d) Break; if (aa[2][2]>d) Break; if (aa[3][2]>d) Break; if (aa[3][0]-aa[0][0]dy/8) ad=99*ad/100; if (aa[0][1]-y0>dy/8) ad=99*ad/100; if (2*dx2*sq(1024/4)) Break; ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100; d=line_deviation(box1, aa[1][3], aa[2][3]); if (d>2*sq(1024/4)) Break; /* search uppermost right > */ i1=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1, y0); x=box1->frame_vector[i1][0]; y=box1->frame_vector[i1][1]; if (y-y0 > 5*dy/8) Break; if (x-x0 < 3*dx/8) Break; if (x-aa[0][0]<=dx/4) Break; // ~lI if (x-aa[0][0]<=dx/3) ad=98*ad/100; // ~lI if (x-aa[0][0]<=dx/2) ad=99*ad/100; // ~lI /* search most right > ~2 */ i3=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1+2*dx, (y0+y1)/2); MSG(fprintf(stderr,"xy= %d %d %d %d %d %d",x0,y0,x-x0,y-y0,box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0);) if ( box1->frame_vector[i3][1]-y0> dy/4 && box1->frame_vector[i3][0]-x>=0) Break; if ( box1->frame_vector[i3][1]-y> dy/8 && box1->frame_vector[i3][0]-x>=-dx/8) ad=98*ad/100; if ( box1->frame_vector[i3][1]-y> dy/8 && box1->frame_vector[i3][0]-x>= 0) ad=97*ad/100; if (box1->frame_vector[i3][0]-aa[0][0] < aa[3][0]-box1->frame_vector[i3][0]) break; // ~lI if (box1->frame_vector[i3][0]-aa[0][0] <(aa[3][0]-box1->frame_vector[i3][0])*2) ad=98*ad/100; // ~lI /* better test for a bow or peaked angle */ /* upper part of a 2, on a Z a and b should be at c .....$@@@@@@a...c. o1 (o1-a)=(dx+5)^2 =dx^2+10*dx+25 ...$$@@@@@@@@@.... (o1-b)=(dx+1)^2+4^2=dx^2+ 2*dx+18 ..$@@$@@@$@@@@@... ..@@@.....$$@@@@.. ..@@.......@$@@@b. ..$.........$@@@@. .$$..........$@@@. .$...........@@@@. .............@@@@.< .............$@@$. ............$@@@.. ............@@$... ............$@$... --- snip ---- */ i4=nearest_frame_vector(box1,aa[2][3],aa[0][3], x1+dx, y0); i5=nearest_frame_vector(box1,aa[2][3],aa[0][3], x1, y0-dx); d=sq(box1->frame_vector[i5][0]-box1->frame_vector[i4][0]) +sq(box1->frame_vector[i5][1]-box1->frame_vector[i4][1]); if (d>2*sq(dx/8+1)) break; /* check if upper left and upper right point are joined directly */ dbg[0]=d=line_deviation(box1, aa[0][3], i1); if (d >2*sq(1024/4)) Break; /* check if lower right and upper left point are joined directly */ dbg[1]=d=line_deviation(box1, i1, aa[1][3]); if (d >2*sq(1024/4)) Break; /* search lowest left < */ i2=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y1); x=box1->frame_vector[i2][0]; y=box1->frame_vector[i2][1]; if (y-y0 < 3*dy/8) Break; if (x-x0 > 5*dx/8) Break; if (aa[2][0]-x<=dx/4) Break; // ~lI if (aa[2][0]-x<=dx/3) ad=98*ad/100; // ~lI if (aa[2][0]-x<=dx/2) ad=99*ad/100; // ~lI /* check if upper right and lower left point are joined directly */ dbg[2]=d=line_deviation(box1,i2, aa[3][3]); if (d >2*sq(1024/4)) Break; /* check if lower left and lower right point are joined directly */ dbg[3]=d=line_deviation(box1, aa[2][3],i2); if (d >2*sq(1024/4)) Break; if (box1->frame_vector[i1][0] -box1->frame_vector[i2][0]<=dx/8) Break; /* nonsignificant distance */ MSG( \ fprintf(stderr,"^v %d %d %d %d line deviation %d %d %d %d max %d %d",\ box1->frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\ box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\ dbg[0],dbg[1],dbg[2],dbg[3],2*sq(1024/4),2*sq(1024));) ad=(100-(dbg[0]-sq(1024)/2)/sq(1024)/4)*ad/100; ad=(100-(dbg[1]-sq(1024)/2)/sq(1024)/4)*ad/100; ad=(100-(dbg[2]-sq(1024)/2)/sq(1024)/4)*ad/100; ad=(100-(dbg[3]-sq(1024)/2)/sq(1024)/4)*ad/100; if ( gchar) ad=98*ad/100; bc='z'; if( hchar ) bc='Z'; Setac(box1,bc,ad); break; } return box1->c; } static wchar_t ocr0_wW(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,handwritten=0, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad,ya,yb,xa,xb,xc,xd,xe,t1; /* tmp-vars */ wchar_t ac; // ------- test w ~{\it w} --------------- for(ad=d=100;dx>3 && dy>3;){ // dy<=dx DBG( wchar_t c_ask='w'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ // xa xe // \ xc / <=ya connected xa-xb-xc-xd-xe // xb xd <=yb // get two lowest points i3,i4,ya // out_x(box1); // ~ul ~uf // out_x(box1); for(y=dy/8;y< dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs)< 2 ) break; if(y4) { /* 4x6 is to small */ for(y=dy-1-dy/16;y>3*dy/4;y--) if( num_cross(0,dx-1,y,y,bp,cs)==2 ) break; if(y==3*dy/4) Break; } yb=y; t1=loop(bp,0 ,dy/4,dx,cs,0,RI); t1=loop(bp,t1,dy/4,dx,cs,1,RI); // thickness of line? for(i=j=0 ;y> dy/4;y--) if( num_cross(0,dx-1,y,y,bp,cs)==4 ) i++; else if( num_cross(0,dx-1,y,y,bp,cs)>=3 ) j++; if(i+56 || dx>4)) Break; if(i+j==0 && dx<=4){ if (abs(loop(bp, 1,dy-1,dy,cs,0,UP) -loop(bp,dx-2,dy-1,dy,cs,0,UP))>dy/8+1) Break; // 4x6 N if ( ( loop(bp, 1, 0,dy,cs,0,DO)>=dy-2 && loop(bp, 0,dy-1,dy,cs,0,UP)>0) || ( loop(bp,dx-2, 0,dy,cs,0,DO)>=dy-2 && loop(bp,dx-1,dy-1,dy,cs,0,UP)>0)) Break; // 4x6 UV ad=ad*99/100; // 4x6 font MSG(fprintf(stderr,"ad=%d",ad);) } if( num_cross(0,dx-1, 1, 1,bp,cs)< 2 && num_cross(0,dx-1,dy/16,dy/16,bp,cs)< 2 ) Break; x =loop(bp,0 ,yb,dx,cs,0,RI); xb=loop(bp,x ,yb,dx,cs,1,RI);xb=x+xb/2; if(xb>dx/2) Break; x =loop(bp,dx-1 ,yb,dx,cs,0,LE); xd=loop(bp,dx-1-x,yb,dx,cs,1,LE);xd=dx-1-x-xd/2;if(xd<3*dx/8) Break; for(y=0,xc=x=xb+1;xy){xc=x;y=i;} if(dx>4 && !y) Break; ya=dy-1-y; // flat y=loop(bp,xc,ya,dy,cs,1,UP);if(y)y--; if (dy>6 || dx>4) { // ~4x6 font if( num_cross(0 ,xc ,ya-y ,ya-y ,bp,cs)!= 2 && num_cross(0 ,xc ,ya-y/2,ya-y/2,bp,cs)!= 2 ) Break; if( num_cross(xc,dx-1,ya-y ,ya-y ,bp,cs)!= 2 && num_cross(xc,dx-1,ya-y/2,ya-y/2,bp,cs)!= 2 ) Break; } ya-=y/2; x =loop(bp,0 ,1 ,dx,cs,0,RI); xa=loop(bp,x ,1 ,dx,cs,1,RI); if( x+xa>xb ){ // may be, here is a small but thick letter // later add some proofs xa=x+xa/4; } else { xa=x+xa/2; } x =loop(bp,dx-1 ,1 ,dx,cs,0,LE); xe=loop(bp,dx-1-x,1 ,dx,cs,1,LE);xe=dx-1-x-xe/2; MSG( fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d %d %d", xa,1,xb,yb,xc,ya,xd,yb,xe,1);) if (ya94 ) break; if (x==xa+i) Break; // no vert. line found if( get_line2(xb,yb-1,xc,ya ,bp,cs,100)<95 && get_line2(xb,yb-1,xc,ya+dy/32,bp,cs,100)<95 && get_line2(xb,yb-1,xc,ya+dy/16,bp,cs,100)<95 ) Break; if( get_line2(xc, ya,xd, yb,bp,cs,100)<95 && get_line2(xc+1,ya,xd, yb,bp,cs,100)<95 ) Break; if( get_line2(xd,yb,xe ,1+dy/16,bp,cs,100)<95 && get_line2(xd,yb,dx-1 ,1+dy/8 ,bp,cs,100)<95 // round w && get_line2(xd,yb,xe+dx/20,1+dy/16,bp,cs,100)<95 ) Break; // if( num_hole(0,dx-1,0,dy-1,bp,cs,NULL) != 0 ) Break; // ~ur MSG(fprintf(stderr,"ad=%d",ad);) for(i=0,y=5*dy/8;yi ) i=x; if( x3 && dy>3;){ // dy<=dx 4x6font (like a H with fat bar) DBG( wchar_t c_ask='w'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ // ~ul ~uf if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)<2 ) Break; if( num_cross(0,dx-1,dy/8,dy/8,bp,cs)<2 ) handwritten=40; if( num_cross(0,dx-1,dy/4,dy/4,bp,cs)<2 ) handwritten=80; for(i=0,y=0;ydx/2) Break; xb=loop(bp,x ,yb,dx,cs,0,RI);xb=x+xb/2; if(xb>dx/2) Break; x =loop(bp,dx-1 ,yb,dx,cs,0,LE); x+=loop(bp,dx-1-x,yb,dx,cs,1,LE); xd=loop(bp,dx-1-x,yb,dx,cs,0,LE);xd=dx-1-x-xd/2;if(xd<3*dx/8) Break; if( num_cross(xb,xd,yb,yb ,bp,cs)!= 1 ) Break; if( num_cross(xb,xb,yb,dy-1,bp,cs)!= 1 ) Break; if( num_cross(xd,xd,yb,dy-1,bp,cs)!= 1 ) Break; if( num_cross(xb,xb, 0,yb ,bp,cs)!= 0 ) Break; if( num_cross(xd,xd, 0,yb ,bp,cs)!= 0 ) Break; // if( num_hole(0,dx-1,0,dy-1,bp,cs,NULL) != 0 ) Break; if (sdata->holes.num != 0) Break; // ~ur for(i=0,y=3*dy/4;yi ) i=x; if( xc; } static wchar_t ocr0_aA(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,d,x,y,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad,ya; /* tmp-vars */ // --- test A --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='A'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ // first selection (rough sieve) if( get_bw(dx/2 ,dx/2 ,dy-1-dy/8,dy-1,bp,cs,1) == 1 && get_bw(dx/2-1,dx/2-1,dy-1-dy/8,dy-1,bp,cs,1) == 1 ) Break; // ~B ya=0; /* upper end, not 0 for modified A etc. */ if (box1->modifier) for (ya=0;ya=dy/2) ya=0; // already subtracted? if( num_cross(0,dx-1,ya+ 1 ,ya+ 1 ,bp,cs)!=1 // 600dpi && num_cross(0,dx-1,ya+ dy/8 ,ya+ dy/8 ,bp,cs)!=1 && num_cross(0,dx-1,ya+ dy/16 ,ya+ dy/16 ,bp,cs)!=1 && num_cross(0,dx-1,ya+ dy/8+1,ya+ dy/8+1,bp,cs)!=1 ) Break; if( num_cross(0,dx-1, 7*dy/8 , 7*dy/8 ,bp,cs)!=2 && num_cross(0,dx-1, 7*dy/8-1, 7*dy/8-1,bp,cs)!=2 ) Break; if ( num_cross( 0,dx/8,ya+dy/8,ya+0,bp,cs)>0 ) Break; // ~R for(y=ya+dy/8;y 1 ) break; if( y==ya+dy/2 ) Break; i1=y; if (dy>20) i1++; /* get arround some noise fat font */ x =loop(bp,0,i1,dx,cs,0,RI); if(x>3*dx/4) Break; x+=loop(bp,x,i1,dx,cs,1,RI); if(x>3*dx/4) Break; i2=x; x+=loop(bp,x,i1,dx,cs,0,RI); if(x<3*dx/8) Break; i2=(x+i2)/2; // hole (i2,i1) y+=loop(bp,i2,y,dy,cs,1,DO); y+=loop(bp,i2,y,dy,cs,0,DO); if(y>3*dy/4) ad=ad*99/100; if (y>5*dy/6) { MSG(fprintf(stderr,"x,y,i1,i2= %d %d %d %d",x,y,i1,i2);) } if (y>5*dy/6) Break; if( sdata->holes.num != ((box1->modifier==RING_ABOVE)?2:1) || sdata->holes.hole[0].y1-ya >= dy-1-dy/4) Break; // if( num_hole ( x0, x1, y0, y1-dy/4 ,box1->p,cs,NULL) != 1 ) Break; // out_x(box1); i3=0;i4=0; for(x=dx/3;x<2*dx/3;x++){ i4=num_cross(i2,x,y ,dy-1,bp,cs);if(i4<1 || i4>2) i4=num_cross(i2,x,y+dy/16,dy-1,bp,cs);if(i4<1 || i4>2) break; if(i4==1) i3=x; } if(i4<1 || i4>2 || i3==0){ // ToDo: MSG(fprintf(stderr,"x,y,i4,i3= %d %d %d %d",x,y,i4,i3);) Break; } if( get_bw(dx-1-dx/4, dx-1, dy-1-dy/4, dy-1, bp,cs,1) != 1 ) Break; i1=loop(bp,dx-1,ya+ (dy-ya)/4,dx,cs,0,LE); i2=loop(bp,dx-1,ya+ (dy-ya)/2,dx,cs,0,LE); i3=loop(bp,dx-1,dy-1-(dy-ya)/4,dx,cs,0,LE); if( 2*i2-dx/8>i1+i3 ) ad=99*ad/100; /* 6*8 font */ if( 2*i2+dx/4i1+i3 ) Break; i1=loop(bp,0 ,ya+ (dy-ya)/4,dx,cs,0,RI); // linke senkr. linie i2=loop(bp,0 ,ya+ (dy-ya)/2,dx,cs,0,RI); i3=loop(bp,0 ,dy-1-(dy-ya)/4,dx,cs,0,RI); if( 2*i2-dx/8>i1+i3 ) ad=98*ad/100; /* 6*8 font */ if( 2*i2+dx/4i1+i3 || i1i3+dx/16) break; if( i1+120) ad=97*ad/100; // italic-a if (!hchar) ad=99*ad/100; // italic-a Setac(box1,'A',ad); break; } // --- test a ------------------------------------------- // with a open bow above the circle starting // on the right side of the circle for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='a'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if( get_bw(x0 , x0+dx/2, y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/3, x1 , y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/3, x1 , y0+dy/4, y0+dy/4,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2, x0+dx/2, y1-dy/3, y1, box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2, x0+dx/2, y0 , y0+dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/3, x1-dx/3, y0 , y0 ,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/4, x1-dx/2, y1 , y1 ,box1->p,cs,1) != 1 ) if( get_bw(x0+dx/4, x1-dx/3, y1-1 , y1-1 ,box1->p,cs,1) != 1 ) Break; if( get_bw(x0 , x0 , y0+dy/2, y1 ,box1->p,cs,1) != 1 ) if( get_bw(x0+dx/8, x0+dx/8, y0+dy/2, y1 ,box1->p,cs,1) != 1 ) Break; if( loop(bp,3*dx/8,0,dy,cs,0,DO) > 3*dy/16 ) Break; // ~d if( num_cross(0,dx-1,dy/4 ,dy/4 , bp,cs) >2 // ~glued am != an && num_cross(0,dx-1,dy/4+1,dy/4+1, bp,cs) >2 ) Break; for( x=dx/4;xdy/2) break; i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if (i>dy/2) break; } if( xy-1, bp,cs) == 3 ) i--; } if( i ) Break; i1=loop(bp,0, dy/8,dx,cs,0,RI); i3=loop(bp,0,3*dy/4,dx,cs,0,RI); for(y=dy/8+1;y<3*dy/4;y++){ i2=loop(bp,0,y,dx,cs,0,RI);if(2*i2>i1+i3+1) break; } if(y==3*dy/4) Break; // ~6 // ~ s (small thick s), look for vertikal line piece for(x=3*dx/4;xdy/4 ) break; if( x==dx ) Break; if (sdata->holes.num != 1) ad=96*ad/100; else if (sdata->holes.num == 1) if( num_hole ( x0, x1, y0+dy/3, y1 ,box1->p,cs,NULL) != 1 ) Break; // if( num_hole ( x0, x1, y0, y1, box1->p,cs,NULL) != 1 ) Break; if( num_hole ( x0, x1, y0, y1-dy/3 ,box1->p,cs,NULL) != 0 ){ i =loop(bp,0,dy/4,dx,cs,0,RI); i =loop(bp,i,dy/4,dx,cs,1,RI); if(ii) Break; // ~ 8 } /* test for horizontal symmetry ~8 */ for (y=0;y3 && dy>3;){ // min 4x4 DBG( wchar_t c_ask='a'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2 , x0+dx/2,y1-dy/2 , y1, box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/3 , x0+dx/3,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break; i = loop(bp,dx/2, 0 ,dy,cs,0,DO); if (i>dy/4) Break; i+= loop(bp,dx/2, i ,dy,cs,1,DO); if (i>dy/2) Break; i = loop(bp,dx/2, i ,dy,cs,0,DO); if (ip,cs,1) == 1 ) Break; if( num_cross(x0+dx/2,x0+dx/2,y0 , y1 ,box1->p,cs) != 2 ) Break; if( num_cross(x0+dx/3,x1-dx/3,y0 , y0 ,box1->p,cs) != 1 ) // AND if( num_cross(x0+dx/3,x1-dx/3,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break; i = loop(bp,dx/2,dy-1 ,dy,cs,0,UP); if (i>dy/3) Break; y = i+loop(bp,dx/2,dy-1-i,dy,cs,1,UP); if (i>dy/2) Break; // normal 'a' has a well separated vertical line right from the circle // but fat 'a' is like a 'o', only bigger on the right side if( num_cross(x0+dx/2-1,x1,y1 ,y1 ,box1->p,cs) < 2 /* 4x6font */ && num_cross(x0+dx/2-1,x1,y1-i,y1-i ,box1->p,cs) < 2 /* 2 or 3 */ && num_cross(x0+dx/2-1,x1,y1-y,y1-y ,box1->p,cs) < 2 ) { if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI) <4*loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)) { Break;} else ad=98*ad/100; } if( num_cross(x0,x1,y0+dy/2 , y0+dy/2,box1->p,cs) < 2 || num_cross(x0,x1,y0+dy/3 , y0+dy/3,box1->p,cs) < 2 ) Break; // Jun00 if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/4,box1->p,cs) != 1 ) if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/4,box1->p,cs) != 1 ) Break; if (sdata->holes.num != 1) if( num_hole(x0,x1-2,y0 ,y1 ,box1->p,cs,NULL) != 1 ) // if( num_hole(x0,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break; if( num_hole(x0,x1 ,y0+dy/3,y1-1 ,box1->p,cs,NULL) != 0 ) Break; if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<= loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break; if( loop(bp,dx-1,dy-1,x1-x0,cs,0,LE)> dx/4 && loop(bp,dx-1,dy-2,x1-x0,cs,0,LE)> (dx+4)/8 ) ad=97*ad/100; x=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); i=loop(bp,dx-1, dy/4,dx,cs,0,LE); if (abs(x-i)>dx/4) Break; for( x=dx/4;xdy/2) break; i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if (i>dy/2) break; } if( xp,cs) == 1 ) if( num_cross(x0 , x1, y0, y0,box1->p,cs) == 1 ) if( loop(bp,dx-1, 0,y1-y0,cs,0,DO)> dy/4 && loop(bp,dx-1,dy-1,y1-y0,cs,0,UP)> dy/4 ) Break; // ~o if( loop(bp,dx/2,dy-1,y1-y0,cs,0,UP)> dy/4 ) Break; // ~q if (hchar) ad=98*ad/100; if (gchar) ad=98*ad/100; // handwritten-a (alpha) Setac(box1,'a',ad); break; } // --- test A_A_WITH_OGONEK 0x0104 Centr.Eur.Font ------------------------- /* not sure if we should move this to a get_CentralEuropean-function */ for(ad=d=100;dx>2 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='A'; ) if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ // first selection (grobes Sieb) if( get_bw(dx/2,dx/2,dy-1-dy/8,dy-1,bp,cs,1) == 1 ) break; // ~B if( num_cross(0,dx-1, 1 , 1 ,bp,cs)!=1 // 600dpi && num_cross(0,dx-1, dy/8 , dy/8 ,bp,cs)!=1 && num_cross(0,dx-1, dy/16 , dy/16 ,bp,cs)!=1 && num_cross(0,dx-1, dy/8+1, dy/8+1,bp,cs)!=1 ) break; if( num_cross(0,dx-1, dy-1 , dy-1 ,bp,cs)!=1 ) break; if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs)!=2 && num_cross(0,dx-1, dy/3 , dy/3 ,bp,cs)!=2 ) break; if ( num_cross( 0,dx/8,dy/8, 0,bp,cs)>0 ) break; // ~R for(y=dy/8;y 1 ) break; if( y==dy/2 ) break; i1=y; if (dy>20) i1++; /* get arround some noise fat font */ x =loop(bp,0,i1,dx,cs,0,RI); if(x>3*dx/4) break; x+=loop(bp,x,i1,dx,cs,1,RI); if(x>3*dx/4) break; i2=x; x+=loop(bp,x,i1,dx,cs,0,RI); if(x<3*dx/8) break; i2=(x+i2)/2; // hole (i2,i1) y+=loop(bp,i2,y,dy,cs,1,DO); y+=loop(bp,i2,y,dy,cs,0,DO); if(y>3*dy/4) ad=ad*99/100; if (y>5*dy/6) break; if( sdata->holes.num != 1 || sdata->holes.hole[0].y1 >= dy-1-dy/4) break; // if( num_hole ( x0, x1, y0, y1-dy/4 ,box1->p,cs,NULL) != 1 ) break; // out_x(box1); i3=0;i4=0; for(x=dx/3;x<2*dx/3;x++){ i4=num_cross(i2,x,y ,dy-1,bp,cs);if(i4<1 || i4>2) i4=num_cross(i2,x,y+dy/16,dy-1,bp,cs);if(i4<1 || i4>2) break; if(i4==1) i3=x; } if(i4<1 || i4>2 || i3==0){ // ToDo: g_debug_A(printf(" A: x,y,i4,i3= %d %d %d %d\n",x,y,i4,i3);) break; } if( get_bw(dx-1-dx/4, dx-1, dy-1-dy/4, dy-1, bp,cs,1) != 1 ) break; /* dy/4 changed to dy/6 because of screenfonts */ /* there are strange fonts, one has a serif on the upper end of A */ if ( num_cross( 0,dx/8,dy/6, 0,bp,cs)>0 ) break; if ( num_cross(dx-1-dx/4,dx-1, 0,dy/6,bp,cs)>0 ) break; i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if( 2*i2+dx/4i1+i3 ) break; i1=loop(bp,0 , dy/4,dx,cs,0,RI); // linke senkr. linie i2=loop(bp,0 , dy/2,dx,cs,0,RI); i3=loop(bp,0 ,dy-1-dy/4,dx,cs,0,RI); if( 2*i2+dx/4i1+i3 || i1i3+dx/16) break; if( i1+12c; } static wchar_t ocr0_cC(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad,t1; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test c,C --------------------------------------------------- for(ad=d=100;dx>2 && dy>2;){ // min 3x4 DBG( wchar_t c_ask='c'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if( get_bw(x0 , x0+dx/3,y0+dy/2, y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2, x0+dx/2,y1-dy/3, y1, box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2, x0+dx/2,y0 , y0+dy/3,box1->p,cs,1) != 1 ) Break; if( num_cross(x0,(x0+x1)/2,(y0+y1)/2,(y0+y1)/2,box1->p,cs) > 1 ) Break; // ~ocr-a-[ for(y=y0+dy/4;yp,cs,1) == 0 ) break; if( y==y0+3*dy/4 ) Break; i1=y; // i1: upper end of right gap // measure thickness of line! t1=loop(bp, 0,dy/2,dx,cs,0,RI); t1=loop(bp,t1,dy/2,dx,cs,1,RI); if (t1>dx/2) Break; for(y=i1,i2=0,x=x0+dx/2;xp,x0+dx/2,i1,dy,cs,0,DO); if( i>i2 ) { i2=i; } } if(i2p,x0+5*dx/8,i1,dy,cs,0,UP); i =y+1-loop(box1->p,x0+4*dx/8,i1,dy,cs,0,UP); if(iy0+ dy/4+t1/2) Break; // highest for(y=i1;yp,cs,1) == 1 ) break; if( y-i1p,cs) < 1 ) Break; // ~L if (loop(box1->p,x0,y0+3*dy/4,dx,cs,0,RI)>dx/16) if( num_cross(x0+dx/2,x1,i3 ,y1,box1->p,cs) < 1 && num_cross(x0+dx/2,x1,y1-dy/4,y1,box1->p,cs) < 1 ) Break; // ~r i=1; for(x=dx/2;x=cs && getpixel(bp,x+1,y )< cs && getpixel(bp,x+1,y-1)< cs && getpixel(bp,x ,y-1)< cs ) { i=0;break; } } if(!i) ad=95*ad/100; // ~G i=loop(bp,0,dy/2,dx,cs,0,RI); for(y=0;y=dy/4;y--){ x =loop(bp,0,y,dx,cs,0,RI); x+=loop(bp,x,y,dx,cs,1,RI); if(x>i5) i5=x; i =loop(bp,x,y,dx,cs,0,RI); if(ii4+dx/32 ) break; // unusual for c, more a bad e? } if( y>=dy/4 ) Break; if( !hchar ){ // test for e where the middle line is partly removed x= loop(bp,0,dy/2,dx,cs,0,RI); x=x +loop(bp,x,dy/2,dx,cs,1,RI); y=dy/2-loop(bp,x,dy/2,dy,cs,0,UP)-1; i=x +loop(bp,x,y,dx,cs,1,RI); i=i +loop(bp,i,y,dx,cs,0,RI); if( num_cross(x ,x ,1,dy/2,bp,cs) > 1 || num_cross(x+1,x+1,1,dy/2,bp,cs) > 1 ) if( num_cross(i-1,i-1,1,dy/2,bp,cs) > 1 || num_cross(i ,i ,1,dy/2,bp,cs) > 1 ) Break; // ~bad e } if( dy>16 && dy>3*dx && hchar ){ // ~[ x= loop(bp,0, dy/16,dx,cs,0,RI); x=+loop(bp,0,dy-1-dy/16,dx,cs,0,RI); i= loop(bp,0, dy/2 ,dx,cs,0,RI)*2; if( i>=x ) if( num_cross(0,dx-1,dy/4,dy/4,bp,cs) < 2 ) Break; } if( get_bw(x0,x0,y0 ,y1 ,box1->p,cs,2) != 2 && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2 && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2 && get_bw(x1,x1,y0+1,y1-1,box1->p,cs,1) != 1 ) Break; /* ~[ */ x =loop(bp, 0,dy/2,dx,cs,0,RI); i =loop(bp,dx-1,dy/2,dx,cs,0,LE); if( (i7 ) if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8 && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8 && loop(bp,dx-1,dy-1-dy/ 8,dx,cs,0,LE) > loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE) && loop(bp,dx-1, dy/ 8,dx,cs,0,LE) > loop(bp,dx-1, dy/16,dx,cs,0,LE) ) Break; // ~( // printf(" hchar=%d i1=%d i2=%d %d\n",hchar,i1-y0,i2-y0,9*dy/16); // ~G without characteristic crotchet if (hchar && dy>15 && dx>7 && i2-y0<9*dy/16 && i1-y0<=dy/4) if ( loop(bp,5*dx/8,i2-y0,dy,cs,0,DO) > 2*dy/8 ){ Setac(box1,'G',90); Break; } if (hchar){ i=1; for(x=dx/2;x=cs && getpixel(bp,x+1,y )< cs && getpixel(bp,x+1,y-1)< cs && getpixel(bp,x ,y-1)< cs ) { i=0;break; } } if (i) ad=98*ad/100; // ~( if (dy>2*dx) ad=99*ad/100; } if( loop(bp,dx-1,dy/2,dx,cs,0,LE) < 6*dx/8 ) ad=98*ad/100; i= loop(bp,dx-1,dy/16,dx,cs,0,LE); j= loop(bp,dx/2,0 ,dy,cs,0,DO); if (i>=dx/2 && j>dy/8 && j>2 && j=3*dx && dy>12) ad=99*ad/100; // ( i= loop(bp,dx-1,dy-1,dy,cs,0,UP); j= loop(bp,dx/2,dy-1,dy,cs,0,UP); if (i==0 && j>dy/8) ad=95*ad/100; // < i= loop(bp,dx-1, 0,dy,cs,0,DO); j= loop(bp,dx/2, 0,dy,cs,0,DO); if (i==0 && j>dy/8) ad=95*ad/100; // < if (loop(bp,0,dy-1-dy/8,dx,cs,0,RI)>= 3*dx/4) ad=98*ad/100; // < if (loop(bp,0,dy-1-dy/8,dx,cs,0,RI)>=(dx+1)/2) ad=98*ad/100; // < if (loop(bp,0, dy/8,dx,cs,0,RI)>=dx/2) ad=98*ad/100; // < if (gchar) ad=98*ad/100; // could happen for 5x7 font bc=((hchar)?'C':'c'); Setac(box1,bc,ad); break; } return box1->c; } static wchar_t ocr0_lL(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i0,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test L --------------------------------------------------- for(ad=d=100;dx>2 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='L'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ i=loop(bp,dx-1,dy/2,dx,cs,0,LE); if (i<3 && dy>8) {Break;} if (ip,x0 ,y,dx,cs,0,RI); j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ){ i=j;i1=y; } } if( i<3*dx/4 ) Break; i1=i; // length of horizontal line // line thickness (i2) i=loop(box1->p,x0 ,y0+dy/2,dx,cs,0,RI); if( i>dx/2 ) Break; j=loop(box1->p,x0+i,y0+dy/2,dx,cs,1,RI); if( i+j>dx/2 ) Break; i2=j; if (loop(bp,dx-1, 0,dx,cs,0,LE)dx/2 && loop(bp, 0,5*dy/8,dx,cs,0,RI)p,x0 ,y,dx,cs,0,RI); if ( j>(dx+2)/4+(y1-dy/4-y)*dx/2/dy ) { i=0; break; } x=loop(box1->p,x0+j,y,dx,cs,1,RI); if( ((x>i2+1 || 4*x<3*i2) && y>y0+dy/8) || 4*x>3*i1 ) i=0; } if( !i ) Break; if( num_cross(0, dx-1-dx/8, dy-1-dy/2, dy-1-dy/2,bp,cs) != 1 ) Break; if( num_cross(0, dx-1 , dy/3 , dy/3,bp,cs) != 1 ) Break; if( num_cross(0, dx-1 , dy/8 , dy/8,bp,cs) != 1 ) Break; if (loop(bp,0,dy-1,dx,cs,0,RI) -loop(bp,0,dy-3,dx,cs,0,RI)>1+dx/16) ad=96*ad/100; // ~c if (loop(box1->p,x0+dx/4,y1,dy,cs,0,UP)>1+dy/16) ad=99*ad/100; // ~4 if ( gchar) ad=98*ad/100; if (!hchar) ad=99*ad/100; if (5*dx<2*dy && loop(box1->p,x0,y1,dx,cs,0,RI)>dx/4) ad=99*ad/100; // ~l Setac(box1,'L',ad); break; } // --- test l --------------------------------------------------- // recognize a "l" is a never ending problem, because there are lots of // variants and the char is not very unique (under construction) // --- test italic l --------------------------------------------------- // --- test l ~italic (set flag-italic) -------------------------------- // if unsure d should be multiplied by 80..90% for(ad=d=100; dy>dx && dy>5;){ // min 3x4 DBG( wchar_t c_ask='l'; ) if( box1->dots>0 ) Break; if( num_cross(0, dx-1,dy/2,dy/2,bp,cs) != 1 || num_cross(0, dx-1,dy/4,dy/4,bp,cs) != 1 ) Break; // mesure thickness for(i1=0,i2=dx,y=dy/4;yi1 ) { i1=j; } // thickest if( j2*i2 ) Break; if(box1->m3 && dy<=box1->m3-box1->m2) ad=94*ad/100; if( box1->m2-box1->m1>1 && y0>=box1->m2 ) ad=94*ad/100; for(i0=0,i3=0,y=0;yi3 ) { i3=j; } // widest space j = loop(bp,j,y,dx,cs,1,RI); if( j>i0 ) { i0=j;i3=0; } // thickest } if ( i0>4*i2 || 3*i3>2*dx) if ( loop(bp,dx-1,dy-1,dx,cs,0,LE)>3*dx/8 || loop(bp, 0,dy-1,dx,cs,0,RI)>3*dx/8) Break; // ~7 // detect serifs x =loop(bp,0, 0,dx,cs,0,RI); i3=loop(bp,x, 0,dx,cs,0,RI); x =loop(bp,0, 1,dx,cs,0,RI); x =loop(bp,x, 1,dx,cs,0,RI); if(x>i3) i3=x; x =loop(bp,0,dy-1,dx,cs,0,RI); i4=loop(bp,x,dy-1,dx,cs,0,RI); x =loop(bp,0,dy-2,dx,cs,0,RI); x =loop(bp,x,dy-2,dx,cs,0,RI); if(x>i4) i4=x; if( i3>i1+dx/8+1 && i4>i1+dx/8+1 ) Break; // ~I for(i=dx,j=0,y=1;yi+1) break; i=x; if( num_cross(0,dx-1,y ,y ,bp,cs)==2 && num_cross(0,dx-1,y+1+dy/32,y+1+dy/32,bp,cs)==2 ) j=1; } if ( y3) if( get_bw(dx-1-dx/8,dx-1,0,dy/6,bp,cs,1) != 1 ) if( get_bw(dx-1-dx/8,dx-1,0,dy/2,bp,cs,1) == 1 ) Break; if( get_bw(dx-1-dx/8,dx-1,dy/4,dy/3,bp,cs,1) != 1 ) // large I ??? if( get_bw(0 ,dx/8,dy/4,dy/3,bp,cs,1) != 1 ) if( get_bw(dx-1-dx/8,dx-1,0 ,dy/8,bp,cs,1) == 1 ) if( get_bw(0 ,dx/8,0 ,dy/8,bp,cs,1) == 1 ) ad=ad*97/100; if( get_bw(dx-1-dx/8,dx-1,dy/2,dy-1,bp,cs,1) != 1 ) // r ??? if( get_bw(0 ,dx/8,dy/2,dy-1,bp,cs,1) == 1 ) if( get_bw(dx-1-dx/8,dx-1,0 ,dy/3,bp,cs,1) == 1 ) if( get_bw(0 ,dx/8,0 ,dy/3,bp,cs,1) == 1 ) Break; for( y=1;y<12*dy/16;y++ ) if( num_cross(0, dx-1, y , y ,bp,cs) != 1 // sure ? && num_cross(0, dx-1, y-1, y-1,bp,cs) != 1 ) break; if( y<12*dy/16 ) Break; if(dx>3){ for( y=dy/2;yy-1-5*dy/16;y>=dy/5;y--){ // rechts abfallende Kante/Knick? i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE); if( i-2-dx/16>=x ) break; if( i=dy/5 ) Break; // test ob linke Kante gerade for(x=0,y=bp->y-1-dy/5;y>=dy/5;y--){ // rechts abfallende Kante/Knick? i=loop(bp,0,y,x1-x0,cs,0,RI); if( i+2+dx/16x ) x=i; } if (y>=dy/5 ) Break; if (box1->m4 && y1m4) if ( get_bw(x0,x1,y1+1,box1->m4+dy/8,box1->p,cs,1) == 1 ) ad=ad*97/100; // unsure !l| i=loop(bp,dx-1,dy/16,dx,cs,0,LE); j=loop(bp,dx-1,dy/2 ,dx,cs,0,LE); if( i>3 && j>3 ) if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,bp,cs,1) == 1 ) Break; // ~t for(y=5*dy/8;y8 && loop(bp, 0,3*dy/4,dx,cs,0,RI)>=dx/4 && loop(bp, 0,7*dy/8,dx,cs,0,RI)<=dx/8 && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)<=dx/8 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE)<=dx/8 ) Break; // ~J if ( 2*i3>5*i1 ) // hmm \tt l can look very similar to 7 if ( loop(bp,0,dy/4,dx,cs,0,RI)>dx/2 && get_bw(0,dx/8,0,dy/4,bp,cs,1) == 1 ) Break; // ~7 if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)>dx/2 && get_bw(3*dx/4,dx-1,3*dy/4,dy-1,bp,cs,1) == 1) { if (loop(bp,0,dy-1,dx,cs,0,RI)2*dy) ad=99*ad/100; // ~L if(5*dx>3*dy) ad=99*ad/100; // ~L } if(!hchar){ // right part (bow) of h is never a l if( get_bw(dx/4,dx/4, 0,dy/4,bp,cs,1) == 1 && get_bw(dx/4,dx/4,dy/2,dy-1,bp,cs,1) == 0 ) Break; } if( dx>3 && dy>3*dx ) if( loop(bp,dx/4,dy-1 ,dy,cs,0,UP)< dy/4 && loop(bp, 0,dy-1-dy/8,dx,cs,0,RI)>=dx/2 && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)<=dx/4 ){ ad=98*ad/100; // ~] if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)==0 ) Break; } for(x=0;xi ) break; } if( x>=loop(bp,0,y+1,dx,cs,0,RI) ) if( loop(bp,0 ,0,dy,cs,0,DO)>1 ) if( loop(bp,0 ,0,dy,cs,0,DO) - loop(bp,dx/16+1,0,dy,cs,0,DO) < dx/16+1 ) Break; // ~1 Jul00,Nov00 if( num_cross(0,dx/2,y-1,y-1,bp,cs)==2 ) Break; // ~1 } if(dx<8 && dy<12){ // screen font i= loop(bp,0,0,dy,cs,0,DO); if( loop(bp,dx/2,1,dy,cs,1,DO)>=dy-2 && loop(bp,0,dy/2,dx,cs,0,RI)>=2 && i>1 && ip,cs,2) != 2 && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2 && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2 && get_bw(x0,x0+dx/4,y0+1+dy/16,y1-1-dy/16,box1->p,cs,1) != 1 ) Break; /* ~] */ i=loop(bp,dx-1,dy/2,dx,cs,0,LE); if( loop(bp, 0,dy/2,dx,cs,0,RI)>=dx/2 && (ip,cs,2) != 2 && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2 && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2 && get_bw(x1-dx/4,x1,y0+1+dy/16,y1-1-dy/16,box1->p,cs,1) != 1 ) Break; /* ~[ */ x =loop(bp, 0,dy/2,dx,cs,0,RI); // konvex/konkav? ~() i =loop(bp,dx-1,dy/2,dx,cs,0,LE); if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8 && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) < i-dx/8 && loop(bp,dx-1, dy/8,dx,cs,0,LE) < i-dx/8 ) Break; // ~( if( loop(bp, 0,7*dy/8,dx,cs,0,RI) < x-dx/8 && loop(bp, 0, dy/8,dx,cs,0,RI) < x-dx/8 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) > i+dx/8 && loop(bp,dx-1, dy/8,dx,cs,0,LE) > i+dx/8 ) Break; // ~) i= loop(bp, 0, 0,dy,cs,0,DO); // horizontal line? if(dy>=12 && i>dy/8 && iloop(bp,dx-1, i,dx,cs,0,LE) || loop(bp,dx-1,3*dy/16,dx,cs,0,LE)-dx/8 >loop(bp,dx-1, i+1,dx,cs,0,LE) ) if( loop(bp,dx-1,8*dy/16,dx,cs,0,LE)-dx/8 >loop(bp,dx-1, i,dx,cs,0,LE) || loop(bp,dx-1,8*dy/16,dx,cs,0,LE)-dx/8 >loop(bp,dx-1, i+1,dx,cs,0,LE) ) if( loop(bp, 0,3*dy/16,dx,cs,0,RI)-dx/8 >loop(bp, 0, i,dx,cs,0,RI) || loop(bp, 0,3*dy/16,dx,cs,0,RI)-dx/8 >loop(bp, 0, i+1,dx,cs,0,RI) ) if( loop(bp, 0,8*dy/16,dx,cs,0,RI)-dx/8 >loop(bp, 0, i,dx,cs,0,RI) || loop(bp, 0,8*dy/16,dx,cs,0,RI)-dx/8 >loop(bp, 0, i+1,dx,cs,0,RI) ) Break; // ~t if( loop(bp, 0,i-1,dx,cs,0,RI)>1 && dx<6 ) Break; // ~t if( loop(bp, 0,8*dy/16,dx,cs,0,RI)>dx/8 && loop(bp, 0, i,dx,cs,1,RI)>=dx-1 && loop(bp,dx-1,8*dy/16,dx,cs,0,LE)>dx/8 && loop(bp,dx-1, i-1,dx,cs,0,LE)>dx/8 ) Break; // ~t } // if( vertical_detected && dx>5 ) if( loop(bp,0, 1,dx,cs,0,RI)>=dx/2 && ( loop(bp,0,dy-2,dx,cs,0,RI)<=dx/8 || loop(bp,0,dy-1,dx,cs,0,RI)<=dx/8 ) ) if( ( loop(bp,dx-1, 0,dx,cs,0,LE)<=dx/8 || loop(bp,dx-1, 1,dx,cs,0,LE)<=dx/8 ) && loop(bp,dx-1,dy-2,dx,cs,0,LE)>=dx/2 ) ad=98*ad/100; // ~/ if( get_bw(x0,x1,y0,y1,box1->p,cs,2) == 0 ) ad=99*ad/100; if (!hchar || loop(bp,0,dy/4,dx,cs,0,RI)>dx/2){ // ~z i=loop(bp,0,dy/16 ,dx,cs,0,RI); i=loop(bp,i,dy/16 ,dx,cs,1,RI); j=i; i=loop(bp,0,dy/16+1,dx,cs,0,RI); i=loop(bp,i,dy/16+1,dx,cs,1,RI); if (i>j) j=i; i=loop(bp,0,dy/16+2,dx,cs,0,RI); i=loop(bp,i,dy/16+2,dx,cs,1,RI); if (i>j) j=i; if (j*4>=dx*3) ad=98*ad/100; // ~z if (j*8>=dx*7) ad=96*ad/100; // ~z } if( get_bw(x0,x0,y1,y1,box1->p,cs,2) == 0 ) ad=99*ad/100; if( get_bw(x1,x1,y1,y1,box1->p,cs,2) == 0 ) ad=99*ad/100; if (ad==100) ad--; /* I have to fix that: .@@@@.<- @@..@@ ....@@ ....@@< ...@@. ..@@@. ..@@.. .@@... @@.... @@@@@@<- */ if(!hchar) ad=ad*99/100; if( gchar) ad=ad*99/100; Setac(box1,'l',ad); // if( i<100 ) Break; ???? // if( loop(bp,0, 1,dx,cs,0,RI)<=dx/8 // && loop(bp,0,dy/2,dx,cs,0,RI)<=dx/8 // && loop(bp,0,dy-2,dx,cs,0,RI)<=dx/8 ) vertical_detected=1; break; } return box1->c; } static wchar_t ocr0_oO(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test o,O --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='o'; ) if (sdata->holes.num !=1 ) Break; if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2 , x0+dx/2,y1-dy/2 , y1, box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2 , x0+dx/2,y0+dy/2 , y1-dy/3,box1->p,cs,1) != 0 ) Break; if (sdata->holes.hole[0].y0 > dy/3 || sdata->holes.hole[0].y1 < dy-1-dy/3) Break; if( num_cross(x0+dx/2 ,x0+dx/2 ,y0, y1 ,box1->p,cs) != 2 && num_cross(x0+dx/2+1,x0+dx/2+1,y0, y1 ,box1->p,cs) != 2 ) Break; if( num_cross(x0+dx/3,x1-dx/4,y0 , y0 ,box1->p,cs) != 1 ) // AND if( num_cross(x0+dx/3,x1-dx/4,y0+1 , y0+1,box1->p,cs) != 1 ) Break; if( num_cross(x0+dx/4,x1-dx/3,y1 , y1 ,box1->p,cs) != 1 ) // against "rauschen" if( num_cross(x0+dx/4,x1-dx/3,y1-1 , y1-1,box1->p,cs) != 1 ) Break; if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<= loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break; x=loop(bp,dx-1,dy-1-dy/3,x1-x0,cs,0,LE); // should be minimum for( y=dy-1-dy/3;ydx/8 ) if( loop(bp,0 , dy/16,dx,cs,0,RI)dx/8 ) if( loop(bp,0 ,dy-1-dy/16,dx,cs,0,RI)p,cs,1) == 0 && get_bw(x1-dx/32,x1,y1-dy/32,y1,box1->p,cs,1) == 0 // && ( get_bw(x0,x0+dx/32,y0,y0+dy/32,box1->p,cs,1) == 1 && ( get_bw(0,dx/32,0,dy/32,bp,cs,1) == 1 || get_bw(x0,x0+dx/32,y1-dy/32,y1,box1->p,cs,1) == 1 ) ) Break; // ~D // search lowest inner white point for(y=dy,j=x=0;x 1 ) ad=99*ad/100; // ~a \it a for(y=0;y 2 ) ad=98*ad/100; // ~a \it a if (loop(bp,dx-1,dy-1,x1-x0,cs,0,LE)dy/8 || num_cross(0,dx-1, 0, 0,bp,cs) > 1 || num_cross(0,dx-1,dy-1,dy-1,bp,cs) > 1 ) ad=98*ad/100; // ~bq if( hchar && 2*y0m1+box1->m2 ) i=1; else i=0; if (gchar) ad=99*ad/100; bc='o'; if( i ){ bc='O'; } if ( bc=='O' && ad>99) ad=99; /* we can never 100% sure, 0O */ Setac(box1,bc,ad); if (bc=='O') Setac(box1,'0',ad); if (bc=='o') Setac(box1,'0',98*ad/100); break; } return box1->c; } static wchar_t ocr0_pP(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test pP --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='p'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if( get_bw(0 , dx/2,3*dy/4,3*dy/4,bp,cs,1) != 1 ) Break; if( get_bw(0 , dx/2, dy/2, dy/2,bp,cs,1) < 1 ) Break; if( get_bw(dx/4, dx-1, dy/4, dy/4,bp,cs,1) != 1 ) Break; i= loop(bp,dx-1,3*dy/4,dx,cs,0,LE); if (ip,cs) != 2 ) if( num_cross(x0+dx/2 ,x0+dx/2 , y0, y1-3*dy/16,box1->p,cs) != 2 ) if( num_cross(x0+dx/2+1,x0+dx/2+1, y0, y1-3*dy/16,box1->p,cs) != 2 ) Break; if( num_cross(0,dx-1,7*dy/8 ,7*dy/8 ,bp,cs) != 1 ) if( num_cross(0,dx-1,7*dy/8-1,7*dy/8-1,bp,cs) != 1 ) Break; if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 ) if( num_cross(0,dx-1, dy/4-1, dy/4-1,bp,cs) != 3 ) // \it p with nice kurve if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 ) if( num_cross(0,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 ) Break; i= loop(bp,0,dy/2,dx,cs,0,RI); if(i<1) i++; if( num_cross(i-1,dx-1, dy/4 , dy/4 ,bp,cs) != 2 ) if( num_cross(i-1,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 ) Break; i1= loop(bp, 0,3*dy/8,dx,cs,0,RI); if (i1>=dx/2) ad=90*ad/100; i2=i1+loop(bp,i1,3*dy/8,dx,cs,1,RI); // upper x-position of v line i3= loop(bp, 0,7*dy/8,dx,cs,0,RI); i4=i3+loop(bp,i3,7*dy/8,dx,cs,1,RI); // lower x-position of v line // out_x(box1);printf(" p:"); for ( y=dy/8; y<7*dy/8; y++ ){ x=i2+ (8*y-3*dy)*(i4-i2)/(4*dy); // right limit of line i= loop(bp,0,y,dx,cs,0,RI); if(i>x+dx/16) break; } if ( y<7*dy/8 ) Break; for ( x=0,j=y=dy/3; yx ) { x=i; j=y; } if(x>dx/2) break; } if ( x=dx) Break; if( get_bw(3*dx/4,dx-1, y , dy-1,bp,cs,1) == 1 ) Break; i=num_hole (x0,x1,y0,y1-dy/5,box1->p,cs,NULL); // j=num_hole (x0,x1,y0,y1 ,box1->p,cs,NULL); j=sdata->holes.num; if (j!=1 && dx< 8) ad=96*ad/100; if (j!=1 && dx>=8) ad=98*ad/100; if (i==0 && j==0) ad=90*ad/100; /* some times there is a small gap */ if (i>1 || j>1 || j>i) Break; // check for serif F i= loop(bp,bp->x-1, bp->y/4, dx ,cs,0,LE); i=i+loop(bp,bp->x-1-i,bp->y/4, dx ,cs,1,LE); j= loop(bp,bp->x-1-i,bp->y/4,3*dy/4,cs,0,DO); if (j>dy/2) ad=80*ad/100; // its an serif-F if( ((!hchar) && (!gchar)) || (hchar && gchar)) ad=95*ad/100; bc='p'; if( hchar && ((!gchar) || dy<14)) bc='P'; if ( hchar && gchar) ad=98*ad/100; // \ss sz if ((!hchar) && !gchar) ad=98*ad/100; Setac(box1,bc,ad); break; } return box1->c; } static wchar_t ocr0_qQ(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad; /* tmp-vars */ // --- test Q --------------------------------------------------- for(ad=d=100;dx>2 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='Q'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if( get_bw(x0 ,x0+dx/3,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/3,x1 ,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2,x0+dx/2,y1-dy/3,y1, box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2,x0+dx/2,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2,x0+dx/2,y0+dy/3,y1-dy/2,box1->p,cs,1) == 1 ) Break; if( get_bw(x1 ,x1 ,y0 ,y0 ,box1->p,cs,1) == 1 ) Break; //alpha if( num_cross(x0+dx/2,x0+dx/2,y0 , y1 ,box1->p,cs) < 2 ) Break; if( num_cross(x0+dx/5,x1-dx/5,y0 , y0 ,box1->p,cs) != 1 ) // AND if( num_cross(x0+dx/5,x1-dx/5,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break; if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; if( get_bw(x1 ,x1 ,y1-dy/8 , y1 ,box1->p,cs,1) == 0 ) if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; // i=num_hole(x0,x1,y0,y1,box1->p,cs,NULL); i=sdata->holes.num; if(!i) Break; if( i!=1 && (i!=2 || num_hole(x0,x1,y0+dy/2,y1,box1->p,cs,NULL)!=1) ) Break; x=x1;y=y1; turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( xp,&x,&y,x0,x1,y0,y1,cs,ST,LE); if( x 5*dx/8 ) Break; // ~4 Okt00 x= loop(bp,dx-1,3*dy/8,dy,cs,0,LE); if( x>dx/4 ) Break; if( loop(bp,dx-1-x,0 ,dy,cs,0,DO) <= loop(bp,dx-2-x,0 ,dy,cs,0,DO) ) Break; // 4 if( loop(bp,dx-1,dy-2,dx,cs,0,LE) <= loop(bp,dx-1,dy/2,dx,cs,0,LE) ) if( loop(bp, 1,dy-1,dy,cs,0,UP) <= loop(bp,dx/2,dy-1,dy,cs,0,UP) ) if( loop(bp, 0,dy-2,dx,cs,0,RI)>dx/2 ) if( loop(bp, 0, 0,dx,cs,0,RI)>dx/2 ) Break; // 4 if( loop(bp,dx-1,3*dy/4,dx,cs,0,LE) + loop(bp, 0,3*dy/4,dx,cs,0,RI) < loop(bp,dx-1,2*dy/4,dx,cs,0,LE) + loop(bp, 0,2*dy/4,dx,cs,0,RI) ) ad=94*ad/100; // 4 if( loop(bp,0 ,3*dy/4,dx,cs,1,RI) >= dx ) ad=94*ad/100; // 4 if( loop(bp,dx-1,dy/3,dx,cs,0,LE)> dx/4 ) Break; j=loop(bp,dx/2,dy-1,dy,cs,0,UP); if (j>1 && j>dy/8) { if( get_bw(0,dx/2,dy-1-j/2,dy-1-j/2,bp,cs,1) == 1 ) { // ~RA if (j<5) ad=95*ad/100; else Break; } } // italic a for(i=0,y=0;y 2 ) i++; if(i>dy/8) Break; // ~a \it a if (i>0) ad=99*ad/100; // ~o look at the lower right side for falling line for(j=x=0,y=dy/2;yx){ x=i; } if (x-i>j) j=x-i; if( j>dx/16 ) Break; // falling line detected } if (j==0) Break; // no falling line => no Q if (j<=dx/16) ad=98*ad/100; if(y1<=box1->m3) ad=98*ad/100; // ~q no underlength! rare if(!hchar) ad=96*ad/100; Setac(box1,'Q',ad); break; } // --- test q --------------------------------------------------- for(ad=d=100;dx>2 && dy>3;){ // min 3x4 DBG( wchar_t c_ask='q'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ for ( y=y0; 2*y<=y0+y1; y++ ){ // detect ring if( num_cross(x0,x1, y, y,box1->p,cs) == 2 ) Break; } if (2*y>y0+y1) Break; /* < */ for ( y=(y0+y1)/2; y<=y1; y++ ){ // detect vert line if( num_cross(x0, x1, y, y,box1->p,cs) == 1 && num_cross(x0,x0+dx/2, y, y,box1->p,cs) == 0 ) Break; } if (y>y1) Break; /* O (y==y1 for 4x6font-q) */ for ( x=0,j=y=y0+dy/3; y<=y1-dy/8; y++ ){ // detect baseline i=loop(box1->p,x0,y,dx,cs,0,RI); if ( i>x ) { x=i; j=y; } if ( x>dx/2 ) break; } if ( x=dx) Break; if (y1-j+1p,cs) != 0 ) ad=96*ad/100; // ~g if( loop(box1->p,x0+dx/16,j,dy,cs,0,UP)<1+dy/16 ){ ad=97*ad/100; if (hchar || !gchar) Break; // 4 } if( loop(box1->p,x0+dx/16,j-dy/32-1,dy,cs,1,RI)>=dx-dx/8 || loop(box1->p,x0+dx/16,j-dy/16-1,dy,cs,1,RI)>=dx-dx/8 ){ ad=96*ad/100; // 4 } if( get_bw(x1-dx/3, x1, y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x0, x0+dx/3, y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break; if( get_bw(x0, x0+dx/4, y1-dy/8, y1-dy/9,box1->p,cs,1) == 1 ) Break; if( get_bw(x0, x0+dx/4, y1-dy/5, y1-dy/9,box1->p,cs,1) == 1 ) ad=99*ad/100; if( num_cross(x0+dx/2,x0+dx/2, y0, j ,box1->p,cs) != 2 ) Break; // if( num_hole (x0 ,x1 , y0, y1 ,box1->p,cs,NULL) != 1 ) if (sdata->holes.num != 1) { if (dx<16) ad=98*ad/100; else Break; } if( num_hole (x0 ,x1 , y0, j ,box1->p,cs,NULL) != 1 ) { if (dx<16) ad=98*ad/100; else Break; } // ~\it g if( loop(bp,0,dy-1-dy/4,dx,cs,0,RI)>5*dx/8 && get_bw(dx/4,dx/4,dy-1-dy/4,dy-1,bp,cs,1)==1 ) Break; // ~\it g // what about unsure m1-m4? if(!gchar){ ad=ad*99/100; } // ~4 if( hchar){ ad=ad*99/100; } // ~49 Setac(box1,'q',ad); break; } return box1->c; } static wchar_t ocr0_iIjJ(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar, ax,ay,bx,by,cx,cy,ex,ey, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ad,ya,yb,yc,yd,ye,yf,xa,xb, /* tmp-vars */ (*aa)[4]=sdata->aa; /* the for line ends, (x,y,dist^2,vector_idx) */ // --- test i --------------------------------------------------- // if(box1->dots==1) // what about \it neighbouring ij for(ad=d=100;dy>3 && dx>0;){ // min 3x4 without dot DBG( wchar_t c_ask='i'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ // ToDo: ':' check that high of dot is smaller than the vert. line! /* * o <== ya * o * * ooo <== yb * o * o * o * ooo */ ya=y0; if (box1->dots!=1) ad=98*ad/100; while(dy>3*dx && box1->m2){ // test for vertical i without detected dot i= loop(bp,dx/2,dy-1 ,dy,cs,0,UP); if (dy-1-im3-2) break; i+=loop(bp,dx/2,dy-1-i,dy,cs,1,UP); // distance upper end to m2 > (m2-m1)/3 if (3*abs(dy-1-i-box1->m2)>box1->m2-box1->m1) break; if( get_bw(x0,x1,y0,(box1->m1+box1->m2)/2,box1->p,cs,1) == 1 ) if( get_bw(x0,x1,y1-i ,y1-i ,box1->p,cs,1) == 0 || get_bw(x0,x1,y1-i-1,y1-i-1,box1->p,cs,1) == 0 || get_bw(x0,x1,y1-i-2,y1-i-2,box1->p,cs,1) == 0 ) { Setac(box1,'i',ad); return 'i'; /* beleave me, thats an "i"! */ } break; } // if( box1->dots!=1 ) Break; if( box1->m2 && 2*y0>=box1->m2+box1->m1 ) ya=box1->m1; // out_x(box1); for (y=ya;2*yp,cs,1) == 1 ) break; if (2*y>=ya+y1) Break; // hmm, gap only, no dot? ya=y; if (box1->m2 && ya>box1->m2+2) Break; for ( ;2*yp,cs,1) != 1 ) break; if (2*y>=ya+y1) Break; // hmm no gap for ( ;2*yp,cs,1) == 1 ) break; yb=y; if (5*yb>=3*ya+2*y1) ad=99*ad/100; // large gap if (2*yb>= ya+ y1) ad=97*ad/100; // very large gap, ~: if (5*yb>=2*ya+3*y1) Break; // huge gap, ~: if (loop(bp,dx-1,y+(y1-ya+1)/32,dx,cs,0,LE)>dx/2) // unusual (right part of ouml) ad=95*ad/100; // printf(" num_cross dy/2=%d %d\n",dy/2, num_cross(0,dx-1,dy/2,dy/2,bp,cs)); // printf(" dots=%d\n",box1->dots); out_x(box1); // \sl ~f. ! for (y=y1;y>ya;y--) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break; if (y>(ya+3*y1)/4) Break; if (y>(ya+2*y1)/3) ad=96*ad/100; y=(y1-yb+1)/2+yb-y0; /* only one vertical line, italic i is more an tall S */ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) Break; for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } yc=y; for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yd=y; if( yd<3*(y1-yb+1)/4+yb-y0 ) Break; y=(y1-yb+1)/2+yb-y0; for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } ye=y; for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yf=y; if( yf>(y1-yb+1)/4+yb-y0 ) Break; if(yd>yc+2){ xa=loop(bp, 0,yc-1,dx,cs,0,RI); xb=loop(bp,dx-1,yc-1,dx,cs,0,LE); if( xb-loop(bp,dx-1,yc,dx,cs,0,LE) /* Dec00 */ > xa-loop(bp, 0,yc,dx,cs,0,RI) ){ y= loop(bp,dx-xb,yc-1,dy,cs,0,DO); if(y>0){ i=loop(bp,dx-xb-1,yc-1+y-1,dy,cs,0,DO); if( i>0 ) y+=i-1; } if( yc-1+y < yd-1 ) Break; } else { y= loop(bp,11*xa/16,yc-1,dy,cs,0,DO); if( yc-1+y < yd-2 ) Break; } } if(yf0 ) y+=i-1; if( ye+1-y > yf+1 ) Break; } if( 2*y0 <= box1->m1+box1->m2 && loop(bp,0, 0,dx,cs,0,RI)+1 < loop(bp,0,dx/2,dx,cs,0,RI) ) ad=97*ad/100; if( gchar ) // i is more often than j, be sure that realy correct Mai00 if( loop(bp, 0,2*dy/4,dx,cs,0,RI) -loop(bp,dx-1,2*dy/4,dx,cs,0,LE)>dx/8 ) Break; // could be a broken + or similar thing? if( 3 * ya > box1->m1 + 2*box1->m2 ) ad=90*ad/100; if( loop(bp,dx-1,3*dy/4,dx,cs,0,LE)>dx/2 && loop(bp,dx-1, dy-1,dx,cs,0,LE)5 && num_cross(x0+dx/2,x0+dx/2, ya, y1 ,box1->p,cs) >= 3 ) ad=95*ad/100; Setac(box1,'i',ad); break; } // --- test j --------------------------------------------------- // if(box1->dots==1) // what about \it neighbouring ij for(ad=d=100;dy>4 && dx>0;){ // min 3x4 DBG( wchar_t c_ask='j'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ ya=y0; if( box1->m2 && 2*y0>=box1->m2+box1->m1 ) ya=box1->m1; for(y=ya;2*yp,cs,1) == 1 ) break; if(2*y>=ya+y1) Break; // hmm only gap ya=y; if( box1->m2 && ya>box1->m2+2 ) Break; for( ;2*yp,cs,1) != 1 ) break; if(2*y>=ya+y1) Break; // hmm no gap for( ;2*yp,cs,1) == 1 ) break; if(2*y>=ya+y1) Break; // hmm very large gap yb=y; if( loop(bp,dx-1,y+(y1-ya+1)/32,dx,cs,0,LE)>dx/2 ) Break; // unusual (right part of ouml) // printf(" num_cross dy/2=%d %d\n",dy/2, num_cross(0,dx-1,dy/2,dy/2,bp,cs)); // printf(" dots=%d\n",box1->dots); out_x(box1); // \sl ~f. ! for(y=(ya+y1)/2;y<=y1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break; if(y<=y1) Break; y=(y1-yb+1)/2+yb-y0; /* only one vertical line, italic i is more an tall S */ if( num_cross(0,dx-1,y,y,bp,cs) >2 ) Break; for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } yc=y; for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yd=y; if( yd<3*(y1-yb+1)/4+yb-y0 ) Break; y=(y1-yb+1)/2+yb-y0; for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } ye=y; for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yf=y; if( yf>(y1-yb+1)/4+yb-y0 ) Break; if(yd>yc+2){ xa=loop(bp, 0,yc-1,dx,cs,0,RI); xb=loop(bp,dx-1,yc-1,dx,cs,0,LE); if( xb-loop(bp,dx-1,yc,dx,cs,0,LE) /* Dec00 */ > xa-loop(bp, 0,yc,dx,cs,0,RI) ){ y= loop(bp,dx-xb,yc-1,dy,cs,0,DO); if(y>0){ i=loop(bp,dx-xb-1,yc-1+y-1,dy,cs,0,DO); if( i>0 ) y+=i-1; } if( yc-1+y < yd-1 ) Break; } else { y= loop(bp,11*xa/16,yc-1,dy,cs,0,DO); if( yc-1+y < yd-2 ) Break; } } if(yf0 ) y+=i-1; if( ye+1-y > yf+1 ) Break; } if( 2*y0 <= box1->m1+box1->m2 && loop(bp,0, 0,dx,cs,0,RI)+1 < loop(bp,0,dx/2,dx,cs,0,RI) ) ad=97*ad/100; if (loop(bp,0,dy-1,dx,cs,0,RI) -loop(bp,0,dy-3,dx,cs,0,RI)>1+dx/16) ad=96*ad/100; // ~c if( gchar ) // i is more often than j, be sure that realy correct Mai00 if( loop(bp, 0,2*dy/4,dx,cs,0,RI) -loop(bp,dx-1,2*dy/4,dx,cs,0,LE)<=dx/8 ) Break; // could be a broken + or similar thing? if( 3 * ya > box1->m1 + 2*box1->m2 ) ad=80*ad/100; if (!gchar) ad=96*ad/100; if( box1->dots!=1 ) ad=98*ad/100; Setac(box1,'j',ad); break; } // --- test I --------------------------------------------------- for(ad=d=100;dy>4 && dy>dx && 5*dy>4*(box1->m3-box1->m2);){ // min 3x4 DBG( wchar_t c_ask='I'; ) if( box1->dots==1 ) Break; if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ x =loop(bp,0, dy/2,dx,cs,0,RI); // konvex? divided Q if(loop(bp,0,7*dy/8,dx,cs,0,RI) > x+dx/8) Break; for( y=dy/16;y1+dx/8 ) break; } if( y<3*dy/4 ) Break; // out_x(box1); // upper max width for(i2=i1=0,y=0;yi1){ i1=x;i2=y; } } for(i4=i3=0,y=3*dy/4;yi3){ i3=x;i4=y; } } if( abs(i3-i1)>1+dx/8 ) Break; // if i3>>i5 more sure! if( i1>i5 ){ // look for edges else *80% } if(i1+1i2 ) i2=i; // printf(" get_line(%d,%d) %d\n",i1,i2, // get_line2(i1,dy/8,i2,dy-1-dy/8,bp,cs,100)); if( get_line2(i1,dy/8,i2,dy-1-dy/8,bp,cs,100)<95 ) Break; x =(i1-i2+4)/8; i1+=x; i2-=x; // upper and lower width (what about serifs?) y=dy/8; x =loop(bp,i1, y+0,dx,cs,1,LE); i=x; x =loop(bp,i1, y+1,dx,cs,1,LE); if(x>i)i=x; x =loop(bp,i1, y+0,dx,cs,1,RI); j=x; x =loop(bp,i1, y+1,dx,cs,1,RI); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break; x =loop(bp,i2,dy-y-1,dx,cs,1,LE); j=x; x =loop(bp,i2,dy-y-2,dx,cs,1,LE); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break; x =loop(bp,i2,dy-y-1,dx,cs,1,RI); j=x; x =loop(bp,i2,dy-y-2,dx,cs,1,RI); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break; if(dy>15) // v024a4 if( loop(bp,dx-1,dy/16 ,dx,cs,0,LE) > loop(bp,dx-1,dy/4 ,dx,cs,0,LE)+1+dx/32 ) Break; // ~bad ) (thinn) for(i=0,y=dy/16;y<15*dy/16 && i<2;y++) if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++; if( i>1 ) Break; if(!hchar){ // right part (bow) of h is never a l if( get_bw(dx/4,dx/4, 0,dy/4,bp,cs,1) == 1 && get_bw(dx/4,dx/4,dy/2,dy-1,bp,cs,1) == 0 ) Break; if( loop(bp, 0,dy/4,dx,cs,0,RI)> dx/4 && loop(bp,dx-1,dy/4,dx,cs,0,LE)<=dx/4 && loop(bp, 1, 0,dy,cs,0,DO)<=dy/4 ) Break; // ~z } if( get_bw(x1,x1,y0 ,y1 ,box1->p,cs,2) != 2 && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2 && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2 && get_bw(x0,x0,y0+1,y1-1,box1->p,cs,1) != 1 ) Break; /* ~] */ if ( loop(bp,dx-1, dy/4,dx,cs,0,LE) > dx/2 && loop(bp,dx-1,3*dy/4,dx,cs,0,LE) > dx/2 && loop(bp, 0, dy/2,dx,cs,0,RI) < dx/4 ) Break; /* ~[ */ x =loop(bp, 0,dy/2,dx,cs,0,RI); // konvex/konkav? ~() i =loop(bp,dx-1,dy/2,dx,cs,0,LE); if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8 && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) < i-dx/8 && loop(bp,dx-1, dy/8,dx,cs,0,LE) < i-dx/8 ) Break; // ~( if( loop(bp, 0,7*dy/8,dx,cs,0,RI) < x-dx/8 && loop(bp, 0, dy/8,dx,cs,0,RI) < x-dx/8 && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) > i+dx/8 && loop(bp,dx-1, dy/8,dx,cs,0,LE) > i+dx/8 ) Break; // ~) if( loop(bp, 0, dy/8,dx,cs,0,RI) -(dx-loop(bp,dx-1,7*dy/8,dx,cs,0,LE)) > dx/4 ) Break; // ~/ if( loop(bp, 0, 0,dx,cs,0,RI) > dx/2 // ToDo: check for serifs && loop(bp, 0, dy/8,dx,cs,0,RI) > dx/2 && loop(bp,dx-1,dy-1 ,dx,cs,0,LE) > dx/2 && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) > dx/2 ) ad=99*ad/100; // ~/ if (box1->m2 && 3*y0>box1->m1+2*box1->m2) if( get_bw(x0+dx/8,x1-dx/8,box1->m1,(box1->m1+box1->m2)/2,box1->p,cs,1) == 1 ) Break; // ~i if(i1+1p,cs,1) != 1 || get_bw(x0+i4/4,x0+i4/4,y1-dy/4,y1,box1->p,cs,1) != 1 ) { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // ToDo: improve it if(!hchar){ ad=96*ad/100; MSG({}) } // ~bad_small_r if (box1->m4 && y1m4) { // probably lower dot? if ((dx>2 && get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) == 1) || (dx<3 && get_bw(x0 ,x1 ,y1+1,box1->m4,box1->p,cs,1) == 1)) { ad=96*ad/100; } } // ~! // a---b // I // I // c---e // check against Z for(bx=0,ax=dx,ay=by=y=0;ybx) { bx=dx-1-i; by=y; } i+=loop(bp,dx-1-i,y,dx,cs,1,LE); if (dx-i-1dy-1-dy/4;y--){ i =loop(bp,0,y,dx,cs,0,RI); if (iex) { ex=i; ey=y; } } x=(3*ax+cx)/4; y=(3*ay+cy)/4; i= loop(bp,x,y,dx,cs,0,RI); x=(3*bx+ex)/4; y=(3*by+ey)/4; j= loop(bp,x,y,dx,cs,0,LE); if (j>0 && (2*i>3*j || 3*i<2*j )) ad=99*ad/100; if (j>0 && ( i>2*j || 2*i< j )) ad=97*ad/100; i=loop(bp,0,0,dy,cs,0,DO); if (i>dy/8 && idx/4) ad=96*ad/100; // ~l 5x7 if( get_bw(x0,x1,y0,y1,box1->p,cs,2) == 0 ) ad=99*ad/100; if (gchar) ad=98*ad/100; // J if (box1->m3 && 2*y1<=box1->m2+box1->m3) ad=96*ad/100; // ' Setac(box1,'I',ad); break; } // --- test J --------------------------------------------------- 22Nov06 for(ad=d=100;dy>4 && dy>=dx && dx>2;){ // min 3x4 ~Y)]d', // rewritten for vectors 0.42 int ld, i1, i2, i3, i4, i5, i6, i7; // line derivation + corners DBG( wchar_t c_ask='J'; ) if (sdata->holes.num > 0) Break; /* no hole */ /* half distance to the center */ d=2*sq(128/4); /* now we check for the upper right end of the J */ if (aa[3][2]>d) Break; /* [2] = distance */ /* searching for 4 notches between neighbouring ends */ /* type A B 6OOOO 6O5 7O5 7O O O O O 2O 1O4 1O4 OO 2OO 3 3 */ /* Warning: aa0 can be left upper or left lower point for type B */ /* get a point on the inner low left side of the J */ i =nearest_frame_vector(box1,aa[3][3],aa[1][3],(x0+x1)/2,y0); i1=nearest_frame_vector(box1,i ,aa[1][3], x1+dx,(y0+3*y1)/4); /* get the most left point on the lower part of the J */ i2=nearest_frame_vector(box1,i1,aa[3][3], x0-2*dx, y1-dy/8); /* get a point on the middle of the bottom of the J */ i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], (x0+x1)/2, y1); /* get a point on the outer low right side of the J */ i4=nearest_frame_vector(box1,aa[1][3],aa[3][3], x1, (y0+2*y1)/3); /* get a point on the outer right side below top serif */ i5=nearest_frame_vector(box1,aa[2][3],aa[3][3], (x0+2*x1)/3,y0); /* get a point on the left side of upper serif */ i6=nearest_frame_vector(box1,aa[3][3],i1, x0, y0); /* get a point on the most right left side of upper serif */ i7=nearest_frame_vector(box1,i6,i1, x1, y0); MSG(fprintf(stderr," i1-i7 %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7);) /* check the highest point on lower left area */ i =nearest_frame_vector(box1,i1,i3,x0,y0); if (box1->frame_vector[i ][1]-y0frame_vector[i ][1]-y0<=dy/2) ad=97*ad/100; // imperfect a /* check the lowest point on upper left area, serife? */ j =nearest_frame_vector(box1,i6,i7,x0,y1); if (box1->frame_vector[i ][1] -box1->frame_vector[j ][1]<=dy/4) Break; // imperfect a if (box1->frame_vector[i7][1]>y0+dy/4) Break; // not to low if (box1->frame_vector[i1][1] -box1->frame_vector[i7][1]frame_vector[i4][1] -box1->frame_vector[i5][1]frame_vector[i7][0]frame_vector[i1][0] -box1->frame_vector[i2][0]<=dx/8) Break; // ~1 if (box1->frame_vector[i1][0] -box1->frame_vector[i2][0]<=dx/4) ad=ad*99/100; // ~1 if (box1->frame_vector[i6][1]>y0+dy/8) ad=99*ad/100; // ~1 if (aa[0][2]==0) { // ]? ad=99*ad/100; if (aa[1][2]==0) ad=98*ad/100; if (aa[2][2]<=aa[3][2]) ad=97*ad/100; } /* check for left bow */ for (j=i=i2;i!=i4;i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[ i][0] /* [0]=x */ frame_vector[i1][0]) break; /* curve? */ } if (i==i4) Break; // ~I /* check for no right bow */ for (j=i=i2;i!=i4;i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[ i][0] /* [0]=x */ >box1->frame_vector[i4][0]) break; } if (i!=i4) Break; // ~I /* check for no right bow */ for (j=i=i5;i!=i6;i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[ i][1] > y0+dy/4) break; } if (i!=i6) Break; // ~Y /* check if upper left and lower left points are joined directly */ ld=line_deviation(box1, i7, i1); MSG(fprintf(stderr," i7,i1 %d %d linedist= %d/%d",i7,i1,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; if (5*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3 if (6*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3 if (7*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3 if (8*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3 /* check if lower right and upper right points are joined directly */ ld=line_deviation(box1, i4, i5); MSG(fprintf(stderr," i4,i5 %d %d linedist= %d/%d",i4,i5,ld,2*sq(1024/4));) if (ld >2*sq(1024/4)) Break; if (5*ld >4*2*sq(1024/4)) ad=99*ad/100; // J exists as gchar and ~gchar if(!hchar){ ad=99*ad/100; } Setac(box1,'J',ad); break; } return box1->c; } static wchar_t ocr0_brackets(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i1,i2,i3,i4,i5,i6,hchar=sdata->hchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */ ad,r1,r2; /* tmp-vars */ wchar_t bc=UNKNOWN; // --- test > derived from xX --------------------------------------------------- // rewritten for vectors v0.41 for(ad=d=100;dx>1 && dy>2;){ // min 3x2 // 0 - indizes 0,1,i1,i2 pointing to edges of the char // \ . // \ . // i1,i2 // / // / // 1 DBG( wchar_t c_ask='>'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (sdata->holes.num > 0 && (dx<6 || dy<6)) Break; /* # */ /* calculate the half distance to the center */ d=2*sq(128/4); /* now we check for the 2 left ends of the > */ if (aa[0][2]>d) Break; /* upper left end */ if (aa[1][2]>d) Break; /* lower left end */ if (aa[1][1]-aa[0][1]num_frame_vectors[0]) { if (box1->frame_vector[i][0] >=box1->frame_vector[j][0]) j=i; /* notice most right vector */ } if (j==i || j==aa[0][3]) Break; /* calculate the distance to the center */ x=box1->frame_vector[j][0]; y=box1->frame_vector[j][1]; if (2*x-aa[0][0]-aa[1][0](dy+2)) Break; if ( aa[0][0]+aa[1][0]-2*x>=0) Break; i1=j; d=line_deviation(box1, aa[0][3], j) >sq(1024/4); /* check if upper left and center point are joined directly */ MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));) if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024); MSG(fprintf(stderr,"ad=%d", ad);) d=line_deviation(box1, j, aa[1][3]); /* check if lower left and center point are joined directly */ MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));) if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024); MSG(fprintf(stderr,"ad=%d", ad);) /* run along right side from bottom to top */ for (j=i=aa[1][3];i!=aa[0][3];i=(i+1)%box1->num_frame_vectors[0]) { if (box1->frame_vector[i][0] >=box1->frame_vector[j][0]) j=i; /* notice most right vector */ // MSG(fprintf(stderr,"search right: %d %d %d %d",i,j,aa[1][3],aa[0][3]);) } if (j==i || j==aa[1][3]) Break; /* calculate the distance to the center */ x=box1->frame_vector[j][0]; y=box1->frame_vector[j][1]; if ( (aa[0][0]+aa[1][0]-2*x)>= 0 ) Break; if (abs(aa[0][1]+aa[1][1]-2*y)>(dy+2)/4) Break; if (aa[0][0]>=x || aa[1][0]>=x) Break; i2=j; d=line_deviation(box1, j, aa[0][3]); /* check if upper left and center point are directly joined directly */ MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));) if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024); MSG(fprintf(stderr,"ad=%d", ad);) d=line_deviation(box1, aa[1][3], j); /* check if lower left and center point are directly joined */ MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));) if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024); MSG(fprintf(stderr,"ad=%d", ad);) /* ToDo: calculate momentums or max derivations along lines to distinguish )]}> i1,i2 */ if (sdata->gchar) ad=98*ad/100; if (sdata->hchar) ad=99*ad/100; bc='>'; Setac(box1,bc,ad); break; } // --- test /\\ ------------------------------------------------ // if(bc==UNKNOWN) // if(!box1->dots) for(ad=d=100;dx>3 && dy>3;){ // min 4x4 for 4x6 font DBG( wchar_t c_ask='/'; ) if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */ #if 1 for(i=y=0;y2 || (i>0 && dy<16)) Break; #endif /* get the center as exact as possible */ i2=dx-1-loop(bp,dx-1,dy/2 ,dx,cs,0,LE) // be exact for small fonts +dx-1-loop(bp,dx-1,dy/2+dy%2-1,dx,cs,0,LE) + loop(bp, 0,dy/2 ,dx,cs,0,RI) + loop(bp, 0,dy/2+dy%2-1,dx,cs,0,RI); if (abs(i2-2*dx)>1+dx/2) Break; if (abs(i2-2*dx)> dx/2) ad=99*ad/100; i1=loop(bp,dx-1,dy/16,dx,cs,0,LE); // right side i3=loop(bp,dx-1,dy-1 ,dx,cs,0,LE); i4=loop(bp, 0,0 ,dx,cs,0,RI); // left side i6=loop(bp, 0,dy-1 ,dx,cs,0,RI); i=(box1->m4+box1->m3)/2-box1->m2; // // out_x(box1);printf("() %d %d %d %d %d %d %d\n",i,i1,i2,i3,i4,i5,i6); // ~lI for(i=i4,y=0;ydx/6+1 ) break; i=x; } if( ydx/6+1 ) break; i=x; } if( ydx/4 ) { Setac(box1,(bc='/'),ad);break; } if(i4<=dx/8 && i3<=dx/8 && i6-(dx-i1)>dx/4 ) { Setac(box1,(bc='\\'),ad);break; } Break; } // --- test ()<> ------------------------------------------------ // if(bc==UNKNOWN) // if(!box1->dots) for(ad=d=100;dx>1 && dy>4;){ // min 3x4 DBG( wchar_t c_ask='('; ) if (sdata->holes.num > 1) {Break;}; /* tolerant against a tiny hole */ #if 1 for(i=y=0;y2 || (i>0 && dy<16)) {Break;}; #endif /* look for the extrema => r1..r2 */ for(i=dx,r1=r2=y=dy/2-dy/8;y<=dy/2+dy/8;y++){ j=loop(bp, 0,y,dx,cs,0,RI); if(j==i) r2=y; if(jdy){ // from Aug06 vector-version of greater is used // if(i2==0 && 3*i5>dx && i4<=dx/8 && i6<=dx/8) { Setac(box1,(bc='>'),98);{Break;}; } if(i5==0 && 3*i2>dx && i1<=dx/8 && i3<=dx/8) { Setac(box1,(bc='<'),98);{Break;}; } } if( dx > 2 && 9*dx>=5*dy ){ // 4x6 screen-font (3*5) ad=98; if (dx<8) ad=99*ad/100; if (dx<6) ad=96*ad/100; if( 2*dx > JOB->res.avX && 4*dx>dy ) ad=98; // printf(" %d %d %d %d %d %d\n",i5,i1,i3,i2,i4,i6); if( i5==0 && i1<=dx/8+1 && i3<=dx/8+1 && i1+i3<=dx/8+1 && i2>=dx/2 && i4>=3*dx/4 && i6>=3*dx/4 ) { if (2*loop(bp, 0, y/2,dx,cs,0,RI)+1+dx/16=dx/2 && i1>=3*dx/4 && i3>=3*dx/4 ) { if (2*loop(bp,dx-1, y/2,dx,cs,0,LE)+1+dx/16m4+box1->m3)/2-box1->m2; // // out_x(box1);printf("() %d %d %d %d %d %d %d\n",i,i1,i2,i3,i4,i5,i6); if(2*i2i4+i6 && 2*dx=i){ Setac(box1,(bc=')'),98);break; } if(2*i2>i1+i3 && 2*i5=i){ if(2*i2<=i1+i3+1 || 2*i5>=i4+i6-1) ad=98*ad/100; if(2*i2<=i1+i3+2 || 2*i5>=i4+i6-2) ad=98*ad/100; for(x=y=0;yx ) x=i; } for(y=0;y<(dy+2)/4;y++){ i=loop(bp,0,y+dy/8,dx,cs,0,RI);if( i2 && dy>4 && dy>=2*dx;){ // (3,6) on 4x6 font DBG( wchar_t c_ask=']'; ) if (sdata->holes.num > 1) { Break;} /* tolerant against a tiny hole */ if (!hchar) ad=97*ad/100; for(y=0;yp,cs,2) == 2 && get_bw(x0,x1,y0+1,y0+1,box1->p,cs,2) == 2 ) {Break;}; if( get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) == 2 && get_bw(x0,x1,y1-1,y1-1,box1->p,cs,2) == 2 ) {Break;}; if( get_bw(x0 ,x0,y0 ,y1 ,box1->p,cs,2) == 0 || get_bw(x0+1 ,x0+1,y0 ,y1 ,box1->p,cs,2) == 0 ) if( get_bw(x0+dx/2,x1,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 ) { Setac(box1,(bc='['),ad);break; } if( get_bw(x1 ,x1,y0 ,y1 ,box1->p,cs,2) == 0 || get_bw(x1-1 ,x1-1,y0 ,y1 ,box1->p,cs,2) == 0 ) if( get_bw(x0,x1-dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 ) { Setac(box1,(bc=']'),ad);break; } break; } #if CODE_NOT_COMPLETED // --- test ] ------- for(ad=d=100;dx>2 && dy>3;){ DBG( wchar_t c_ask=']'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (sdata->holes.num > 0) ad=98*ad/100; /* # */ /* 1/8 distance to the center */ d=2*sq(128/16); /* now we check for the 4 ends of the x */ if (aa[0][2]>d) Break; if (aa[1][2]>d) Break; if (aa[2][2]>d) Break; if (aa[3][2]>d) Break; if (aa[3][0]-aa[0][0]<7*dx/8) Break; if (aa[2][0]-aa[1][0]<7*dx/8) Break; if (aa[1][1]-aa[0][1]<7*dy/8) Break; if (aa[2][1]-aa[3][1]<7*dy/8) Break; if (aa[3][0]-aa[0][0]<2) Break; /* to small */ if (aa[2][0]-aa[1][0]<2) Break; /* to small */ MSG( fprintf(stderr," aa %d %d %d %d %d %d %d %d d %d %d %d %d",\ aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,\ aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0,\ aa[0][2],aa[1][2],aa[2][2],aa[3][2]);) /* left and right vertical line */ d=line_deviation(box1, aa[0][3], aa[1][3]); if (d>2*sq(1024/4)) Break; ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100; d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break; /* search uppermost left ^ */ i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0, y0); x=box1->frame_vector[i1][0]; y=box1->frame_vector[i1][1]; if (y-y0 > 5*dy/8) Break; if (x-x0 > 5*dx/8) Break; /* search uppermost right ^ ~H */ i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0); if ( box1->frame_vector[i3][0]-x> dx/4 && box1->frame_vector[i3][1]-y<=dy/8) Break; /* check if upper left and lower right point are joined directly */ dbg[0]=d=line_deviation(box1,i1, aa[2][3]); if (d >2*sq(1024/4)) Break; /* check if lower left and lower left point are joined directly */ dbg[1]=d=line_deviation(box1, aa[1][3],i1); if (d >2*sq(1024/4)) Break; if (!hchar) ad=99*ad/100; if ( gchar) ad=98*ad/100; // \sc N ac=(wchar_t) ']'; Setac(box1,ac,ad); if (ad>=100) return ac; break; } #endif // --------- test ocr-a-[] -------------------------------- if(bc==UNKNOWN) for(ad=d=98;dx>5 && dy>7 && 2*dy>3*dx;){ // only for accurate font at the moment DBG( wchar_t c_ask='['; ) if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ if (!hchar) ad=97*ad/100; if( num_cross(0,dx-1, 0, 0,bp,cs) != 1 ) break; if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1 ) break; if ( loop(bp,dx-1,dy/2,dx,cs,0,LE) +loop(bp, 0,dy/2,dx,cs,0,RI) <= dx/4 ) break; // O for(y=dy/8;yp,cs,1) == 0) { Setac(box1,(bc='['),ad);break; } if( get_bw(x0,(5*x0+3*x1)/8,y0+3*dy/16,y1-3*dy/16,box1->p,cs,1) == 0) { Setac(box1,(bc=']'),ad);break; } break; } // --------- test {} -------------------------------- for(ad=d=99;dx>2 && dy>5 && 2*dy>3*dx;){ DBG( wchar_t c_ask='{'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if (!hchar) ad=97*ad/100; for(y=0;y3*dx/4 ) ad=99*ad/100; if ( loop(bp,0, 0,dx,cs,0,RI)>3*dx/4 ) ad=99*ad/100; // < if ( loop(bp,0, 0,dy,cs,0,DO)=dx/8 ) ad=98*ad/100; // < if ( loop(bp,dx-2,dy-1,dy,cs,0,UP)>dy/4 ) Break; // f if ( get_bw(x0,x0,y0,y0+dy/4,box1->p,cs,1) == 1 || get_bw(x0,x0,y1-dy/4,y1,box1->p,cs,1) == 1 ) Break; Setac(box1,(bc='{'),ad);Break; } for(ad=d=99;dx>2 && dy>5 && 2*dy>3*dx;){ DBG( wchar_t c_ask='}'; ) if (!hchar) ad=97*ad/100; for(y=0;y3*dx/4 ) {ad=99*ad/100;} if ( loop(bp,dx-1, 0,dx,cs,0,LE)>3*dx/4 ) {ad=99*ad/100;} // > if ( loop(bp,dx-1, 0,dy,cs,0,DO)=dx/8 ) ad=98*ad/100; // < if ( loop(bp,1,dy-1,dy,cs,0,UP)>dy/4 ) Break; // ??? if ( get_bw(x1,x1,y0,y0+dy/4,box1->p,cs,1) == 1 || get_bw(x1,x1,y1-dy/4,y1,box1->p,cs,1) == 1 ) Break; Setac(box1,(bc='}'),ad);Break; } return box1->c; } #if 0 /* ---------- empty prototype function for copy and expand ---------- */ static wchar_t ocr0_XXX(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,i0,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; int dx=x1-x0+1,dy=y1-y0+1, /* size */ ac,ad; /* tmp-vars */ // --- test XXX --------------------------------------------------- return box1->c; } #endif /* ----------------------- part9 -------------------------------- */ static wchar_t ocr0p9(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; int dx=x1-x0+1,dy=y1-y0+1, /* size */ i1,i2,i3,i4; /* tmp-vars */ int xa,xb, /* used for store significant points of char */ dbg[9]={0,0,0,0,0,0,0,0,0}, /* debugging space */ ya,ad,cs=sdata->cs; wchar_t ac,bc=UNKNOWN; // bestletter int hchar; // char is higher than e int gchar; // char has ink lower than m3 // --- hchar --- gchar ------------------------- hchar=0;if( 2*y0<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; gchar=0;if( 2*y1>=2*box1->m3+(box1->m4-box1->m3) ) gchar=1; // if the char is slightly moved down correction can be done if ( y0m2 && y1>box1->m3 && 2*y1m3+box1->m4) // moved if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; /* reserved for the future */ // --- test beta,\3,sz,"s --------------------------------------------- if(bc==UNKNOWN && hchar) for(ad=d=100;dx>3 && dy>6;){ // min 4x7 DBG( wchar_t c_ask='S'; ) if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ /* this part is provisorium, should be changed! a-\ | d b| / | \ -c / */ if( num_cross(x0 ,x1 ,y0+dy/4 ,y0+dy/4 ,box1->p,cs) != 2 && num_cross(x0 ,x1 ,y0+dy/4+1,y0+dy/4+1,box1->p,cs) != 2 ) break; for(i=1+dy/16,y=y0+dy/8;y0;y++){ if( yp,cs) != 2 ) i--;} else { if( num_cross(x0 ,x1 ,y,y,box1->p,cs) < 2 ) i--;} if( get_bw(x0,x0+dx/2,y,y,box1->p,cs,1) == 0 ) i--; if( yp,cs,1) == 0 ) i--; } if( i<=0 ) break; // out_x(box1); for(y=y0+dy/3;yp,x1,y,dx,cs,0,LE); if( i>=dx/8 ) break; i+=loop(box1->p,x1-i,y,dx,cs,1,LE); if( i>=dx/2 ) break; } if( y>=y1-dy/3 ) break; for(y=y0+dy/5;yp,cs,1) == 1 ) break; if( y>=y0+dy/3 ) break; for(y=y0+dy/2;yp,cs,1) == 1 ) break; if( y>=y1 ) break; for(y=y1-dy/3;yp,x1,y,dx,cs,0,LE); if( i>dx/4 && get_bw(x1-dx/8,x1-dx/8,y,y1,box1->p,cs,1) == 1 ) break; } if( ym3==0 || 2*y1m3+box1->m4 ) if( loop(box1->p,x1,y1, dx,cs,0,LE)==0 && loop(box1->p,x1,y1-dy/4,dx,cs,0,LE)>dx/8 ) break; // ~R for(x=x0+dx/4;xp,cs) == 3 ) break; if( x>=x1-dx/4 ) break; i=loop(bp,dx/2,dy-1,dy,cs,0,UP)+dy/64; // Jul00 for(x=dx/5;x i ) break; if( x==dx/2 ) break; x=x0+loop(bp,0,dy/4,dx,cs,0,RI); for(;xp,cs,1) == 0 ) break; if( xp,cs,NULL) != 0 ) break; if (sdata->holes.num != 0) break; bc=LATIN_SMALL_LETTER_SHARP_S; Setac(box1,(wchar_t)bc,98); break; } // --- test + ------------------------------------------------ for(ad=d=100;dx>2 && dy>2;){ // min 3x3 DBG( wchar_t c_ask='+'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ xa=(dx+1)/3-1; ya=(dy+1)/3-1; xb=(dx+1)/4; if( get_bw(x0,x0+xa,y0,y0+ya,box1->p,cs,1) == 1 ) Break; if( get_bw(x0,x0+xa,y1-ya,y1,box1->p,cs,1) == 1 ) Break; if( get_bw(x1-xb,x1,y0,y0+ya,box1->p,cs,1) == 1 ) Break; if( get_bw(x1-xa,x1,y1-ya,y1,box1->p,cs,1) == 1 ) Break; for(i=0,y=y0+ya;y<=y1-ya;y++){ // horizontal line if( get_bw(x0+dx/9,x1-dx/9,y,y,box1->p,cs,2) == 0 ) { i=y; break; } } if (3*dx<2*dy) ad=99*ad/100; // ~t if( !i ) Break; ac=(wchar_t) '+'; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test $ ------------------------------------------------ for(ad=d=99;dx>3 && dy>5;){ // min 3x4 DBG( wchar_t c_ask='$'; ) if (sdata->holes.num != 2) Break; if( get_bw(x0,x0+dx/5,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break; if( get_bw(x0,x0+dx/9,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break; if( get_bw(x1-dx/9,x1,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break; if( get_bw(x1-dx/5,x1,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break; if( get_bw(x0,x0+dx/3,y0+dy/3 ,y0+dy/2 ,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/3,x1,y1-dy/2 ,y1-dy/3 ,box1->p,cs,1) != 1 ) Break; i1=x0+loop(box1->p,x0,y0,dx,cs,0,RI); if( i1x1-dx/5 ) Break; i2=x0+loop(box1->p,x0,y1,dx,cs,0,RI); if( i2i1 ) Break; ad= get_line2(i1,y0,i2,y1,box1->p,cs,100)*ad/100; // check upper left and lower right half circle, $ for (x=0,i3=y=0;yp,cs) == 2 ) { i = loop(box1->p,x0,y0+dy/2-y,dx,cs,0,RI); if (i>x) { x=i; i3=y0+dy/2-y; } } if (x<=dx/4) Break; for (x=0,i4=y=0;yp,cs) == 2 ) { i = loop(box1->p,x0,y0+dy/2+y,dx,cs,0,RI); if (i>x) { x=i; i4=y0+dy/2+y; } } if (x<=dx/4) Break; if (ad<95) Break; ac=(wchar_t) '$'; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test & ------------------------------------------------ for(ad=d=99;dx>3 && dy>4;){ /* 4x6 font */ DBG( wchar_t c_ask='&'; ) if (sdata->holes.num != 2) Break; if( get_bw(x1-dx/9,x1,y0,y0+dy/4,box1->p,cs,1) == 1 ) Break; // g if( loop(bp,dx/2,0,dy,cs,0,DO)>dy/2) Break; i1=loop(bp,0,dy/8 ,dx,cs,0,RI); if (i1>dx/2) Break; i =loop(bp,0,dy/4 ,dx,cs,0,RI); if (i1>dx/2) Break; if (idx/2) Break; i =loop(bp,0,dy-dy/4-1,dx,cs,0,RI); if (i3>dx/2) Break; if (ii1) Break; for( i2=0, y=dy/4; y<=dy/2+1; y++ ){ i =loop(bp,0,y,dx,cs,0,RI); if( i>i2 ) i2=i; } if(2*i2-i1-i3<1) Break; // if( num_hole(x0,x1 ,y0,y1,box1->p,cs,NULL)!=2 ) Break; if( num_hole(x0,x1-dx/4,y0,y1,box1->p,cs,NULL)!=2 ) Break; if( num_cross(dx-1,dx-1,dy/4,dy-1,bp,cs) < 1 ) Break; for( x=dx-1; x>=dx/2; x-- ){ if( num_cross(x,x,dy/4,dy-1,bp,cs) > 1 ) break; } if( x<=3*dx/4 && x 3 ) { // glued ah if (dy>15) { Break; } else ad=96*ad/100; } if (!hchar) ad=98*ad/100; bc=(wchar_t) '&'; Setac(box1,bc,ad); if (ad>=100) return bc; break; } // --- test \it & like \epsilon\tau ------------------------------ if(bc==UNKNOWN) for(ad=d=100;dx>7 && dy>7;){ DBG( wchar_t c_ask='&'; ) if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ if( num_cross(0,dx-1, dy/4, dy/4,bp,cs) != 3 ) break; if( num_cross(0,dx-1, dy/2, dy/2,bp,cs) != 4 ) break; if( num_cross(dx/2,dx-1,dy/2, dy/2,bp,cs) != 2 ) break; if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs) != 2 ) break; if( num_cross(0,dx-1, dy-1, dy-1,bp,cs) != 1 ) break; if( num_cross( 0, 0,0,dy-1,bp,cs) != 1 ) break; if( num_cross( dx/3, dx/3,0,dy-1,bp,cs) != 4 ) break; if( num_cross(13*dx/16,13*dx/16,0,dy/8,bp,cs) != 0 ) break; if( num_cross(4*dx/8,4*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break; if( num_cross(3*dx/8,3*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break; if( num_cross(5*dx/8,5*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break; if( num_hole(x0 ,(x0+x1)/2,y0, y1,box1->p,cs,NULL) != 1 ) break; if( num_hole(x0+dx/8,x1-dx/4,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) break; ac=(wchar_t) '&'; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test ? --------------------------------------------------- for(ad=d=98;dx>2 && dy>5;){ // min 3x(4+2) DBG( wchar_t c_ask='?'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ if ( num_cross(x0, x1, y0, y0, box1->p, cs) !=1 ) Break; // ~? if ( num_cross(x0, x1, y1, y1, box1->p, cs) > 1 ) Break; // ~? for(y=y0;yp,cs,1) != 1 ) break; // lower end if (2*ym4) { // probably lower dot not catched in box? if (get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) != 1 ) Break; i1=box1->m4; for(;i1>y1;i1--) // new y1 if( get_bw(x0, x1,i1,i1,box1->p,cs,1) == 1 ) break; // lower dot } y--; i=y-y0+1; // new dy for (y=0;yp, cs) == 2 ) break; if (y==dy/2) Break; // if( num_hole( x0, x1, y0, y1, box1->p,cs,NULL) > 0 ) Break; if (sdata->holes.num > 0) Break; for(y=y0+dy/2;y<=i1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 0 ) break; if( y==i1 ) Break; for( ;y<=i1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break; if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+7*dx/8,x1,y,i1,box1->p,cs,1) == 1 ) Break; // broken thin 2 bc='?'; Setac(box1,(wchar_t)bc,98); return bc; } // --- test !| --------------------------------------------------- for(ad=d=99; dy>4 && dy>2*dx;){ // min 3x4 DBG( wchar_t c_ask='!'; ) if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ // measure thickness if (num_cross(x0,x1,y0 ,y0 ,box1->p,cs)!=1) Break; if (num_cross(x0,x1,y0+dy/2,y0+dy/2,box1->p,cs)!=1) Break; for(y=y0;yp,cs,1) != 1 ) break; // lower end if (2*ybox1->m3-dy/8) ad=ad*97/100; /* missing dot? */ i1=y1; if (y==y1 && box1->m4) { // probably lower dot not catched in box? if ((dx>2 && get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) == 1) || (dx<3 && get_bw(x0 ,x1 ,y1+1,box1->m4,box1->p,cs,1) == 1 )) { i1=box1->m4; for(;i1>y1;i1--) // new y1 if( get_bw(x0, x1,i1,i1,box1->p,cs,1) == 1 ) break; // lower dot } } i2=i1; for( i1=0,y=y0;y<=i2;y++){ i=num_cross(x0,x1,y,y,box1->p,cs); if(i>1) break; if(i==0 && i1==0) i1=y; } if(y<=i2 || i1==0 || i1dx/4+1 ) Break; // f if (!hchar) ad=96*ad/100; Setac(box1,(wchar_t)'!',ad); break; } // --- test * five egdes (jagges? beames?) what is the right english word? ---- for(ad=d=99;dx>2 && dy>4;){ DBG( wchar_t c_ask='*'; ) if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */ if( num_cross(0,dx-1, 0,dy-1,bp,cs) != 1 && num_cross(0,dx-1, 1,dy-2,bp,cs) != 1 ) Break; if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 2 && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 2 ) Break; x=dx/2;y=(6*dy+8)/16; // center point 6/8=6/2^3 rounded /* upwarts from center */ dbg[0]=i=get_line2(x,y,x ,0,bp,cs,100); if(i<95) Break; if (dx<8) /* be exact on small fonts, where get_line2 returns 100 (ToDo change) */ if (get_bw(x,x,0,y,bp,cs,2)==2) Break; /* horizontal */ dbg[1]=i=get_line2(0,y,dx-1,y,bp,cs,100); if(i<95) Break; if (dy<8) if (get_bw(0,dx-1,y ,y ,bp,cs,2)==2 && get_bw(0,dx-1,y+1,y+1,bp,cs,2)==2) Break; /* down (right) */ i=get_line2(x,y,(5*dx+4)/8,dy-1,bp,cs,100); j=get_line2(x,y,(6*dx+4)/8,dy-1,bp,cs,100); if(j>i) dbg[2]=i=j; if(i<95) Break; /* down (left) */ dbg[3]=i=get_line2(x, y,(2*dx+4)/8,dy-1,bp,cs,100); if(i<95) Break; // straight up /* check for lower gap at bottom */ dbg[4]=i=get_bw( x, x,dy-1-dy/8,dy-1,bp,cs,1); if(i==1) Break; dbg[5]=i=get_line2( dx/4,dy/4, 0,0,bp,cs,101); if(i<95) Break; // upper left gap dbg[6]=i=get_line2(dx-1-dx/4,dy/4,dx-1,0,bp,cs,101); if(i<95) Break; // upper right gap MSG(fprintf(stderr,"%d %d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5],dbg[6]);) Setac(box1,(wchar_t)'*',ad); break; } // --- test * six egdes (jagges? beames?) what is the right english word? ---- for(ad=d=100;dx>4 && dy>4;){ DBG( wchar_t c_ask='*'; ) if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */ if( num_cross(0,dx-1, dy/8, dy/8,bp,cs) != 3 && num_cross(0,dx-1, 1+dy/8, 1+dy/8,bp,cs) != 3) Break; if( num_cross(0,dx-1,dy-2-dy/8,dy-2-dy/8,bp,cs) != 3) Break; if( num_cross(0 , 0, 0,dy-1,bp,cs) != 2) Break; if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) != 2) Break; if( num_cross(0,dx-1,dy/2,dy/2,bp,cs) != 1) Break; if( num_cross( 0 ,dx/8,dy/2,dy/2,bp,cs) != 0) Break; if( num_cross(dx-1-dx/8,dx-1,dy/2,dy/2,bp,cs) != 0) Break; if (dx>5) { dbg[0]=i=get_line2(0,dy-2-dy/8,dx-1,dy/8,bp,cs,100); if(i<95) Break; // black upwarts beam dbg[1]=i=get_line2(0,dy/8,dx-1,dy-2-dy/8,bp,cs,100); if(i<95) Break; // black downwards beam /* check vertical line */ dbg[2]=i=get_line2(dx/2,0,dx/2, dy-1,bp,cs,100); if(i<95) Break; } MSG(fprintf(stderr,"%d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5]);) Setac(box1,(wchar_t)'*',98); break; } // --- test @ - a popular char should be detectable! added in version v0.2.4a5 if(bc==UNKNOWN) for(ad=d=99;dx>5 && dy>7;){ DBG( wchar_t c_ask='@'; ) if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */ if (loop(bp, 0,dy/2,dx,cs,0,RI)>dx/4) Break; if (loop(bp,dx-1,dy/2,dx,cs,0,LE)>dx/4) Break; if (loop(bp,dx/2,dy-1,dy,cs,0,UP)>dx/8) Break; if (loop(bp,dx/2, 0,dy,cs,0,DO)>dx/8) Break; /* ..@@@@..<- 8*10 example .@@..@@. @@....@@ @@..@@@@< @@.@@.@@ @@.@@.@@ @@..@@@. @@...... .@@...@@ ..@@@@@.<- */ x=6*dx/16; y=dy/2; i=num_cross(0,dx-1,y,y,bp,cs); if (i<3 || i>4) Break; if( i != 4 && dx>8 ) ad=98*ad/100; i=num_cross(x,x,0,dy-1,bp,cs); if (i<2) Break; if (i!=4) { j=num_cross(x+1,x+1,0,dy-1,bp,cs); if (abs(4-j)4) Break; if (i!=4) ad=97*ad/100; if( num_cross(0, x,y,y,bp,cs) != 2 ) Break; if( num_cross(x,dx-1,y,y,bp,cs) != 2 ) Break; if( num_cross(x,x,0, y,bp,cs) != 2 ) Break; if( num_cross(x,x,y,dy-1,bp,cs) != 2 ) Break; if (dx>7) { // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 1 ) Break; if (sdata->holes.num != 1) Break; if( num_hole(x0+dx/8,x1-3*dx/16,y0+dy/8,y1-dy/8,box1->p,cs,NULL) != 1 ) Break; } Setac(box1,(wchar_t)'@',ad); break; } // --- test paragraph v0.2.6 if(bc==UNKNOWN && hchar) for(ad=d=100;dx>4 && dy>15;){ DBG( wchar_t c_ask='$'; ) if (sdata->holes.num > 3) break; /* tolerant against a tiny hole */ if( get_bw( 0,dx/2,3*dy/4,3*dy/4,bp,cs,1) == 1 ) break; if( get_bw(3*dx/4,dx-1,3*dy/4,3*dy/4,bp,cs,1) == 0 ) break; if( get_bw( 0,dx/4, dy/4, dy/4,bp,cs,1) == 0 ) break; if( get_bw( dx/2,dx-1, dy/4, dy/4,bp,cs,1) == 1 ) break; if( get_bw(dx/2,dx/2, 0, dy/4,bp,cs,1) == 0 ) break; if( get_bw(dx/2,dx/2,dy-1-dy/4, dy-1,bp,cs,1) == 0 ) break; if( num_cross(dx/2,dx/2,0,dy-1,bp,cs) != 4 ) break; if( num_cross(x0,x1,y0+dy/2,y0+dy/2,box1->p,cs) != 2 ) break; if( num_hole( x0,x1,y0+dy/4,y1-dy/4,box1->p,cs,NULL) != 1 ) break; Setac(box1,SECTION_SIGN,96); break; // paragraph=0xA7=167 } return bc; } /* ----------------------- partx -------------------------------- */ static wchar_t ocr0px(ocr0_shared_t *sdata){ struct box *box1=sdata->box1; pix *bp=sdata->bp; int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; int dx=x1-x0+1,dy=y1-y0+1, /* size */ i1,i2,i3,i4,j1,cs=sdata->cs; /* tmp-vars */ int ya,ad; /* used for store significant points of char */ wchar_t ac,bc=UNKNOWN; // bestletter int hchar; // char is higher than e int gchar; // char has ink lower than m3 // --- hchar --- gchar ------------------------- hchar=0;if( 2*y0<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; gchar=0;if( 2*y1>=2*box1->m3+(box1->m4-box1->m3) ) gchar=1; // if the char is slightly moved down correction can be done if ( y0m2 && y1>box1->m3 && 2*y1m3+box1->m4) // moved if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; /* reserved for special chars, to test at the end */ // --- test 'ff' --------------------------------------------------- // ToDo: better check and call test 'f' and 'f' with subboxes if( bc==UNKNOWN ) for(ad=98;dx>4 && dy>6;){ // Dec00 body copied from H DBG( wchar_t c_ask='f'; ) if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 && num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) != 2 ) break; if( num_cross(0,dx-1,3*dy/4 ,3*dy/4 ,bp,cs) != 2 && num_cross(0,dx-1,3*dy/4+1,3*dy/4+1,bp,cs) != 2 ) break; if( loop(bp,0 ,dy/8,dx,cs,0,RI) + loop(bp,dx-1,dy/8,dx,cs,0,LE)>dx/2 ) break; // ~A for( j1=0,i=1,y=y0+dy/10; yp,x0 ,y,dx,cs,0,RI) +loop(box1->p,x1 ,y,dx,cs,0,LE); if( j>10*dx/16 ) i=0; if ( j>j1 ) j1=j; } if( !i ) break; for( x=dx/4; x 3*dy/8 ) break; if ( 10*y > dy ){ /* italic */ i=loop(bp,x ,dy-y,dx,cs,0,RI); if( i>1 && y+loop(bp,x+i-1,dy-y,dy,cs,0,UP)>3*dy/8 ) break; } } if( x>=dx/2 ) break; x=loop(box1->p,x0 ,y1-dy/8,dx,cs,0,RI) +loop(box1->p,x1 ,y1-dy/8,dx,cs,0,LE); for( i=1,y=dy/4; ydx/5 ) i=0; } if( !i ) break; // ~K Jul00 for( i=0,ya=y=y0+dy/4; yp,x0 ,y,dx,cs,0,RI); j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ) { i=j; ya=y; } } if( i<=dx/2 ) break; ya-=y0; if( num_cross(0,dx-1,ya ,ya ,bp,cs) != 1 && num_cross(0,dx-1,ya+1,ya+1,bp,cs) != 1 ) break; /* Dec00 */ for( y=ya; y 2 && num_cross(0,dx-1,y+1,y+1,bp,cs) > 2 ) break; if ( yp,cs,1) == 0 ) i=0; } if( !i ) break; for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){ if( get_bw( x, x,y1-dy/4,y1 ,box1->p,cs,1) == 0 ) i=0; } if( i ) break; for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){ if( num_cross(x,x,y0+dy/8,y1-dy/8, box1->p,cs) == 1 ) i=0; } if( i ) break; for(i=1,y=y0;y<=y0+dy/4 && i;y++){ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if( i ) break; for(i=1,y=y1-dy/4;y<=y1 && i;y++){ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; } if( i ) break; if( num_cross(x0 ,x0+dx/8 ,y0+dy/8 ,y0 ,box1->p,cs) != 0 ) ad=96*ad/100; if( get_bw(x1-dx/8, x1 , y0, y0+dy/8,box1->p,cs,1) != 1 ) break; if( get_bw(x0 , x0+dx/8, y1-dy/8, y1,box1->p,cs,1) != 1 ) break; i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(i1>dx/2) break; i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(i2i1+dx/8) break; i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if(i3i2+dx/8) break; if(abs(i1+i3-2*i2)>dx/16+1) break; if( num_hole(x0,x1,y0+dy/4,y1,box1->p,cs,NULL) != 0 ) break; if (!hchar) ad=96*ad/100; if (!gchar) ad=99*ad/100; ac=LATIN_SMALL_LIGATURE_FF; Setac(box1,ac,ad); break; } // --- test ae --------------------------------------------------- if( bc==UNKNOWN ) for(ad=98;dx>4 && dy>6;){ // provisorium DBG( wchar_t c_ask=LATIN_SMALL_LETTER_AE; ) if (sdata->holes.num > 4) Break; /* tolerant against a tiny hole */ if( num_cross( dx/4,dx-1,3*dy/16,3*dy/16,bp,cs) != 2 && num_cross(dx-1-dx/4,dx-1,3*dy/16,3*dy/16,bp,cs) != 1 ) Break; if( num_cross(0,dx-1,3*dy/ 4,3*dy/ 4,bp,cs) < 2 ) Break; if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 3 ) Break; if( num_cross(dx-1,0, 0, dy-1,bp,cs) < 3 ) Break; if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) < 2 ) if( num_cross(0,dx-1,1+dy/16,1+dy/16,bp,cs) < 2 ) Break; if( num_cross(0,dx-1,dy-1-dy/16,dy-1-dy/16,bp,cs) < 2 ) Break; for( x=0,i2=y=dy/4; y<3*dy/4; y++ ){ j=loop(bp,0,y,dx,cs,0,RI); if(j>x) { i2=y; x=j; } } if( x3*dx/4 ) Break; for( x=0,i4=y=dy/4; y<3*dy/4; y++ ){ j=loop(bp,dx-1,y,dx,cs,0,LE); if(j>x) { i4=y; x=j; } } if( x3*dx/4 ) Break; for( x=0,i4=y=dy/8; y<3*dy/4; y++ ){ j=loop(bp,dx-1 ,y,dx,cs,0,LE); j=loop(bp,dx-1-j,y,dx,cs,1,LE); if(j>x) { i4=y; x=j; } } if( xp,cs,NULL) != 1 ) Break; if( num_hole(x0+dx/2-1,x1,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break; ac=LATIN_SMALL_LETTER_AE; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test AE --------------------------------------------------- if( bc==UNKNOWN ) for(ad=98;dx>5 && dy>6;){ // provisorium DBG( wchar_t c_ask=LATIN_CAPITAL_LETTER_AE; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if( num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) < 2 ) Break; if( num_cross(0,dx-1,3*dy/ 4,3*dy/ 4,bp,cs) < 2 ) Break; if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 3 ) Break; if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) != 1 && num_cross(0,dx-1, dy/32, dy/32,bp,cs) != 1 && num_cross(0,dx-1, 0, 0,bp,cs) != 1 ) Break; // check for upper horizontal line j=loop(bp,dx-1 ,0,dx,cs,0,LE); x=j; j=loop(bp,dx-1-j,0,dx,cs,1,LE); i=loop(bp,dx-1 ,1,dx,cs,0,LE); if (ij) j=i; if (x>dx/8) Break; if (jx) break; x=j; j=loop(bp, j,y,dx,cs,1,RI); if(j>i1) { i1=j; i2=y; } j=loop(bp,dx-1 ,y,dx,cs,0,LE); j=loop(bp,dx-1-j,y,dx,cs,1,LE); if(j>i3) { i3=j; i4=y; } } if( y<3*dy/4 || i1i1) { i1=j; } j=loop(bp,dx-1 ,dy-1-y,dx,cs,0,LE); j=loop(bp,dx-1-j,dy-1-y,dx,cs,1,LE); if(j>i3) { i3=j; } } if( i1<=dx/4 || i3<=dx/4 ) Break; for( x=dx-1-dx/8; x>dx/2; x-- ){ // look for right the E if( num_cross(x,x, 0,dy-1,bp,cs) == 3 ) if( num_cross(x,x, 0,dy/4,bp,cs) == 1 ) if( num_cross(x-1,dx-1-dx/8,3*dy/4,3*dy/4,bp,cs) == 0 ) if( num_cross(x,x,3*dy/4,dy-1,bp,cs) == 1 ) break; } if (x<=dx/2) Break; // not found if (sdata->holes.num != 1) Break; if( num_hole(x0,x0+3*dx/4,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break; // if( num_hole(x0, x1,y0,y1 ,box1->p,cs,NULL) != 1 ) Break; ac=LATIN_CAPITAL_LETTER_AE; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test /0 /o /O O_WITH_STROKE ----------------------------------------- for(ad=99;dx>4 && dy>4;){ // provisorium DBG( wchar_t c_ask=LATIN_SMALL_LETTER_O_WITH_STROKE; ) if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */ if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 3 ) Break; if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 3 ) Break; if (loop(bp,dx-1,3*dy/8,dx,cs,0,RI)>dx/8) Break; if (loop(bp, 0,5*dy/8,dx,cs,0,RI)>dx/8) Break; if( num_cross( 0,dx-1, 0, 0,bp,cs) > 2 ) Break; if( num_cross(dx/4,dx-1, 0, 0,bp,cs) > 2 ) Break; if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) > 2 ) Break; if( num_cross( 0,3*dx/4,dy-1,dy-1,bp,cs) > 2 ) Break; if( num_cross( 0, 0, 0,dy-1,bp,cs) > 2 ) Break; if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) > 2 ) Break; if( num_cross( 0, 0,dy/4,dy-1,bp,cs) > 2 ) Break; if( num_cross(dx-1,dx-1, 0,3*dy/4,bp,cs) > 2 ) Break; i1 =loop(bp,dx-1 , 0,dx,cs,0,LE); if( i1>dx/8 ) Break; i1+=loop(bp,dx-1-i1, 0,dx,cs,1,LE); if( i1>dx/3 ) Break; i1=dx-1-i1; i2 =loop(bp, 0,dy-1,dx,cs,0,RI); if( i2>dx/8 ) Break; for(y=1;y3*dx/16 ) break; } if( yholes.num != 2) Break; // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 2 ) Break; if ( hchar && 2*y0m1+box1->m2 ) ac=LATIN_CAPITAL_LETTER_O_WITH_STROKE; else ac=LATIN_SMALL_LETTER_O_WITH_STROKE; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test /c /C C_WITH_STROKE CENT_SIGN -------------------------- // here only the version with a continuously vertical line (not broken variant) if( bc==UNKNOWN ) for(ad=98;dx>4 && dy>4;){ // provisorium DBG( wchar_t c_ask=CENT_SIGN; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 2 ) Break; if( num_cross(0,dx-1-dx/4,dy/2,dy/2,bp,cs) != 2 ) Break; if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 3 ) Break; if( num_cross( 0,dx-1, 0, 0,bp,cs) > 2 ) Break; if( num_cross(dx/4,dx-1, 0, 0,bp,cs) > 2 ) Break; if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) > 2 ) Break; if( num_cross( 0,3*dx/4,dy-1,dy-1,bp,cs) > 2 ) Break; if( num_cross( 0, 0, 0,dy-1,bp,cs) > 2 ) Break; if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) > 3 ) Break; if( num_cross( 0, 0,dy/4,dy-1,bp,cs) > 2 ) Break; if( num_cross(dx-1,dx-1, 0,3*dy/4,bp,cs) > 3 ) Break; i1 =loop(bp,dx-1 , 0,dx,cs,0,LE); if( i1>dx/4 ) Break; i1+=loop(bp,dx-1-i1, 0,dx,cs,1,LE); if( i1>dx/4 ) Break; i1=dx-1-i1; i2 =loop(bp, 0,dy-1,dx,cs,0,RI); if( i2>dx/4 ) Break; for(y=0;ydx/16+1) x-=dx/16+1; j=loop(bp,x,y,dx,cs,0,RI); // fprintf(stderr,"\n x=%d j=%d",x,j); if( j>(dx+4)/8 ) ad=96*ad/100; if( j>(dx+2)/4 ) break; } if( yp,cs,NULL) != 1 ) Break; if (sdata->holes.num != 1) Break; ac=CENT_SIGN; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test EURO_CURRENCY_SIGN ----------------------------------------- if( bc==UNKNOWN ) for(ad=98;dx>4 && dy>6;){ // provisorium DBG( wchar_t c_ask='&'; ) if (sdata->holes.num > 1) break; /* tolerant against a tiny hole */ if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 4 ) break; if( num_cross( 0,dx-1, 0, 0,bp,cs) != 1 ) break; if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) != 1 ) break; if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 1 ) break; for(i=0,y=dy/4;ydx/4 ) break; j=loop(bp,x,y,dx,cs,1,RI); if( j>i ) i=j; } if( ydx/2 ) break; } if( y>=dy-dy/4-1 ) break; // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break; if (sdata->holes.num != 0) break; ac=EURO_CURRENCY_SIGN; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test LETTER_C_WITH_CEDILLA --------------------------------------------------- if (bc==UNKNOWN) if (gchar) for(ad=98;dx>3 && dy>6;){ // provisorium DBG( wchar_t c_ask='c'; ) if (sdata->holes.num > 0) break; /* no tolerant against tiny holes */ j=loop(bp,dx-1,dy/16 ,dy,cs,0,LE); x=loop(bp,dx-1,dy/16+1,dy,cs,0,LE); if (xdx) Break; // ~4 ocr-b if( num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) > 2 ) break; if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 2 ) break; if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) > 2 ) break; for( x=dx,i2=y=dy/4; y<3*dy/4; y++ ){ j=loop(bp,0,y,dx,cs,0,RI); if(j0 ) break; i1=x; for( x=0,i4=y=dy/4; y<5*dy/8; y++ ){ j=loop(bp,dx-1,y,dx,cs,0,LE); if(j>x) { i4=y; x=j; } } if( xdy/4) break; j =loop(bp,dx/2,j,dy,cs,0,DO); if(j3*dx) break; j =loop(bp,dx-1-j/2,dy-1-dy/8,dy,cs,0,UP); if(j>dy/2) break; // ~() // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break; if (sdata->holes.num) break; if( hchar ) ac= LATIN_CAPITAL_LETTER_C_WITH_CEDILLA; else ac= LATIN_SMALL_LETTER_C_WITH_CEDILLA; Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test # --------------------------------------------------- for(ad=99;dx>4 && dy>4;){ // never sure? DBG( wchar_t c_ask='#'; ) if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ if (sdata->holes.num < 1) Break; if( num_cross(0,dx-1, dy/8, dy/8,bp,cs) != 2 ) Break; if( num_cross(0,dx-1,dy-1-dy/8,dy-1-dy/8,bp,cs) != 2 ) Break; if( num_cross(0,dx-1, dy/2, dy/2,bp,cs) != 2 ) Break; if( num_cross(0,dx/2, dy/2, dy/2,bp,cs) != 1 ) Break; /* fat "#" have only small ends on left and right side, we tolerate this */ j=loop(bp, 0,dy/8,dx,cs,0,RI); if(j<1 || j=dx/2) Break; if (j=dx/2) Break; if (j3*dx/4) { i1=0; break; } j=loop(bp,j, y,dx,cs,1,RI); if(j>i1) { i1=j; } j=loop(bp,0,dy-1-y,dx,cs,0,RI); if(j>3*dx/4) { i1=0; break; } j=loop(bp,j,dy-1-y,dx,cs,1,RI); if(j>i3) { i3=j; } } if (i1holes.num != 1) {ad=95*ad/100;} if( num_hole(x0+dx/8,x1-dx/8,y0+dy/8,y1-dy/8,box1->p,cs,NULL) != 1 ) Break; // if( num_hole(x0 ,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break; ac=(wchar_t) '#'; if( gchar ) {ad=99*ad/100;} Setac(box1,ac,ad); if (ad>=100) return ac; break; } // --- test bullet, full_box, grabbed cursor, ZapfDingBats_156 if (bc==UNKNOWN) for(ad=96;dx>4 && dy>4 && 2*dx>dy;){ // provisorium DBG( wchar_t c_ask='#'; ) if( get_bw(x0,x1,y0,y1,box1->p,cs,2) != 0 ) break; ac=BULLET; if (gchar && !hchar) ad=80*ad/100; Setac(box1,ac,ad); if (ad>=100) return ac; break; } /* --- test | (vertical line, could be a I or l) --- */ for(ad=99;dy>4 && 2*dxp,cs,2) != 0 ) break; /* more unsure if the borders are not exact */ if( get_bw(x0 ,x0+dx/8,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) ad=99*ad/100; if( get_bw(x1-dx/8,x1 ,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) ad=99*ad/100; if( get_bw(x0+dx/8,x1-dx/8,y0 ,y0+dy/8,box1->p,cs,2) != 0 ) ad=99*ad/100; if( get_bw(x0+dx/8,x1-dx/8,y1-dy/8,y1 ,box1->p,cs,2) != 0 ) ad=99*ad/100; if (3*dxm2 && 2*y1> box1->m2+box1->m3) Break; if (box1->m2 && 3*y1>2*box1->m2+box1->m3) ad=95*ad/100; ac='|'; if (!hchar) ad=98*ad/100; Setac(box1,ac,ad); break; } // --- test % --------------------------------------------------- for(ad=100;dx>5 && dy>7;){ // provisorium DBG( wchar_t c_ask='%'; ) if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) != 3 && num_cross(x0,x1 ,y0+dy/8,y0+dy/8,box1->p,cs) != 3 ) Break; if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) != 3 && num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) != 3 ) Break; if( num_cross(x0,x1, y0, y1,box1->p,cs) < 4 && num_cross(x0+dx/8,x1, y0, y1,box1->p,cs) < 4 && num_cross(x0,x1+dx/4, y0, y1,box1->p,cs) < 4 && dx>7 && dy>15) Break; if( num_cross(x0,x1, y0, y1,box1->p,cs) !=5 ) ad=99*ad/100; if (dx>7 && dy>12) { if( num_hole(x0 ,x1 ,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break; if( num_hole(x0+dx/4,x1+dx/4,y0+dy/4,y1,box1->p,cs,NULL) != 1 ) Break; if( num_hole(x0 ,x1+dx/4,y0,y1 ,box1->p,cs,NULL) != 2 ) Break; } else ad=98*ad/100; // use box1->p instead of b, because % is a sum of 3 objects if ( loop(box1->p,x0,y0 ,dx,cs,0,RI) <= loop(box1->p,x0,y0+dy/16+1,dx,cs,0,RI) ) ad=96*ad/100; // X if ( loop(box1->p,x1,y1 ,dx,cs,0,LE) <= loop(box1->p,x1,y1-1-dy/16,dx,cs,0,LE) ) ad=96*ad/100; // X for (x=0;xp,cs,2) != 2 ) break; } if (x=100) return ac; break; } // --- test Omega --------------------------------------------------- for(ad=d=99;dx>7 && dy>7;){ // min 3x4 DBG( wchar_t c_ask=GREEK_CAPITAL_LETTER_OMEGA; ) if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break; if( get_bw(x0+dx/2 , x0+dx/2,y0+dy/3 , y1-dy/3,box1->p,cs,1) != 0 ) Break; if( num_cross(x0+dx/2,x0+dx/2,y0 , y1-dy/3,box1->p,cs) != 1 ) Break; if( num_cross(x0+dx/3,x1-dx/3,y0 , y0 ,box1->p,cs) != 1 ) // AND if( num_cross(x0+dx/3,x1-dx/3,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break; if( num_cross(x0+dx/3,x1-dx/3,y1 , y1 ,box1->p,cs) != 2 ) // against "rauschen" if( num_cross(x0+dx/3,x1-dx/3,y1-1 , y1-1 ,box1->p,cs) != 2 ) Break; if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; if (sdata->holes.num) Break; // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break; if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<= loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break; if( loop(bp,dx/2,dy-dy/4,x1-x0,cs,0,RI)>dx/4 || loop(bp,dx/2,dy-dy/4,x1-x0,cs,0,LE)>dx/4 ) Break; if( loop(bp,dx/2,3*dy/8,x1-x0,cs,0,RI)dx/8) Break; x=loop(bp,i,dy-1-dy/16,x1-x0,cs,1,RI); i+=x; if(i<3*dx/8 || i>dx/2) Break; x=loop(bp,i,dy-1-dy/16,x1-x0,cs,0,RI); i+=x; if(i5*dx/8) Break; x=loop(bp,i,dy-1-dy/16,x1-x0,cs,1,RI); i+=x; if(i<7*dx/8) Break; /* look for a vertikal gap at lower end */ for( x=dx/4;x<3*dx/4;x++ ){ i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if( i>3*dy/4 ) break; } if( x>=3*dx/4 ) Break; if( !hchar ) ad=60*ad/100; bc=GREEK_CAPITAL_LETTER_OMEGA; Setac(box1,bc,ad); break; } return bc; } // -------------------- OCR engine ;) ---------------------------- wchar_t ocr0(struct box *box1, pix *bp, int cs){ // pix p=*(box1->p); int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; int dx=x1-x0+1,dy=y1-y0+1, /* size */ rx,ry,r1,r2,i1,i2,ad; /* tmp-vars */ // ad,ac will be used in future wchar_t bc = UNKNOWN; // bestletter wchar_t um = SPACE; // modifier '" int hchar; // char is higher than e int gchar; // char has ink lower than m3 int aa[4][4]; /* corner points, see xX, (x,y,dist^2,vector_idx) v0.41 */ ocr0_shared_t sdata; // data used in all subfunctions sdata.box1=box1; sdata.bp=bp; sdata.cs=cs; // --- hchar --- gchar ------------------------- hchar=0;if( y0 < box1->m2-(box1->m2-box1->m1)/2 ) hchar=1; gchar=0;if( y1 > box1->m3+(box1->m4-box1->m3)/2 ) gchar=1; // if the char is slightly moved down correction can be done if ( y0m2 && y1>box1->m3 && 2*y1m3+box1->m4) // moved if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; sdata.hchar=hchar; sdata.gchar=gchar; /* search for nearest points to the 4 courners, typical for xX */ /* this is faster as calling nearest_frame_vector 4 times */ aa[0][0]=aa[1][0]=aa[2][0]=aa[3][0]=(x0+x1)/2; /* set to center */ aa[0][1]=aa[1][1]=aa[2][1]=aa[3][1]=(y0+y1)/2; /* set to center */ aa[0][2]=aa[1][2]=aa[2][2]=aa[3][2]=2*sq(128); /* distance to box edges */ aa[0][3]=aa[1][3]=aa[2][3]=aa[3][3]=0; /* vector index */ /* searching for 4 diagonal line ends */ for (i=0;inum_frame_vectors[0];i++) { x=box1->frame_vector[i][0]; /* take a vector */ y=box1->frame_vector[i][1]; /* distance to upper left end, normalized to 128 */ j=0; d=sq((x-x0)*128/dx)+sq((y-y0)*128/dy); // fprintf(stderr," setaa i= %2d xy= %3d %3d d=%5d aa[3]=%2d\n",i,x-x0,y-y0,d,aa[0][3]); if (dnum_frames>0) // speedup v0.42 num_hole(x0,x1,y0,y1,box1->p,cs,&sdata.holes); // call once // printf(" num_holes=%d\n",sdata.holes.num); /* after division of two glued chars, boundaries could be wrong, check this first (ToDo: only if a flag set?) */ if (2*y0 < box1->m2+box1->m3) if (box1->m4>box1->m3 && 2*box1->y1>box1->m4+box1->m3){ /* could be a "I" from divided "Ij" or "Ig" */ for(y=(box1->m3+box1->m2)/2;2*ym3+box1->m4;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1)==0 ) break; if(2*ym3+box1->m4) if( get_bw((x0+x1)/2,(x0+x1)/2,y,box1->m4,box1->p,cs,1)==0 ){ /* be sure, ~_ */ if (y>y0) y1=box1->y1=y; } } DBG( IFV fprintf(stderr,"\nDBG L%d (%d,%d): ",__LINE__,box1->x0,box1->y0); ) DBG( IFV out_b(box1,sdata.bp,0,0,dx,dy,160); ) DBG( IFV fprintf(stderr,"# aa[] %d %d %d %d %d %d %d %d (4 corners)" " d= %d %d %d %d", aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0, aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0, aa[0][2], aa[1][2], aa[2][2], aa[3][2]);) DBG( IFV fprintf(stderr,"\n# holes %d gchar=%d hchar=%d",sdata.holes.num, gchar, hchar);) // --- test thin lines - --------------------------------- for( ad=100; 2*dym3-box1->m2 && 3*dx>=4*dy && dx>2; ){ // min 3x3 (small font) DBG( wchar_t c_ask='-'; ) if( get_bw(x0+dx/8+1,x1-dx/8-1,y0+dy/8+((dy>2)?1:0), y1-dy/8-((dy>2)?1:0),box1->p,cs,2)==2 ) break; if( box1->dots ) { Setac(box1,'=',97);break; } if (dx<=2*dy) ad=98*ad/100; if (dx<=3*dy) ad=99*ad/100; if (!box1->m4) ad=96*ad/100; else { if (y1>=box1->m3) { if ( dx<2*dy) ad=98*ad/100; if (2*dx<3*dy) ad=98*ad/100; Setac(box1,'_',ad); break; } } Setac(box1,'-',ad); if (ad>=100) return '-'; break; } // --- test thin lines = --------------------------------- for( ; dy>2 && dx>2; ){ // min 3x3 (small font) DBG( wchar_t c_ask='='; ) for( y=y0;yp,cs,1)==1 ) break; if( get_bw(x0+dx/10,x1-dx/10,y ,y ,box1->p,cs,2)==2 ) break; if( get_bw(x0 ,x1 ,(y+y1)/2,(y+y1)/2,box1->p,cs,1)==1 ) break; if( get_bw(x0+dx/10,x1-dx/10,y1 ,y1 ,box1->p,cs,2)==2 ) break; Setac(box1,'=',100); return '='; } // --- test dots : --------------------------------- for( ad=100; dy>2 && dy>=2*dx; ){ // max 3x3 (small font) DBG( wchar_t c_ask=':'; ) // check the gap hight for( i1=dy/16;i1p,cs,1)==0 ) break; if (i1>=dy/2) break; for( i2=dy/16;i2p,cs,1)==0 ) break; if (i2>=dy/2) Break; MSG(fprintf(stderr,"gap y12 %d %d",i1,i2);) if (box1->m3 && y1>box1->m3) ad=98*ad/100; // ~; if (box1->m3 && 2*y0> box1->m2+box1->m1) ad=98*ad/100; // ~i if (gchar) ad=99*ad/100; ad=ad-abs(i1-i2)/dy*20; if (abs(i1-dx)>dy/4) Break; // round or quadratic dots? if (abs(i1-dx)>dy/8) ad=98*ad/100; if (abs(i2-dx)>dy/4) Break; // round or quadratic dots? if (abs(i2-dx)>dy/8) ad=98*ad/100; if (box1->dots!=1) ad=96*ad/100; Setac(box1,':',ad); // dx<=3 ad-- if (ad>=100) return ':'; break; } // --- test dots ; --------------------------------- if( 2*y0> box1->m2+box1->m1 ) // ~i if( 4*y1>=3*box1->m3+box1->m2 ) // ~: for( ad=100; dy>5 && dx>1 && dy>2*dx; ){ // max 3x3 (small font) DBG( wchar_t c_ask=';'; ) // better would it be to detect round pixelcluster on top // check high of upper and lower dot for( i1=0;i1p,cs,1)==0 ) break; if (i1>=dy/2) break; for( i2=0;i2p,cs,1)==0 ) break; if (i2m3) ad=97*ad/100; if (i2-i1=100) return ';'; break; } // --- first test small dots . --------------------------------- if( 3*dym4-box1->m1 && abs(dx-dy)<(dx+dy)/4+2 && 3*y1>=(2*box1->m3+ box1->m2) // dot near baseline? && 5*y0>=(3*box1->m3+2*box1->m2) ){ // Jul00 DBG( wchar_t c_ask='.'; ) d=0; r1=60;r2=140; ad=99; for(x=x0;x<=x1;x++)for(y=y0;y<=y1;y++){ /* circle equation */ rx=100*(2*x-(x0+x1))/dx; // normalize to 15bit number ry=100*(2*y-(y0+y1))/dy; if( rx*rx + ry*ry < r1*r1 ) if( getpixel(box1->p,x,y)>=cs ){ d++;x=x1+1;y=y1+1; } if( rx*rx + ry*ry > r2*r2 ) if( getpixel(box1->p,x,y)< cs ){ d++;x=x1+1;y=y1+1; } // fprintf(stderr,"\nDBG . x= %3d %3d r= %6d %6d %6d", rx, ry, rx*rx+ry*ry, r1*r1, r2*r2); } if(d==0) if( loop(box1->p,x0,y0,x1-x0,cs,0,RI) <= loop(box1->p,x0,y1,x1-x0,cs,0,RI) || loop(box1->p,x1,y0,x1-x0,cs,0,LE) >= loop(box1->p,x1,y1,x1-x0,cs,0,LE) ) { bc='.'; if (box1->dots) { Setac(box1,':',ad); ad=98*ad/100; } Setac(box1,bc,ad); } } // --- first test small dots , --------------------------------- if( 3*dy<2*(box1->m4-box1->m1) && 2*y0> box1->m2+box1->m3 && (2*dx<3*dy || get_bw(0,dx/2,dy/2,dy-1,bp,cs,1)==0) ){ // ocr-a-, DBG( wchar_t c_ask=','; ) ad=100; bc=','; if (dy==1 && dx==1) ad=98*ad/100; if (dy==2 && dx==1) ad=99*ad/100; // this is a problem case if (dx>=dy) ad=99*ad/100; if( 2*dy >= box1->m4-box1->m1) ad=98*ad/100; if( loop(box1->p,x0,y0,x1-x0,cs,0,RI) /* simple line */ > loop(box1->p,x0,y1,x1-x0,cs,0,RI) && loop(box1->p,x1,y0,x1-x0,cs,0,LE) < loop(box1->p,x1,y1,x1-x0,cs,0,LE) ) { ad=99*ad/100; } else { /* with upper circle */ if( loop(box1->p,x0,(y0+y1+1)/2,x1-x0,cs,0,RI)p,x1, y1 ,x1-x0,cs,0,LE)p,x0,y1-((dy>5)?1:0),x1-x0,cs,0,LE)>(dx+1)/2 ) if( loop(box1->p,x0, y1 ,x1-x0,cs,0,LE)>(dx+1)/2 ) ad=96*ad/100; } if(box1->dots==1) { Setac(box1,';',ad); ad=99*ad/100; } Setac(box1,bc,ad); } // --- first test small dots '" --------------------------------- if( 2*dy < box1->m4 -box1->m1+1 && 2*y0 < box1->m2 +box1->m3 && 3*y1 < box1->m2+2*box1->m3+2 ){ DBG( wchar_t c_ask='\''; ) ad=100; bc='\''; if (2*y1 >= box1->m2+box1->m3) { ad=96*ad/100; MSG({}) } // ~! if (3*y1>=2*box1->m2+box1->m3) { ad=96*ad/100; MSG({}) } if (get_bw(x0,x1,(box1->m2+box1->m3)/2,box1->m4,box1->p,cs,1)!=0) { ad=98*ad/100; MSG({}) } if (dx>4 && num_cross(x0,x1,y1,y1,box1->p,cs) == 2) { // " " bc='"'; // ocr-a-" has no gap! if ( get_bw((x0+x1)/2,(x0+x1)/2,y0,y1,box1->p,cs,1)!=0 ) ad=96*ad/100; } else { if ( num_cross(x0,x1, y0 , y0 ,box1->p,cs)!=1) ad=96*ad/100; if ( num_cross(x0,x1,(y0+y1)/2,(y0+y1)/2,box1->p,cs)!=1) ad=98*ad/100; if (dx>dy) { ad=96*ad/100; MSG({}) } } if (2*y0 > box1->m1+box1->m2) ad=99*ad/100; Setac(box1,bc,ad); if (ad>=100) return bc; } // --- TILDE ~ --------------------------------- if( 2*dym4-box1->m1 && dx>=dy && dx>3 && dy>1 && 2*y0< box1->m1+box1->m2 && 3*y1<2*box1->m2+box1->m3 ){ if( loop(box1->p,x0,y0,dx,cs,0,RI) > loop(box1->p,x0,y1,dx,cs,0,RI) && loop(box1->p,x1,y0,dx,cs,0,LE) < loop(box1->p,x1,y1,dx,cs,0,LE) && num_cross(x0,x1,y0,y0,box1->p,cs) == 2 && num_cross(x0,x1,y1,y1,box1->p,cs) == 2 ) { DBG( wchar_t c_ask='~'; ) bc=TILDE; Setac(box1,bc,99); } } // --- CIRCUMFLEX, hat ^ --------------------------------- if( 2*dym4-box1->m1 && dx>=dy && dx>2 && dy>1 && 2*y0< box1->m1+box1->m2 && 3*y1<2*box1->m2+box1->m3 ){ DBG( wchar_t c_ask='^'; ) if( ( loop(box1->p,x0,y0 ,dx,cs,0,RI) > loop(box1->p,x0,y1 ,dx,cs,0,RI)-dx/8 || loop(box1->p,x0,y0 ,dx,cs,0,RI) > loop(box1->p,x0,y1-1,dx,cs,0,RI)-dx/8 ) && ( loop(box1->p,x1,y0 ,dx,cs,0,LE) > loop(box1->p,x1,y1 ,dx,cs,0,LE)-dx/8 || loop(box1->p,x1,y0 ,dx,cs,0,LE) > loop(box1->p,x1,y1-1,dx,cs,0,LE)-dx/8 ) && num_cross(x0,x1,y0 ,y0 ,box1->p,cs) == 1 && ( num_cross(x0,x1,y1 ,y1 ,box1->p,cs) == 2 || num_cross(x0,x1,y1-1,y1-1,box1->p,cs) == 2 )) { bc='^'; Setac(box1,bc,99); } } // ------------------------------------------------------ // if( dots==1 ){ um='\''; } #if 0 /* ToDo: change to vectors, call here or in whatletter */ if (box1->dots==0) { // i-dots ??? (if dots==0 is wrong) y=box1->m1; for(;yp,cs,1)==1) break; { i1=y; if( yp,cs,1)==0) break; if( ybox1->m2-box1->m1){ testumlaut(box1,cs,2,&um); // set modifier + new y0 ??? } } } #else um = box1->modifier; #endif if ( /* um==ACUTE_ACCENT || */ um==DIAERESIS){ for(y=y1;y>y0;y--) if( get_bw(x0,x1,y,y,box1->p,cs,1)==0) { y0=y; dy=y1-y0+1; break; } // scan "a "o "u } // --- test numbers 0..9 --- separated for faster compilation if( JOB->cfg.only_numbers ) return ocr0n(&sdata); // bc=ocr1(box1,bp,cs); if(bc!=UNKNOWN && box1->num_ac>0 && box1->wac[0]==100) return bc; // for fast compilable tests // ------ separated for faster compilation // ToDo: inser ocr0_shared_t here and split into a,b,cC,d,e,f,g9,... #define IF_NOT_SURE if(bc==UNKNOWN || box1->num_ac==0 || box1->wac[0]<100) IF_NOT_SURE bc=ocr0_eE(&sdata); IF_NOT_SURE bc=ocr0_f(&sdata); IF_NOT_SURE bc=ocr0_bB(&sdata); IF_NOT_SURE bc=ocr0_dD(&sdata); IF_NOT_SURE bc=ocr0_F(&sdata); IF_NOT_SURE bc=ocr0_uU(&sdata); IF_NOT_SURE bc=ocr0_micro(&sdata); IF_NOT_SURE bc=ocr0_vV(&sdata); IF_NOT_SURE bc=ocr0_rR(&sdata); IF_NOT_SURE bc=ocr0_m(&sdata); IF_NOT_SURE bc=ocr0_tT(&sdata); IF_NOT_SURE bc=ocr0_sS(&sdata); IF_NOT_SURE bc=ocr0_gG(&sdata); IF_NOT_SURE bc=ocr0_xX(&sdata); IF_NOT_SURE bc=ocr0_yY(&sdata); IF_NOT_SURE bc=ocr0_zZ(&sdata); IF_NOT_SURE bc=ocr0_wW(&sdata); IF_NOT_SURE bc=ocr0_aA(&sdata); IF_NOT_SURE bc=ocr0_cC(&sdata); IF_NOT_SURE bc=ocr0_lL(&sdata); IF_NOT_SURE bc=ocr0_oO(&sdata); IF_NOT_SURE bc=ocr0_pP(&sdata); IF_NOT_SURE bc=ocr0_qQ(&sdata); IF_NOT_SURE bc=ocr0_iIjJ(&sdata); IF_NOT_SURE bc=ocr0_n(&sdata); IF_NOT_SURE bc=ocr0_M(&sdata); IF_NOT_SURE bc=ocr0_N(&sdata); IF_NOT_SURE bc=ocr0_h(&sdata); IF_NOT_SURE bc=ocr0_H(&sdata); IF_NOT_SURE bc=ocr0_k(&sdata); IF_NOT_SURE bc=ocr0_K(&sdata); IF_NOT_SURE bc=ocr0n(&sdata); IF_NOT_SURE bc=ocr0_brackets(&sdata); IF_NOT_SURE bc=ocr0p9(&sdata); IF_NOT_SURE bc=ocr0px(&sdata); if(box1->num_ac==0 && bc!=UNKNOWN) fprintf(stderr,""); if(box1->num_ac>0 && box1->wac[0]>95) box1->c=bc=box1->tac[0]; /* will be removed later, only fix old things */ for (i=0;inum_ac;i++) if (box1->tac[i]==bc) { bc=box1->tac[0]; } return bc; }